## ----"installation", eval=FALSE----------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("dorothea") ## ----"load packages", message = FALSE----------------------------------------- ## We load the required packages library(dorothea) library(dplyr) library(Seurat) library(tibble) library(pheatmap) library(tidyr) library(viper) ## ----"load data", eval=FALSE-------------------------------------------------- # ## Load the PBMC dataset # pbmc.data <- Read10X(data.dir = "filtered_gene_bc_matrices/hg19/") # # ## Initialize the Seurat object with the raw (non-normalized data). # pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", # min.cells = 3, min.features = 200) ## ----"load data2", eval=TRUE , include=FALSE---------------------------------- load(file="../inst/extdata/for_vignette/seurat_object.RData", verbose = FALSE) ## ----"preprocessing", message=FALSE------------------------------------------- ## Identification of mithocondrial genes pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-") ## Filtering cells following standard QC criteria. pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) ## Normalizing the data pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000) pbmc <- NormalizeData(pbmc) ## Identify the 2000 most highly variable genes pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000) ## In addition we scale the data all.genes <- rownames(pbmc) pbmc <- ScaleData(pbmc, features = all.genes) ## ----"clustering", message=FALSE, warning=FALSE------------------------------- pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), verbose = FALSE) pbmc <- FindNeighbors(pbmc, dims = 1:10, verbose = FALSE) pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE) pbmc <- RunUMAP(pbmc, dims = 1:10, umap.method = "uwot", metric = "cosine") pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, verbose = FALSE) ## Assigning cell type identity to clusters new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet") names(new.cluster.ids) <- levels(pbmc) pbmc <- RenameIdents(pbmc, new.cluster.ids) ## ----"pca", message = FALSE, warning = FALSE---------------------------------- DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() ## ----"viper", message=FALSE--------------------------------------------------- ## We read Dorothea Regulons for Human: dorothea_regulon_human <- get(data("dorothea_hs", package = "dorothea")) ## We obtain the regulons based on interactions with confidence level A, B and C regulon <- dorothea_regulon_human %>% dplyr::filter(confidence %in% c("A","B","C")) ## We compute Viper Scores pbmc <- run_viper(pbmc, regulon, options = list(method = "scale", minsize = 4, eset.filter = FALSE, cores = 1, verbose = FALSE)) ## ----"tf clustering", message=FALSE------------------------------------------- ## We compute the Nearest Neighbours to perform cluster DefaultAssay(object = pbmc) <- "dorothea" pbmc <- ScaleData(pbmc) pbmc <- RunPCA(pbmc, features = rownames(pbmc), verbose = FALSE) pbmc <- FindNeighbors(pbmc, dims = 1:10, verbose = FALSE) pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE) pbmc <- RunUMAP(pbmc, dims = 1:10, umap.method = "uwot", metric = "cosine") pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, verbose = FALSE) ## Assigning cell type identity to clusters new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet") names(new.cluster.ids) <- levels(pbmc) pbmc <- RenameIdents(pbmc, new.cluster.ids) ## ----"tf_umap", message = FALSE, warning = FALSE------------------------------ DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() ## ----"tf activity", message = FALSE------------------------------------------- ## We transform Viper scores, scaled by seurat, into a data frame to better ## handling the results viper_scores_df <- GetAssayData(pbmc, slot = "scale.data", assay = "dorothea") %>% data.frame() %>% t() ## We create a data frame containing the cells and their clusters CellsClusters <- data.frame(cell = names(Idents(pbmc)), cell_type = as.character(Idents(pbmc)), stringsAsFactors = FALSE) ## We create a data frame with the Viper score per cell and its clusters viper_scores_clusters <- viper_scores_df %>% data.frame() %>% rownames_to_column("cell") %>% gather(tf, activity, -cell) %>% inner_join(CellsClusters) ## We summarize the Viper scores by cellpopulation summarized_viper_scores <- viper_scores_clusters %>% group_by(tf, cell_type) %>% summarise(avg = mean(activity), std = sd(activity)) ## ----"highly variable tfs", message=FALSE------------------------------------- ## We select the 20 most variable TFs. (20*9 populations = 180) highly_variable_tfs <- summarized_viper_scores %>% group_by(tf) %>% mutate(var = var(avg)) %>% ungroup() %>% top_n(180, var) %>% distinct(tf) ## We prepare the data for the plot summarized_viper_scores_df <- summarized_viper_scores %>% semi_join(highly_variable_tfs, by = "tf") %>% dplyr::select(-std) %>% spread(tf, avg) %>% data.frame(row.names = 1, check.names = FALSE, stringsAsFactors = FALSE) ## ----"tf_heatmap"------------------------------------------------------------- palette_length = 100 my_color = colorRampPalette(c("Darkblue", "white","red"))(palette_length) my_breaks <- c(seq(min(summarized_viper_scores_df), 0, length.out=ceiling(palette_length/2) + 1), seq(max(summarized_viper_scores_df)/palette_length, max(summarized_viper_scores_df), length.out=floor(palette_length/2))) viper_hmap <- pheatmap(t(summarized_viper_scores_df),fontsize=14, fontsize_row = 10, color=my_color, breaks = my_breaks, main = "DoRothEA (ABC)", angle_col = 45, treeheight_col = 0, border_color = NA) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()