## ----style, echo = FALSE, results = 'asis'---------------- BiocStyle::markdown() options(width=60, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), tidy.opts=list(width.cutoff=60), tidy=TRUE) ## ----setup, echo=FALSE, message=FALSE, warning=FALSE, eval=FALSE---- # suppressPackageStartupMessages({ # library(systemPipeR) # }) ## ----create_workflow, message=FALSE, eval=FALSE----------- # library(systemPipeR) # sal <- SPRproject() # sal ## ----load_packages, eval=FALSE, spr=TRUE------------------ # cat(crayon::blue$bold("To use this workflow, following R packages are expected:\n")) # cat(c("'Seurat", "ggplot2", "ggpubr", "patchwork", "dplyr", "tibble", "readr'\n"), sep = "', '") # ###pre-end # appendStep(sal) <- LineWise( # code = { # library(systemPipeR) # library(Seurat) # library(dplyr) # library(ggplot2) # library(ggpubr) # library(patchwork) # }, # step_name = "load_packages" # ) ## ----load_data, eval=FALSE, spr=TRUE---------------------- # appendStep(sal) <- LineWise( # code = { # # unzip the data # untar("data/pbmc3k_filtered_gene_bc_matrices.tar.gz", exdir = "data") # # load data # pbmc.data <- Read10X(data.dir = "data/filtered_gene_bc_matrices/hg19/") # # Use dim to see the size of dataset, example data has 2700 cells x 32738 genes # dim(pbmc.data) # }, # step_name = "load_data", # dependency = "load_packages" # ) ## ----count_plot, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- LineWise( # code = { # at_least_one <- apply(pbmc.data, 2, function(x) sum(x>0)) # count_p1 <- tibble::as_tibble(at_least_one) %>% # ggplot() + # geom_histogram(aes(x = value), binwidth = floor(nrow(pbmc.data)/400), fill = "#6b97c2", color = "white") + # theme_pubr(16) + # scale_y_continuous(expand = c(0, 0)) + # scale_x_continuous(expand = c(0, 0)) + # labs(title = "Distribution of detected genes", x = "Genes with at least one tag") # # count_p2 <- tibble::as_tibble(BiocGenerics::colSums(pbmc.data)) %>% # ggplot() + # geom_histogram(aes(x = value), bins = floor(ncol(pbmc.data)/50), fill = "#6b97c2", color = "white") + # theme_pubr(16) + # scale_y_continuous(expand = c(0, 0)) + # scale_x_continuous(expand = c(0, 0)) + # labs(title = "Expression sum per cell", x = "Sum expression") # # png("results/count_plots.png", 1000, 700) # count_p1 + count_p2 + # patchwork::plot_annotation(tag_levels = "A") # dev.off() # }, # step_name = "count_plot", # dependency = "load_data" # ) ## ----create_seurat, eval=FALSE, spr=TRUE------------------ # appendStep(sal) <- LineWise( # code = { # sce <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200) # # calculate mitochondria gene ratio # sce[['percent.mt']] <- PercentageFeatureSet(sce,pattern='^MT-') # }, # step_name = "create_seurat", # dependency = "load_data" # ) ## ----qc_seurat, eval=FALSE, spr=TRUE---------------------- # appendStep(sal) <- LineWise( # code = { # # png("results/qc1.png", 700, 700) # VlnPlot(sce, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) # dev.off() # # qc_p1 <- FeatureScatter(sce, feature1 = "nCount_RNA", feature2 = "percent.mt") # qc_p2 <- FeatureScatter(sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") # png("results/qc2.png", 700, 450) # qc_p1 + qc_p2 + patchwork::plot_annotation(tag_levels = "A") # dev.off() # }, # step_name = "qc_seurat", # dependency = "create_seurat" # ) ## ----filter_cells, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # # Based on the QC plots, please change the settings accordingly # sce <- subset( # sce, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & # nCount_RNA < 10000 & percent.mt <= 5 # ) # }, # step_name = "filter_cells", # dependency = "create_seurat" # ) ## ----normalize, eval=FALSE, spr=TRUE---------------------- # appendStep(sal) <- LineWise( # code = { # # scale.factor = 10000 is a convenient number for plotting, so the # # normalized counts is ranged between 0.xx to 10. # sce <- NormalizeData(sce, normalization.method = "LogNormalize", scale.factor = 10000) # # # compare counts before and after # count_p_norm <- tibble::as_tibble(BiocGenerics::colSums(sce$RNA@data)) %>% # ggplot() + # geom_histogram(aes(x = value), bins = floor(ncol(pbmc.data)/50), fill = "#6b97c2", color = "white") + # theme_pubr(16) + # scale_y_continuous(expand = c(0, 0)) + # scale_x_continuous(expand = c(0, 0)) + # labs(title = "Total expression after normalization", x = "Sum expression") # png("results/normalize_count_compare.png", 1000, 700) # count_p2 + count_p_norm + patchwork::plot_annotation(tag_levels = "A") # dev.off() # }, # step_name = "normalize", # dependency = "filter_cells" # ) ## ----find_var_genes, eval=FALSE, spr=TRUE----------------- # appendStep(sal) <- LineWise( # code = { # # 2000 is default # sce <- FindVariableFeatures(sce, selection.method = "vst", nfeatures = 2000) # # top 10 variable genes # top10_var <- head(VariableFeatures(sce), 10) # # plot the top 2000 variable genes and mark top 10 # png("results/variable_genes.png", 700, 600) # VariableFeaturePlot(sce) %>% # LabelPoints(points = top10_var, repel = TRUE) + # theme_pubr(16) # dev.off() # }, # step_name = "find_var_genes", # dependency = "normalize" # ) ## ----scaling, eval=FALSE, spr=TRUE------------------------ # appendStep(sal) <- LineWise( # code = { # sce <- ScaleData(sce, features = rownames(sce)) # }, # step_name = "scaling", # dependency = "find_var_genes" # ) ## ----pca, eval=FALSE, spr=TRUE---------------------------- # appendStep(sal) <- LineWise( # code = { # # only use the top 2000 genes, first 50 PCs (default) # sce <- RunPCA(sce,features = VariableFeatures(object = sce), npcs = 50) # # we can use following command to see first 5 genes in each PC # print(sce$pca, dims = 1:5, nfeatures = 5) # }, # step_name = "pca", # dependency = "scaling" # ) ## ----pca_plots, eval=FALSE, spr=TRUE---------------------- # appendStep(sal) <- LineWise( # code = { # # plot PCA overview # png("results/pca_overview.png", 500, 500) # DimPlot(sce , reduction = "pca") # dev.off() # # plot top contributed genes in PC 1 and 2 # png("results/pca_loadings.png", 700, 550) # VizDimLoadings(sce , dims = 1:2, reduction = "pca") # dev.off() # # we can also use heatmap to show top genes in different PCs # png("results/pca_heatmap.png", 700, 700) # DimHeatmap(sce,dims = 1:6, cells = ncol(sce)/5, balanced = TRUE, slot = "scale.data") # dev.off() # }, # step_name = "pca_plots", # dependency = "pca" # ) ## ----choose_pcs, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- LineWise( # code = { # # for demo purposes, only a few replicates are used to speed up the calculation # # in your real data, use larger number like 100, etc. # sce <- JackStraw(sce, num.replicate = 30) # sce <- ScoreJackStraw(sce, dims = 1:20) # png("results/jackstraw.png", 660, 750) # JackStrawPlot(sce, dims = 1:20) # dev.off() # # png("results/elbow.png", 500, 500) # ElbowPlot(sce) # dev.off() # }, # step_name = "choose_pcs", # dependency = "pca" # ) ## ----find_clusters, eval=FALSE, spr=TRUE------------------ # appendStep(sal) <- LineWise( # code = { # sce<- FindNeighbors(sce, dims = 1:10) # # resolution 0.4-1.2 good for 3000 cells, if you have more cells, increase # # the number will give you more clusters # sce<- FindClusters(sce, resolution = 0.5) # }, # step_name = "find_clusters", # dependency = "pca" # ) ## ----plot_cluster, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # sce <- RunUMAP(sce,dims = 1:20) # png("results/") # p_umap <- DimPlot(sce,reduction = 'umap', label = TRUE) # sce <- RunTSNE(sce,dims = 1:20) # p_tsne <- DimPlot(sce,reduction = 'tsne', label = TRUE) # png("results/plot_clusters.png", 1000, 570) # p_umap + p_tsne # dev.off() # }, # step_name = "plot_cluster", # dependency = "find_clusters" # ) ## ----find_markers, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # # find markers for every cluster compared to all remaining cells, report only the positive ones # sce.markers <- FindAllMarkers(sce, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) # sce.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_log2FC) # }, # step_name = "find_markers", # dependency = "find_clusters" # ) ## ----plot_markers, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # png("results/vlnplot.png", 600, 600) # VlnPlot(sce, features = c("MS4A1", "CD79A")) # dev.off() # # png("results/marker_features.png", 700, 500) # FeaturePlot(sce, features = c("MS4A1", "GNLY", "CD3E", "CD14")) # dev.off() # # # plot top 10 DEG genes in each cluster as a heatmap # top10_markers <- sce.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) # png("results/marker_heatmap.png", 1100, 700) # DoHeatmap(sce, features = top10_markers$gene) + NoLegend() # dev.off() # }, # step_name = "plot_markers", # dependency = "find_markers" # ) ## ----label_cell_type, eval=FALSE, spr=TRUE---------------- # appendStep(sal) <- LineWise( # code = { # new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", # "CD8 T", "FCGR3A+ Mono","NK", "DC", "Platelet") # names(new.cluster.ids) <- levels(sce) # sce<- RenameIdents(sce, new.cluster.ids) # png("results/marker_labels.png", 700, 700) # DimPlot(sce, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() # dev.off() # # }, # step_name = "label_cell_type", # dependency = c("plot_cluster", "find_markers") # ) ## ----wf_session, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- LineWise( # code = { # sessionInfo() # }, # step_name = "wf_session", # dependency = "label_cell_type") ## ----runWF, eval=FALSE------------------------------------ # sal <- runWF(sal, run_step = "mandatory") # remove `run_step` to run all steps to include optional steps ## ----list_tools------------------------------------------- if(file.exists(file.path(".SPRproject", "SYSargsList.yml"))) { local({ sal <- systemPipeR::SPRproject(resume = TRUE) systemPipeR::listCmdTools(sal) systemPipeR::listCmdModules(sal) }) } else { cat(crayon::blue$bold("Tools and modules required by this workflow are:\n")) cat(c("NA"), sep = "\n") } ## ----report_session_info, eval=TRUE----------------------- sessionInfo()