## ----v1, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE ) ## ----install, eval=FALSE--------------------------------------------------- # if (!requireNamespace("BiocManager")) # install.packages("BiocManager") # BiocManager::install("CellMixS") ## ----load, message=FALSE--------------------------------------------------- library(CellMixS) ## ----warning=FALSE--------------------------------------------------------- # load required packages suppressPackageStartupMessages({ library(SingleCellExperiment) library(cowplot) library(limma) library(magrittr) library(dplyr) library(purrr) library(ggplot2) }) ## ----data------------------------------------------------------------------ # load sim_list example data sim_list <- readRDS(system.file(file.path("extdata", "sim50.rds"), package = "CellMixS")) names(sim_list) sce50 <- sim_list[["batch50"]] class(sce50) table(sce50[["batch"]]) ## ----vis batch 50---------------------------------------------------------- #visualize batch distribution in sce50 visGroup(sce50, group = "batch") ## ----vis batch all, fig.wide=TRUE------------------------------------------ #visualize batch distribution in other elements of sim_list batch_names <- c("batch0", "batch20") vis_batch <- lapply(batch_names, function(name){ sce <- sim_list[[name]] visGroup(sce, "batch") + ggtitle(paste0("sim_", name)) }) plot_grid(plotlist = vis_batch, ncol = 2) ## ----cms------------------------------------------------------------------- #call cell-specific mixing score for sce50 # Note cell_min is set to 4 due to the low number of cells and small k. # Usually default params are recommended!! sce50 <- cms(sce50, k = 30, group = "batch", res_name = "unaligned", n_dim = 3, cell_min = 4) head(colData(sce50)) #call cell-specific mixing score for all sim_list <- lapply(batch_names, function(name){ sce <- sim_list[[name]] sce <- cms(sce, k = 30, group = "batch", res_name = "unaligned", n_dim = 3, cell_min = 4) }) %>% set_names(batch_names) #append cms50 sim_list[["batch50"]] <- sce50 ## ----hist, fig.wide=TRUE--------------------------------------------------- #pval hist of cms50 visHist(sce50) #pval hist sim30 #combine cms results in one matrix batch_names <- names(sim_list) cms_mat <- batch_names %>% map(function(name) sim_list[[name]]$cms.unaligned) %>% bind_cols() %>% set_colnames(batch_names) visHist(cms_mat, n_col = 3) ## ----single plots, fig.wide= TRUE------------------------------------------ #cms only cms10 sce20 <- sim_list[["batch20"]] metric_plot <- visMetric(sce20, metric_var = "cms_smooth.unaligned") #group only cms10 group_plot <- visGroup(sce20, group = "batch") plot_grid(metric_plot, group_plot, ncol = 2) ## ----overview-------------------------------------------------------------- #add random celltype assignments as new metadata sce20[["celltype"]] <- rep(c("CD4+", "CD8+", "CD3"), ncol(sce20)/3) %>% as.factor visOverview(sce20, "batch", other_var = "celltype") ## ----compare cluster, fig.small= TRUE-------------------------------------- visCluster(sce20, metric_var = "cms.unaligned", cluster_var = "celltype") ## ----batch correction methods---------------------------------------------- #MNN - embeddings are stored in the reducedDims slot of sce reducedDimNames(sce20) sce20 <- cms(sce20, k = 30, group = "batch", dim_red = "MNN", res_name = "MNN", n_dim = 3, cell_min = 4) # run limma limma_corrected <- removeBatchEffect(counts(sce20), batch = sce20$batch) #add corrected counts to sce assay(sce20, "lim_corrected") <- limma_corrected #run cms sce20 <- cms(sce20, k = 30, group = "batch", assay_name = "lim_corrected", res_name = "limma", n_dim = 3, cell_min = 4) names(colData(sce20)) ## ----batch correction methods vis------------------------------------------ # As pvalue histograms visHist(sce20, metric_prefix = "cms.", n_col = 3) ## ----ldfDiff, warning=FALSE------------------------------------------------ #Prepare input # list with single SingleCellExperiment objects #(Important: List names need to correspond to batch levels! See ?ldfDiff) sce_pre_list <- list("1" = sce20[,sce20$batch == "1"], "2" = sce20[,sce20$batch == "2"], "3" = sce20[,sce20$batch == "3"]) sce20 <- ldfDiff(sce_pre_list, sce_combined = sce20, group = "batch", k = 70, dim_red = "PCA", dim_combined = "MNN", assay_pre = "counts", n_dim = 3, res_name = "MNN") sce20 <- ldfDiff(sce_pre_list, sce_combined = sce20, group = "batch", k = 70, dim_red = "PCA", dim_combined = "PCA", assay_pre = "counts", assay_combined = "lim_corrected", n_dim = 3, res_name = "limma") names(colData(sce20)) ## ----vis ldfDiff----------------------------------------------------------- #ldfDiff score summarized visIntegration(sce20, metric_prefix = "diff_ldf") ## ----session info---------------------------------------------------------- sessionInfo()