## ----knitr-options, echo=FALSE, message=FALSE, warning=FALSE------------- ## To render an HTML version that works nicely with github and web pages, do: ## rmarkdown::render("vignettes/vignette.Rmd", "all") library(knitr) opts_chunk$set(fig.align = 'center', fig.width = 6, fig.height = 5, dev = 'png') library(ggplot2) theme_set(theme_bw(12)) ## ----quickstart-load-data, message=FALSE, warning=FALSE------------------ library(scater, quietly = TRUE) data("sc_example_counts") data("sc_example_cell_info") ## ----quickstart-make-sceset, results='hide'------------------------------ pd <- new("AnnotatedDataFrame", data = sc_example_cell_info) rownames(pd) <- pd$Cell example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd) ## ----filter-no-exprs----------------------------------------------------- keep_feature <- rowSums(exprs(example_sceset) > 0) > 0 example_sceset <- example_sceset[keep_feature,] ## ----quick-start-calc-qc-metrics, eval=FALSE----------------------------- ## example_sceset <- calculateQCMetrics(example_sceset, feature_controls = 1:40) ## ----quick-start-gui, eval=FALSE----------------------------------------- ## scater_gui(example_sceset) ## ----sceset-make-sceset-counts-only-------------------------------------- pd <- new("AnnotatedDataFrame", data = sc_example_cell_info) rownames(pd) <- pd$Cell gene_df <- data.frame(Gene = rownames(sc_example_counts)) rownames(gene_df) <- gene_df$Gene fd <- new("AnnotatedDataFrame", data = gene_df) example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd, featureData = fd) example_sceset ## ----sceset-make-sceset-exprs-only--------------------------------------- example2 <- newSCESet(exprsData = edgeR::cpm.default(sc_example_counts)) pData(example2) fData(example2) ## ----counts-accessor----------------------------------------------------- counts(example2)[1:3, 1:6] ## ----exprs-accessor------------------------------------------------------ exprs(example2)[1:3, 1:6] ## ----get-exprs----------------------------------------------------------- get_exprs(example_sceset, "counts")[1:3, 1:6] ## ----set-exprs----------------------------------------------------------- set_exprs(example2, "counts") <- counts(example_sceset) ## ----sceset-add-count-data----------------------------------------------- counts(example2) <- sc_example_counts example2 counts(example2)[1:3, 1:6] ## ----sceset-demo-replacement--------------------------------------------- gene_df <- data.frame(Gene = rownames(sc_example_counts)) rownames(gene_df) <- gene_df$Gene fd <- new("AnnotatedDataFrame", data = gene_df) ## replace featureData fData(example_sceset) <- fd ## replace phenotype data pData(example_sceset) <- pd ## replace expression data to be used exprs(example_sceset) <- edgeR::cpm.default(counts(example_sceset), prior.count = 5, log = TRUE) ## ----plot-sceset-blocking------------------------------------------------ plot(example_sceset, block1 = "Mutation_Status", block2 = "Treatment", colour_by = "Cell_Cycle", nfeatures = 300, exprs_values = "counts") ## ----plot-expression----------------------------------------------------- plotExpression(example_sceset, rownames(example_sceset)[1:6], x = "Mutation_Status", exprs_values = "exprs", colour = "Treatment") ## ----plot-expression-theme-bw-------------------------------------------- plotExpression(example_sceset, rownames(example_sceset)[7:12], x = "Mutation_Status", exprs_values = "counts", colour = "Cell_Cycle", show_median = TRUE, show_violin = FALSE, xlab = "Mutation Status", log = TRUE) ## ----calc-qc-metrics----------------------------------------------------- example_sceset <- calculateQCMetrics(example_sceset, feature_controls = 1:20) varLabels(example_sceset) ## ----calc-qc-metrics-multi-feature-controls------------------------------ example_sceset <- calculateQCMetrics( example_sceset, feature_controls = list(controls1 = 1:20, controls2 = 500:1000), cell_controls = list(set_1 = 1:5, set_2 = 31:40)) varLabels(example_sceset) ## ----list-fdata-qc------------------------------------------------------- names(fData(example_sceset)) ## ----plot-qc-expression, fig.height=7.5, fig.width=8.5------------------- keep_feature <- rowSums(counts(example_sceset) > 0) > 4 example_sceset <- example_sceset[keep_feature,] ## Plot QC plotQC(example_sceset, type = "highest-expression", exprs_values = "counts") ## ----plot-qc-expression-cell-controls, fig.height=7.5, fig.width=8.5, fig.show=FALSE---- p1 <- plotQC(example_sceset[, !example_sceset$is_cell_control], type = "highest-expression") p2 <- plotQC(example_sceset[, example_sceset$is_cell_control], type = "highest-expression") multiplot(p1, p2, cols = 2) ## ----plot-qc-exprs-freq-vs-mean-default---------------------------------- plotQC(example_sceset, type = "exprs-freq-vs-mean") ## ----plot-qc-exprs-mean-vs-freq-defined-feature-set, results = 'hide', fig.show = FALSE---- feature_set_1 <- fData(example_sceset)$is_feature_control_controls1 plotQC(example_sceset, type = "exprs-freq-vs-mean", feature_set = feature_set_1) ## ----plot-fdata---------------------------------------------------------- plotFeatureData(example_sceset, aes(x = n_cells_exprs, y = pct_total_counts)) ## ----plot-pdata, echo=FALSE, fig.show=FALSE, results='hide'-------------- plotPhenoData(example_sceset, aes(x = total_counts, y = total_features, colour = Mutation_Status)) ## ----plot-pdata-cont-col, fig.show = TRUE-------------------------------- plotPhenoData(example_sceset, aes(x = Mutation_Status, y = total_features, colour = log10_total_counts)) ## ----plot-pdata-col-gene-exprs------------------------------------------- plotPhenoData(example_sceset, aes(x = total_counts, y = total_features, colour = Gene_1000)) ## ----plot-pdatacol-gene-exprs-2, fig.show = FALSE------------------------ plotPhenoData(example_sceset, aes(x = pct_exprs_feature_controls, y = total_features, colour = Gene_0500)) ## ----plot-pdatacol-gene-exprs-3, fig.show = FALSE------------------------ plotPhenoData(example_sceset, aes(x = pct_exprs_feature_controls, y = pct_exprs_top_50_features, colour = Gene_0001)) ## ----plot-pdata-pct-exprs-controls--------------------------------------- plotPhenoData(example_sceset, aes(x = total_features, y = pct_exprs_feature_controls, colour = Mutation_Status)) + theme(legend.position = "top") + stat_smooth(method = "lm", se = FALSE, size = 2, fullrange = TRUE) ## ----plot-pca-default---------------------------------------------------- plotPCA(example_sceset) ## ----plot-pca-cpm, fig.show=FALSE---------------------------------------- plotPCA(example_sceset, exprs_values = "cpm") ## ----plot-pca-feature-controls, fig.show = FALSE------------------------- plotPCA(example_sceset, feature_set = fData(example_sceset)$is_feature_control) ## ----plot-pca-4comp-colby-shapeby, fig.height=5.5------------------------ plotPCA(example_sceset, ncomponents = 4, colour_by = "Treatment", shape_by = "Mutation_Status") ## ----plot-pca-4comp-colby-sizeby-exprs, fig.height=5.5------------------- plotPCA(example_sceset, colour_by = "Gene_0001", size_by = "Gene_1000") ## ----plot-tsne-1comp-colby-sizeby-exprs, fig.height=5.5------------------ plotTSNE(example_sceset, colour_by = "Gene_0001", size_by = "Gene_1000") ## ----plot-difmap-1comp-colby-sizeby-exprs, fig.height=5.5---------------- plotDiffusionMap(example_sceset, colour_by = "Gene_0001", size_by = "Gene_1000") ## ----plot-pca-4comp-colby-shapeby-save-pcs, fig.show = FALSE------------- example_sceset <- plotPCA(example_sceset, ncomponents = 4, colour_by = "Treatment", shape_by = "Mutation_Status", return_SCESet = TRUE, theme_size = 12) head(reducedDimension(example_sceset)) ## ----plot-reduceddim-4comp-colby-shapeby, fig.show=FALSE----------------- plotReducedDim(example_sceset, ncomponents = 4, colour_by = "Treatment", shape_by = "Mutation_Status") ## ----plot-reduceddim-4comp-colby-sizeby-exprs, fig.show = FALSE---------- plotReducedDim(example_sceset, ncomponents = 4, colour_by = "Gene_1000", size_by = "Gene_0500") ## ----plot-pca-outlier---------------------------------------------------- example_sceset <- plotPCA(example_sceset, pca_data_input = "pdata", detect_outliers = TRUE, return_SCESet = TRUE) ## ----plot-qc-expl-variables-all, warning=FALSE--------------------------- plotQC(example_sceset, type = "expl") ## ----plot-qc-expl-variables-select-variables----------------------------- plotQC(example_sceset, type = "expl", variables = c("total_features", "total_counts", "Mutation_Status", "Treatment", "Cell_Cycle")) ## ----plot-qc-pairs-pc---------------------------------------------------- plotQC(example_sceset, type = "expl", method = "pairs", theme_size = 6) ## ----plot-qc-find-pcs-pcs-vs-vars, fig.width=8, fig.height=7------------- p1 <- plotQC(example_sceset, type = "find-pcs", variable = "total_features", plot_type = "pcs-vs-vars") p2 <- plotQC(example_sceset, type = "find-pcs", variable = "Cell_Cycle", plot_type = "pcs-vs-vars") multiplot(p1, p2, cols = 2) ## ----plot-qc-find-pcs-pairs, fig.width=10, fig.height=7------------------ plotQC(example_sceset, type = "find-pcs", variable = "total_features", plot_type = "pairs-pcs") ## ----plot-qc-find-pcs-pairs-2, fig.show=FALSE---------------------------- plotQC(example_sceset, type = "find-pcs", variable = "Cell_Cycle", plot_type = "pairs-pcs") ## ----cell-pairwise-distance-matrices-euclidean, eval=TRUE---------------- cell_dist <- dist(t(exprs(example_sceset))) cellPairwiseDistances(example_sceset) <- cell_dist plotMDS(example_sceset) ## ----cell-pairwise-distance-matrices-canberra, eval=TRUE, fig.show = FALSE---- cell_dist <- dist(t(counts(example_sceset)), method = "canberra") cellPairwiseDistances(example_sceset) <- cell_dist plotMDS(example_sceset, colour_by = "Mutation_Status") ## ----feature-pairwise-distance-matrices, eval=FALSE---------------------- ## feature_dist <- dist(exprs(example_sceset)) ## featurePairwiseDistances(example_sceset) <- feature_dist ## limma::plotMDS(featDist(example_sceset)) ## ----kallisto-demo-kallisto-test-data, eval=FALSE------------------------ ## ################################################################################ ## ### Tests and Examples ## ## # Example if in the kallisto/test directory ## setwd("/home/davis/kallisto/test") ## kallisto_log <- runKallisto("targets.txt", "transcripts.idx", single_end=FALSE, ## output_prefix="output", verbose=TRUE, n_bootstrap_samples=10) ## ## sce_test <- readKallistoResults(kallisto_log, read_h5=TRUE) ## sce_test ## ----kallisto-cell-cycle-example, eval=FALSE----------------------------- ## setwd("/home/davis/021_Cell_Cycle/data/fastq") ## system("wc -l targets.txt") ## ave_frag_len <- mean(c(855, 860, 810, 760, 600, 690, 770, 690)) ## ## kallisto_test <- runKallisto("targets.txt", ## "Mus_musculus.GRCm38.rel79.cdna.all.ERCC.idx", ## output_prefix="kallisto_output_Mmus", n_cores=12, ## fragment_length=ave_frag_len, verbose=TRUE) ## sce_kall_mmus <- readKallistoResults(kallisto_test, read_h5=TRUE) ## sce_kall_mmus ## ## sce_kall_mmus <- readKallistoResults(kallisto_test) ## ## sce_kall_mmus <- getBMFeatureAnnos(sce_kall_mmus) ## ## head(fData(sce_kall_mmus)) ## head(pData(sce_kall_mmus)) ## sce_kall_mmus[["start_time"]] ## ## counts(sce_kall_mmus)[sample(nrow(sce_kall_mmus), size=15), 1:6] ## ## ## Summarise expression at the gene level ## sce_kall_mmus_gene <- summariseExprsAcrossFeatures( ## sce_kall_mmus, exprs_values="tpm", summarise_by="feature_id") ## ## datatable(fData(sce_kall_mmus_gene)) ## ## sce_kall_mmus_gene <- getBMFeatureAnnos( ## sce_kall_mmus_gene, filters="ensembl_gene_id", ## attributes=c("ensembl_gene_id", "mgi_symbol", "chromosome_name", ## "gene_biotype", "start_position", "end_position", ## "percentage_gc_content", "description"), ## feature_symbol="mgi_symbol", feature_id="ensembl_gene_id", ## biomart="ensembl", dataset="mmusculus_gene_ensembl") ## ## datatable(fData(sce_kall_mmus_gene)) ## ## ## Add gene symbols to featureNames to make them more intuitive ## new_feature_names <- featureNames(sce_kall_mmus_gene) ## notna_mgi_symb <- !is.na(fData(sce_kall_mmus_gene)$mgi_symbol) ## new_feature_names[notna_mgi_symb] <- fData(sce_kall_mmus_gene)$mgi_symbol[notna_mgi_symb] ## notna_ens_gid <- !is.na(fData(sce_kall_mmus_gene)$ensembl_gene_id) ## new_feature_names[notna_ens_gid] <- paste(new_feature_names[notna_ens_gid], ## fData(sce_kall_mmus_gene)$ensembl_gene_id[notna_ens_gid], sep="_") ## sum(duplicated(new_feature_names)) ## featureNames(sce_kall_mmus_gene) <- new_feature_names ## head(featureNames(sce_kall_mmus_gene)) ## tail(featureNames(sce_kall_mmus_gene)) ## sum(duplicated(fData(sce_kall_mmus_gene)$mgi_symbol)) ##