## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE---------------- ## For links library("BiocStyle") ## Track time spent on making the vignette startTime <- Sys.time() ## Bib setup library("knitcitations") ## Load knitcitations with a clean bibliography cleanbib() cite_options(hyperlink = "to.doc", citation_format = "text", style = "html") # Note links won't show for now due to the following issue # https://github.com/cboettig/knitcitations/issues/63 ## Write bibliography information bib <- c( R = citation(), AnnotationHub = citation("AnnotationHub"), benchmarkme = citation("benchmarkme"), BiocStyle = citation("BiocStyle"), cowplot = citation("cowplot"), DT = citation("DT"), ExperimentHub = citation("ExperimentHub"), fields = citation("fields"), ggplot2 = citation("ggplot2"), golem = citation("golem"), IRanges = citation("IRanges"), knitcitations = citation("knitcitations"), knitr = citation("knitr")[3], plotly = citation("plotly"), Polychrome = citation("Polychrome"), RColorBrewer = citation("RColorBrewer"), rmarkdown = citation("rmarkdown")[1], S4Vectors = citation("S4Vectors"), scater = citation("scater"), sessioninfo = citation("sessioninfo"), SingleCellExperiment = citation("SingleCellExperiment"), shiny = citation("shiny"), SummarizedExperiment = citation("SummarizedExperiment"), testthat = citation("testthat"), viridisLite = citation("viridisLite"), spatialLIBD = citation("spatialLIBD")[1], spatialLIBDpaper = citation("spatialLIBD")[2] ) ## ----'install', eval = FALSE-------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # # BiocManager::install("spatialLIBD") # # ## Check that you have a valid Bioconductor installation # BiocManager::valid() ## ----'citation'--------------------------------------------------------------- ## Citation info citation("spatialLIBD") ## ----setup, message = FALSE, warning = FALSE---------------------------------- library("spatialLIBD") ## ----'experiment_hub'--------------------------------------------------------- ## Connect to ExperimentHub ehub <- ExperimentHub::ExperimentHub() ## ----'download_data'---------------------------------------------------------- ## Downloa small example sce data if (!exists("sce")) sce <- fetch_data(type = "sce_example", eh = ehub) ## If you want to download the full real data (about 2.1 GB in RAM) use: if (FALSE) { if (!exists("sce")) sce <- fetch_data(type = "sce", eh = ehub) } ## Query ExperimentHub and download the data if (!exists("sce_layer")) sce_layer <- fetch_data(type = "sce_layer", eh = ehub) modeling_results <- fetch_data("modeling_results", eh = ehub) ## ----'explore data'----------------------------------------------------------- ## spot-level data sce ## layer-level data sce_layer ## list of modeling result tables sapply(modeling_results, class) sapply(modeling_results, dim) sapply(modeling_results, function(x) { head(colnames(x)) }) ## ----'create_sig_genes'------------------------------------------------------- ## Convert to a long format the modeling results ## This takes a few seconds to run system.time( sig_genes <- sig_genes_extract_all( n = nrow(sce_layer), modeling_results = modeling_results, sce_layer = sce_layer ) ) ## Explore the result class(sig_genes) dim(sig_genes) ## ----------------------------------------------------------------------------- if (interactive()) { run_app( sce = sce, sce_layer = sce_layer, modeling_results = modeling_results, sig_genes = sig_genes ) } ## ----'sce_image_clus', fig.small = TRUE--------------------------------------- ## View our LIBD layers for one sample sce_image_clus( sce = sce, clustervar = "layer_guess_reordered", sampleid = "151673", colors = libd_layer_colors, ... = " LIBD Layers" ) ## ----'sce_image_clus_variables'----------------------------------------------- ## This is not fully precise, but gives you a rough idea ## Some integer columns are actually continuous variables table(sapply(colData(sce), class) %in% c("factor", "integer")) ## This is more precise (one cluster has 28 unique values) table(sapply(colData(sce), function(x) length(unique(x))) < 29) ## ----'sce_image_clus_no_spatial', fig.small = TRUE---------------------------- ## View our LIBD layers for one sample ## without spatial information sce_image_clus( sce = sce, clustervar = "layer_guess_reordered", sampleid = "151673", colors = libd_layer_colors, ... = " LIBD Layers", spatial = FALSE ) ## ----'libd_layer_colors'------------------------------------------------------ ## Color palette designed by Lukas M. Weber with feedback from the team. libd_layer_colors ## ----'sce_image_gene', fig.small = TRUE--------------------------------------- ## Available gene expression assays assayNames(sce) ## Not all of these make sense to visualize ## In particular, barcode, row, col, imagerow, imagecol, key are not ## useful to visualize. colnames(colData(sce))[sapply(colData(sce), function(x) { length(unique(x)) }) > 29] ## Visualize a gene sce_image_gene( sce = sce, sampleid = "151673", viridis = FALSE ) ## Visualize the estimated number of cells per spot sce_image_gene( sce = sce, sampleid = "151673", geneid = "cell_count" ) ## Visualize the fraction of chrM expression per spot ## without the spatial layer sce_image_gene( sce = sce, sampleid = "151673", geneid = "expr_chrM_ratio", spatial = FALSE ) ## ----'sig_genes_detail'------------------------------------------------------- head(sig_genes) ## ----'layer_boxplot'---------------------------------------------------------- ## Note that we recommend setting the random seed so the jittering of the ## points will be reproducible. Given the requirements by BiocCheck this ## cannot be done inside the layer_boxplot() function. ## Create a boxplot of the first gene in `sig_genes`. set.seed(20200206) layer_boxplot(sig_genes = sig_genes, sce_layer = sce_layer) ## Viridis colors displayed in the shiny app ## showing the first pairwise model result ## (which illustrates the background colors used for the layers not ## involved in the pairwise comparison) set.seed(20200206) layer_boxplot( i = which(sig_genes$model_type == "pairwise")[1], sig_genes = sig_genes, sce_layer = sce_layer, col_low_box = viridisLite::viridis(4)[2], col_low_point = viridisLite::viridis(4)[1], col_high_box = viridisLite::viridis(4)[3], col_high_point = viridisLite::viridis(4)[4] ) ## Paper colors displayed in the shiny app set.seed(20200206) layer_boxplot( sig_genes = sig_genes, sce_layer = sce_layer, short_title = FALSE, col_low_box = "palegreen3", col_low_point = "springgreen2", col_high_box = "darkorange2", col_high_point = "orange1" ) ## ----'matt_t_stats'----------------------------------------------------------- ## Explore the enrichment t-statistics derived from Tran et al's snRNA-seq ## DLPFC data dim(tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer) tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer[seq_len(3), seq_len(6)] ## ----'layer_stat_cor'--------------------------------------------------------- ## Compute the correlation matrix of enrichment t-statistics between our data ## and Tran et al's snRNA-seq data cor_stats_layer <- layer_stat_cor( tstats_Human_DLPFC_snRNAseq_Nguyen_topLayer, modeling_results, "enrichment" ) ## Explore the correlation matrix head(cor_stats_layer[, seq_len(3)]) ## ----'layer_stat_cor_plot', fig.wide = TRUE----------------------------------- ## Visualize the correlation matrix layer_stat_cor_plot(cor_stats_layer, max = max(cor_stats_layer)) ## ----'read_sfari'------------------------------------------------------------- ## Read in the SFARI gene sets included in the package asd_sfari <- utils::read.csv( system.file( "extdata", "SFARI-Gene_genes_01-03-2020release_02-04-2020export.csv", package = "spatialLIBD" ), as.is = TRUE ) ## Format them appropriately asd_sfari_geneList <- list( Gene_SFARI_all = asd_sfari$ensembl.id, Gene_SFARI_high = asd_sfari$ensembl.id[asd_sfari$gene.score < 3], Gene_SFARI_syndromic = asd_sfari$ensembl.id[asd_sfari$syndromic == 1] ) ## ----'sfari_enrichment'------------------------------------------------------- ## Compute the gene set enrichment results asd_sfari_enrichment <- gene_set_enrichment( gene_list = asd_sfari_geneList, modeling_results = modeling_results, model_type = "enrichment" ) ## Explore the results head(asd_sfari_enrichment) ## ----'sfari_enrichment_plot'-------------------------------------------------- ## Visualize gene set enrichment results gene_set_enrichment_plot( asd_sfari_enrichment, xlabs = gsub(".*_", "", unique(asd_sfari_enrichment$ID)), layerHeights = c(0, 40, 55, 75, 85, 110, 120, 135) ) ## ----'required_structure'----------------------------------------------------- ## Images data stored under metadata(sce)$image head(metadata(sce)$image) ## Ensembl gene IDs are row names with the symbol (gene_name) and Ensembl ## ID (gene_name) pasted into `gene_search` ## gene_search <- paste0(gene_name, '; ', gene_id) rowData(sce)[, c("gene_id", "gene_name", "gene_search")] ## This information also has to be present in sce_layer stopifnot(all( c("gene_id", "gene_name", "gene_search") %in% colnames(rowData(sce_layer)) )) ## Information about the image coordinates stored in imagerow and imagecol colData(sce)[, c("imagerow", "imagecol")] ## The sample names stored under sce$sample_name stopifnot("sample_name" %in% colnames(colData(sce))) ## A unique spot-level ID (such as barcode) stored under sce$key ## The 'key' column is necessary for the plotly code to work. stopifnot("key" %in% colnames(colData(sce))) stopifnot(length(unique(sce$key)) == ncol(sce)) ## Here's how we made those unique keys identical(sce$key, paste0(sce$sample_name, "_", sce$barcode)) ## Some cluster variables that you can specify with run_app(). ## For example: cluster_variables <- c( "GraphBased", "Layer", "Maynard", "Martinowich", paste0("SNN_k50_k", 4:28), "layer_guess_reordered_short", "SpatialDE_PCA", "SpatialDE_pool_PCA", "HVG_PCA", "pseudobulk_PCA", "markers_PCA", "SpatialDE_UMAP", "SpatialDE_pool_UMAP", "HVG_UMAP", "pseudobulk_UMAP", "markers_UMAP", "SpatialDE_PCA_spatial", "SpatialDE_pool_PCA_spatial", "HVG_PCA_spatial", "pseudobulk_PCA_spatial", "markers_PCA_spatial", "SpatialDE_UMAP_spatial", "SpatialDE_pool_UMAP_spatial", "HVG_UMAP_spatial", "pseudobulk_UMAP_spatial", "markers_UMAP_spatial" ) stopifnot(all(cluster_variables %in% colnames(colData(sce)))) ## The last one gets renamed to spatialLIBD in spatialLIBD::app_server() ## Some continuous variables that you can specify in run_app() ## as well as the already mentioned rowData(sce)$gene_search ## For example: continuous_variables <- c( "cell_count", "sum_umi", "sum_gene", "expr_chrM", "expr_chrM_ratio" ) stopifnot(all(continuous_variables %in% colnames(colData(sce)))) ## None of the values of rowData(sce)$gene_search should be re-used in ## colData(sce) stopifnot(!all(rowData(sce)$gene_search %in% colnames(colData(sce)))) ## No column named COUNT as that's used by sce_image_gene_p() and related ## functions. stopifnot(!"COUNT" %in% colnames(colData(sce))) ## The counts and logcounts assays stopifnot(all(c("counts", "logcounts") %in% assayNames(sce))) ## For the layer-level data we also need layer_guess_reordered ## although this is a parameter you can change in run_app() stopifnot("layer_guess_reordered" %in% colnames(colData(sce_layer))) stopifnot(all( c("enrichment", "pairwise", "anova") %in% names(modeling_results) )) ## All the model tables contain the ensembl column stopifnot(all(sapply(modeling_results, function(x) { "ensembl" %in% colnames(x) }))) ## The following column prefixes are present in each of the model tables expected_column_name_starts <- c("^[f|t]_stat_", "^p_value_", "^fdr_") stopifnot(all(sapply(modeling_results, function(model_table) { all(sapply(expected_column_name_starts, function(expected_prefix) { any(grepl( expected_prefix, colnames(model_table) )) })) }))) ## ----'run_check_functions'---------------------------------------------------- check_sce(sce) check_sce_layer(sce_layer) ## The output here is too long to print xx <- check_modeling_results(modeling_results) identical(xx, modeling_results) ## ----createVignette, eval=FALSE----------------------------------------------- # ## Create the vignette # library("rmarkdown") # system.time(render("spatialLIBD.Rmd")) # # ## Extract the R code # library("knitr") # knit("spatialLIBD.Rmd", tangle = TRUE) ## ----reproduce1, echo=FALSE--------------------------------------------------- ## Date the vignette was generated Sys.time() ## ----reproduce2, echo=FALSE--------------------------------------------------- ## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits = 3) ## ----reproduce3, echo=FALSE------------------------------------------------------------------------------------------- ## Session info library("sessioninfo") options(width = 120) session_info() ## ----vignetteBiblio, results = 'asis', echo = FALSE, warning = FALSE, message = FALSE--------------------------------- ## Print bibliography bibliography()