--- title: "Multi-sample analysis (10x Visium Human DLPFC)" output: BiocStyle::html_document # output: pdf_document vignette: > %\VignetteIndexEntry{Multi-sample analysis (10x Visium Human DLPFC)} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "figures/", dpi = 36 ) ``` Here, we demonstrate BANKSY analysis on 10x Visium data of the human dorsolateral prefrontal cortex from Maynard et al (2018). The data comprise 12 samples obtained from 3 subjects, with manual annotation of the layers in each sample. We will focus on 4 of the 12 samples from subject 3, demonstrating multi-sample analysis with BANKSY. ```{r, eval=TRUE, include=FALSE} start.time <- Sys.time() ``` ```{r, eval=TRUE, message=FALSE, warning=FALSE} library(Banksy) library(SummarizedExperiment) library(SpatialExperiment) library(Seurat) library(scater) library(cowplot) library(ggplot2) ``` # Loading the data We fetch the data for all 12 DLPFC samples with the [*spatialLIBD*](https://bioconductor.org/packages/release/data/experiment/html/spatialLIBD.html) package. This might take awhile. ```{r, eval=TRUE, message=FALSE, warning=FALSE} library(spatialLIBD) library(ExperimentHub) ehub <- ExperimentHub::ExperimentHub() spe <- spatialLIBD::fetch_data(type = "spe", eh = ehub) ``` After the download is completed, we trim the *SpatialExperiment* object, retaining only the counts and some metadata such as the sample identifier and pathology annotations. This saves some memory. ```{r, eval=TRUE, message=FALSE, warning=FALSE} imgData(spe) <- NULL assay(spe, "logcounts") <- NULL reducedDims(spe) <- NULL rowData(spe) <- NULL colData(spe) <- DataFrame( sample_id = spe$sample_id, clust_annotation = factor( addNA(spe$layer_guess_reordered_short), exclude = NULL, labels = seq(8) ), in_tissue = spe$in_tissue, row.names = colnames(spe) ) invisible(gc()) ``` Next, subset `spe` to samples from the last subject (samples `151673`, `151674`, `151675`, `151676`). This stores each sample in its own *SpatialExperiment* object, all placed in a list. ```{r, eval=TRUE, message=FALSE, warning=FALSE} sample_names <- as.character(151673:151676) spe_list <- lapply(sample_names, function(x) spe[, spe$sample_id == x]) rm(spe) invisible(gc()) ``` # Data preprocessing Using Seurat, we perform basic normalisation of the data, and select the top 2000 highly variable features from each sample. Other methods for normalisation and feature selection may also be used. We take the union of these features for downstream analysis. ```{r, eval=TRUE, message=FALSE, warning=FALSE} #' Normalize data seu_list <- lapply(spe_list, function(x) { x <- as.Seurat(x, data = NULL) NormalizeData(x, scale.factor = 5000, normalization.method = 'RC') }) #' Compute HVGs hvgs <- lapply(seu_list, function(x) { VariableFeatures(FindVariableFeatures(x, nfeatures = 2000)) }) hvgs <- Reduce(union, hvgs) #' Add data to SpatialExperiment and subset to HVGs aname <- "normcounts" spe_list <- Map(function(spe, seu) { assay(spe, aname) <- GetAssayData(seu) spe[hvgs,] }, spe_list, seu_list) rm(seu_list) invisible(gc()) ``` # Running BANKSY To run BANKSY across multiple samples, we first compute the BANKSY neighborhood feature matrices for each sample separately. We use `k_geom=6` corresponding to the first-order neighbors in 10x Visium assays (`k_geom=18` corresponding to first and second-order neighbors may also be used). ```{r, eval=TRUE, message=FALSE, warning=FALSE} compute_agf <- FALSE k_geom <- 6 spe_list <- lapply(spe_list, computeBanksy, assay_name = aname, compute_agf = compute_agf, k_geom = k_geom) ``` We then merge the samples to perform joint dimensional reduction and clustering: ```{r, eval=TRUE, message=FALSE, warning=FALSE} spe_joint <- do.call(cbind, spe_list) rm(spe_list) invisible(gc()) ``` When running multi-sample BANKSY PCA, the `group` argument may be provided. This specifies the grouping variable for the cells or spots across the samples. Features belonging to cells or spots corresponding to each level of the grouping variable will be z-scaled separately. In this case, `sample_id` in `colData(spe_joint)` gives the grouping based on the sample of origin. ```{r, eval=TRUE, message=FALSE, warning=FALSE} lambda <- 0.2 use_agf <- FALSE spe_joint <- runBanksyPCA(spe_joint, use_agf = use_agf, lambda = lambda, group = "sample_id", seed = 1000) ``` Run UMAP on the BANKSY embedding: ```{r, eval=TRUE, message=FALSE, warning=FALSE} spe_joint <- runBanksyUMAP(spe_joint, use_agf = use_agf, lambda = lambda, seed = 1000) ``` Finally, obtain cluster labels for spots across all 4 samples. We use `connectClusters` for visual comparison of the manual annotations and BANKSY clusters. ```{r, eval=TRUE, message=FALSE, warning=FALSE} res <- 0.7 spe_joint <- clusterBanksy(spe_joint, use_agf = use_agf, lambda = lambda, resolution = res, seed = 1000) cnm <- sprintf("clust_M%s_lam%s_k50_res%s", as.numeric(use_agf), lambda, res) spe_joint <- connectClusters(spe_joint) ``` Once joint clustering is performed, we split the samples into their own `SpatialExperiment` objects: ```{r, eval=TRUE, message=FALSE, warning=FALSE} spe_list <- lapply(sample_names, function(x) spe_joint[, spe_joint$sample_id == x]) rm(spe_joint) invisible(gc()) ``` As an optional step, we smooth the cluster labels of each sample separately. This can be done if smooth spatial domains are expected in the biological sample or tissue in question. ```{r, eval=TRUE, message=FALSE, warning=FALSE} spe_list <- lapply(spe_list, smoothLabels, cluster_names = cnm, k = 6L, verbose = FALSE) names(spe_list) <- paste0("sample_", sample_names) ``` The raw and smoothed cluster labels are stored in the `colData` slot of each `SingleCellExperiment` or `SpatialExperiment` object. ```{r, eval=TRUE, echo=FALSE} head(colData(spe_list$sample_151673)) ``` # Parsing BANKSY output We can compare BANKSY clusters to pathology annotations using several cluster comparison measures such as the adjusted Rand index (ARI) or normalized mutual information (NMI) with `compareClusters`. The function computes the selected comparison measure between all pairs of cluster labels: ```{r, eval=TRUE} compareClusters(spe_list$sample_151673, func = 'ARI') ``` We evaluate the ARI and NMI for each sample: ```{r, eval=TRUE} ari <- sapply(spe_list, function(x) as.numeric(tail(compareClusters(x, func = "ARI")[, 1], n = 1))) ari ``` ```{r, eval=TRUE} nmi <- sapply(spe_list, function(x) as.numeric(tail(compareClusters(x, func = "NMI")[, 1], n = 1))) nmi ``` Visualise pathology annotation and BANKSY cluster on spatial coordinates with the [*ggspavis*](https://bioconductor.org/packages/ggspavis) package: ```{r multi-sample-spatial, eval=TRUE, fig.height=4, out.width='90%'} # Use scater:::.get_palette('tableau10medium') pal <- c( "#729ECE", "#FF9E4A", "#67BF5C", "#ED665D", "#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2", "#CDCC5D", "#6DCCDA" ) plot_bank <- lapply(spe_list, function(x) { df <- cbind.data.frame( clust=colData(x)[[sprintf("%s_smooth", cnm)]], spatialCoords(x)) ggplot(df, aes(x=pxl_row_in_fullres, y=pxl_col_in_fullres, col=clust)) + geom_point(size = 0.5) + scale_color_manual(values = pal) + theme_classic() + theme( legend.position = "none", axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + labs(title = "BANKSY clusters") + coord_equal() }) plot_anno <- lapply(spe_list, function(x) { df <- cbind.data.frame( clust=colData(x)[['clust_annotation']], spatialCoords(x)) ggplot(df, aes(x=pxl_row_in_fullres, y=pxl_col_in_fullres, col=clust)) + geom_point(size = 0.5) + scale_color_manual(values = pal) + theme_classic() + theme( legend.position = "none", axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank()) + labs(title = sprintf("Sample %s", x$sample_id[1])) + coord_equal() }) plot_list <- c(plot_anno, plot_bank) plot_grid(plotlist = plot_list, ncol = 4, byrow = TRUE) ``` Visualize joint UMAPs for each sample: ```{r multi-sample-umap, eval=TRUE, fig.height=5, out.width='90%'} umap_bank <- lapply(spe_list, function(x) { plotReducedDim(x, "UMAP_M0_lam0.2", colour_by = sprintf("%s_smooth", cnm), point_size = 0.5 ) + theme(legend.position = "none") + labs(title = "BANKSY clusters") }) umap_anno <- lapply(spe_list, function(x) { plotReducedDim(x, "UMAP_M0_lam0.2", colour_by = "clust_annotation", point_size = 0.5 ) + theme(legend.position = "none") + labs(title = sprintf("Sample %s", x$sample_id[1])) }) umap_list <- c(umap_anno, umap_bank) plot_grid(plotlist = umap_list, ncol = 4, byrow = TRUE) ``` # Session information Vignette runtime: ```{r, eval=TRUE, echo=FALSE} Sys.time() - start.time ```
```{r, sess} sessionInfo() ```