In the examples below, we assume the input data are provided as a SpatialExperiment (SPE) object. The outputs for spot-level outliers and artifacts are stored in the colData
of the SPE object.
This is an example workflow showing how to detect and visualize local outliers in 10X Genomics Visium data.
library(SpotSweeper)
# load Maynard et al DLPFC daatset
spe <- STexampleData::Visium_humanDLPFC()
# change from gene id to gene names
rownames(spe) <- rowData(spe)$gene_name
# show column data before SpotSweeper
colnames(colData(spe))
#> [1] "barcode_id" "sample_id" "in_tissue" "array_row" "array_col"
#> [6] "ground_truth" "cell_count"
# drop out-of-tissue spots
spe <- spe[, spe$in_tissue == 1]
spe <- spe[, !is.na(spe$ground_truth)]
SpotSweeper can be run on an SPE object with the following code. This outputs the local_outliers
in the colData of the SPE object. Selecting data_output=TRUE
exports z-transformed QC metrics as well.
# Identifying the mitochondrial transcripts in our SpatialExperiment.
is.mito <- rownames(spe)[grepl("^MT-", rownames(spe))]
# Calculating QC metrics for each spot using scuttle
spe<- scuttle::addPerCellQCMetrics(spe, subsets=list(Mito=is.mito))
colnames(colData(spe))
#> [1] "barcode_id" "sample_id" "in_tissue"
#> [4] "array_row" "array_col" "ground_truth"
#> [7] "cell_count" "sum" "detected"
#> [10] "subsets_Mito_sum" "subsets_Mito_detected" "subsets_Mito_percent"
#> [13] "total"
# Identifying local outliers using SpotSweeper
features <- c('sum' ,'detected', "subsets_Mito_percent")
spe<- localOutliers(spe,
features=features,
n_neighbors=18,
data_output=TRUE,
method="multivariate"
)
# show column data after SpotSweeper
colnames(colData(spe))
#> [1] "barcode_id" "sample_id"
#> [3] "in_tissue" "array_row"
#> [5] "array_col" "ground_truth"
#> [7] "cell_count" "sum"
#> [9] "detected" "subsets_Mito_sum"
#> [11] "subsets_Mito_detected" "subsets_Mito_percent"
#> [13] "total" "sum_log2"
#> [15] "detected_log2" "subsets_Mito_percent_log2"
#> [17] "coords" "local_outliers"
#> [19] "sum_z" "detected_z"
#> [21] "subsets_Mito_percent_z" "LOF"
We can now visualize local_outliers
vs one of the QC metrics, sum_log2
, with help from the escheR
package.
library(escheR)
library(ggpubr)
# plotting using escheR
p1 <- make_escheR(spe) |>
add_fill(var = "sum_log2", point_size=1.25) +
scale_fill_gradient(low ="white",high = "darkgreen")
p2 <- make_escheR(spe) |>
add_fill(var = "sum_log2", point_size=1.25) |>
add_ground(var = "local_outliers", stroke = 1) +
scale_color_manual(
name = "", # turn off legend name for ground_truth
values = c(
"TRUE" = "red",
"FALSE" = "transparent")
) +
scale_fill_gradient(low ="white",high = "darkgreen")
plot_list <- list(p1, p2)
ggarrange(
plotlist = plot_list,
ncol = 2, nrow = 1,
common.legend = FALSE
)
utils::sessionInfo()
#> R Under development (unstable) (2024-01-16 r85808)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_GB LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: America/New_York
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] ggpubr_0.6.0 escheR_1.3.2
#> [3] ggplot2_3.4.4 STexampleData_1.11.0
#> [5] SpatialExperiment_1.13.0 SingleCellExperiment_1.25.0
#> [7] SummarizedExperiment_1.33.3 Biobase_2.63.0
#> [9] GenomicRanges_1.55.3 GenomeInfoDb_1.39.6
#> [11] IRanges_2.37.1 S4Vectors_0.41.3
#> [13] MatrixGenerics_1.15.0 matrixStats_1.2.0
#> [15] ExperimentHub_2.11.1 AnnotationHub_3.11.1
#> [17] BiocFileCache_2.11.1 dbplyr_2.4.0
#> [19] BiocGenerics_0.49.1 SpotSweeper_0.99.1
#>
#> loaded via a namespace (and not attached):
#> [1] DBI_1.2.1 bitops_1.0-7
#> [3] rlang_1.1.3 magrittr_2.0.3
#> [5] compiler_4.4.0 RSQLite_2.3.5
#> [7] DelayedMatrixStats_1.25.1 png_0.1-8
#> [9] vctrs_0.6.5 pkgconfig_2.0.3
#> [11] crayon_1.5.2 fastmap_1.1.1
#> [13] backports_1.4.1 magick_2.8.2
#> [15] XVector_0.43.1 labeling_0.4.3
#> [17] scuttle_1.13.0 utf8_1.2.4
#> [19] rmarkdown_2.25 purrr_1.0.2
#> [21] bit_4.0.5 xfun_0.42
#> [23] zlibbioc_1.49.0 cachem_1.0.8
#> [25] beachmat_2.19.1 jsonlite_1.8.8
#> [27] blob_1.2.4 highr_0.10
#> [29] DelayedArray_0.29.1 BiocParallel_1.37.0
#> [31] broom_1.0.5 parallel_4.4.0
#> [33] R6_2.5.1 bslib_0.6.1
#> [35] car_3.1-2 jquerylib_0.1.4
#> [37] Rcpp_1.0.12 knitr_1.45
#> [39] Matrix_1.6-5 tidyselect_1.2.0
#> [41] abind_1.4-5 yaml_2.3.8
#> [43] codetools_0.2-19 curl_5.2.0
#> [45] lattice_0.22-5 tibble_3.2.1
#> [47] withr_3.0.0 KEGGREST_1.43.0
#> [49] evaluate_0.23 Biostrings_2.71.2
#> [51] pillar_1.9.0 BiocManager_1.30.22
#> [53] filelock_1.0.3 carData_3.0-5
#> [55] generics_0.1.3 dbscan_1.1-12
#> [57] RCurl_1.98-1.14 BiocVersion_3.19.1
#> [59] sparseMatrixStats_1.15.0 munsell_0.5.0
#> [61] scales_1.3.0 glue_1.7.0
#> [63] tools_4.4.0 BiocNeighbors_1.21.2
#> [65] ggsignif_0.6.4 cowplot_1.1.3
#> [67] grid_4.4.0 tidyr_1.3.1
#> [69] AnnotationDbi_1.65.2 colorspace_2.1-0
#> [71] GenomeInfoDbData_1.2.11 cli_3.6.2
#> [73] rappdirs_0.3.3 fansi_1.0.6
#> [75] viridisLite_0.4.2 S4Arrays_1.3.3
#> [77] dplyr_1.1.4 gtable_0.3.4
#> [79] rstatix_0.7.2 sass_0.4.8
#> [81] digest_0.6.34 SparseArray_1.3.4
#> [83] farver_2.1.1 rjson_0.2.21
#> [85] memoise_2.0.1 htmltools_0.5.7
#> [87] lifecycle_1.0.4 httr_1.4.7
#> [89] mime_0.12 bit64_4.0.5
#> [91] MASS_7.3-60.2