## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = TRUE,
  crop = NULL
)
library(BiocStyle)

## -----------------------------------------------------------------------------
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("sketchR")

## ----setup--------------------------------------------------------------------
suppressPackageStartupMessages({
    library(sketchR)
    library(TENxPBMCData)
    library(scuttle)
    library(scran)
    library(scater)
    library(SingleR)
    library(celldex)
    library(cowplot)
    library(SummarizedExperiment)
    library(SingleCellExperiment)
    library(beachmat.hdf5)
})

## -----------------------------------------------------------------------------
## Load data
pbmc3k <- TENxPBMCData::TENxPBMCData(dataset = "pbmc3k")

## Set row and column names
colnames(pbmc3k) <- paste0("Cell", seq_len(ncol(pbmc3k)))
rownames(pbmc3k) <- scuttle::uniquifyFeatureNames(
    ID = SummarizedExperiment::rowData(pbmc3k)$ENSEMBL_ID,
    names = SummarizedExperiment::rowData(pbmc3k)$Symbol_TENx
)

## Normalize and log-transform counts
pbmc3k <- scuttle::logNormCounts(pbmc3k)

## Find highly variable genes
dec <- scran::modelGeneVar(pbmc3k)
top.hvgs <- scran::getTopHVGs(dec, n = 2000)

## Perform dimensionality reduction
set.seed(100)
pbmc3k <- scater::runPCA(pbmc3k, subset_row = top.hvgs)
pbmc3k <- scater::runTSNE(pbmc3k, dimred = "PCA")

## Predict cell type labels
ref_monaco <- celldex::MonacoImmuneData()
pred_monaco_main <- SingleR::SingleR(test = pbmc3k, ref = ref_monaco, 
                                     labels = ref_monaco$label.main)
pbmc3k$labels_main <- pred_monaco_main$labels

dim(pbmc3k)

## -----------------------------------------------------------------------------
idx800gs <- geosketch(SingleCellExperiment::reducedDim(pbmc3k, "PCA"), 
                      N = 800, seed = 123)
head(idx800gs)
length(idx800gs)

## -----------------------------------------------------------------------------
idx800scs <- scsampler(SingleCellExperiment::reducedDim(pbmc3k, "PCA"), 
                       N = 800, seed = 123)
head(idx800scs)
length(idx800scs)

## ----fig.width = 10, fig.height = 8-------------------------------------------
cowplot::plot_grid(
    scater::plotTSNE(pbmc3k, colour_by = "labels_main"),
    scater::plotTSNE(pbmc3k[, idx800gs], colour_by = "labels_main"),
    scater::plotTSNE(pbmc3k[, idx800scs], colour_by = "labels_main")
)

## ----fig.width = 6, fig.height = 8--------------------------------------------
compareCompositionPlot(SummarizedExperiment::colData(pbmc3k), 
                       idx = list(geosketch = idx800gs,
                                  scSampler = idx800scs), 
                       column = "labels_main")

## -----------------------------------------------------------------------------
set.seed(123)
hausdorffDistPlot(mat = SingleCellExperiment::reducedDim(pbmc3k, "PCA"), 
                  Nvec = c(400, 800, 2000),
                  Nrep = 3, methods = c("geosketch", "scsampler", "uniform"))

## -----------------------------------------------------------------------------
sessionInfo()