## ----setup, include = FALSE------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", error = FALSE, warning = FALSE, message = FALSE ) ## ---- eval=FALSE----------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("HCAData") ## -------------------------------------------------------------------------- library("HCAData") ## -------------------------------------------------------------------------- HCAData() ## -------------------------------------------------------------------------- suppressPackageStartupMessages({ library(ExperimentHub) library(SingleCellExperiment) }) eh <- ExperimentHub() query(eh, "HCAData") # these three are the components to the bone marrow dataset bonemarrow_h5densematrix <- eh[["EH2047"]] bonemarrow_coldata <- eh[["EH2048"]] bonemarrow_rowdata <- eh[["EH2049"]] # and are put together when calling... sce_bonemarrow <- HCAData("ica_bone_marrow") sce_bonemarrow # similarly, to access the umbilical cord blood dataset sce_cordblood <- HCAData("ica_cord_blood") sce_cordblood ## -------------------------------------------------------------------------- library(scran) library(scater) set.seed(42) sce <- sce_bonemarrow[, sample(seq_len(ncol(sce_bonemarrow)), 1000, replace = FALSE)] ## -------------------------------------------------------------------------- rownames(sce) <- uniquifyFeatureNames(rowData(sce)$ID, rowData(sce)$Symbol) head(rownames(sce)) counts(sce) <- as.matrix(counts(sce)) sce <- scater::calculateQCMetrics(sce) ## -------------------------------------------------------------------------- set.seed(42) clusters <- quickCluster(sce, method="igraph", min.mean=0.1, irlba.args=list(maxit=1000)) table(clusters) sce <- computeSumFactors(sce, min.mean=0.1, cluster=clusters) summary(sizeFactors(sce)) sce <- normalize(sce) ## -------------------------------------------------------------------------- new.trend <- makeTechTrend(x=sce) fit <- trendVar(sce, use.spikes=FALSE, loess.args=list(span=0.05)) # plot(fit$mean, fit$var, pch=16) # curve(fit$trend(x), col="dodgerblue", add=TRUE) # curve(new.trend(x), col="red", add=TRUE) fit0 <- fit fit$trend <- new.trend dec <- decomposeVar(fit=fit) top.dec <- dec[order(dec$bio, decreasing=TRUE),] head(top.dec) plotExpression(sce, features=rownames(top.dec)[1:10]) ## -------------------------------------------------------------------------- set.seed(42) sce <- denoisePCA(sce, technical=new.trend, approximate=TRUE) ncol(reducedDim(sce, "PCA")) plot(attr(reducedDim(sce), "percentVar"), xlab="PC", ylab="Proportion of variance explained") abline(v=ncol(reducedDim(sce, "PCA")), lty=2, col="red") plotPCA(sce, ncomponents=3, colour_by="log10_total_features_by_counts") set.seed(42) sce <- runTSNE(sce, use_dimred="PCA", perplexity=30) plotTSNE(sce, colour_by="log10_total_features_by_counts") ## -------------------------------------------------------------------------- snn.gr <- buildSNNGraph(sce, use.dimred="PCA") clusters <- igraph::cluster_walktrap(snn.gr) sce$Cluster <- factor(clusters$membership) table(sce$Cluster) plotTSNE(sce, colour_by="Cluster") ## ---- eval=FALSE----------------------------------------------------------- # if (require(iSEE)) { # iSEE(sce) # } ## ---- eval=FALSE----------------------------------------------------------- # destination <- "where/to/store/the/processed/data.rds" # saveRDS(sce, file = destination) ## -------------------------------------------------------------------------- sessionInfo()