## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 5L, fig.height = 5L ) ## ----message=FALSE, warning=FALSE--------------------------------------------- options(parallelly.fork.enable = TRUE) library(COTAN) library(zeallot) library(data.table) library(factoextra) library(Rtsne) library(qpdf) library(GEOquery) ## ----eval=TRUE, include=TRUE-------------------------------------------------- dataDir <- tempdir() dataSetFile <- file.path(dataDir, "GSM2861514/GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz") if (!file.exists(dataSetFile)) { getGEOSuppFiles("GSM2861514", makeDirectory = TRUE, baseDir = dataDir, fetch_files = TRUE, filter_regex = "GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz") sample.dataset <- read.csv(dataSetFile, sep = "\t", row.names = 1L) } ## ----------------------------------------------------------------------------- outDir <- tempdir() # Log-level 2 was chosen to showcase better how the package works # In normal usage a level of 0 or 1 is more appropriate setLoggingLevel(2) # This file will contain all the logs produced by the package # as if at the highest logging level setLoggingFile(file.path(outDir, "vignette_v1.log")) ## ----------------------------------------------------------------------------- cond = "mouse_cortex_E17.5" #cond = "test" #obj = COTAN(raw = sampled.dataset) obj = COTAN(raw = sample.dataset) obj = initializeMetaDataset(obj, GEO = "GSM2861514", sequencingMethod = "Drop_seq", sampleCondition = cond) logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])), logLevel = 1) ## ----------------------------------------------------------------------------- ECDPlot(obj, yCut = 700) ## ----------------------------------------------------------------------------- cellSizePlot(obj) ## ----------------------------------------------------------------------------- genesSizePlot(obj) ## ----------------------------------------------------------------------------- mit <- mitochondrialPercentagePlot(obj, genePrefix = "^Mt") mit[["plot"]] ## ----------------------------------------------------------------------------- cells_to_rem <- getCells(obj)[getCellsSize(obj) > 6000] obj <- dropGenesCells(obj, cells = cells_to_rem) ## ----------------------------------------------------------------------------- cellGeneNumber <- sort(colSums(as.data.frame(getRawData(obj) > 0)), decreasing = FALSE) cells_to_rem <- names(cellGeneNumber)[cellGeneNumber > 3000] obj <- dropGenesCells(obj, cells = cells_to_rem) genesSizePlot(obj) ## ----------------------------------------------------------------------------- to_rem <- mit[["sizes"]][["mit.percentage"]] > 1.5 cells_to_rem <- rownames(mit[["sizes"]])[to_rem] obj <- dropGenesCells(obj, cells = cells_to_rem) mit <- mitochondrialPercentagePlot(obj, genePrefix = "^Mt") mit[["plot"]] ## ----------------------------------------------------------------------------- genes_to_rem = getGenes(obj)[grep('^Mt', getGenes(obj))] cells_to_rem = getCells(obj)[which(getCellsSize(obj) == 0)] obj = dropGenesCells(obj, genes_to_rem, cells_to_rem) ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1) ## ----------------------------------------------------------------------------- n_it <- 1 obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- genesPlot ## ----eval=TRUE, include=TRUE-------------------------------------------------- cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"] obj <- dropGenesCells(obj, cells = cells_to_rem) n_it <- 2 obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- UDEPlot ## ----------------------------------------------------------------------------- plot(nuPlot) ## ----------------------------------------------------------------------------- nuDf = data.frame("nu" = sort(getNu(obj)), "n" = seq_along(getNu(obj))) yset = 0.35 # the threshold to remove low UDE cells plot.ude <- ggplot(nuDf, aes(x = n, y = nu)) + geom_point(colour = "#8491B4B2", size = 1) + xlim(0, 400) + ylim(0, 1) + geom_hline(yintercept = yset, linetype = "dashed", color = "darkred") + annotate(geom = "text", x = 200, y = 0.25, label = paste0("to remove cells with nu < ", yset), color = "darkred", size = 4.5) plot.ude ## ----------------------------------------------------------------------------- obj <- addElementToMetaDataset(obj, "Threshold low UDE cells:", yset) cells_to_rem = rownames(nuDf)[nuDf[["nu"]] < yset] obj <- dropGenesCells(obj, cells = cells_to_rem) ## ----------------------------------------------------------------------------- n_it <- 3 obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1) ## ----------------------------------------------------------------------------- obj = estimateDispersionBisection(obj, cores = 10) ## ----------------------------------------------------------------------------- obj <- calculateCoex(obj) ## ----eval=TRUE, include=TRUE-------------------------------------------------- # saving the structure saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS"))) ## ----eval=FALSE, include=TRUE------------------------------------------------- # obj2 <- automaticCOTANObjectCreation( # raw = sample.dataset, # GEO = "GSM2861514", # sequencingMethod = "Drop_seq", # sampleCondition = cond, # saveObj = TRUE, outDir = outDir, cores = 10) ## ----------------------------------------------------------------------------- quant.p = calculateGDI(obj) head(quant.p) ## ----------------------------------------------------------------------------- genesList <- list( "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"), "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"), "hk" = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a", "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1", "Tars", "Amacr") ) # needs to be recalculated after the changes in nu/dispersion above #obj <- calculateCoex(obj, actOnCells = FALSE) GDIPlot(obj, cond = cond, genes = genesList) ## ----------------------------------------------------------------------------- print(cond) ## ----------------------------------------------------------------------------- heatmapPlot(genesLists = genesList, sets = c(1:3), conditions = c(cond), dir = outDir) ## ----------------------------------------------------------------------------- genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Vim", "Hes1"), pValueThreshold = 0.001, symmetric = TRUE) ## ----------------------------------------------------------------------------- genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"), secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"), pValueThreshold = 0.001, symmetric = FALSE) ## ----------------------------------------------------------------------------- c(observedCT, expectedCT) %<-% contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b") print("Observed CT") observedCT print("Expected CT") expectedCT ## ----------------------------------------------------------------------------- # For the whole matrix coex <- getGenesCoex(obj, zeroDiagonal = FALSE) coex[1:5, 1:5] ## ----------------------------------------------------------------------------- # For a partial matrix coex <- getGenesCoex(obj, genes = c("Satb2","Bcl11b","Fezf2")) head(coex) ## ----------------------------------------------------------------------------- layersGenes = list( "L1" = c("Reln", "Lhx5"), "L2/3" = c("Satb2", "Cux1"), "L4" = c("Rorb", "Sox5"), "L5/6" = c("Bcl11b", "Fezf2"), "Prog" = c("Vim", "Hes1") ) c(gSpace, eigPlot, pcaClustersDF, treePlot) %<-% establishGenesClusters(obj, groupMarkers = layersGenes, numGenesPerMarker = 25, kCuts = 6) plot(eigPlot) ## ----------------------------------------------------------------------------- plot(treePlot) ## ----------------------------------------------------------------------------- UMAPPlot(pcaClustersDF[, 1:10], clusters = pcaClustersDF[["hclust"]], elements = layersGenes, title = "Genes' clusters UMAP Plot") ## ----eval=FALSE, include=TRUE------------------------------------------------- # fineClusters <- cellsUniformClustering(obj, GDIThreshold = 1.4, cores = 10, # saveObj = TRUE, outDir = outDir) # obj <- addClusterization(obj, clName = "FineClusters", clusters = fineClusters) ## ----eval=FALSE, include=TRUE------------------------------------------------- # c(coexDF, pValueDF) %<-% DEAOnClusters(obj, clusters = fineClusters) # obj <- addClusterizationCoex(obj, clName = "FineClusters", # coexDF = coexDF) ## ----eval=FALSE, include=TRUE------------------------------------------------- # c(mergedClusters, coexDF, pValueDF) %<-% # mergeUniformCellsClusters(obj, GDIThreshold = 1.4, cores = 10, # saveObj = TRUE, outDir = outDir) # obj <- addClusterization(obj, clName = "MergedClusters", # clusters = mergedClusters, coexDF = coexDF) ## ----eval=FALSE, include=TRUE------------------------------------------------- # mergedUMAPPlot <- UMAPPlot(coexDF, elements = layersGenes, # title = "Fine Cluster UMAP Plot") # plot(mergedUMAPPlot) ## ----------------------------------------------------------------------------- if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) { #Delete file if it exists file.remove(file.path(outDir, paste0(cond, ".cotan.RDS"))) } unlink(file.path(outDir, cond), recursive = TRUE) file.remove(dataSetFile) # stop logging to file setLoggingFile("") file.remove(file.path(outDir, "vignette_v1.log")) options(parallelly.fork.enable = FALSE) sessionInfo()