## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk[["set"]]( collapse = TRUE, comment = "#>", fig.width = 5L, fig.height = 5L ) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(COTAN) library(zeallot) library(data.table) library(Rtsne) library(qpdf) library(GEOquery) options(parallelly.fork.enable = TRUE) ## ----eval=TRUE, include=TRUE-------------------------------------------------- dataDir <- tempdir() GEO <- "GSM2861514" fName <- "GSM2861514_E175_Only_Cortical_Cells_DGE.txt.gz" dataSetFile <- file.path(dataDir, GEO, fName) if (!file.exists(dataSetFile)) { getGEOSuppFiles(GEO, makeDirectory = TRUE, baseDir = dataDir, fetch_files = TRUE, filter_regex = fName) sample.dataset <- read.csv(dataSetFile, sep = "\t", row.names = 1L) } ## ----------------------------------------------------------------------------- outDir <- tempdir() # Log-level 2 was chosen to showcase better how the package works # In normal usage a level of 0 or 1 is more appropriate setLoggingLevel(2L) # This file will contain all the logs produced by the package # as if at the highest logging level setLoggingFile(file.path(outDir, "vignette_v2.log")) ## ----------------------------------------------------------------------------- cond <- "mouse_cortex_E17.5" #cond <- "test" #obj = COTAN(raw = sampled.dataset) obj <- COTAN(raw = sample.dataset) obj <- initializeMetaDataset(obj, GEO = GEO, sequencingMethod = "Drop_seq", sampleCondition = cond) logThis(paste0("Condition ", getMetadataElement(obj, datasetTags()[["cond"]])), logLevel = 1L) ## ----------------------------------------------------------------------------- ECDPlot(obj, yCut = 700L) ## ----------------------------------------------------------------------------- cellSizePlot(obj) ## ----------------------------------------------------------------------------- genesSizePlot(obj) ## ----------------------------------------------------------------------------- mit <- mitochondrialPercentagePlot(obj, genePrefix = "^Mt") mit[["plot"]] ## ----------------------------------------------------------------------------- cells_to_rem <- getCells(obj)[getCellsSize(obj) > 6000L] obj <- dropGenesCells(obj, cells = cells_to_rem) ## ----------------------------------------------------------------------------- cells_to_rem <- getCells(obj)[getNumExpressedGenes(obj) > 3000L] obj <- dropGenesCells(obj, cells = cells_to_rem) genesSizePlot(obj) ## ----------------------------------------------------------------------------- to_rem <- mit[["sizes"]][["mit.percentage"]] > 1.5 cells_to_rem <- rownames(mit[["sizes"]])[to_rem] obj <- dropGenesCells(obj, cells = cells_to_rem) mit <- mitochondrialPercentagePlot(obj, genePrefix = "^Mt") mit[["plot"]] ## ----------------------------------------------------------------------------- genes_to_rem <- getGenes(obj)[grep("^Mt", getGenes(obj))] cells_to_rem <- getCells(obj)[which(getCellsSize(obj) == 0L)] obj <- dropGenesCells(obj, genes_to_rem, cells_to_rem) ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) ## ----------------------------------------------------------------------------- n_it <- 1L obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- genesPlot ## ----eval=TRUE, include=TRUE-------------------------------------------------- cells_to_rem <- rownames(pcaCellsData)[pcaCellsData[["groups"]] == "B"] obj <- dropGenesCells(obj, cells = cells_to_rem) n_it <- 2L obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- UDEPlot ## ----------------------------------------------------------------------------- plot(nuPlot) ## ----------------------------------------------------------------------------- plot(zoomedNuPlot) ## ----------------------------------------------------------------------------- yset <- 0.28 obj <- addElementToMetaDataset(obj, "Threshold low UDE cells:", yset) cells_to_rem <- getCells(obj)[getNu(obj) < yset] obj <- dropGenesCells(obj, cells = cells_to_rem) ## ----------------------------------------------------------------------------- n_it <- 3L obj <- clean(obj) c(pcaCellsPlot, pcaCellsData, genesPlot, UDEPlot, nuPlot, zoomedNuPlot) %<-% cleanPlots(obj) pcaCellsPlot ## ----------------------------------------------------------------------------- logThis(paste("n cells", getNumCells(obj)), logLevel = 1L) ## ----------------------------------------------------------------------------- obj <- proceedToCoex(obj, calcCoex = TRUE, cores = 10L, saveObj = TRUE, outDir = outDir) ## ----eval=TRUE, include=TRUE-------------------------------------------------- # saving the structure saveRDS(obj, file = file.path(outDir, paste0(cond, ".cotan.RDS"))) ## ----eval=FALSE, include=TRUE------------------------------------------------- # obj2 <- automaticCOTANObjectCreation( # raw = sample.dataset, # GEO = GEO, # sequencingMethod = "Drop_seq", # sampleCondition = cond, # calcCoex = TRUE, saveObj = TRUE, outDir = outDir, cores = 10L) ## ----------------------------------------------------------------------------- quantP <- calculateGDI(obj) head(quantP) ## ----------------------------------------------------------------------------- genesList <- list( "NPGs" = c("Nes", "Vim", "Sox2", "Sox1", "Notch1", "Hes1", "Hes5", "Pax6"), "PNGs" = c("Map2", "Tubb3", "Neurod1", "Nefm", "Nefl", "Dcx", "Tbr1"), "hk" = c("Calm1", "Cox6b1", "Ppia", "Rpl18", "Cox7c", "Erh", "H3f3a", "Taf1", "Taf2", "Gapdh", "Actb", "Golph3", "Zfr", "Sub1", "Tars", "Amacr") ) # needs to be recalculated after the changes in nu/dispersion above #obj <- calculateCoex(obj, actOnCells = FALSE) GDIPlot(obj, GDIIn = quantP, cond = cond, genes = genesList) ## ----------------------------------------------------------------------------- print(cond) ## ----------------------------------------------------------------------------- heatmapPlot(genesLists = genesList, sets = (1L:3L), conditions = c(cond), dir = outDir) ## ----------------------------------------------------------------------------- genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Vim", "Hes1"), pValueThreshold = 0.001, symmetric = TRUE) ## ----------------------------------------------------------------------------- genesHeatmapPlot(obj, primaryMarkers = c("Satb2", "Bcl11b", "Fezf2"), secondaryMarkers = c("Gabra3", "Meg3", "Cux1", "Neurod6"), pValueThreshold = 0.001, symmetric = FALSE) ## ----------------------------------------------------------------------------- c(observedCT, expectedCT) %<-% contingencyTables(obj, g1 = "Satb2", g2 = "Bcl11b") print("Observed CT") observedCT print("Expected CT") expectedCT ## ----------------------------------------------------------------------------- # For the whole matrix coex <- getGenesCoex(obj, zeroDiagonal = FALSE) coex[1L:5L, 1L:5L] ## ----------------------------------------------------------------------------- # For a partial matrix coex <- getGenesCoex(obj, genes = c("Satb2", "Bcl11b", "Fezf2")) head(coex) ## ----------------------------------------------------------------------------- layersGenes <- list( "L1" = c("Reln", "Lhx5"), "L2/3" = c("Satb2", "Cux1"), "L4" = c("Rorb", "Sox5"), "L5/6" = c("Bcl11b", "Fezf2"), "Prog" = c("Vim", "Hes1") ) c(gSpace, eigPlot, pcaClustersDF, treePlot) %<-% establishGenesClusters(obj, groupMarkers = layersGenes, numGenesPerMarker = 25L, kCuts = 6L) plot(eigPlot) ## ----------------------------------------------------------------------------- plot(treePlot) ## ----------------------------------------------------------------------------- UMAPPlot(pcaClustersDF[, 1L:10L], clusters = pcaClustersDF[["hclust"]], elements = layersGenes, title = "Genes' clusters UMAP Plot") ## ----eval=FALSE, include=TRUE------------------------------------------------- # fineClusters <- cellsUniformClustering(obj, GDIThreshold = 1.4, # initialResolution = 0.8, # cores = 10L, saveObj = TRUE, # outDir = outDir)[["clusters"]] # obj <- addClusterization(obj, clName = "FineClusters", clusters = fineClusters) ## ----eval=FALSE, include=TRUE------------------------------------------------- # coexDF <- DEAOnClusters(obj, clusters = fineClusters) # obj <- addClusterizationCoex(obj, clName = "FineClusters", # coexDF = coexDF) ## ----eval=FALSE, include=TRUE------------------------------------------------- # c(mergedClusters, coexDF) %<-% # mergeUniformCellsClusters(obj, GDIThreshold = 1.4, cores = 10L, # saveObj = TRUE, outDir = outDir) # obj <- addClusterization(obj, clName = "MergedClusters", # clusters = mergedClusters, coexDF = coexDF) ## ----eval=FALSE, include=TRUE------------------------------------------------- # mergedUMAPPlot <- UMAPPlot(coexDF, elements = layersGenes, # title = "Merged Clusters UMAP Plot") # plot(mergedUMAPPlot) ## ----------------------------------------------------------------------------- if (file.exists(file.path(outDir, paste0(cond, ".cotan.RDS")))) { #Delete file if it exists file.remove(file.path(outDir, paste0(cond, ".cotan.RDS"))) } unlink(file.path(outDir, cond), recursive = TRUE) file.remove(dataSetFile) # stop logging to file setLoggingFile("") file.remove(file.path(outDir, "vignette_v1.log")) options(parallelly.fork.enable = FALSE) sessionInfo()