## ---- echo = FALSE, results = 'hide', warning = FALSE, message = FALSE----- suppressPackageStartupMessages(library(BiocStyle)) ## ----load_files1----------------------------------------------------------- # Run scruff on example dataset # NOTE: Requires Rsubread index and TxDb objects for the reference genome. # For generation of these files, please refer to the Stepwise Tutorial. library(scruff) # Get the paths to example FASTQ, FASTA, and GTF files. # Please note that because the following files are included in # scruff R package, we use system.file() function to extract the paths # to these files. If the user is running scruff on real data, the # paths to the input FASTQ, FASTA, and GTF files should be provided, # and there is no need to call system.file() function. For example, # v1h1R1 <- "/PATH/TO/vandenBrink_1h1_L001_R1_001.fastq.gz" # fasta <- "/Path/TO/GRCm38_MT.fa" v1h1R1 <- system.file("extdata", "vandenBrink_1h1_L001_R1_001.fastq.gz", package = "scruff") v1h1R2 <- system.file("extdata", "vandenBrink_1h1_L001_R2_001.fastq.gz", package = "scruff") vb1R1 <- system.file("extdata", "vandenBrink_b1_L001_R1_001.fastq.gz", package = "scruff") vb1R2 <- system.file("extdata", "vandenBrink_b1_L001_R2_001.fastq.gz", package = "scruff") fasta <- system.file("extdata", "GRCm38_MT.fa", package = "scruff") gtf <- system.file("extdata", "GRCm38_MT.gtf", package = "scruff") ## ----buildindex1, results = 'hide'----------------------------------------- # NOTE: Rsubread package does not support Windows environment. if (!requireNamespace("Rsubread", quietly = TRUE)) { message("Package \"Rsubread\" needed for \"alignRsubread\"", " function to work. ", "Please install it if you are using Linux or macOS systems. ", "The function is not available in Windows environment.") } else { # Create index files for GRCm38_MT. # For details, please refer to Rsubread user manual. # Specify the basename for Rsubread index indexBase <- "GRCm38_MT" Rsubread::buildindex(basename = indexBase, reference = fasta, indexSplit = FALSE) } ## ----scruff, results = 'hide', warning = FALSE, message = FALSE------------ data(barcodeExample, package = "scruff") if (!requireNamespace("Rsubread", quietly = TRUE)) { message("Package \"Rsubread\" needed. ", "Please install it if you are using Linux or macOS systems. ", "The function is not available in Windows environment.") } else { sce <- scruff(project = "example", experiment = c("1h1", "b1"), lane = c("L001", "L001"), read1Path = c(v1h1R1, vb1R1), read2Path = c(v1h1R2, vb1R2), bc = barcodeExample, index = indexBase, unique = FALSE, nBestLocations = 1, reference = gtf, bcStart = 1, bcStop = 8, bcEdit = 1, umiStart = 9, umiStop = 12, umiEdit = 1, keep = 75, cellPerWell = c(rep(1, 94), 0, 0, rep(1, 94), 300, 1), cores = 2, verbose = TRUE) } ## ----qc1, warning = FALSE, message = FALSE, results = 'hide'--------------- data(sceExample, package = "scruff") qc <- qcplots(sceExample) ## ----loadfiles2------------------------------------------------------------ library(scruff) # Get the paths to example FASTQ files. # Please note that because the following files are included in # scruff R package, we use system.file() function to extract the paths # to these files. If the user is running scruff on real data, the # paths to the input FASTQ, FASTA, and GTF files should be provided, # and there is no need to call system.file() function. For example, # v1h1R1 <- "/PATH/TO/vandenBrink_1h1_L001_R1_001.fastq.gz" # fasta <- "/Path/TO/GRCm38_MT.fa" v1h1R1 <- system.file("extdata", "vandenBrink_1h1_L001_R1_001.fastq.gz", package = "scruff") v1h1R2 <- system.file("extdata", "vandenBrink_1h1_L001_R2_001.fastq.gz", package = "scruff") vb1R1 <- system.file("extdata", "vandenBrink_b1_L001_R1_001.fastq.gz", package = "scruff") vb1R2 <- system.file("extdata", "vandenBrink_b1_L001_R2_001.fastq.gz", package = "scruff") ## ----de, results = 'hide', warning = FALSE, message = FALSE---------------- data(barcodeExample, package = "scruff") de <- demultiplex(project = "example", experiment = c("1h1", "b1"), lane = c("L001", "L001"), read1Path = c(v1h1R1, vb1R1), read2Path = c(v1h1R2, vb1R2), barcodeExample, bcStart = 1, bcStop = 8, bcEdit = 1, umiStart = 9, umiStop = 12, keep = 75, minQual = 10, yieldReads = 1e+06, verbose = TRUE, overwrite = TRUE, cores = 2) ## ----al_files, results = 'hide'-------------------------------------------- # Create index files for GRCm38_MT. For details, please refer to Rsubread # user manual. fasta <- system.file("extdata", "GRCm38_MT.fa", package = "scruff") # NOTE: Rsubread package does not support Windows environment. if (!requireNamespace("Rsubread", quietly = TRUE)) { message("Package \"Rsubread\" needed.", " Please install it if you are using Linux or macOS systems.", " The function is not available in Windows environment.\n") } else { # Create index files for GRCm38_MT. # For details, please refer to Rsubread user manual. # Specify the basename for Rsubread index indexBase <- "GRCm38_MT" Rsubread::buildindex(basename = indexBase, reference = fasta, indexSplit = FALSE) } ## ----al, results = 'hide', warning = FALSE, message = FALSE---------------- # Align the reads using Rsubread if (requireNamespace("Rsubread", quietly = TRUE)) { al <- alignRsubread(de, indexBase, unique = FALSE, nBestLocations = 1, format = "BAM", overwrite = TRUE, verbose = TRUE, cores = 2) } ## ----co, results = 'hide', warning = FALSE, message = FALSE---------------- gtf <- system.file("extdata", "GRCm38_MT.gtf", package = "scruff") # get the molecular counts of trancsripts for each cell # In experiment 1h1, cell barcodes 95 and 96 are empty well controls. # In experiment b1, cell barcode 95 is bulk sample containing 300 cells. if (requireNamespace("Rsubread", quietly = TRUE)) { sce = countUMI(al, gtf, umiEdit = 1, format = "BAM", cellPerWell = c(rep(1, 94), 0, 0, rep(1, 94), 300, 1), verbose = TRUE, cores = 2) } ## ----qc2, warning = FALSE, message = FALSE, echo = TRUE, results = "hide"---- data(sceExample, package = "scruff") qc <- qcplots(sceExample) qc ## ----gtfEG, results = 'hide', warning = FALSE, message = FALSE------------- # Visualize the reads mapped to gene "mt-Rnr2" in # cell "vandenBrink_b1_cell_0095". # bamExample is generated by GenomicAlignments::GAlignments function data(bamExample, package = "scruff") ## ----visualize_reads------------------------------------------------------- # gene mt-Rnr2 starts at 1094 and ends at 2675 start <- 1094 end <- 2675 g1 <- rview(bamExample, chr = "MT", start = start, end = end) g2 <- gview(gtf, chr = "MT", start = start, end = end) g <- ggbio::tracks(g1, g2, heights = c(4,1), xlab = "chr MT") g ## ----10x_bam_qc, warning = FALSE, message = FALSE-------------------------- # The following example BAM file is the first 5000 BAM file records extracted # from sample 01 of the 1.3 Million Brain Cells dataset from E18 Mice. # (https://support.10xgenomics.com/single-cell-gene-expression/datasets/1.3.0/ # 1M_neurons) # The BAM file for sample 01 is downloaded from here: # http://sra-download.ncbi.nlm.nih.gov/srapub_files/ # SRR5167880_E18_20160930_Neurons_Sample_01.bam # see details here: # https://trace.ncbi.nlm.nih.gov/Traces/sra/?study=SRP096558 # and here: # https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE93421 bamfile10x <- system.file("extdata", "SRR5167880_E18_20160930_Neurons_Sample_01_5000.bam", package = "scruff") # The filtered cell barcodes are generated using the following script: # library(TENxBrainData) # library(data.table) # tenx <- TENxBrainData() # # get filtered barcodes for sample 01 # filteredBcIndex <- tstrsplit(colData(tenx)[, "Barcode"], "-")[[2]] == 1 # filteredBc <- colData(tenx)[filteredBcIndex, ][["Barcode"]] filteredBc <- system.file("extdata", "SRR5167880_E18_20160930_Neurons_Sample_01_filtered_barcode.tsv", package = "scruff") # QC results are saved to current working directory qcDt <- tenxBamqc(bam = bamfile10x, experiment = "Neurons_Sample_01", filter = filteredBc) qcDt ## ----bamqc1, warning = FALSE, message = FALSE, echo = TRUE, results = "hide"---- g <- qcplots(qcDt) g ## ----session, echo = FALSE------------------------------------------------- sessionInfo()