## ---- include = FALSE----------------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", width = 100 ) options(width=100) # devtools::load_all(".") # delete later ## ------------------------------------------------------------------------------------------------- library(epialleleR) capture.bam <- system.file("extdata", "capture.bam", package="epialleleR") bam.data <- preprocessBam(capture.bam) ## ------------------------------------------------------------------------------------------------- # data.table::data.table object for # CpG VEF report cg.vef.report <- generateCytosineReport(bam.data) head(cg.vef.report[order(meth+unmeth, decreasing=TRUE)]) # CpG cytosine report cg.report <- generateCytosineReport(bam.data, threshold.reads=FALSE) head(cg.report[order(meth+unmeth, decreasing=TRUE)]) # CX cytosine report cx.report <- generateCytosineReport(bam.data, threshold.reads=FALSE, report.context="CX") head(cx.report[order(meth+unmeth, decreasing=TRUE)]) ## ------------------------------------------------------------------------------------------------- # report for amplicon-based data # matching is done by exact start or end positions plus/minus tolerance amplicon.report <- generateAmpliconReport( bam=system.file("extdata", "amplicon010meth.bam", package="epialleleR"), bed=system.file("extdata", "amplicon.bed", package="epialleleR") ) amplicon.report # report for capture-based data # matching is done by overlap capture.report <- generateCaptureReport( bam=system.file("extdata", "capture.bam", package="epialleleR"), bed=system.file("extdata", "capture.bed", package="epialleleR") ) head(capture.report) # generateBedReport is a generic function for BED-guided reports bed.report <- generateBedReport( bam=system.file("extdata", "capture.bam", package="epialleleR"), bed=system.file("extdata", "capture.bed", package="epialleleR"), bed.type="capture" ) identical(capture.report, bed.report) ## ---- fig.width=10, fig.height=6, out.width="100%", out.height="100%"----------------------------- # First, let's visualise eCDFs for within- and out-of-context beta values # for all four amplicons and unmatched reads. Note that within-the-context eCDF # of 0.5 is very close to the expected 1-VEF value (0.1) for all amplicons # produced from this 1:9 mix of methylated and non-methylated control DNA # let's compute eCDF amplicon.ecdfs <- generateBedEcdf( bam=system.file("extdata", "amplicon010meth.bam", package="epialleleR"), bed=system.file("extdata", "amplicon.bed", package="epialleleR"), bed.rows=NULL ) # there are 5 items in amplicon.ecdfs, let's plot all of them par(mfrow=c(1,length(amplicon.ecdfs))) # cycle through items for (x in 1:length(amplicon.ecdfs)) { # four of them have names corresponding to genomic regions of amplicon.bed # fifth - NA for all the reads that don't match to any of those regions main <- if (is.na(names(amplicon.ecdfs[x]))) "unmatched" else names(amplicon.ecdfs[x]) # plotting eCDF for within-the-context per-read beta values (in red) plot(amplicon.ecdfs[[x]]$context, col="red", verticals=TRUE, do.points=FALSE, xlim=c(0,1), xlab="per-read beta value", ylab="cumulative density", main=main) # adding eCDF for out-of-context per-read beta values (in blue) plot(amplicon.ecdfs[[x]]$out.of.context, add=TRUE, col="blue", verticals=TRUE, do.points=FALSE) } # Second, let's compare eCDFs for within-the-context beta values for only one # amplicon but all three sequenced samples: pure non-methylated DNA, 1:9 mix of # methylated and non-methylated DNA, and fully methylated DNA # our files bam.files <- c("amplicon000meth.bam", "amplicon010meth.bam", "amplicon100meth.bam") # let's plot all of them par(mfrow=c(1,length(bam.files))) # cycle through items for (f in bam.files) { # let's compute eCDF amplicon.ecdfs <- generateBedEcdf( bam=system.file("extdata", f, package="epialleleR"), bed=system.file("extdata", "amplicon.bed", package="epialleleR"), # only the second amplicon bed.rows=2, verbose=FALSE ) # plotting eCDF for within-the-context per-read beta values (in red) plot(amplicon.ecdfs[[1]]$context, col="red", verticals=TRUE, do.points=FALSE, xlim=c(0,1), xlab="per-read beta value", ylab="cumulative density", main=f) # adding eCDF for out-of-context per-read beta values (in blue) plot(amplicon.ecdfs[[1]]$out.of.context, add=TRUE, col="blue", verticals=TRUE, do.points=FALSE) } ## ------------------------------------------------------------------------------------------------- # VCF report vcf.report <- generateVcfReport( bam=system.file("extdata", "amplicon010meth.bam", package="epialleleR"), bed=system.file("extdata", "amplicon.bed", package="epialleleR"), vcf=system.file("extdata", "amplicon.vcf.gz", package="epialleleR"), # thresholds on alignment and base quality min.mapq=30, min.baseq=13, # when VCF seqlevels are different from BED and BAM it is possible # to convert them internally vcf.style="NCBI" ) # NA values are shown for the C->T variants on the "+" and G->A on the "-" # strands, because bisulfite conversion makes their counting impossible head(vcf.report) # let's sort the report by increasing Fisher's exact test's p-values. # the p-values are given separately for reads that map to the "+" head(vcf.report[order(`FEp-`, na.last=TRUE)]) # and to the "-" strand head(vcf.report[order(`FEp+`, na.last=TRUE)]) ## ----session-------------------------------------------------------------------------------------- sessionInfo()