## ----style, echo = FALSE, results = 'asis'--------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))) ## ----setup, echo=FALSE----------------------------------------------------- options(digits=3) suppressPackageStartupMessages({ library(csaw) library(edgeR) library(GenomicRanges) library(ChIPseeker) library(genefilter) library(org.Mm.eg.db) library(TxDb.Mmusculus.UCSC.mm10.knownGene) }) ## ----null-p, cache=TRUE---------------------------------------------------- ## 100,000 t-tests under the null, n = 6 n <- 6; m <- matrix(rnorm(n * 100000), ncol=n) P <- genefilter::rowttests(m, factor(rep(1:2, each=3)))$p.value quantile(P, c(.001, .01, .05)) hist(P, breaks=20) ## ----csaw-preprocess, eval=FALSE------------------------------------------- # file.path("ChIPSeq", "NFYA", "scripts", "preprocess.R") ## ----csaw-setup------------------------------------------------------------ acc <- c(es_1="SRR074398", es_2="SRR074399", tn_1="SRR074417", tn_2="SRR074418") files <- data.frame(Treatment=sub("_.*", "", names(acc)), Replicate=sub(".*_", "", names(acc)), sra=sprintf("%s.sra", acc), fastq=sprintf("%s.fastq.gz", acc), bam=sprintf("%s.fastq.gz.subread.BAM", acc), row.names=acc, stringsAsFactors=FALSE) ## ----csaw-setwd, eval=FALSE------------------------------------------------ # setwd("ChIPSeq/NFYA/bam") ## ----csaw-reduction, eval=FALSE-------------------------------------------- # library(csaw) # library(GenomicRanges) # frag.len <- 110 # system.time({ # data <- windowCounts(files$bam, width=10, ext=frag.len) # }) # 156 seconds # acc <- sub(".fastq.*", "", data$bam.files) # colData(data) <- cbind(files[acc,], colData(data)) ## ----csaw-data-load, echo=FALSE-------------------------------------------- frag.len <- 110 fl <- file.path("ChIPSeq", "NFYA", "extdata", "csaw-data.Rds") data <- readRDS(fl) ## ----csaw-filter, eval=FALSE----------------------------------------------- # library(edgeR) # for aveLogCPM() # keep <- aveLogCPM(assay(data)) >= -1 # data <- data[keep,] ## ----csaw-normalize, eval=FALSE-------------------------------------------- # system.time({ # binned <- windowCounts(files$bam, bin=TRUE, width=10000) # }) #139 second # normfacs <- normalize(binned) ## ----csaw-normacs-load, echo=FALSE----------------------------------------- fl <- file.path("ChIPSeq", "NFYA", "extdata", "csaw-normfacs.Rds") normfacs <- readRDS(fl) ## ----csaw-experimental-design---------------------------------------------- design <- model.matrix(~Treatment, colData(data)) ## ----csaw-de--------------------------------------------------------------- y <- asDGEList(data, norm.factors=normfacs) y <- estimateDisp(y, design) fit <- glmQLFit(y, design, robust=TRUE) results <- glmQLFTest(fit, contrast=c(0, 1)) head(results$table) ## ----csaw-merge-windows---------------------------------------------------- merged <- mergeWindows(rowRanges(data), tol=1000L) ## ----csaw-combine-merged-tests--------------------------------------------- tabcom <- combineTests(merged$id, results$table) head(tabcom) ## ----csaw-combined-merged-------------------------------------------------- final <- merged$region mcols(final) <- as(tabcom, "DataFrame") ## -------------------------------------------------------------------------- library(org.Mm.eg.db) library(TxDb.Mmusculus.UCSC.mm10.knownGene) anno <- detailRanges( final, txdb=TxDb.Mmusculus.UCSC.mm10.knownGene, orgdb=org.Mm.eg.db, promoter=c(3000, 1000), dist=5000 ) mcols(final) <- cbind(mcols(final), DataFrame(anno)) ## -------------------------------------------------------------------------- annotated <- final[nzchar(final$overlap)] annotated[order(annotated$PValue)]