### R code from vignette source 'CountAndMeasure-lab.Rnw'

###################################################
### code chunk number 1: setup-private
###################################################
options(width=70)


###################################################
### code chunk number 2: setup
###################################################
library(Rsamtools)
library(leeBamViews)
library(org.Sc.sgd.db)
library(TxDb.Scerevisiae.UCSC.sacCer2.ensGene)


###################################################
### code chunk number 3: bam
###################################################
fls <- dir(system.file("bam", package="leeBamViews"), 
           "bam$", full=TRUE)
names(fls) <- sub("_13e.bam$", "", basename(fls))
bam <- BamFileList(fls)

regex <- "([[:alpha:]]+)([[:digit:]])"
values(bam) <- 
    DataFrame(Genotype=factor(sub(regex, "\\1", names(bam))),
              Lane=sub(regex, "\\2", names(bam)))
open(bam)


###################################################
### code chunk number 4: read
###################################################
aln <- readBamGappedAlignments(bam[[1]])
aln <- keepSeqlevels(aln, "Scchr13")

seqlevels(bam[[1]])
start <- org.Sc.sgdCHRLOC[["YMR297W"]]
end <- org.Sc.sgdCHRLOCEND[["YMR297W"]]
which <- GRanges("Scchr13", IRanges(start, end))
aln1 <- readBamGappedAlignments(bam[[1]], which=which)


###################################################
### code chunk number 5: countOverlaps
###################################################
txdb <- Scerevisiae_UCSC_sacCer2_ensGene_TxDb
ex <- exonsBy(txdb, "gene")
ex1 <- ex[["YMR297W"]]

aln2 <- renameSeqlevels(aln, list(Scchr13="chrXIII"))
cnt <- countOverlaps(aln2, ex1, ignore.strand=TRUE)
table(cnt)


###################################################
### code chunk number 6: coverage
###################################################
ex2 <- renameSeqlevels(keepSeqlevels(ex1, "chrXIII"),
                       list(chrXIII="Scchr13"))
aln1 <- keepSeqlevels(aln1, "Scchr13")
coverage(aln1, shift=-start(ex2) + 1L, width(ex2))

cvg <- function(bam, which) {
    aln <- readBamGappedAlignments(bam, which)
    aln <- keepSeqlevels(aln, seqlevels(which))
    cvg <- coverage(aln, shift=-start(which) + 1L, width(which))
    as.integer(cvg)
}

Coverage <- sapply(open(bam), cvg, ex2)

matplot(Coverage, type="l", col=values(bam)$Genotype, lty=1)


###################################################
### code chunk number 7: genes (eval = FALSE)
###################################################
## ## genes on chr 8 or 9
## library(Rsamtools)
## library(TxDb.Hsapiens.UCSC.hg18.knownGene)
## txdb <- Hsapiens_UCSC_hg18_knownGene_TxDb
## ex <- exonsBy(txdb, "gene")
## ex89 <- range(keepSeqlevels(ex, c("chr8", "chr9")))
## gn <- unlist(ex89[elementLengths(ex89) == 1], use.names=FALSE)
## names(gn) <- names(ex89[elementLengths(ex89) == 1])


###################################################
### code chunk number 8: seq (eval = FALSE)
###################################################
## ## seq and GC content
## ## preparation
## indexFa("chr8-9.fa")
## seq <- scanFa("chr8-9.fa", gn)


###################################################
### code chunk number 9: gc (eval = FALSE)
###################################################
## alph <- alphabetFrequency(seq, as.prob=TRUE)
## gc <- rowSums(alph[,c("G", "C")])


###################################################
### code chunk number 10: bam (eval = FALSE)
###################################################
## ## bam files
## dir0 <- "/mnt/cpl/data/Solexa/SOC/101101/Samples/"
## dir <- dir(dir0, "Benign|Normal|SOC", full=TRUE)
## fls <- list.files(dir, "*bam$", recursive=TRUE, full=TRUE)
## names(fls) <- sub(".*tophat_([^/]+).*", "\\1", fls)
## bam <- BamFileList(fls)
## re <- "^([[:alnum:]]+)_+([[:alnum:]]+).*"
## values(bam) <- DataFrame(Trt=sub(re, "\\1", names(bam)),
##                          Id=sub(re, "\\2", names(bam)))


###################################################
### code chunk number 11: cnt (eval = FALSE)
###################################################
## ## reads overlapping genes
## library(ShortRead)
## aln <- readGappedReads(path(bam[[1]]), which=gn)
## hits <- countOverlaps(aln, gn, ignore.strand=TRUE)
## cnt <- countOverlaps(gn, aln[hits==1], ignore.strand=TRUE)
## (bwplot(log(1+cnt)~cut(gc, 30), scales=list(x=list(rot=90))))


###################################################
### code chunk number 12: gcPerRead (eval = FALSE)
###################################################
## ## more detail -- gc content of aligned reads
## olap <- findOverlaps(aln, gn, ignore.strand=TRUE)
## hits <- tabulate(queryHits(olap), length(aln))
## keep <- queryHits(olap) %in% which(hits == 1)
## 
## reads <- qseq(aln)[ queryHits(olap)[keep] ]
## readGc <- rowSums(alphabetFrequency(reads, as.prob=TRUE)[,c("G", "C")])
## readGcPerGene <- tapply(readGc, subjectHits(olap)[keep], mean)
## idx <- as.integer(names(readGcPerGene))
## 
## xyplot(readGcPerGene ~ gc[idx],
##        panel=function(...) {
##            panel.xyplot(...)
##            panel.abline(0, 1, col="red", lwd=2)
##            panel.lmline(..., col="blue", lwd=2)
##        }, xlab="Reference", ylab="Query")


###################################################
### code chunk number 13: sessionInfo
###################################################
sessionInfo()