## ----knitr, echo=FALSE, results="hide"---------------------------------------- library("knitr") opts_chunk$set(tidy=FALSE,dev="png",fig.show="hide", fig.width=4,fig.height=4.5, message=FALSE) ## ----<style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------- BiocStyle::latex() ## ----options,echo=FALSE------------------------------------------------------- options(digits=3, width=80, prompt=" ", continue=" ") ## ----Import_library_noEcho,echo=TRUE,eval=FALSE------------------------------- # library(RiboProfiling) # listInputBam <- c( # BamFile("http://genomique.info/data/public/RiboProfiling/ctrl.bam"), # BamFile("http://genomique.info/data/public/RiboProfiling/nutlin2h.bam") # ) # covData <- riboSeqFromBAM(listInputBam, genomeName="hg19") ## ----Import_library,echo=FALSE------------------------------------------------ suppressWarnings(suppressMessages(library(RiboProfiling))) ## ----riboSeqfromBam,echo=FALSE,eval=TRUE,dev=c("png"),fig.width=7------------- myFileCtrl <- system.file("extdata", "ctrl_sample.bam", package="RiboProfiling") myFileNutlin2h <- system.file("extdata", "nutlin2h_sample.bam", package="RiboProfiling") listeInputBam <- c(myFileCtrl, myFileNutlin2h) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene covData <- suppressMessages( suppressWarnings( riboSeqFromBAM( listeInputBam, txdb=txdb, listShiftValue=c(-14, -14) ) ) ) ## ----GAlignments_from_BAM,echo=TRUE,eval=FALSE-------------------------------- # aln <- readGAlignments( # BamFile("http://genomique.info/data/public/RiboProfiling/ctrl.bam") # ) ## ----Hist_quick,echo=TRUE,eval=TRUE,dev=c('png')------------------------------ data(ctrlGAlignments) aln <- ctrlGAlignments matchLenDistr <- histMatchLength(aln, 0) matchLenDistr[[2]] ## ----CovAroundTSS_quick,eval=FALSE,echo=TRUE,split=TRUE----------------------- # #transform the GAlignments object into a GRanges object # #(faster processing of the object) # alnGRanges <- readsToStartOrEnd(aln, what="start") # #txdb object with annotations # txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene # oneBinRanges <- aroundPromoter(txdb, alnGRanges, percBestExpressed=0.001) # #the coverage in the TSS flanking region for the reads with match sizes 29:31 # listPromoterCov <- # readStartCov( # alnGRanges, # oneBinRanges, # matchSize=c(29:31), # fixedInterval=c(-20, 20), # renameChr="aroundTSS", # charPerc="perc" # ) # plotSummarizedCov(listPromoterCov) ## ----TSS_Cov,echo=FALSE,eval=TRUE,fig.height=10,dev=c('png')------------------ #transform the GAlignments object into a GRanges object #(faster processing of the object) alnGRanges <- readsToStartOrEnd(aln, what="start") #make a txdb object containing the annotations for the specified species. #In this case hg19. txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene #Please make sure that seqnames of txdb correspond to the seqnames #of the alignment files ("chr" particle) #if not rename the txdb seqlevels #renameSeqlevels(txdb, sub("chr", "", seqlevels(txdb))) #get the flanking region around the promoter of the 0.1% best expressed CDSs oneBinRanges <- aroundPromoter(txdb, alnGRanges, percBestExpressed=0.001) #the coverage in the TSS flanking region for the summarized read match sizes #the coverage in the TSS flanking region for the reads with match sizes 29:31 listPromoterCov <- suppressWarnings( readStartCov( alnGRanges, oneBinRanges, matchSize=c(29:32), fixedInterval=c(-20, 20), renameChr="aroundTSS", charPerc="perc" ) ) suppressMessages(plotSummarizedCov(listPromoterCov)) ## ----CountReads_echo,echo=TRUE,eval=FALSE------------------------------------- # #keep only the match read sizes 30-33 # alnGRanges <- alnGRanges[which(!is.na(match(alnGRanges$score,30:33)))] # #get all CDSs by transcript # cds <- cdsBy(txdb, by="tx", use.names=TRUE) # #get all exons by transcript # exonGRanges <- exonsBy(txdb, by="tx", use.names=TRUE) # #get the per transcript relative position of start and end codons # cdsPosTransc <- orfRelativePos(cds, exonGRanges) # #compute the counts on the different features # #after applying the specified shift value on the read start along the transcript # countsDataCtrl1 <- # countShiftReads( # exonGRanges=exonGRanges[names(cdsPosTransc)], # cdsPosTransc=cdsPosTransc, # alnGRanges=alnGRanges, # shiftValue=-14 # ) # head(countsDataCtrl1[[1]]) # listCountsPlots <- countsPlot( # list(countsDataCtrl1[[1]]), # grep("_counts$", colnames(countsDataCtrl1[[1]])), # 1 # ) # listCountsPlots ## ----CountReads,echo=FALSE,eval=TRUE,fig.width=7,dev=c('png')----------------- #get all CDSs by transcript cds <- GenomicFeatures::cdsBy(txdb, by="tx", use.names=TRUE) #get all exons by transcript exonGRanges <- GenomicFeatures::exonsBy(txdb, by="tx", use.names=TRUE) #get the per transcript relative position of start and end codons #cdsPosTransc <- orfRelativePos(cds, exonGRanges) data("cdsPosTransc") #compute the counts on the different features after applying #the specified shift value on the read start along the transcript countsDataCtrl1 <- countShiftReads( exonGRanges[names(cdsPosTransc)], cdsPosTransc, alnGRanges, -14 ) listCountsPlots <- countsPlot( list(countsDataCtrl1[[1]]), grep("_counts$", colnames(countsDataCtrl1[[1]])), 1 ) invisible(capture.output( suppressWarnings( suppressMessages( print(listCountsPlots)) ) ) ) head(countsDataCtrl1[[1]], n=3) ## ----codon_cov_position,echo=TRUE,eval=TRUE----------------------------------- data(codonIndexCovCtrl) head(codonIndexCovCtrl[[1]], n=3) ## ----CodonAnalysis_echo,echo=TRUE,eval=FALSE---------------------------------- # listReadsCodon <- countsDataCtrl1[[2]] # #get the names of the expressed ORFs grouped by transcript # cds <- cdsBy(txdb, use.names=TRUE) # orfCoord <- cds[names(cds) %in% names(listReadsCodon)] # #chromosome names should correspond between the BAM, # #the annotations, and the genome # genomeSeq <- BSgenome.Hsapiens.UCSC.hg19 # #codon frequency, coverage, and annotation # codonData <- codonInfo(listReadsCodon, genomeSeq, orfCoord) ## ----CodonAnalysis,echo=FALSE,eval=TRUE--------------------------------------- listReadsCodon <- countsDataCtrl1[[2]] #get the names of the ORFs for which we have coverage #grouped by transcript cds <- GenomicFeatures::cdsBy(txdb, use.names=TRUE) orfCoord <- cds[names(cds) %in% names(listReadsCodon)] genomeSeq <- BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19 #codon frequency, coverage, and annotation codonData <- codonInfo(listReadsCodon, genomeSeq, orfCoord) ## ----CountReads_res,results=TRUE,echo=TRUE,split=FALSE------------------------ data("codonDataCtrl") head(codonDataCtrl[[1]], n=3) ## ----CodonPCA,echo=TRUE,eval=TRUE,dev=c("png")-------------------------------- codonUsage <- codonData[[1]] codonCovMatrix <- codonData[[2]] #keep only genes with a minimum number of reads nbrReadsGene <- apply(codonCovMatrix, 1, sum) ixExpGenes <- which(nbrReadsGene >= 50) codonCovMatrix <- codonCovMatrix[ixExpGenes, ] #get the PCA on the codon coverage codonCovMatrixTransp <- t(codonCovMatrix) rownames(codonCovMatrixTransp) <- colnames(codonCovMatrix) colnames(codonCovMatrixTransp) <- rownames(codonCovMatrix) listPCACodonCoverage <- codonPCA(codonCovMatrixTransp, "codonCoverage") listPCACodonCoverage[[2]] ## ----sessInfo,echo=TRUE,eval=TRUE--------------------------------------------- sessionInfo()