## ----style, echo = FALSE, results = 'asis'------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")) ) options(width = 75) ## ---- message = FALSE---------------------------------------------------- library(Biostrings) ## ------------------------------------------------------------------------ head( methods(class = "DNAStringSet") ) methods(generic = "reverseComplement") ## ---- message=FALSE------------------------------------------------------ library(GenomicRanges) ## ------------------------------------------------------------------------ gr <- GRanges("A", IRanges(c(10, 20, 22), width=5), "+") shift(gr, 1) # intra-range range(gr) # inter-range reduce(gr) # inter-range snps <- GRanges("A", IRanges(c(11, 17, 24), width=1)) findOverlaps(snps, gr) # between-range setdiff(range(gr), gr) # 'introns' ## ----SummarizedExperiment, message = FALSE------------------------------- library(SummarizedExperiment) library(airway) ## ------------------------------------------------------------------------ data(airway) airway colData(airway) head(assay(airway)) airway[, airway$dex %in% "trt"] chr14 <- as(seqinfo(rowRanges(airway)), "GRanges")["14"] airway[airway %over% chr14,] ## ------------------------------------------------------------------------ colSums(assay(airway)) ## ---- message=FALSE------------------------------------------------------ library(ggplot2) ## ------------------------------------------------------------------------ ridx <- rowSums(assay(airway)) > 0 se <- airway[ridx,] ave_log_expr <- rowMeans(log(1 + assay(se))) tbl <- tibble::enframe(ave_log_expr, "gene", "ave_log_expr") ggplot(tbl, aes(ave_log_expr)) + geom_density() ## ------------------------------------------------------------------------ il = IntegerList(sample(26, 5), sample(26, 7)) il > 7 any(il > 7) il[il > 7] relist(letters[unlist(il)], il) ## ------------------------------------------------------------------------ DataFrame( i = 1:2, il = IntegerList(sample(5), sample(7)), dna = DNAStringSet(c("ATCG", "AAACGGG")), gr = GRanges(c("chr1:1-20", "chr20:10-30")) ) ## ---- message = FALSE---------------------------------------------------- library(rtracklayer) ## ------------------------------------------------------------------------ data(cpgIslands, package="Gviz") cpgIslands$name <- letters[1:10] cpgIslands$score <- sample(10) cpgIslands bed_file <- tempfile(fileext = ".bed") basename(bed_file) export(cpgIslands, bed_file) cat(readLines(bed_file), sep = "\n") import(bed_file) ## ------------------------------------------------------------------------ gtf <- system.file( package = "airway", "extdata", "Homo_sapiens.GRCh37.75_subset.gtf" ) import(gtf) ## see also `GenonmicFeatures::makeTxDbFromGFF()` ## ------------------------------------------------------------------------ library(GenomicAlignments) ## ------------------------------------------------------------------------ fls <- dir( system.file(package = "airway", "extdata"), pattern = "bam", full.names = TRUE ) readGAlignments(fls[1]) ## ---- message = FALSE---------------------------------------------------- library(GenomicAlignments) ## ------------------------------------------------------------------------ fl <- dir( system.file(package = "airway", "extdata"), pattern = "bam", full.name = TRUE ) fl[1] readGAlignments(fl[1]) ## hack: index the bam file in-place invisible(indexBam(fl[1])) which <- GRanges(c("1:11053773-11072770", "1:11362290-11386194")) param <- ScanBamParam(which = which, what = "seq") readGAlignments(fl[1], param = param) ## ------------------------------------------------------------------------ bf <- BamFile(fl[1], yieldSize = 5000) open(bf) repeat { galn <- readGAlignments(bf) if (length(galn) == 0) break ## do work message(length(galn)) } close(bf) ## ------------------------------------------------------------------------ sessionInfo()