## ----style, eval=TRUE, echo=FALSE, results='asis'------------------------ options(max.print=1000) stopifnot(BiocInstaller::biocVersion() == "2.13") BiocStyle::latex() library(knitr) opts_chunk$set(cache=TRUE, tidy=FALSE) ## ----packages, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE------- suppressPackageStartupMessages({ library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(BSgenome.Hsapiens.UCSC.hg19) library(rtracklayer) library(biomaRt) }) ## ----select-setup-------------------------------------------------------- egids <- c("3183", "91828", "81537", "4776", "283624", "4053", "85446", "10484", "55701", "1112") ## ----load-org------------------------------------------------------------ library(org.Hs.eg.db) ## ----select-------------------------------------------------------------- keytypes(org.Hs.eg.db) columns(org.Hs.eg.db) cols <- c("SYMBOL", "GENENAME") select(org.Hs.eg.db, keys=egids, columns=cols, keytype="ENTREZID") ## ----merge-setup, echo=FALSE--------------------------------------------- fl <- system.file("extdata", "E-MTAB-1147-toptable.csv", package="SummerX") ## ----merge-user-setup, eval=FALSE---------------------------------------- ## fl <- file.choose() ## ----file-exists--------------------------------------------------------- file.exists(fl) ## ----merge--------------------------------------------------------------- csv <- read.csv(fl, row.names=1) class(csv) # data.frame dim(csv) # 528 genes x 6 columns head(csv) # log (base 2) fold change, # adjusted P-values ## ----anno---------------------------------------------------------------- cols <- c("CHR", "SYMBOL", "GENENAME") anno <- select(org.Hs.eg.db, rownames(csv), cols) class(anno) dim(anno) head(anno) all(anno$CHR %in% "14") # all on CHR 14? ## ----merge-annotated----------------------------------------------------- annotated <- merge(csv, anno, by.x=0, by.y="ENTREZID") class(annotated) dim(annotated) head(annotated) ## ----order-annotated----------------------------------------------------- o <- order(abs(annotated$log2FoldChange), decreasing=TRUE) ## ----head-annotated------------------------------------------------------ annotated[head(o),] ## ----biomaRt1, eval=FALSE, results="hide"-------------------------------- ## ## NEEDS INTERNET ACCESS !! ## library(biomaRt) ## head(listMarts(), 3) ## list the marts ## head(listDatasets(useMart("ensembl")), 3) ## mart datasets ## ensembl <- ## fully specified mart ## useMart("ensembl", dataset = "hsapiens_gene_ensembl") ## ## head(listFilters(ensembl), 3) ## filters ## myFilter <- "chromosome_name" ## head(filterOptions(myFilter, ensembl), 3) ## return values ## myValues <- c("21", "22") ## head(listAttributes(ensembl), 3) ## attributes ## myAttributes <- c("ensembl_gene_id","chromosome_name") ## ## ## assemble and query the mart ## res <- getBM(attributes = myAttributes, filters = myFilter, ## values = myValues, mart = ensembl) ## ----SYMBOL-to-ENTREZID-------------------------------------------------- library(org.Hs.eg.db) egid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")$ENTREZID library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene egToTx <- select(txdb, egid, "TXNAME", "GENEID") ## ----cdsBy--------------------------------------------------------------- cdsByTx <- cdsBy(txdb, "tx", use.names=TRUE)[egToTx$TXNAME] ## ----getsequence--------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) txx <- extractTranscriptsFromGenome(Hsapiens, cdsByTx) ## ----translate----------------------------------------------------------- all(width(txx) %% 3 == 0) # sanity check translate(txx) # amino acid sequence ## ----rtracklayer-roi----------------------------------------------------- roi <- GRanges("chr10", IRanges(92106877, 112106876, names="ENSG00000099194")) ## ----rtracklayer-session------------------------------------------------- library(rtracklayer) session <- browserSession() ## ----rtracklayer-marks--------------------------------------------------- trackName <- "wgEncodeRegTfbsClusteredV2" tableName <- "wgEncodeRegTfbsClusteredV2" trFactor <- "ERalpha_a" ucscTable <- getTable(ucscTableQuery(session, track=trackName, range=roi, table=tableName, name=trFactor)) ## ----rtracklayer-plot, fig.height=3-------------------------------------- plot(score ~ chromStart, ucscTable, pch="+") abline(v=start(roi) + (end(roi) - start(roi) + 1) / 2, col="blue")