## ----setup, echo=FALSE--------------------------------------------------- library(UseBioconductor) stopifnot(BiocInstaller::biocVersion() == "3.1") ## ----style, echo = FALSE, results = 'asis'------------------------------- BiocStyle::markdown() ## ----pkgs, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE----------- options(max.print=1000) suppressPackageStartupMessages({ library(org.Hs.eg.db) library(TxDb.Hsapiens.UCSC.hg19.knownGene) library(BSgenome.Hsapiens.UCSC.hg19) library(GenomicRanges) library(biomaRt) library(rtracklayer) }) ## ----select-setup-------------------------------------------------------- ensids <- c("ENSG00000130720", "ENSG00000103257", "ENSG00000156414", "ENSG00000144644", "ENSG00000159307", "ENSG00000144485") ## ----select-------------------------------------------------------------- library(org.Hs.eg.db) keytypes(org.Hs.eg.db) columns(org.Hs.eg.db) cols <- c("SYMBOL", "GENENAME") select(org.Hs.eg.db, keys=ensids, columns=cols, keytype="ENSEMBL") ## ----biomaRt1, eval=FALSE, results="hide"-------------------------------- # ## NEEDS INTERNET ACCESS !! # library(biomaRt) # head(listMarts(), 3) ## list the marts # head(listDatasets(useMart("ensembl")), 3) ## mart datasets # ensembl <- ## fully specified mart # useMart("ensembl", dataset = "hsapiens_gene_ensembl") # # head(listFilters(ensembl), 3) ## filters # myFilter <- "chromosome_name" # substr(filterOptions(myFilter, ensembl), 1, 50) ## return values # myValues <- c("21", "22") # head(listAttributes(ensembl), 3) ## attributes # myAttributes <- c("ensembl_gene_id","chromosome_name") # # ## assemble and query the mart # res <- getBM(attributes = myAttributes, filters = myFilter, # values = myValues, mart = ensembl) ## ----symbol-to-entrez---------------------------------------------------- library(org.Hs.eg.db) eid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")[["ENTREZID"]] ## ----entrez-to-tx, messages=FALSE---------------------------------------- library(TxDb.Hsapiens.UCSC.hg19.knownGene) txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene txid <- select(txdb, eid, "TXNAME", "GENEID")[["TXNAME"]] ## ----tx-to-cds-coords---------------------------------------------------- cds <- cdsBy(txdb, by="tx", use.names=TRUE) brca1cds <- cds[names(cds) %in% txid] class(brca1cds) length(brca1cds) brca1cds[[1]] # exons in cds cdswidth <- width(brca1cds) # width of each exon all((sum(cdswidth) %% 3) == 0) # sum within cds, modulus 3 ## ----Gviz, message=FALSE------------------------------------------------- require(Gviz) anno <- AnnotationTrack(brca1cds) plotTracks(list(GenomeAxisTrack(), anno)) ## ----cds-to-seq---------------------------------------------------------- library(BSgenome.Hsapiens.UCSC.hg19) genome <- BSgenome.Hsapiens.UCSC.hg19 tx_seq <- extractTranscriptSeqs(genome, brca1cds) tx_seq ## ----introns------------------------------------------------------------- introns <- psetdiff(range(brca1cds), brca1cds) ## ----intron-seqs--------------------------------------------------------- seq <- getSeq(genome, introns) names(seq) seq[["uc010whl.2"]] # 21 introns ## ----rtracklayer-roi----------------------------------------------------- library(GenomicRanges) roi <- GRanges("chr10", IRanges(92106877, 112106876, names="ENSG00000099194")) ## ----rtracklayer-session------------------------------------------------- library(rtracklayer) session <- browserSession() ## ----rtracklayer-marks--------------------------------------------------- trackName <- "wgEncodeRegTfbsClusteredV2" tableName <- "wgEncodeRegTfbsClusteredV2" trFactor <- "ERalpha_a" ucscTable <- getTable(ucscTableQuery(session, track=trackName, range=roi, table=tableName, name=trFactor)) ## ----rtracklayer-plot, fig.height=3-------------------------------------- plot(score ~ chromStart, ucscTable, pch="+") abline(v=start(roi) + (end(roi) - start(roi) + 1) / 2, col="blue") ## ----grasp2db, eval=FALSE------------------------------------------------ # library(grasp2db) # Annotation package, # # 6 Gb AnnotationHub resource # d <- GRASP2() # dplyr instance # hispanic <- tbl(d, "count") %>% # filter(Population=="Hispanic") # semi_join(tbl(d, "variant"), hispanic) ## ----REST, eval=FALSE---------------------------------------------------- # library(httr) # # .SERVER <- "http://rest.ensembl.org" # # .get <- # function(endpoint, parameters=list(), server=.SERVER) # { # parameters <- # paste(names(parameters), unname(parameters), sep="=", collapse="&") # url <- sprintf("%s%s%s%s", server, endpoint, # ifelse(missing(parameters), "", "?"), # parameters) # response <- GET(url) # stop_for_status(response) # content(response) # } # # .jsonlist2data.frame <- # function(lst) # { # as.data.frame(t(simplify2array(lst))) # } ## ----ensembl-ping, eval=FALSE-------------------------------------------- # .get('/info/ping')$ping ## ----ensembl-mapping, eval=FALSE----------------------------------------- # species <- "human"; symbol <- "BRAF" # endpoint <- sprintf("/xrefs/symbol/%s/%s", species, symbol) # parameters <- c(object_type="gene") # .get(endpoint, parameters) ## ----ensembl-ranges, eval=FALSE------------------------------------------ # endpoint <- "/overlap/region/human/7:140424943-140624564" # parameters <- c(feature="cds", logic_name="havana") # olaps <- .get(endpoint, parameters) # .jsonlist2data.frame(olaps) ## ----ensembl-species, eval=FALSE----------------------------------------- # species <- .get("/info/species") # species1 <- lapply(species[[1]], "[", names(species[[1]][[1]])[c(1:7, 10)]) # .jsonlist2data.frame(species1)