## ----style, eval=TRUE, echo=FALSE, results='asis'-------------------------- BiocStyle::latex() ## ----include=FALSE--------------------------------------------------------- library(knitr) opts_chunk$set(tidy=FALSE) ## ----<available schemas, results='hide'------------------------------------ library(DBI) library(org.Hs.eg.db) library(AnnotationForge) available.dbschemas() ## ----setup0, results='hide', echo=FALSE------------------------------- options(continue=" ", prompt="R> ", width=72L) ## ----setup, results='hide'-------------------------------------------- library(hgu95av2.db) ## ----objects---------------------------------------------------------- ls("package:hgu95av2.db") ## ----Question #1, echo=FALSE, results='hide'-------------------------- library(hgu95av2.db) search() hgu95av2_dbschema() org.Hs.eg_dbschema() ## ----QAlisting-------------------------------------------------------- qcdata = capture.output(hgu95av2()) head(qcdata, 20) ## ----mapcounts, eval=FALSE-------------------------------------------- # hgu95av2MAPCOUNTS ## ----envApiDemo1------------------------------------------------------ all_probes <- ls(hgu95av2ENTREZID) length(all_probes) set.seed(0xa1beef) probes <- sample(all_probes, 5) probes ## ----envApiDemo2------------------------------------------------------ hgu95av2ENTREZID[[probes[1]]] hgu95av2ENTREZID$"31882_at" syms <- unlist(mget(probes, hgu95av2SYMBOL)) syms ## ----helpDemo, eval= FALSE, results='hide'---------------------------- # ?hgu95av2CHRLOC ## ----Question #2, echo=FALSE, results='hide'-------------------------- mget(probes, hgu95av2CHRLOC, ifnotfound=NA)[1:2] ## ----as.list, eval=FALSE---------------------------------------------- # system.time(as.list(hgu95av2SYMBOL)[1:10]) # # ## vs: # # system.time(as.list(hgu95av2SYMBOL[1:10])) ## ----show.revmap------------------------------------------------------ unlist(mget(syms, revmap(hgu95av2SYMBOL))) ## ----thisworks-------------------------------------------------------- as.list(revmap(hgu95av2PATH)["00300"]) ## ----revmap2---------------------------------------------------------- x <- hgu95av2PATH ## except for the name, this is exactly revmap(x) revx <- hgu95av2PATH2PROBE revx2 <- revmap(x, objName="PATH2PROBE") revx2 identical(revx, revx2) as.list(revx["00300"]) ## ----revmap2b--------------------------------------------------------- Term("GO:0000018") Definition("GO:0000018") ## ----Question #3, echo=FALSE, results='hide'-------------------------- rs = ls(revmap(org.Hs.egREFSEQ))[4:6] EGs = mget(rs, revmap(org.Hs.egREFSEQ), ifnotfound=NA) ##Then get the GO terms. GOs = mget(unlist(EGs), org.Hs.egGO, ifnotfound=NA) GOs ##Extract the GOIDs from this list: GOIDs = as.character(unique(sapply(GOs, names))) ##Then look up what these terms are: Term(GOIDs) ## ----toTable---------------------------------------------------------- head(toTable(hgu95av2GO[probes])) ## ----undirectedMethod------------------------------------------------- toTable(x)[1:6, ] toTable(revx)[1:6, ] ## ----directedMethods-------------------------------------------------- length(x) length(revx) allProbeSetIds <- keys(x) allKEGGIds <- keys(revx) ## ----moreUndirectedMethods-------------------------------------------- junk <- Lkeys(x) # same for all maps in hgu95av2.db (except pseudo-map # MAPCOUNTS) Llength(x) # nb of Lkeys junk <- Rkeys(x) # KEGG ids for PATH/PATH2PROBE maps, GO ids for # GO/GO2PROBE/GO2ALLPROBES maps, etc... Rlength(x) # nb of Rkeys ## ----moreKeysMethods-------------------------------------------------- x = hgu95av2ENTREZID[1:10] ## Directed methods mappedkeys(x) # mapped keys count.mappedkeys(x) # nb of mapped keys ## Undirected methods mappedLkeys(x) # mapped left keys count.mappedLkeys(x) # nb of mapped Lkeys ## ----isNA------------------------------------------------------------- y = hgu95av2ENTREZID[isNA(hgu95av2ENTREZID)] # usage like is.na() Lkeys(y)[1:4] ## ----Question #4, echo=FALSE, results='hide'-------------------------- count.mappedLkeys(hgu95av2GO) Llength(hgu95av2GO) - count.mappedLkeys(hgu95av2GO) mappedLkeys(hgu95av2GO)[1] toTable(hgu95av2GO["1000_at"]) ## ----revmapUseCases--------------------------------------------------- x <- hgu95av2CHR Rkeys(x) chroms <- Rkeys(x)[23:24] chroms Rkeys(x) <- chroms toTable(x) ## ----easy------------------------------------------------------------- z <- as.list(revmap(x)[chroms]) names(z) z[["Y"]] ## ----evilUnlist------------------------------------------------------- chrs = c("12","6") mget(chrs, revmap(hgu95av2CHR[1:30]), ifnotfound=NA) ## ----evilUnlist2------------------------------------------------------ unlist(mget(chrs, revmap(hgu95av2CHR[1:30]), ifnotfound=NA)) ## ----evilUnlist3------------------------------------------------------ unlist2(mget(chrs, revmap(hgu95av2CHR[1:30]), ifnotfound=NA)) ## ----cytogenetic2----------------------------------------------------- x <- hgu95av2MAP pbids <- c("38912_at", "41654_at", "907_at", "2053_at", "2054_g_at", "40781_at") x <- subset(x, Lkeys=pbids, Rkeys="18q11.2") toTable(x) ## ----coerce----------------------------------------------------------- pb2cyto <- as.character(x) pb2cyto[pbids] ## ----coercWarnings---------------------------------------------------- cyto2pb <- as.character(revmap(x)) ## ----multiProbes------------------------------------------------------ ## How many probes? dim(hgu95av2ENTREZID) ## Make a mapping with multiple probes exposed multi <- toggleProbes(hgu95av2ENTREZID, "all") ## How many probes? dim(multi) ## ----multiProbes2----------------------------------------------------- ## Make a mapping with ONLY multiple probes exposed multiOnly <- toggleProbes(multi, "multiple") ## How many probes? dim(multiOnly) ## Then make a mapping with ONLY single mapping probes singleOnly <- toggleProbes(multiOnly, "single") ## How many probes? dim(singleOnly) ## ----multiProbes3----------------------------------------------------- ## Test the multiOnly mapping hasMultiProbes(multiOnly) hasSingleProbes(multiOnly) ## Test the singleOnly mapping hasMultiProbes(singleOnly) hasSingleProbes(singleOnly) ## ----orgSchema, results='hide'---------------------------------------- org.Hs.eg_dbschema() ## ----connObj, results='hide'------------------------------------------ org.Hs.eg_dbconn() ## ----connObj2, results='hide'----------------------------------------- query <- "SELECT gene_id FROM genes LIMIT 10;" result = dbGetQuery(org.Hs.eg_dbconn(), query) result ## ----Question #5, echo=FALSE, results='hide'-------------------------- sql <- "SELECT gene_id, chromosome FROM genes AS g, chromosomes AS c WHERE g._id=c._id;" dbGetQuery(org.Hs.eg_dbconn(),sql)[1:10,] ##OR toTable(org.Hs.egCHR)[1:10,] ## ----complexEnv, eval=FALSE------------------------------------------- # ## Obtain SYMBOLS with at least one GO BP # ## annotation with evidence IMP, IGI, IPI, or IDA. # system.time({ # bpids <- eapply(hgu95av2GO, function(x) { # if (length(x) == 1 && is.na(x)) # NA # else { # sapply(x, function(z) { # if (z$Ontology == "BP") # z$GOID # else # NA # }) # } # }) # bpids <- unlist(bpids) # bpids <- unique(bpids[!is.na(bpids)]) # g2p <- mget(bpids, hgu95av2GO2PROBE) # wantedp <- lapply(g2p, function(x) { # x[names(x) %in% c("IMP", "IGI", "IPI", "IDA")] # }) # wantedp <- wantedp[sapply(wantedp, length) > 0] # wantedp <- unique(unlist(wantedp)) # ans <- unlist(mget(wantedp, hgu95av2SYMBOL)) # }) # length(ans) # ans[1:10] ## ----schema, results='hide'------------------------------------------- hgu95av2_dbschema() ## ----schema2, results='hide'------------------------------------------ hgu95av2ORGPKG ## ----schema3, results='hide'------------------------------------------ org.Hs.eg_dbschema() ## ----hgu95av2_org_join, tidy=FALSE------------------------------------ orgDBLoc = system.file("extdata", "org.Hs.eg.sqlite", package="org.Hs.eg.db") attachSQL = paste("ATTACH '", orgDBLoc, "' AS orgDB;", sep = "") dbGetQuery(hgu95av2_dbconn(), attachSQL) ## ----complexDb-------------------------------------------------------- system.time({ SQL <- "SELECT DISTINCT probe_id,symbol FROM probes, orgDB.gene_info AS gi, orgDB.genes AS g, orgDB.go_bp AS bp WHERE bp._id=g._id AND gi._id=g._id AND probes.gene_id=g.gene_id AND bp.evidence IN ('IPI', 'IDA', 'IMP', 'IGI')" zz <- dbGetQuery(hgu95av2_dbconn(), SQL) }) #its a good idea to always DETACH your database when you are finished... dbGetQuery(hgu95av2_dbconn(), "DETACH orgDB" ) ## ----Question #6, echo=FALSE, results='hide'-------------------------- sql <- "SELECT gene_id, start_location, end_location, cytogenetic_location FROM genes AS g, chromosome_locations AS c, cytogenetic_locations AS cy WHERE g._id=c._id AND g._id=cy._id" dbGetQuery(org.Hs.eg_dbconn(),sql)[1:10,] ## ----Question #7, echo=FALSE, results='hide'-------------------------- orgDBLoc = system.file("extdata", "org.Hs.eg.sqlite", package="org.Hs.eg.db") attachSQL = paste("ATTACH '", orgDBLoc, "' AS orgDB;", sep = "") dbGetQuery(hgu95av2_dbconn(), attachSQL) goDBLoc = system.file("extdata", "GO.sqlite", package="GO.db") attachSQL = paste("ATTACH '", goDBLoc, "' AS goDB;", sep = "") dbGetQuery(hgu95av2_dbconn(), attachSQL) SQL <- "SELECT DISTINCT p.probe_id, gi.symbol, gt.go_id, gt.definition FROM probes AS p, orgDB.gene_info AS gi, orgDB.genes AS g, orgDB.go_bp AS bp, goDB.go_term AS gt WHERE bp._id=g._id AND gi._id=g._id AND p.gene_id=g.gene_id AND bp.evidence IN ('IPI', 'IDA', 'IMP', 'IGI') AND gt.go_id=bp.go_id" zz <- dbGetQuery(hgu95av2_dbconn(), SQL) dbGetQuery(hgu95av2_dbconn(), "DETACH orgDB") dbGetQuery(hgu95av2_dbconn(), "DETACH goDB") ## ----SessionInfo, echo=FALSE------------------------------------------ sessionInfo()