## ----style, eval=TRUE, echo=FALSE, results='asis'------------------------ BiocStyle::latex() ## ----include=FALSE------------------------------------------------------- library(knitr) opts_chunk$set(tidy=FALSE) ## ----Homo.sapiens-------------------------------------------------------- library(RSQLite) library(Homo.sapiens) columns(Homo.sapiens) ## ----Homo.sapiens2------------------------------------------------------- keytypes(Homo.sapiens) ## ----Homo.sapiens3------------------------------------------------------- k <- head(keys(Homo.sapiens,keytype="ENTREZID")) k ## ----Homo.sapiens4------------------------------------------------------- result <- select(Homo.sapiens, keys=k, columns=c("TXNAME","TXSTART","TXSTRAND"), keytype="ENTREZID") head(result) ## ----URI Example--------------------------------------------------------- uri <- 'http://www.uniprot.org/uniprot/?query=' ids <- c('P13368', 'Q6GZX4') idStr <- paste(ids, collapse="+or+") format <- '&format=tab' fullUri <- paste0(uri,idStr,format) read.delim(fullUri) ## ----web service code---------------------------------------------------- getUniprotGoodies <- function(query, columns) { ## query and columns start as a character vectors qstring <- paste(query, collapse="+or+") cstring <- paste(columns, collapse=",") uri <- 'http://www.uniprot.org/uniprot/?query=' fullUri <- paste0(uri,qstring,'&format=tab&columns=',cstring) dat <- read.delim(fullUri, stringsAsFactors=FALSE) ## now remove things that were not in the specific original query... dat <- dat[dat[,1] %in% query,] dat } ## ----xml_tree------------------------------------------------------------ library(XML) uri <- "http://www.uniprot.org/uniprot/?query=P13368+or+Q6GZX4&format=xml" xml <- xmlTreeParse(uri, useInternalNodes=TRUE) ## ----xml_namespace------------------------------------------------------- defs <- xmlNamespaceDefinitions(xml, recurisve=TRUE) defs ## ----xml_namespace_struct------------------------------------------------ ns <- structure(sapply(defs, function(x) x$uri), names=names(defs)) ## ----xml_namespace2------------------------------------------------------ entry <- getNodeSet(xml, "//ns:entry", "ns") xmlSize(entry) ## ----xml_xmlAttrs-------------------------------------------------------- nms <- xpathSApply(xml, "//ns:entry/ns:name", xmlValue, namespaces="ns") attrs <- xpathApply(xml, "//ns:entry", xmlAttrs, namespaces="ns") names(attrs) <- nms attrs ## ----xml_xmlChildren----------------------------------------------------- fun1 <- function(elt) unique(names(xmlChildren(elt))) xpathApply(xml, "//ns:entry", fun1, namespaces="ns") ## ----xml_feature_type---------------------------------------------------- Q6GZX4 <- "//ns:entry[ns:accession='Q6GZX4']/ns:feature" xmlSize(getNodeSet(xml, Q6GZX4, namespaces="ns")) P13368 <- "//ns:entry[ns:accession='P13368']/ns:feature" xmlSize(getNodeSet(xml, P13368, namespaces="ns")) ## ----xml_feature_type2--------------------------------------------------- path <- "//ns:feature" unique(xpathSApply(xml, path, xmlGetAttr, "type", namespaces="ns")) ## ----xml_feature_type_P13368--------------------------------------------- path <- "//ns:entry[ns:accession='P13368']/ns:feature[@type='sequence conflict']" data.frame(t(xpathSApply(xml, path, xmlAttrs, namespaces="ns"))) ## ----xml_sequence-------------------------------------------------------- library(Biostrings) path <- "//ns:entry/ns:sequence" seqs <- xpathSApply(xml, path, xmlValue, namespaces="ns") aa <- AAStringSet(unlist(lapply(seqs, function(elt) gsub("\n", "", elt)), use.names=FALSE)) names(aa) <- nms aa ## ----WebServiceObject---------------------------------------------------- setClass("uniprot", representation(name="character"), prototype(name="uniprot")) ## ----makeInstanceWebServiceObj------------------------------------------- uniprot <- new("uniprot") ## ----onLoad2,eval=FALSE-------------------------------------------------- ## .onLoad <- function(libname, pkgname) ## { ## ns <- asNamespace(pkgname) ## uniprot <- new("uniprot") ## assign("uniprot", uniprot, envir=ns) ## namespaceExport(ns, "uniprot") ## } ## ----keytypeUniprot------------------------------------------------------ setMethod("keytypes", "uniprot",function(x){return("UNIPROT")}) uniprot <- new("uniprot") keytypes(uniprot) ## ----keytypeUniprot2----------------------------------------------------- setMethod("columns", "uniprot", function(x){return(c("ID", "SEQUENCE", "ORGANISM"))}) columns(uniprot) ## ----webServiceSelect---------------------------------------------------- .select <- function(x, keys, columns){ colsTranslate <- c(id='ID', sequence='SEQUENCE', organism='ORGANISM') columns <- names(colsTranslate)[colsTranslate %in% columns] getUniprotGoodies(query=keys, columns=columns) } setMethod("select", "uniprot", function(x, keys, columns, keytype) { .select(keys=keys, columns=columns) }) ## ----webServiceSelect2, eval=FALSE--------------------------------------- ## select(uniprot, keys=c("P13368","P20806"), columns=c("ID","ORGANISM")) ## ----classicConn,results='hide'------------------------------------------ drv <- SQLite() library("org.Hs.eg.db") con_hs <- dbConnect(drv, dbname=system.file("extdata", "org.Hs.eg.sqlite", package = "org.Hs.eg.db")) con_hs dbDisconnect(con_hs) ## ----ourConn------------------------------------------------------------- require(hom.Hs.inp.db) str(hom.Hs.inp.db) ## ----ourConn2------------------------------------------------------------ hom.Hs.inp.db$conn ## or better we can use a helper function to wrap this: AnnotationDbi::dbconn(hom.Hs.inp.db) ## or we can just call the provided convenience function ## from when this package loads: hom.Hs.inp_dbconn() ## ----dbListTables-------------------------------------------------------- con <- AnnotationDbi::dbconn(hom.Hs.inp.db) head(dbListTables(con)) dbListFields(con, "Mus_musculus") ## ----dbGetQuery---------------------------------------------------------- dbGetQuery(con, "SELECT * FROM metadata") ## ----dbListTables2------------------------------------------------------- head(dbListTables(con)) ## ----dbListFields2------------------------------------------------------- dbListFields(con, "Apis_mellifera") ## ----dbGetQuery2--------------------------------------------------------- head(dbGetQuery(con, "SELECT * FROM Apis_mellifera")) ## ----Anopheles,eval=FALSE------------------------------------------------ ## head(dbGetQuery(con, "SELECT * FROM Anopheles_gambiae")) ## ## Then only retrieve human records ## ## Query: SELECT * FROM Anopheles_gambiae WHERE species='HOMSA' ## head(dbGetQuery(con, "SELECT * FROM Anopheles_gambiae WHERE species='HOMSA'")) ## dbDisconnect(con) ## ----getMetadata, echo=FALSE--------------------------------------------- library(hom.Hs.inp.db) hom.Hs.inp_dbInfo() ## ----referenceClass,eval=FALSE------------------------------------------- ## .InparanoidDb <- ## setRefClass("InparanoidDb", contains="AnnotationDb") ## ----onLoad,eval=FALSE--------------------------------------------------- ## sPkgname <- sub(".db$","",pkgname) ## db <- loadDb(system.file("extdata", paste(sPkgname, ## ".sqlite",sep=""), package=pkgname, lib.loc=libname), ## packageName=pkgname) ## dbNewname <- AnnotationDbi:::dbObjectName(pkgname,"InparanoidDb") ## ns <- asNamespace(pkgname) ## assign(dbNewname, db, envir=ns) ## namespaceExport(ns, dbNewname) ## ----columns,eval=FALSE-------------------------------------------------- ## .cols <- function(x) ## { ## con <- AnnotationDbi::dbconn(x) ## list <- dbListTables(con) ## ## drop unwanted tables ## unwanted <- c("map_counts","map_metadata","metadata") ## list <- list[!list %in% unwanted] ## ## Then just to format things in the usual way ## list <- toupper(list) ## dbDisconnect(con) ## list ## } ## ## ## Then make this into a method ## setMethod("columns", "InparanoidDb", .cols(x)) ## ## Then we can call it ## columns(hom.Hs.inp.db) ## ----keytypes,eval=FALSE------------------------------------------------- ## setMethod("keytypes", "InparanoidDb", .cols(x)) ## ## Then we can call it ## keytypes(hom.Hs.inp.db) ## ## ## refactor of .cols ## .getLCcolnames <- function(x) ## { ## con <- AnnotationDbi::dbconn(x) ## list <- dbListTables(con) ## ## drop unwanted tables ## unwanted <- c("map_counts","map_metadata","metadata") ## list <- list[!list %in% unwanted] ## dbDisconnect(con) ## list ## } ## .cols <- function(x) ## { ## list <- .getLCcolnames(x) ## ## Then just to format things in the usual way ## toupper(list) ## } ## ## Test: ## columns(hom.Hs.inp.db) ## ## ## new helper function: ## .getTableNames <- function(x) ## { ## LC <- .getLCcolnames(x) ## UC <- .cols(x) ## names(UC) <- LC ## UC ## } ## .getTableNames(hom.Hs.inp.db) ## ----keys,eval=FALSE----------------------------------------------------- ## .keys <- function(x, keytype) ## { ## ## translate keytype back to table name ## tabNames <- .getTableNames(x) ## lckeytype <- names(tabNames[tabNames %in% keytype]) ## ## get a connection ## con <- AnnotationDbi::dbconn(x) ## sql <- paste("SELECT inp_id FROM",lckeytype, "WHERE species!='HOMSA'") ## res <- dbGetQuery(con, sql) ## res <- as.vector(t(res)) ## dbDisconnect(con) ## res ## } ## ## setMethod("keys", "InparanoidDb", .keys(x, keytype)) ## ## Then we can call it ## keys(hom.Hs.inp.db, "TRICHOPLAX_ADHAERENS") ## ----dbDisconnect-------------------------------------------------------- dbDisconnect(con) ## ----makeNewDb----------------------------------------------------------- drv <- dbDriver("SQLite") dbname <- file.path(tempdir(), "myNewDb.sqlite") con <- dbConnect(drv, dbname=dbname) ## ----exampleFrame-------------------------------------------------------- data = data.frame(id=c(1,2,9), string=c("Blue", "Red", "Green"), stringsAsFactors=FALSE) ## ----exercise2----------------------------------------------------------- dbGetQuery(con, "CREATE Table genePheno (id INTEGER, string TEXT)") ## ----LabelledPreparedQueries--------------------------------------------- names(data) <- c("id","string") sql <- "INSERT INTO genePheno VALUES ($id, $string)" dbBegin(con) dbGetPreparedQuery(con, sql, bind.data = data) dbCommit(con) ## ----ATTACH-------------------------------------------------------------- db <- system.file("extdata", "TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite", package="TxDb.Hsapiens.UCSC.hg19.knownGene") dbGetQuery(con, sprintf("ATTACH '%s' AS db",db)) ## ----ATTACHJoin---------------------------------------------------------- sql <- "SELECT * FROM db.gene AS dbg, genePheno AS gp WHERE dbg.gene_id=gp.id" res <- dbGetQuery(con, sql) res ## ----SessionInfo, echo=FALSE--------------------------------------------- sessionInfo()