################################################### ### chunk number 1: setup ################################################### #line 18 "Integration-lab.Rnw" options(width = 40) ################################################### ### chunk number 2: LDsetup ################################################### #line 52 "Integration-lab.Rnw" library(AdvancedR2011) library(StudentGWAS) dataPath <- system.file("extdata", "snpData.nc", package="AdvancedR2011Data") metadataPath <- system.file("extdata", "metadata.sqlite", package="AdvancedR2011Data") ## The GWASdata class data <- GWASdata(dataPath, metadataPath) ## The cld method for GWASdata res <- cld(data, 1, 20) ################################################### ### chunk number 3: LDallsnps ################################################### #line 77 "Integration-lab.Rnw" ## Helper function to create index createidx <- function(first, last, chunk, width) { part <- ceiling((last-first)/(chunk-width)) first <- c(first, (chunk-width+1)*(seq_len(part-1))) last <- c(chunk, chunk + first[-c(1,part)], last) if (any(abs(first - last) < width)) stop("Adjust the chunk size or width to ensure the number of snps are greater than the width in all sections.") list(first = first, last = last) } ## Function to compute LD and filter snps filterLD <- function(first, last, data, width) { res <- cld(data, first, last, width) idx <- which(abs(res) > 0.1, arr.ind=TRUE) rownames(res)[-unique(idx[,1])] } ## mapply to process all data width <- 5 idx <- createidx(1, 100000, 10000, width) lst <- mapply(filterLD, first=idx$first, last=idx$last, MoreArgs=c(data, width), USE.NAMES=FALSE) snps <- unlist(lst) ################################################### ### chunk number 4: orgExample ################################################### #line 127 "Integration-lab.Rnw" ## load a lib library(org.Hs.eg.db) ## list the mappings: head(ls("package:org.Hs.eg.db")) ## etc. ## The mappings act like subsettable R objects so you can subset: org.Hs.egPATH[c("10","100")] ## or you can convert that subsetted mapping to a data.frame: toTable(org.Hs.egPATH[c("10","100")]) ## and you can merge that with other data using merge() etc. merge(toTable(org.Hs.egPATH[c("10","100")]), toTable(org.Hs.egSYMBOL[c("10","100")]), by.x ="gene_id", by.y="gene_id", all.x=TRUE, all.y=TRUE) ################################################### ### chunk number 5: annotSolutions ################################################### #line 172 "Integration-lab.Rnw" library(org.Hs.eg.db) library(StudentGWAS) ## both approaches require that we recover the entrez gene IDs dbPath <- system.file("extdata","metadata.sqlite",package="AdvancedR2011Data") snpAnn <- getSnps(dbPath) ## TEMP work with just the 1st 10 snps snps <- snpAnn[["snp_id"]][1:10] ## end TEMP ## subset the data snpAnn <- snpAnn[snpAnn[["snp_id"]] %in% snps,] ## 1st lets do the easy way: ## Before we start, snpGeneIds <- snpAnn[["gene_id"]] ## get the IDs snpGeneIds <- snpGeneIds[!is.na(snpGeneIds)] ## drop NAs snpGeneIds <- unique(snpGeneIds) ## make them unique symbAnn <- merge(snpAnn, toTable(org.Hs.egSYMBOL[snpGeneIds]), by.x ="gene_id", by.y="gene_id", all.x=TRUE) ## then lets do it the SQL way: drv <- SQLite() con <- dbConnect(drv, dbname=dbPath) orgPath <- system.file("extdata", "org.Hs.eg.sqlite", package="org.Hs.eg.db") ## Attach the foreign db from the dbPath dbGetQuery(con, sprintf("ATTACH '%s' AS db", orgPath)) ## now we might want a bit more information about that 2nd DB: con2 <- dbConnect(drv, dbname=orgPath) dbListTables(con2) dbListFields(con2, "gene_info") ## format the snpIDs snpIDs <- paste(snps, collapse = "','") ## a simple join will work here (restricted by just the IDs sql <- paste("SELECT * FROM db.gene_info AS gi, db.genes as g, snps as s WHERE gi._id=g._id AND g.gene_id=s.gene_id AND s.snp_id IN ('", snpIDs,"')",sep="") allAnn2 <- dbGetQuery(con, sql) ## And just to be tidy lets put away our DB connections. dbDisconnect(con) dbDisconnect(con2)