###################################################
### chunk number 1: setup
###################################################
#line 18 "Integration-lab.Rnw"
options(width = 40)


###################################################
### chunk number 2: LDsetup
###################################################
#line 52 "Integration-lab.Rnw"
library(AdvancedR2011)
library(StudentGWAS)
dataPath <- system.file("extdata", "snpData.nc",
                        package="AdvancedR2011Data")
metadataPath <- system.file("extdata", "metadata.sqlite",
                            package="AdvancedR2011Data")

## The GWASdata class
data <- GWASdata(dataPath, metadataPath)

## The cld method for GWASdata
res <- cld(data, 1, 20) 


###################################################
### chunk number 3: LDallsnps
###################################################
#line 77 "Integration-lab.Rnw"

## Helper function to create index
createidx <- function(first, last, chunk, width)
{
    part <- ceiling((last-first)/(chunk-width))
    first <- c(first, (chunk-width+1)*(seq_len(part-1)))
    last <- c(chunk, chunk + first[-c(1,part)], last)

    if (any(abs(first - last) < width))
        stop("Adjust the chunk size or width to ensure the
        number of snps are greater than the width in all sections.")
    list(first = first, last = last)
}

## Function to compute LD and filter snps
filterLD <- function(first, last, data, width)
{
    res <- cld(data, first, last, width) 
    idx <- which(abs(res) > 0.1, arr.ind=TRUE)
    rownames(res)[-unique(idx[,1])]
}

## mapply to process all data 
width <- 5
idx <- createidx(1, 100000, 10000, width)
lst <- mapply(filterLD, first=idx$first, last=idx$last, 
              MoreArgs=c(data, width), 
              USE.NAMES=FALSE)

snps <- unlist(lst)


###################################################
### chunk number 4: orgExample
###################################################
#line 127 "Integration-lab.Rnw"
## load a lib
library(org.Hs.eg.db)
## list the mappings:
head(ls("package:org.Hs.eg.db")) ## etc.
## The mappings act like subsettable R objects so you can subset:
org.Hs.egPATH[c("10","100")]
## or you can convert that subsetted mapping to a data.frame:
toTable(org.Hs.egPATH[c("10","100")])
## and you can merge that with other data using merge() etc.
merge(toTable(org.Hs.egPATH[c("10","100")]), 
      toTable(org.Hs.egSYMBOL[c("10","100")]),
      by.x ="gene_id", by.y="gene_id",
      all.x=TRUE, all.y=TRUE)


###################################################
### chunk number 5: annotSolutions
###################################################
#line 172 "Integration-lab.Rnw"
library(org.Hs.eg.db)
library(StudentGWAS)
## both approaches require that we recover the entrez gene IDs
dbPath <- system.file("extdata","metadata.sqlite",package="AdvancedR2011Data")
snpAnn <- getSnps(dbPath)


## TEMP work with just the 1st 10 snps
snps <- snpAnn[["snp_id"]][1:10]
## end TEMP


## subset the data
snpAnn <- snpAnn[snpAnn[["snp_id"]] %in% snps,]

## 1st lets do the easy way:
## Before we start,
snpGeneIds <- snpAnn[["gene_id"]] ## get the IDs
snpGeneIds <- snpGeneIds[!is.na(snpGeneIds)] ## drop NAs
snpGeneIds <- unique(snpGeneIds) ## make them unique

symbAnn <- merge(snpAnn, toTable(org.Hs.egSYMBOL[snpGeneIds]), 
                 by.x ="gene_id", by.y="gene_id", all.x=TRUE)


## then lets do it the SQL way:

drv <- SQLite()
con <- dbConnect(drv, dbname=dbPath)
orgPath <- system.file("extdata", "org.Hs.eg.sqlite",
                       package="org.Hs.eg.db")
## Attach the foreign db from the dbPath
dbGetQuery(con, sprintf("ATTACH '%s' AS db", orgPath))

## now we might want a bit more information about that 2nd DB:
con2 <- dbConnect(drv, dbname=orgPath)
dbListTables(con2)
dbListFields(con2, "gene_info") 

## format the snpIDs
snpIDs <- paste(snps, collapse = "','")

## a simple join will work here (restricted by just the IDs
sql <- paste("SELECT * FROM db.gene_info AS gi, db.genes as g, snps as s
                   WHERE gi._id=g._id AND g.gene_id=s.gene_id AND s.snp_id 
              IN ('", snpIDs,"')",sep="")
allAnn2 <- dbGetQuery(con, sql)


## And just to be tidy lets put away our DB connections.
dbDisconnect(con)
dbDisconnect(con2)