### R code from vignette source 'PADOG.Rnw' ################################################### ### code chunk number 1: fig1 ################################################### library(PADOG) set = "GSE9348" data(list = set, package = "KEGGdzPathwaysGEO") #write a function to extract required info into a list getdataaslist = function(x) { x = get(x) exp = experimentData(x) dataset = exp@name disease = notes(exp)$disease dat.m = exprs(x) ano = pData(x) design = notes(exp)$design annotation = paste(x@annotation, ".db", sep = "") targetGeneSets = notes(exp)$targetGeneSets list = list(dataset, disease, dat.m, ano, design, annotation, targetGeneSets) names(list) = c("dataset", "disease", "dat.m", "ano", "design", "annotation", "targetGeneSets") return(list) } dlist = getdataaslist(set) #run padog function on KEGG pathways #use NI=1000 for accurate results and run in parallel to speed up (see below) myr = padog( esetm = dlist$dat.m, group = dlist$ano$Group, paired = dlist$design == "Paired", block = dlist$ano$Block, targetgs = dlist$targetGeneSets, annotation = dlist$annotation, gslist = "KEGG.db", organism = "hsa", verbose = FALSE, Nmin = 3, NI = 50, plots = TRUE, dseed = 1 ) myr[1:15,-c(4,5)] ################################################### ### code chunk number 2: PADOG.Rnw:124-143 ################################################### #you can control the number of cores to use via argument 'ncr' myr2 = padog( esetm = dlist$dat.m, group = dlist$ano$Group, paired = dlist$design == "Paired", block = dlist$ano$Block, targetgs = dlist$targetGeneSets, annotation = dlist$annotation, gslist = "KEGG.db", organism = "hsa", verbose = FALSE, Nmin = 3, NI = 50, plots = TRUE, dseed = 1, parallel = TRUE ) # verify that the result is the same which is a built-in feature all.equal(myr, myr2) ################################################### ### code chunk number 3: PADOG.Rnw:203-261 ################################################### randFun = function(dseed, mname = "myRand") { #a helper function to pass additional variables to your method getdataaslist = getdataaslist return(function(set, mygslist, minsize) {#your method function set.seed(dseed) #this loads the dataset data(list = set, package = "KEGGdzPathwaysGEO") #extract the required info using the function defined earlier dlist = getdataaslist(set) #get rid of duplicates probesets per ENTREZ ID by keeping the probeset #with smallest p-value (computed using limma) aT1 = filteranot(esetm = dlist$dat.m, group = dlist$ano$Group, paired = dlist$design == "Paired", block = dlist$ano$Block, annotation = dlist$annotation) #create an output dataframe for this toy method with random gene set p-values mygslistSize = unlist(lapply(mygslist, function(x) { length(intersect(aT1$ENTREZID, x)) })) res = data.frame(ID = names(mygslist), P = runif(length(mygslist)), Size = mygslistSize, stringsAsFactors = FALSE) res$FDR = p.adjust(res$P,"fdr") #drop genesets with less than minsize genes in the current dataset res = res[res$Size >= minsize,] #compute ranks res$Rank = rank(res$P) / dim(res)[1]*100 #needed to compare ranks between methods; must be the same as given #in mymethods argument "list(myRand=" res$Method = mname #needed because comparisons of ranks between methods is paired at dataset level res$Dataset = dlist$dataset #output only result for the targetGeneSets #which are gene sets expected to be relevant in this dataset return(res[res$ID %in% dlist$targetGeneSets,]) } ) } randomF = randFun(1) #run the analysis on all 24 datasets and compare the new method "myRand" with #PADOG and GSA (if installed) (chosen as reference since is listed first in the #existingMethods) #if the package doParallel is installed datasets are analyzed in parallel. #out = compPADOG(datasets = NULL, existingMethods = c("GSA","PADOG"), # mymethods = list(myRand = randomF), gslist = "KEGG.db", # Nmin = 3, NI = 1000, plots = TRUE, verbose=FALSE, # parallel = TRUE, dseed = 1, pkgs = NULL) #compare myRand against PADOG on 3 datasets only #mysets = data(package = "KEGGdzPathwaysGEO")$results[,"Item"] mysets = c("GSE9348","GSE8671","GSE1297") out = compPADOG(datasets = mysets, existingMethods = c("PADOG"), mymethods = list(myRand = randomF), gslist = "KEGG.db", Nmin = 3, NI = 40, plots = TRUE, verbose=FALSE, parallel = TRUE, dseed = 1, pkgs = NULL) print(out)