Skip to content.

bioconductor.org

Bioconductor is an open source and open development software project
for the analysis and comprehension of genomic data.

Sections

LabAnnotate.R

################################################### ### chunk number 1: loadlibs ################################################### library(Biobase) library(ALL) library(hgu95av2) library(annotate) data(ALL)

################################################### ### chunk number 2: subset ###################################################

ALLs1 = ALL[, ALL$mol.biol =="ALL1/AF4" | ALL$mol.biol == "BCR/ABL"]

################################################### ### chunk number 3: filter ################################################### library(genefilter) f1 <- kOverA(8, 7) f2 <- function(x)(IQR(x)>0.8) ff <- filterfun(f1, f2) selected <- genefilter(ALLs1, ff) sum(selected) ALLs2 <- ALLs1[selected,]

################################################### ### chunk number 4: mt.maxT ################################################### library(multtest) cl <- as.numeric(ALLs2$mol== "BCR/ABL") ## permutation test, using the Welch statistic resT <- mt.maxT(exprs(ALLs2), classlabel=cl, B=1000) ord <- order(resT$index) #the original gene order rawp <- resT$rawp[ord] # raw permutation $p$-values #names(resT) names(rawp) <- geneNames(ALLs2) sum(resT$adjp < 0.05)

################################################### ### chunk number 5: ALLs3 ###################################################

ALLs3 = ALLs2[resT$index[resT$adjp < 0.05],]

if(interactive()) { heatmap(exprs(ALLs3), ColSide= ifelse(ALLs3$mol=="ALL1/AF4", "red", "blue"), col=topo.colors(15)) } myLLs = unlist(mget(geneNames(ALLs3), hgu95av2LOCUSID)) sum(duplicated(myLLs))

################################################### ### chunk number 6: duplicates ################################################### lls <- unlist(as.list(hgu95av2LOCUSID)) tab <- table(table(lls))

cat("Multiplicity ", sapply(names(tab), function(x) sprintf("%4s", x)), "\nNo. LocusLink IDs ", sapply(tab, function(x) sprintf("%4d", x)),"\n")

##we precomputed the mapping from LL to Affy if( require("Brixen") ) { data("hgu95av2LOCUSID2PROBE") spLL = as.list(hgu95av2LOCUSID2PROBE) } else { lls <- lls[!is.na(lls)] ## LocuslinkIDs spLL <- split(lls, lls) ## Unique Locuslink IDs } ## names(lls) are Affymetrix probe set names ## whSel are those probe sets that were selected whSel <- match(geneNames(ALLs2)[selected], names(lls))

## 24 were NA (among them 10 controls) whSel <- whSel[!is.na(whSel)]

llSel <- lls[whSel] ## selected Locuslink IDs spSel <- split(llSel, llSel)

slen1 <- sapply(spLL, length) ## multiplicities of all LLids slen2 <- sapply(spSel, length) ## multiplicities of selected LLids

################################################### ### chunk number 7: hyperGChr1 ###################################################

chrs = as.list(hgu95av2CHR) table(sapply(chrs, length)) ##what happened there... chr1 = sapply(chrs, function(x) x[1]) table(chr1)

onC1 = (chr1 =="1") onC1[is.na(onC1)] = FALSE sum(onC1) lls = unlist(as.list(hgu95av2LOCUSID)) badll = duplicated(lls) badllnames = names(badll)

onC1unique = onC1 & !badll

##this is not quite right myLLunique = !duplicated(unlist(mget(geneNames(ALLs3), hgu95av2LOCUSID)))

myCr = unlist(mget(geneNames(ALLs3), hgu95av2CHR))

myC1 = (myCr == "1") myC1[is.na(myC1)] = FALSE

myC1unique = myC1 & myLLunique

################################################### ### chunk number 8: loadGOstats ################################################### library(GOstats)

mfhyper = GOHyperG(myLLs[myLLunique])

################################################### ### chunk number 9: setuplotting ################################################### ##chose the nodes with small p-values whGO = resT$index[resT$adjp<0.05] gNsel <- geneNames(ALLs2)[whGO] gGO <- makeGOGraph(gNsel, "MF", "hgu95av2")

nL <- rep("", length(nodes(gGO))) names(nL) <- nodes(gGO) nA <- list()

gGhyp.pv <- mfhyper$pv[nodes(gGO)]

gCols <- ifelse(gGhyp.pv < 0.1, "tomato", "lightblue") names(gCols) = names(gGhyp.pv) #no names lbs = rep("", length(nodes(gGO))) names(lbs) = nodes(gGO) nA$label = lbs

nA$fillcolor = gCols

################################################### ### chunk number 10: graphplot ###################################################

if(require("Rgraphviz") ) plot(gGO, nodeAttrs=nA)

################################################### ### chunk number 11: getMF ################################################### affyGO = as.list(hgu95av2GO) #find the MF terms affyMF = lapply(affyGO, function(x) { onts = sapply(x, function(z) z$Ontology) if( is.null(onts) || is.na(onts) ) NA else unique(names(onts)[onts=="MF"]) })

################################################### ### chunk number 12: goGraph ###################################################

affyMF[5] ggs = lapply(affyMF[5], function(x) { if(is.null(x) ) return(NULL)

ans=NULL for(i in 1:length(x)) ans[[i]] = oneGOGraph(x[i], GOMFPARENTS) a1 = ans[[1]] if( length(x) == 1 ) return(a1) for(j in 2:length(x)) a1 = combGOGraph(a1, ans[[j]]) return(a1) }) ggs

################################################### ### chunk number 13: shortestPath ################################################### library(RBGL)

dd1 = dijkstra.sp(ugraph(ggs[[1]]), "GO:0003674") max(dd1$distance) if( require(Rgraphviz) && interactive() ) plot(ggs[[1]])

News
2009-10-26

BioC 2.5, consisting of 352 packages and designed to work with R 2.10.z, was released today.

2009-01-07

R, the open source platform used by Bioconductor, featured in a series of articles in the New York Times.