### R code from vignette source 'vignettes/SANTA/inst/doc/SANTA.Rnw'

###################################################
### code chunk number 1: Ropts
###################################################
options(width=70)


###################################################
### code chunk number 2: SANTA.Rnw:153-157
###################################################
# generate the simulated network
require(SANTA)
require(igraph)
g <- barabasi.game(n=500, power=1, m=1, directed=F) 


###################################################
### code chunk number 3: SANTA.Rnw:162-165
###################################################
# measure the distance between pairs of vertices in g
dist.method <- "shortest.paths"
D <- DistGraph(g, dist.method=dist.method, verbose=F) 


###################################################
### code chunk number 4: SANTA.Rnw:170-172
###################################################
# place the distances into discreet bins
B <- BinGraph(D, verbose=F)


###################################################
### code chunk number 5: SANTA.Rnw:177-199
###################################################
cluster.size <- 5
s.to.use <- c(10, 20, 50, 100, 500)
n.trials <- 10
pvalues <- array(0, dim=c(n.trials, length(s.to.use)), 
                 dimnames=list(NULL, as.character(s.to.use)))

# run the trials for each value of s
for (s in s.to.use) {        
    for (i in 1:n.trials) {
        # generate the hit set
        seed.vertex <- sample(vcount(g), 1) # identify a seed node
        sample.set <- order(D[seed.vertex, ])[1:s]
        hit.set <- sample(sample.set, cluster.size)
        
        # measure the stength of association
        g <- set.vertex.attribute(g, name="hits", 
                                  value=as.numeric(1:vcount(g) %in% hit.set))
        pvalues[i, as.character(s)] <- Knet(g, nperm=100, 
                                            dist.method=dist.method, vertex.attr="hits", 
                                            B=B, verbose=F)$pval
    }
}


###################################################
### code chunk number 6: simulation_plot
###################################################
boxplot(-log10(pvalues), xlab="cutoff", ylab="-log10(p-value)")


###################################################
### code chunk number 7: fig_simulation_plot
###################################################
boxplot(-log10(pvalues), xlab="cutoff", ylab="-log10(p-value)")


###################################################
### code chunk number 8: SANTA.Rnw:218-220
###################################################
# cleanup
rm(B, D, g)


###################################################
### code chunk number 9: SANTA.Rnw:235-247
###################################################
# create the network
n.nodes <- 12
edges <- c(1,2, 1,3, 1,4, 2,3, 2,4, 3,4, 1,5, 5,6, 
           2,7, 7,8, 4,9, 9,10, 3,11, 11,12)
weights1 <- weights2 <- rep(0, n.nodes)
weights1[c(1,2)] <- 1
weights2[c(5,6)] <- 1 

g <- graph.empty(n.nodes, directed=F)
g <- add.edges(g, edges)
g <- set.vertex.attribute(g, "weights1", value=weights1)
g <- set.vertex.attribute(g, "weights2", value=weights2)


###################################################
### code chunk number 10: case_2_weights
###################################################
par(mfrow=c(1,2))

colors <- rep("grey", n.nodes)
colors[which(weights1 == 1)] <- "red"
g <- set.vertex.attribute(g, "color", value=colors)
plot(g)

colors <- rep("grey", n.nodes)
colors[which(weights2 == 1)] <- "red"
g <- set.vertex.attribute(g, "color", value=colors)
plot(g)

par(mfrow=c(1,1))
g <- remove.vertex.attribute(g, "color")


###################################################
### code chunk number 11: fig_case_2_weights
###################################################
par(mfrow=c(1,2))

colors <- rep("grey", n.nodes)
colors[which(weights1 == 1)] <- "red"
g <- set.vertex.attribute(g, "color", value=colors)
plot(g)

colors <- rep("grey", n.nodes)
colors[which(weights2 == 1)] <- "red"
g <- set.vertex.attribute(g, "color", value=colors)
plot(g)

par(mfrow=c(1,1))
g <- remove.vertex.attribute(g, "color")


###################################################
### code chunk number 12: SANTA.Rnw:281-288
###################################################
# set 1
Knet(g, nperm=100, vertex.attr="weights1", verbose=F)$pval
Compactness(g, nperm=100, vertex.attr="weights1", verbose=F)$pval

# set 2
Knet(g, nperm=100, vertex.attr="weights2", verbose=F)$pval
Compactness(g, nperm=100, vertex.attr="weights2", verbose=F)$pval


###################################################
### code chunk number 13: SANTA.Rnw:308-314
###################################################
# load igraph objects
data(g.costanzo.raw)
data(g.costanzo.cor)
networks <- list(raw=g.costanzo.raw, cor=g.costanzo.cor)
network.names <- names(networks)
network.genes <- V(networks$raw)$name # genes are identical across networks


###################################################
### code chunk number 14: SANTA.Rnw:319-332
###################################################
# obtain the GO term associations from the org.Sc.sgd.db package
library(org.Sc.sgd.db)
xx <- as.list(org.Sc.sgdGO2ALLORFS)
go.terms <- c("GO:0000082", "GO:0003682", "GO:0007265", 
              "GO:0040008", "GO:0090329")
# apply the GO terms to the networks
for (name in network.names) {
    for (go.term in go.terms) {
        networks[[name]] <- set.vertex.attribute(
            networks[[name]], name=go.term, 
            value=as.numeric(network.genes %in% xx[[go.term]]))
    }
} 


###################################################
### code chunk number 15: SANTA.Rnw:337-343
###################################################
# results <- list()
# for (name in network.names) {
#     results[[name]] <- Knet(networks[[name]], nperm=1000, 
#         vertex.attr=go.terms, edge.attr="distance")
#     results[[name]] <- sapply(results[[name]], function(res) res$pval)
# }


###################################################
### code chunk number 16: SANTA.Rnw:348-353
###################################################
# p.values <- array(unlist(results), dim=c(length(go.terms), length(network.names)), dimnames=list(go.terms, network.names))
# p.values.ml10 <- -log10(p.values)
# axis.range <- c(0, max(p.values.ml10)) 
# plot(p.values.ml10[, "raw"], p.values.ml10[, "cor"], asp=1, xlim=axis.range, ylim=axis.range, bty="l", xlab="-log10 of the p-value in the raw GI network", ylab="-log10 of the p-value in the GI-correlation network", main="")
# abline(0, 1, col="red")


###################################################
### code chunk number 17: SANTA.Rnw:366-368
###################################################
# cleanup
rm(g.costanzo.cor, g.costanzo.raw, network.genes, networks, xx)


###################################################
### code chunk number 18: SANTA.Rnw:384-390
###################################################
# load igraph objects
data(g.bandyopadhyay.treated)
data(g.bandyopadhyay.untreated)
networks <- list(treated=g.bandyopadhyay.treated, 
                 untreated=g.bandyopadhyay.untreated)
network.names <- names(networks)


###################################################
### code chunk number 19: SANTA.Rnw:395-403
###################################################
# obtain GO term associations
library(org.Sc.sgd.db)
xx <- as.list(org.Sc.sgdGO2ALLORFS)
associated.genes <- xx[["GO:0006974"]] # change to use alternative GO terms
associations <- sapply(networks, function(g) as.numeric(V(g)$name %in%
    associated.genes), simplify=F)
networks <- sapply(network.names, function(name) set.vertex.attribute(
    networks[[name]], "rdds", value=associations[[name]]), simplify=F)


###################################################
### code chunk number 20: SANTA.Rnw:410-415
###################################################
# results <- sapply(networks, function(g) Knet(g, nperm=1000, 
#     dist.method="shortest.paths", vertex.attr="rdds", 
#     edge.attr="distance"), simplify=F)
# p.values <- sapply(results, function(res) res$pval)
# p.values


###################################################
### code chunk number 21: SANTA.Rnw:422-424
###################################################
# plot(results$treated)
# plot(results$untreated)


###################################################
### code chunk number 22: SANTA.Rnw:437-439
###################################################
# cleanup
rm(xx, networks, g.bandyopadhyay.treated, g.bandyopadhyay.untreated, associated.genes)


###################################################
### code chunk number 23: SANTA.Rnw:450-455
###################################################
# laod igraph object
data(g.srivas.high)
data(g.srivas.untreated)
networks <- list(high=g.srivas.high, untreated=g.srivas.untreated)
network.names <- names(networks)


###################################################
### code chunk number 24: SANTA.Rnw:460-468
###################################################
# obtain GO term associations
library(org.Sc.sgd.db)
xx <- as.list(org.Sc.sgdGO2ALLORFS)
associated.genes <- xx[["GO:0000725"]] 
associations <- sapply(networks, function(g) as.numeric(V(g)$name %in% 
    associated.genes), simplify=F)
networks <- sapply(network.names, function(name) set.vertex.attribute(
    networks[[name]], "dsbr", value=associations[[name]]), simplify=F)


###################################################
### code chunk number 25: SANTA.Rnw:473-477
###################################################
# p.values <- sapply(networks, function(g) Knet(g, nperm=1000, 
#     dist.method="shortest.paths", vertex.attr="dsbr", 
#     edge.attr="distance")$pval)
# p.values


###################################################
### code chunk number 26: SANTA.Rnw:482-484
###################################################
# cleanup
rm(xx, networks, g.srivas.high, g.srivas.untreated)


###################################################
### code chunk number 27: SANTA.Rnw:499-504
###################################################
# import and convert RNAi data
data(rnai.cheung)
rnai.cheung <- -log10(rnai.cheung)
rnai.cheung[!is.finite(rnai.cheung)] <- max(rnai.cheung[is.finite(rnai.cheung)])
rnai.cheung[rnai.cheung < 0] <- 0


###################################################
### code chunk number 28: SANTA.Rnw:509-520
###################################################
# import and create IntAct network
data(edgelist.intact)
g.intact <- graph.edgelist(as.matrix(edgelist.intact), directed=FALSE)

# import data and create HumanNet network
data(edgelist.humannet)
g.humannet <- graph.edgelist(as.matrix(edgelist.humannet)[, 1:2], 
    directed=FALSE)
g.humannet <- set.edge.attribute(g.humannet, "distance", 
    value=edgelist.humannet$distance)
networks <- list(intact=g.intact, humannet=g.humannet)


###################################################
### code chunk number 29: SANTA.Rnw:525-541
###################################################
network.names <- names(networks)
network.genes <- sapply(networks, get.vertex.attribute, name="name", simplify=F)
rnai.cheung.genes <- rownames(rnai.cheung)
cancers <- colnames(rnai.cheung)

for (cancer in cancers) {
    for (name in network.names) {
        vertex.weights <-rep(NA, vcount(networks[[name]]))
        names(vertex.weights) <- network.genes[[name]]
        common.genes <- rnai.cheung.genes[rnai.cheung.genes %in% 
            network.genes[[name]]] 
        vertex.weights[common.genes] <- rnai.cheung[common.genes, cancer]
        networks[[name]] <- set.vertex.attribute(networks[[name]], 
            cancer, value=vertex.weights)
    }
}


###################################################
### code chunk number 30: SANTA.Rnw:546-549
###################################################
#knet.res <- sapply(networks, Knet, nperm=100, dist.method="shortest.paths", 
#   vertex.attr=cancers, edge.attr="distance", simplify=F)
#p.values <- sapply(knet.res, function(i) sapply(i, function(j) j$pval))


###################################################
### code chunk number 31: SANTA.Rnw:554-556
###################################################
# cleanup
rm(rnai.cheung, rnai.cheung.genes, networks, network.genes, edgelist.humannet, edgelist.intact, common.genes, g.humannet, g.intact)


###################################################
### code chunk number 32: SANTA.Rnw:570-571
###################################################
#library(BioNet)


###################################################
### code chunk number 33: SANTA.Rnw:576-585
###################################################
# # required parameters
# n.nodes <- 1000
# n.hits <- 10
# clusters <- 3
# 
# # create network and spread hits across 3 clusters
# g <- barabasi.game(n=n.nodes, power=1, m=1, directed=FALSE) 
# g <- SpreadHits(g, h=n.hits, clusters=clusters, distance.cutoff=12, 
#     lambda=10, dist.method="shortest.paths", verbose=FALSE)


###################################################
### code chunk number 34: SANTA.Rnw:590-597
###################################################
# # simulate p-values
# library(msm)
# hits <- which(get.vertex.attribute(g, "hits") == 1)
# p.values <- runif(vcount(g))
# names(p.values) <- as.character(1:vcount(g))
# p.values[as.character(hits)] <- rtnorm(n.hits * clusters, mean=0, 
#     sd=10e-6, lower=0, upper=1)


###################################################
### code chunk number 35: SANTA.Rnw:602-606
###################################################
# # apply BioNet to the network and p-values
# bum <- fitBumModel(p.values, plot=F)
# scores <- scoreNodes(network=g, fb=bum, fdr=0.1)
# module <- runFastHeinz(g, scores)


###################################################
### code chunk number 36: SANTA.Rnw:611-615
###################################################
# # apply Knode to the network
# g <- set.vertex.attribute(g, name="pheno", value=-log10(p.values))  
# knode.results <- Knode(g, dist.method="diffusion", 
#     vertex.attr="pheno", verbose=FALSE)


###################################################
### code chunk number 37: SANTA.Rnw:620-625
###################################################
# # number of hits identified by BioNet 
# sum(hits %in% as.numeric(V(module)$name))
# 
# # number of hits ranked within the top 30 by Knode
# sum(hits %in% as.numeric(names(knode.results)[1:(n.hits * clusters)]))


###################################################
### code chunk number 38: bionet_simulation
###################################################
# # create subnetworks
# g.bionet <- g.knode <- induced.subgraph(g, hits)
# color.bionet <- color.knode <- rep("grey", vcount(g.bionet))
# color.bionet[hits %in% as.numeric(V(module)$name)] <- "blue"
# color.knode[hits %in% as.numeric(names(
#   knode.results)[1:(n.hits * clusters)])] <- "green"
# g.bionet <- set.vertex.attribute(g.bionet, "color", value=color.bionet)
# g.knode <- set.vertex.attribute(g.knode, "color", value=color.knode)
# 
# # plot subnetworks
# par(mfrow=c(1,2))
# plot(g.bionet)
# plot(g.knode)


###################################################
### code chunk number 39: SANTA.Rnw:654-656
###################################################
# # cleanup
# rm(vertex.weights, p.values, g)


###################################################
### code chunk number 40: SANTA.Rnw:673-680
###################################################
# # load required package
# library(SANTA)
# library(BioNet)
# library(DLBCL)
# data(exprLym) 
# data(dataLym) 
# data(interactome) 


###################################################
### code chunk number 41: SANTA.Rnw:685-692
###################################################
# # extract entrez IDs
# library(stringr)
# 
# # aggregate p-values
# pvals <- cbind(dataLym$t.pval, dataLym$s.pval)
# pval <- aggrPvals(pvals, order=2, plot=F)
# names(pval) <- dataLym$label


###################################################
### code chunk number 42: SANTA.Rnw:699-704
###################################################
# # derive Lymphochip-specific network
# network <- subNetwork(featureNames(exprLym), interactome)
# network <- largestComp(network) # use only the largest component
# network <- igraph.from.graphNEL(network, name=T, weight=T) 
# network <- simplify(network)


###################################################
### code chunk number 43: SANTA.Rnw:709-715
###################################################
# # run BioNet on the Lymphochip-specific network and aggregate p-values
# fb <- fitBumModel(pval, plot=F)
# scores <- scoreNodes(network, fb, fdr=0.001)
# module <- runFastHeinz(network, scores)
# extract.entrez <- function(x) str_extract(str_extract(x, "[(][0-9]+"), "[0-9]+")
# bionet.genes <- extract.entrez(V(module)$name)


###################################################
### code chunk number 44: SANTA.Rnw:720-728
###################################################
# # convert p-values to vertex weights
# vertex.weights <- -log10(pval)[get.vertex.attribute(network, "name")]
# network <- set.vertex.attribute(network, name="pheno", value=vertex.weights)
# 
# # run Knode on the Lymphochip-specific network and converted aggregate p-values
# knode.results <- Knode(network, dist.method="diffusion", 
#     vertex.attr="pheno", verbose=F)
# knode.genes <- extract.entrez(names(knode.results)[1:vcount(module)])


###################################################
### code chunk number 45: SANTA.Rnw:733-736
###################################################
# data(go.entrez)
# sum(go.entrez %in% bionet.genes)
# sum(go.entrez %in% knode.genes)


###################################################
### code chunk number 46: SANTA.Rnw:739-741
###################################################
# # cleanup
# rm(dataLym, exprLym, interactome, network, pval, pvals)


###################################################
### code chunk number 47: SANTA.Rnw:750-751
###################################################
toLatex(sessionInfo())