## ----style, eval=TRUE, echo=FALSE, results="asis"-------------------------- BiocStyle::latex() ## ----include=FALSE--------------------------------------------------------- knitr::opts_chunk$set(concordance=TRUE, resize.width="0.45\\textwidth", fig.align='center', tidy = FALSE, message=FALSE) ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) ## ----setup, results='hide'------------------------------------------------- library("PGPC") if(!file.exists(file.path("result","data"))) dir.create(file.path("result","data"),recursive=TRUE) if(!file.exists(file.path("result","Figures"))) dir.create(file.path("result","Figures"),recursive=TRUE) ## ----load, eval=FALSE------------------------------------------------------ # data("interactions", package="PGPC") ## ----load2, eval=FALSE----------------------------------------------------- # ?interactions ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result","data"))) dir.create(file.path("result","data"),recursive=TRUE) ## ----imageProcessing, eval=FALSE------------------------------------------- # localPath = tempdir() # serverURL = system.file("extdata", package = "PGPC") # # imageConfFile = file.path("conf", "imageconf.txt") # # ## circumvent memory problem on 32bit windows by segementing only spot 1. # if(.Platform$OS.type == "windows" & R.Version()$arch == "i386") # imageConfFile = file.path("conf", "imageconf_windows32.txt") # # x = parseImageConf(imageConfFile, localPath=localPath, serverURL=serverURL) # # unames = getUnames(x) ## select all wells for processing # unames = c("045-01-C23") ## select single well for demonstration # # segmentWells(x, unames, file.path("conf", "segmentationpar.txt")) # # PGPC:::extractFeaturesWithParameter(x, # unames, # file.path("conf", "featurepar.txt")) # # summarizeWellsExtended(x, unames, file.path("conf", "featurepar.txt")) # # ## PGPC:::mergeProfiles(x) only needed if wells were processed in parallel # # ftrs = readHTS(x, type="file", # filename=file.path("data", "profiles.tab"), # format="tab") ## ----convertData1,eval=FALSE----------------------------------------------- # data("ftrs", package="PGPC") # dim(ftrs) # ftrnames = colnames(ftrs[,-1]) ## ----convertData2, eval=FALSE---------------------------------------------- # makeArray <- function(df, nLines=12, nWells=384, nPlates=4, nRep=2){ # ## df = list of features ordered by unames # if(! nLines*nWells*nPlates*nRep == nrow(df)) # stop("Wrong dimensions!") # # ## order by replicate # replicate <- rep(c(rep(1, nWells), rep(2, nWells)), # nrow(df)/(nWells*nRep)) # df = df[order(replicate),] # # ## create matrix # D = as.matrix(df[,-1]) # # ## convert Matrix: x=Gene/wells, y=Celllines, z=replicates # dim(D) = c(nPlates*nWells,nLines,nRep,ncol(D)) # invisible(D) # } # # D = makeArray(ftrs) # dim(D) # # ## free space and save full data matrix # rm("ftrs") # datamatrixfull = list(D = D, anno=list(ftr=ftrnames)) # save(datamatrixfull, file=file.path("result","data","datamatrixfull.rda"), # compress="xz") # rm("datamatrixfull") ## ----annotation1, eval=FALSE----------------------------------------------- # drug = read.delim( # system.file("extdata", # "conf", # "Changed_LOPAC3_384_convert96er_GeneID_updated_Selectivity.txt", # package = "PGPC"), # stringsAsFactors=FALSE, # encoding="latin1") # # drug$Plate = NULL # drug = unique(drug) # drug$Content[drug$GeneID == "no treatment"] = "empty" # drug$Content[grep("ctrl", drug$GeneID)] = drug$GeneID[grep("ctrl", drug$GeneID)] # drug$Content[grep("neg", drug$GeneID)] = drug$GeneID[grep("neg", drug$GeneID)] ## ----annotation2, eval=FALSE----------------------------------------------- # line <- data.frame(name = c('02-006', '02-031', # '02-004', '104-009', 'PAR007', # '104-001', '104-004', '104-007', # '104-008', '02-008', '02-030', 'PAR001'), # mutationDetailed = c('AKT1-/-&AKT2-/-', 'MEK2-/-', # 'AKT1-/-', 'CTNNB1 mt-/wt+', 'PARENTAL007', # 'P53-/-', 'PTEN-/-', 'PI3KCA mt-/wt+', 'KRAS mt-/wt+', # 'BAX-/-', 'MEK1-/-', 'PARENTAL001'), # mutation = c('AKT1/2', 'MEK2', # 'AKT1', 'CTNNB1 wt', 'HCT116 P2', # 'P53', 'PTEN', 'PI3KCA wt', 'KRAS wt', # 'BAX', 'MEK1', 'HCT116 P1'), # startPlate=seq(1, (4*12), by = 4), # stringsAsFactors=FALSE) # # anno=list(drug=drug, line=line, repl=1:2, ftr=ftrnames) # save(anno, file=file.path("result","data","anno.rda"), compress="bzip2") ## ----removeEmptyWells, eval=FALSE------------------------------------------ # emptyWell = anno$drug$Content == "empty" # # screenlog <- read.delim(system.file("extdata", # "conf", # "2013-01-10_LOPACScreenlog_plate.txt", # package = "PGPC"), # stringsAsFactors=FALSE) # allPlates = screenlog$Well[screenlog$Plate=="*"] # flaggedWell = anno$drug$Well %in% allPlates # # singlePlate = screenlog[screenlog$Plate!="*",] # singlePlate$Plate = paste("P", singlePlate$Plate, sep="") # for (i in 1:nrow(singlePlate)){ # flaggedWell[singlePlate$Plate[i] == anno$drug$Plate & # singlePlate$Well[i] == anno$drug$Well] = TRUE # } # # D <- D[!(emptyWell | flaggedWell),,,] # # anno$drug <- anno$drug[!(emptyWell | flaggedWell),] # rownames(anno$drug) <- 1:nrow(anno$drug) ## ----removeFeature1,eval=FALSE--------------------------------------------- # levels = apply(D, 4, function(x) length(unique(c(x)))) # # D = D[,,,!(levels < 2000)] # anno$ftr = anno$ftr[-which(levels < 2000)] # # datamatrixWithAnnotation = list(D=D, anno=anno) # save(datamatrixWithAnnotation, # file=file.path("result","data","datamatrixWithAnnotation.rda"), # compress="xz") ## ----glog------------------------------------------------------------------ glog <- function(x, c) { log2((x+sqrt(x^2+c^2))/2) } ## ----transformation1, eval=FALSE------------------------------------------- # D = datamatrixWithAnnotation$D # quantile <- apply(D, 4, quantile, probs=0.05) # # zeroQt <- quantile == 0 # quantile[zeroQt] <- 0.05*apply(D[,,,zeroQt], 4, max) # # for (i in seq_len(dim(D)[4])) { # D[,,,i] = glog(c(D[,,,i]), c=quantile[i]) # } # # datamatrixTransformed = list(D=D, anno=datamatrixWithAnnotation$anno) # save(datamatrixTransformed, # file=file.path("result","data","datamatrixTransformed.rda"), # compress="xz") ## ----transformation2------------------------------------------------------- if(exists("datamatrixWithAnnotation")) rm(datamatrixWithAnnotation) if(!exists("datamatrixTransformed")){ data("datamatrixTransformed", package="PGPC") } else { PGPC:::checkConsistency("datamatrixTransformed") } ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result","data"))) dir.create(file.path("result","data"),recursive=TRUE) ## ----loadDatamatrixTransformed, echo=FALSE--------------------------------- if(!exists("datamatrixTransformed")){ data("datamatrixTransformed", package="PGPC") } ## ----qualityControlFeatures1, cache=TRUE, dependson = c(-1)---------------- getCorrelation <- function(d){ df <- do.call(rbind, lapply(seq_along(d$anno$ftr), function(i) { data.frame(name=d$anno$ftr[i], cor=cor(c(d$D[,,1,i]), c(d$D[,,2,i]), use="pairwise.complete.obs"), stringsAsFactors=FALSE) } ) ) df } correlation = getCorrelation(datamatrixTransformed) correlation = correlation[order(correlation$cor, decreasing=TRUE),] ## ----correlation1, fig.cap=c("Correlation of features. This figure is the basis for Figure 1B in the paper."), cache=TRUE, dependson = c(-1), dev.args=list(pointsize=16)---- selection = c("n", "nseg.dna.m.eccentricity.qt.0.01", "nseg.0.m.cx.mean") plot(1:nrow(correlation), correlation$cor, ylab="correlation coefficient", xlab="feature", pch=20) abline(h=0.7) points(which(correlation$name %in% selection), correlation$cor[correlation$name %in% selection], pch=19, col=c(2,3,4)) legend("bottomleft", legend=selection, col=c(2,3,4), pch=19, cex=0.6) ## ----singleFeatureCorrelation, fig.cap=c("Correlation of feature n, nseg.dna.m.eccentricity.qt.0.01 and nseg.0.m.cx.mean"), cache=TRUE, dependson = c(-1), fig.show='hold', resize.width="0.3\\textwidth"---- for (i in seq_along(selection)){ plot(c(datamatrixTransformed$D[,,1,match(selection[i], datamatrixTransformed$anno$ftr)]), c(datamatrixTransformed$D[,,2,match(selection[i], datamatrixTransformed$anno$ftr)]), xlab="replicate 1", ylab="replicate 2", main = selection[i], pch=20, col="#00000033", asp=1) } ## ----correlation3, eval=TRUE, cache=TRUE, dependson = c(-1)---------------- ## remove columns with low correlation or few levels remove = correlation$name[correlation$cor < 0.7] D = datamatrixTransformed$D[,,, -match(remove, datamatrixTransformed$anno$ftr)] anno = datamatrixTransformed$anno anno$ftr = anno$ftr[-match(remove, anno$ftr)] datamatrixFiltered = list(D=D, anno=anno) save(datamatrixFiltered, file=file.path("result","data","datamatrixFiltered.rda"), compress="xz") rm(datamatrixTransformed) ## ----stabilitySelection1, eval=FALSE, cache=TRUE--------------------------- # D = apply(datamatrixFiltered$D, # 2:4, # function(x) { (x - median(x,na.rm=TRUE)) / mad(x, na.rm=TRUE) } ) # D[!is.finite(D)] = 0 # dim(D) = c(prod(dim(D)[1:2]),2,dim(D)[4]) # # forStabilitySelection = list(D=D, # Sample = 1:prod(dim(D)[1:2]), # phenotype = datamatrixFiltered$anno$ftr) # # preselect=c("n") # # selected <- PGPC:::selectByStability(forStabilitySelection, # preselect, # Rdim=40) # save(selected, file=file.path("result","data","selected.rda"), compress="xz") ## ----featureSelection1_, fig.cap=c("Correlation of selected feature after subtraction of the information containted in the previously selected features."), cache=TRUE, fig.width=6.3, fig.height=4.2, dev.args=list(pointsize=8), dependson = c(-2)---- if(!exists("selected")){ data("selected", package="PGPC") } else { PGPC:::checkConsistency("selected") } selectedFtrs = selected$selected correlation = selected$correlation ratioPositive = selected$ratioPositive par(mfrow=c(2,1)) barplot(correlation, names.arg=PGPC:::hrNames(selectedFtrs), las=2, col = ifelse(ratioPositive > 0.5, brewer.pal(3,"Pastel1")[2], brewer.pal(3,"Pastel1")[1]), ylab = "information gain") ## ----featureSelection2_, fig.cap=c("Fraction of positive correlations. This figure is the basis for Figure 1C and Appendix Figure S2A in the paper."), cache=TRUE, fig.width=6.3, fig.height=4.2, dev.args=list(pointsize=8), dependson = c(-1)---- par(mfrow=c(2,1)) barplot(ratioPositive-0.5, names.arg=PGPC:::hrNames(selectedFtrs), las=2, col = ifelse(ratioPositive > 0.5, brewer.pal(3,"Pastel1")[2], brewer.pal(3,"Pastel1")[1]), offset=0.5, ylab = "fraction positive correlated") #abline(a=0.5,0) selectedFtrs=selectedFtrs[ratioPositive > 0.5] D = datamatrixFiltered$D[,,,match(selectedFtrs, datamatrixFiltered$anno$ftr)] anno = datamatrixFiltered$anno anno$ftr = anno$ftr[match(selectedFtrs, anno$ftr)] datamatrixSelected = list(D=D, anno=anno) save(datamatrixSelected, file=file.path("result","data","datamatrixSelected.rda"), compress="xz") ## ----calculateInteractions1_, eval=TRUE, cache=TRUE, results='hide', echo=TRUE, dependson = c(-1), dev.args=list(pointsize=16), fig.show='hold'---- neg = grepl("neg", datamatrixSelected$anno$drug$GeneID) pos = grepl("Paclitaxel", datamatrixSelected$anno$drug$GeneID) negMedians = apply(datamatrixSelected$D[neg,,,1], c(2,3), median) posMedians = apply(datamatrixSelected$D[pos,,,1], c(2,3), median) Nnorm = (datamatrixSelected$D[,,,1] - rep(posMedians, each=dim(datamatrixSelected$D)[1])) / (rep(negMedians, each=dim(datamatrixSelected$D)[1]) - rep(posMedians, each=dim(datamatrixSelected$D)[1])) datamatrixSelected$D[,,,1] <- Nnorm plot(c(datamatrixSelected$D[!(neg | pos),,1,1]), c(datamatrixSelected$D[!(neg | pos),,2,1]), xlab="replicate 1", ylab="replicate 2", main = "n", pch=20, col="grey", asp=1) points(c(datamatrixSelected$D[pos,,1,1]), c(datamatrixSelected$D[pos,,2,1]), col=2, pch=20) points(c(datamatrixSelected$D[neg,,1,1]), c(datamatrixSelected$D[neg,,2,1]), col=4, pch=20) ## ----calculateInteractions2_, cache=TRUE, results='hide', echo=TRUE, dependson = c(-1)---- interactions = getInteractions(datamatrixSelected, datamatrixSelected$anno$ftr, eps = 1e-4, maxiter = 100) save(interactions, file=file.path("result","data", "interactions.rda"), compress="xz") ## check consistency PGPC:::checkConsistency("interactions") ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result","data"))) dir.create(file.path("result","data"),recursive=TRUE) ## ----plotPhenotypes1, cache=TRUE, fig.height=8, fig.width=8, dev.args=list(pointsize=8), fig.show='hold',fig.cap=c("Phenoprints of the two parental cell lines and the CTNNB1 WT background. This figure is the basis for Figure 1E-K, Figure 2A and Appendix Figure S2B in the paper.")---- if(!exists("interactions")) data(interactions, package="PGPC") D = interactions$D D2 = D dim(D2) = c(prod(dim(D2)[1:2]),dim(D2)[3],dim(D2)[4]) SD = apply(D2, 3, function(m) apply(m, 2, mad, na.rm=TRUE)) MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } ) pAdjusted = interactions$pVal[,,,2] bin <- function(x) (x-min(x)) / (max(x)-min(x)) ## normalize by mean SD D = apply(D, c(1, 2, 4), mean) for (i in 1:dim(D)[3]) { D[,,i] = bin(D[,,i] / MSD[i]) } dimnames(D) = list(template = paste(interactions$anno$drug$GeneID), query = interactions$anno$line$mutation, phenotype = PGPC:::hrNames(interactions$anno$ftr)) dimnames(pAdjusted) = dimnames(D) drugPheno <- data.frame(name = c("DMSO", "PD98", "Vinblastin", "Vincristine", "Ouabain", "Rottlerin", "Etoposide", "Amsacrine", "Colchicine", "BIX"), GeneID = c("neg ctr DMSO", "79902", "80101", "80082", "79817", "79926", "79294", "79028", "79184", "80002"), stringsAsFactors=FALSE) drugPheno$annotationName = interactions$anno$drug$Name[match(drugPheno$GeneID, interactions$anno$drug$GeneID)] drugPositions <- match(drugPheno$GeneID, dimnames(D)$template) # define order of ftrs in plot: #feature clustering #4 nseg.0.m.majoraxis.mean --> nuclear shape #12 nseg.dna.m.eccentricity.sd --> nuclear shape #7 nseg.dna.h.var.s2.mean --> nuclear texture #13 nseg.dna.h.idm.s1.sd --> nuclear texture #17 nseg.dna.h.cor.s2.sd --> nuclear texture #1 n --> cell number #6 cseg.act.h.f12.s2.sd --> cellular texture #15 cseg.act.h.asm.s2.mean --> cellular texture #18 cseg.dnaact.b.mad.mean --> cellular texture #16 cseg.dnaact.h.den.s2.sd --> cellular texture #10 cseg.dnaact.b.mean.qt.0.05 --> cellular texture #3 cseg.act.h.cor.s1.mean --> cellular texture #19 cseg.act.h.idm.s2.sd --> cellular texture #14 cseg.dnaact.h.f13.s1.mean --> cellular texture #9 cseg.0.s.radius.min.qt.0.05 --> cellular shape #5 cseg.dnaact.m.eccentricity.sd --> cellular shape #11 cseg.act.m.eccentricity.mean --> cellular shape #2 cseg.act.m.majoraxis.mean --> cellular shape #20 nseg.0.s.radius.max.qt.0.05 --> nuclear shape #8 nseg.0.m.eccentricity.mean --> nuclear shape orderFtr <- c(4,12,7,13,17,1,6,15,18,16,10,3,14,19,9,5,11,2,20,8) ## select cell lines par001 <- match("HCT116 P1", dimnames(D)$query) Dselect <- D[drugPositions,par001,] Dselect <- Dselect[, orderFtr] stars(Dselect, len = 0.8, key.loc = c(8, 2), labels=drugPheno$name, col.stars=rainbow(dim(Dselect)[1]), main = "Phenotypes of HCT116 P1", draw.segments = FALSE, scale=FALSE) par007 <- match("HCT116 P2", dimnames(D)$query) Dselect <- D[drugPositions,par007,] Dselect <- Dselect[, orderFtr] stars(Dselect, len = 0.8, key.loc = c(8, 2), labels=drugPheno$name, col.stars=rainbow(dim(Dselect)[1]), main = "Phenotypes of HCT116 P2", draw.segments = FALSE, scale=FALSE) mtCtnb1KO <- match("CTNNB1 wt", dimnames(D)$query) Dselect <- D[drugPositions,mtCtnb1KO,] Dselect <- Dselect[, orderFtr] stars(Dselect, len = 0.8, key.loc = c(8, 2), labels=drugPheno$name, col.stars= rainbow(dim(Dselect)[1]), main = "Phenotypes of CTNNB1 wt", draw.segments = FALSE, scale=FALSE) ## ----plotPhenotypes2, cache=TRUE, fig.height=8, fig.width=8, dev.args=list(pointsize=8), fig.show='hold', fig.cap=c("Phenoprints of the all cell lines for a DMSO control. This figure is the basis for Expanded View Figure EV2 in the paper.")---- ## neg controls for all cell lines drugPositions <- match("neg ctr DMSO", dimnames(D)$template) Dselect <- D[drugPositions,,orderFtr] stars(Dselect, len = 0.8, key.loc = NULL, labels=dimnames(D)$query, col.stars= rainbow(dim(Dselect)[1]), main = "Phenotypes of DMSO control for all cell lines", draw.segments = FALSE, scale=FALSE) ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result"))) dir.create(file.path("result"),recursive=TRUE) ## ----figureLabels, cache=TRUE, echo=FALSE---------------------------------- pi1_fig.cap = c("Interaction profiles") pi1_fig.subcap = c("Interaction profile of Bendamustine. This figure is the basis for Figure 4A in the paper.", "Interaction profile of Disulfiram. This figure is the basis for Figure 4C in the paper.", "Interaction profile of Colchicine in the parental cell line and the CTNNB1 WT background. This figure is the basis for Figure 2B in the paper.", "Interaction profile of BIX01294 in the parental cell line and the CTNNB1 WT background. This figure is the basis for Figure 2B in the paper.") pi2_fig.cap=c("Interaction profiles") pi2_fig.subcap=c("Interaction profile of Bendamustine. This figure is the basis for Appendix Figure S7A in the paper.", "Interaction profile of Disulfiram. This figure is the basis for Appendix Figure S7B in the paper.", "Interaction profile of Colchicine in the parental cell line and the CTNNB1 WT background. This figure is the basis for Appendix Figure S3 in the paper.", "Interaction profile of BIX01294 in the parental cell line and the CTNNB1 WT background. This figure is the basis for Appendix Figure S3 in the paper.") ow = '.45\\textwidth' ## ----plotInteractions1_, cache=TRUE, dependson=c(-1), fig.show='hold', fig.cap=pi1_fig.cap, fig.subcap=pi1_fig.subcap, out.width=ow---- #### plot interactions for 1 drug as radar plot of all celllines #### including ftrs as segments... if(!exists("interactions")) data(interactions, package="PGPC") int = interactions$res dim(int) = c(prod(dim(int)[1:2]),dim(int)[3],dim(int)[4]) dim(int) SD = apply(int, 3, function(m) apply(m, 2, mad, na.rm=TRUE)) MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } ) pAdjusted = interactions$pVal[,,,2] bin <- function(x) (x-min(x)) / (max(x)-min(x)) int = apply(interactions$res, c(1, 2, 4), mean) for (i in 1:dim(int)[3]) { int[,,i] = int[,,i] / MSD[i] int[,,i] = bin(int[,,i]) } dimnames(int) = list(template = paste(interactions$anno$drug$GeneID), query = interactions$anno$line$mutation, phenotype = interactions$anno$ftr) dimnames(pAdjusted) = dimnames(int) drugPheno <- data.frame(name = c("Bendamustine", "Disulfiram", "Colchicine", "BIX01294"), GeneID = c("79497", "80038", "79184", "80002"), stringsAsFactors=FALSE) drugPheno$annotationName = interactions$anno$drug$Name[match(drugPheno$GeneID, interactions$anno$drug$GeneID)] ##order of featurs for plot ftrLevels=c("n", "nseg.dna.h.cor.s2.sd", "nseg.dna.h.idm.s1.sd", "nseg.dna.h.var.s2.mean", "nseg.dna.m.eccentricity.sd", "nseg.0.m.majoraxis.mean", "nseg.0.m.eccentricity.mean", "nseg.0.s.radius.max.qt.0.05", "cseg.act.m.majoraxis.mean" , "cseg.act.m.eccentricity.mean", "cseg.dnaact.m.eccentricity.sd", "cseg.0.s.radius.min.qt.0.05", "cseg.act.h.idm.s2.sd", "cseg.dnaact.h.f13.s1.mean", "cseg.act.h.cor.s1.mean", "cseg.dnaact.b.mean.qt.0.05", "cseg.dnaact.h.den.s2.sd", "cseg.dnaact.b.mad.mean", "cseg.act.h.asm.s2.mean", "cseg.act.h.f12.s2.sd" ) ## define background colors for phenogroups backgroundColors = c("black", rep("grey60", 3), rep("grey40", 4), rep("grey20", 4), rep("grey80", 8)) ## order of mutations for plot mutationOrder = c("HCT116 P1", "HCT116 P2", "PI3KCA wt", "AKT1", "AKT1/2", "PTEN", "KRAS wt", "MEK1", "MEK2", "CTNNB1 wt", "P53", "BAX") ### plot radar for each drug showing all cell lines for(i in seq_len(nrow(drugPheno))){ drugPosition <- match(drugPheno$GeneID[i], dimnames(int)$template) Dselect <- int[drugPosition,,] pAdjustedSelect <- pAdjusted[drugPosition,,] featureDf <- data.frame(Dselect) featureDf$line <- rownames(featureDf) featureDf = melt(featureDf, id.vars="line", variable.name="feature") pAdjustedDf <- data.frame(pAdjustedSelect) pAdjustedDf$line <- rownames(pAdjustedDf) pAdjustedDf <- melt(pAdjustedDf, id.vars="line", variable.name="feature", value.name="pAdjusted") featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE) pAdjustedThresh = 0.01 ## order features and cell lines featureDf$feature = factor(featureDf$feature, levels=ftrLevels, ordered=TRUE) featureDf$line <- factor(featureDf$line, levels=mutationOrder) theme_new = theme_bw() #theme_new$text$size = 3 #theme_new$axis.text$size = rel(0.2) theme_new$axis.text.x = element_blank() maxInt = max(featureDf$value) colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line))) if(i==1) colors[grep("AKT1/2", featureDf$line)[1]] = "red" if(i==2) colors[grep("MEK1", featureDf$line)[1]] = "red" if(i > 2) { colors = c("red", "black") featureDf <- subset(featureDf, line %in% c("HCT116 P1", "CTNNB1 wt")) featureDf$line = gsub("HCT116 P1", "CTNNB1 mut", featureDf$line) } ## remove grid theme_new$panel.grid.major$colour="white" if(i < 3){ barColor = "lightblue" } if(i==3){ barColor = "darkslateblue" } if(i==4){ barColor = "mediumvioletred" } starplot <- ggplot(featureDf) + facet_wrap(~line, ncol=3) + geom_bar(aes(feature, maxInt*1.2), fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)), stat="identity", width = 1) + geom_bar(aes(feature, maxInt), fill="white", stat="identity", width = 1) + geom_bar(aes(feature, value), fill=barColor, stat="identity") + coord_polar(start=-pi/nlevels(featureDf$feature)) + ylim(c(0,maxInt*1.2)) + geom_bar(aes(feature, value), data = featureDf[featureDf$pAdjusted < pAdjustedThresh,], fill="red", stat="identity") + geom_point(aes(feature, maxInt*1.1), data = featureDf[featureDf$pAdjusted < pAdjustedThresh,], pch=8, col=2) + theme_new + labs(title = paste0("Interactions of ", drugPheno$name[i])) print(starplot) } ## ----plotInteractionsAllScaled_, cache=TRUE, fig.show='hold', eval=FALSE, dependson=c(-1)---- # #### plot interactions for 1 drug as radar plot of all celllines # #### including ftrs as segments... # # featureDf <- do.call(rbind, # lapply(seq_len(dim(int)[1]), # function(i){ # tmp=data.frame(int[i,,]) # tmp$GeneID = dimnames(int)[[1]][i] # tmp$line <- rownames(tmp) # tmp # })) # featureDf = melt(featureDf, id.vars=c("GeneID", "line"), # variable.name="feature") # # pAdjustedDf <- do.call(rbind, # lapply(seq_len(dim(pAdjusted)[1]), # function(i){ # tmp=data.frame(pAdjusted[i,,]) # tmp$GeneID = dimnames(pAdjusted)[[1]][i] # tmp$line <- rownames(tmp) # tmp # })) # pAdjustedDf <- melt(pAdjustedDf, # id.vars=c("GeneID", "line"), # variable.name="feature", # value.name="pAdjusted") # # featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE) # # ## remove controls # featureDf <- subset(featureDf, !grepl("ctr", GeneID)) # featureDf$Name <- with(interactions$anno$drug, # Name[match(featureDf$GeneID, GeneID)]) # # # pAdjustedThresh = 0.01 # ## just keep drugs with interactions # featureDf <- subset(featureDf, # GeneID %in% unique(GeneID[pAdjusted < pAdjustedThresh])) # # ## order features and cell lines # featureDf$feature = factor(featureDf$feature, # levels=ftrLevels, # ordered=TRUE) # featureDf$line <- factor(featureDf$line, levels=mutationOrder) # # maxInt = max(featureDf$value) # # colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line))) # # # theme_new = theme_bw(base_size=5) + # theme(panel.grid.major = element_blank(), # panel.grid.minor = element_blank(), # axis.text.y = element_text(size=3)) # #theme_new$text$size = 3 # #theme_new$axis.text$size = rel(0.2) # theme_new$axis.text.x = element_blank() # # # barColor = "lightblue" # # allplots <- lapply(unique(featureDf$GeneID[order(featureDf$Name)]), # function(id){ # subset=subset(featureDf, GeneID %in% id) # starplot <- ggplot(subset) + # facet_grid(GeneID ~ line) + # geom_bar(aes(feature, maxInt*1.2), # fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)), # stat="identity", # width = 1) + # geom_bar(aes(feature, maxInt), # fill="white", # stat="identity", # width = 1) + # geom_bar(aes(feature, value), # fill=barColor, # stat="identity") + # coord_polar(start=-pi/nlevels(featureDf$feature)) + # ylim(c(0,maxInt*1.2)) + # geom_bar(aes(feature, value), # data = subset[subset$pAdjusted < pAdjustedThresh,], # fill="red", # stat="identity") + # geom_point(aes(feature, maxInt*1.1), # data = subset[subset$pAdjusted < pAdjustedThresh,], # pch=8, # col=2, # size=0.3) + # labs(y="Interaction score", x="", title=unique(subset$Name)) + # theme_new # invisible(starplot) # }) # # library(gridExtra) # ggsave(file.path("result", "all_plots.pdf"), # do.call(marrangeGrob, # c(allplots, list(nrow=8, ncol=1, top=NULL))), # width=8.27, height=11.7) ## ----plotInteractions2_, cache=TRUE, fig.show='hold', dependson=c(-1), fig.cap=pi2_fig.cap, fig.subcap=pi2_fig.subcap, out.width=ow---- #### plot interactions for 1 drug as radar plot of all celllines #### including ftrs as segments... int = apply(interactions$res, c(1, 2, 4), mean) for (i in 1:dim(int)[3]) { int[,,i] = int[,,i] / MSD[i] } ## use abs value and replace values larger than 10 by 10 direction <- sign(int) int = abs(int) dimnames(int) = list(template = paste(interactions$anno$drug$GeneID), query = interactions$anno$line$mutation, phenotype = interactions$anno$ftr) dimnames(direction) = dimnames(int) ### plot radar for each drug showing all cell lines for(i in seq_len(nrow(drugPheno))){ drugPosition <- match(drugPheno$GeneID[i], dimnames(int)$template) Dselect <- int[drugPosition,,] pAdjustedSelect <- pAdjusted[drugPosition,,] directionSelect <- direction[drugPosition,,] featureDf <- data.frame(Dselect) featureDf$line <- rownames(featureDf) featureDf = melt(featureDf, id.vars="line", variable.name="feature") pAdjustedDf <- data.frame(pAdjustedSelect) pAdjustedDf$line <- rownames(pAdjustedDf) pAdjustedDf <- melt(pAdjustedDf, id.vars="line", variable.name="feature", value.name="pAdjusted") directionDf <- data.frame(directionSelect) directionDf$line <- rownames(directionDf) directionDf <- melt(directionDf, id.vars="line", variable.name="feature", value.name="direction") directionDf$direction = c("negative", "positive")[ifelse(directionDf$direction < 0, 1, 2)] featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE) featureDf <- merge(featureDf, directionDf, sort=FALSE) pAdjustedThresh = 0.01 ## order features and cell lines featureDf$feature = factor(featureDf$feature, levels=ftrLevels, ordered=TRUE) featureDf$line <- factor(featureDf$line, levels=mutationOrder) theme_new = theme_bw() #theme_new$text$size = 3 #theme_new$axis.text$size = rel(0.2) theme_new$axis.text.x = element_blank() maxInt = max(featureDf$value) colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line))) if(i==1) colors[grep("AKT1/2", featureDf$line)[1]] = "red" if(i==2) colors[grep("MEK1", featureDf$line)[1]] = "red" if(i > 2) { colors = c("red", "black") featureDf <- subset(featureDf, line %in% c("HCT116 P1", "CTNNB1 wt")) featureDf$line = gsub("HCT116 P1", "CTNNB1 mut", featureDf$line) } ## remove grid theme_new$panel.grid.major$colour="white" starplot <- ggplot(featureDf, aes(fill=direction)) + facet_wrap(~line, ncol=3) + geom_bar(aes(feature, maxInt*1.2), fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)), stat="identity", width = 1) + geom_bar(aes(feature, maxInt), fill="white", stat="identity", width = 1) + geom_bar(aes(feature, value), stat="identity") + coord_polar(start=-pi/nlevels(featureDf$feature)) + ylim(c(0,maxInt*1.2)) + geom_point(aes(feature, maxInt*1.1), data = featureDf[featureDf$pAdjusted < pAdjustedThresh,], pch=8, col=2) + theme_new + labs(title = paste0("Interactions of ", drugPheno$name[i])) print(starplot) } ## ----plotInteractionsAll_, cache=TRUE, fig.show='hold', eval=FALSE, dependson=c(-1)---- # #### plot interactions for 1 drug as radar plot of all celllines # #### including ftrs as segments... # # featureDf <- do.call(rbind, # lapply(seq_len(dim(int)[1]), # function(i){ # tmp=data.frame(int[i,,]) # tmp$GeneID = dimnames(int)[[1]][i] # tmp$line <- rownames(tmp) # tmp # })) # featureDf = melt(featureDf, id.vars=c("GeneID", "line"), # variable.name="feature") # # pAdjustedDf <- do.call(rbind, # lapply(seq_len(dim(pAdjusted)[1]), # function(i){ # tmp=data.frame(pAdjusted[i,,]) # tmp$GeneID = dimnames(pAdjusted)[[1]][i] # tmp$line <- rownames(tmp) # tmp # })) # pAdjustedDf <- melt(pAdjustedDf, # id.vars=c("GeneID", "line"), # variable.name="feature", # value.name="pAdjusted") # # directionDf <- do.call(rbind, # lapply(seq_len(dim(pAdjusted)[1]), # function(i){ # tmp=data.frame(direction[i,,]) # tmp$GeneID = dimnames(direction)[[1]][i] # tmp$line <- rownames(tmp) # tmp # })) # directionDf <- melt(directionDf, id.vars=c("GeneID", "line"), variable.name="feature", value.name="direction") # directionDf$direction = c("negative", "positive")[ifelse(directionDf$direction < 0, 1, 2)] # # featureDf <- merge(featureDf, pAdjustedDf, sort=FALSE) # featureDf <- merge(featureDf, directionDf, sort=FALSE) # # ## remove controls # featureDf <- subset(featureDf, !grepl("ctr", GeneID)) # featureDf$Name <- with(interactions$anno$drug, # Name[match(featureDf$GeneID, GeneID)]) # # # pAdjustedThresh = 0.01 # ## just keep drugs with interactions # featureDf <- subset(featureDf, # GeneID %in% unique(GeneID[pAdjusted < pAdjustedThresh])) # # ## order features and cell lines # featureDf$feature = factor(featureDf$feature, # levels=ftrLevels, # ordered=TRUE) # featureDf$line <- factor(featureDf$line, levels=mutationOrder) # # maxValue=10 # # colors = colorRampPalette(c("black", "grey90"))(length(unique(featureDf$line))) # # # theme_new = theme_bw(base_size=5) + # theme(panel.grid.major = element_blank(), # panel.grid.minor = element_blank(), # legend.key.size = unit(.3, "cm"), # axis.text.y = element_text(size=3)) # #theme_new$text$size = 3 # #theme_new$axis.text$size = rel(0.2) # theme_new$axis.text.x = element_blank() # # # barColor = "lightblue" # # allplots <- lapply(unique(featureDf$GeneID[order(featureDf$Name)]), # function(id){ # subset=subset(featureDf, GeneID %in% id) # subset$maxInt = ifelse(max(subset$value) < maxValue, # maxValue, # max(subset$value)) # # starplot <- ggplot(subset, aes(fill=direction)) + # facet_grid(GeneID ~ line) + # geom_bar(aes(feature, maxInt*1.2), # fill=rep(backgroundColors,nrow(featureDf)/length(backgroundColors)), # stat="identity", # width = 1) + # geom_bar(aes(feature, maxInt), # fill="white", # stat="identity", # width = 1) + # geom_bar(aes(feature, value), # stat="identity") + # coord_polar(start=-pi/nlevels(featureDf$feature)) + # #ylim(c(0,maxInt*1.2)) + # scale_y_continuous() + # geom_point(aes(feature, maxInt*1.1), # data = subset[subset$pAdjusted < pAdjustedThresh,], # pch=8, # col=2, # size=0.3, # show_guide=FALSE) + # labs(y="Interaction score", x="", title=unique(subset$Name)) + # theme_new # invisible(starplot) # }) # # library(gridExtra) # ggsave(file.path("result", "all_plots_unscaled.pdf"), # do.call(marrangeGrob, # c(allplots, list(nrow=8, ncol=1, top=NULL))), # width=8.27, height=11.7) # ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(gplots) if(!file.exists(file.path("result"))) dir.create(file.path("result"),recursive=TRUE) ## ----figureLabelsCellLineClustering, cache=TRUE, echo=FALSE---------------- clRaw_fig.cap = paste("Clustering of cell lines based on the raw values of the", "selected features. This figure is the basis for", "Appendix Figure S5B in the paper.") clInt_fig.cap = paste("Clustering of cell lines based on the interaction", "profiles of the selected features. This figure is the", "basis for Appendix Figure S5C in the paper.") ## ----clusterRawFtrCelllinesSelectedControls, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', fig.cap=clRaw_fig.cap---- if(!exists("interactions")) data("interactions", package="PGPC") drugAnno = interactions$anno$drug filterFDR = function(d, pAdjusted, pAdjustedThresh = 0.1){ select = pAdjusted <= pAdjustedThresh select[is.na(select)] = FALSE selectedRows = apply(select, 1, any) d[selectedRows,,] } D = interactions$D D2 = D dim(D2) = c(prod(dim(D2)[1:2]),dim(D2)[3],dim(D2)[4]) SD = apply(D2, 3, function(m) apply(m, 2, mad, na.rm=TRUE)) MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } ) D = apply(D, c(1, 2, 4), mean) for (i in 1:dim(D)[3]) { D[,,i] = (D[,,i] - median(D[,,i])) / MSD[i] } dimnames(D) = list(template = paste(interactions$anno$drug$GeneID), query = interactions$anno$line$mutation, phenotype = interactions$anno$ftr) ## filter FDR pAdjustedThresh = 0.01 pAdjusted = interactions$pVal[,,,2] Dfilter = filterFDR(D, pAdjusted, pAdjustedThresh) ## combine controls Dfilter = apply(Dfilter, c(2,3), function(x) tapply(x, dimnames(Dfilter)$template, mean)) ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin") Dfilter = Dfilter[!grepl("ctr", dimnames(Dfilter)[[1]]) | dimnames(Dfilter)[[1]] %in% ctrlToKeep,,] celllineCorrelation = PGPC:::getCorr(aperm(Dfilter, c(2, 1, 3)), drugAnno) celllineDist = PGPC:::trsf(celllineCorrelation) hc <- hclust(as.dist(celllineDist)) heatmap.2(celllineDist, Colv = as.dendrogram(hc), Rowv = as.dendrogram(hc), trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6), margin=c(9,9)) tmp = par(mar=c(5, 4, 4, 10) + 0.1) plot(as.dendrogram(hc), horiz=TRUE) par(tmp) save(celllineDist, file=file.path("result", "celllineDist.rda")) ## ----clusterCelllinesSelectedControls, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dependson = c(-1), fig.cap=clInt_fig.cap---- PI = interactions$res PI2 = PI ##aperm(PI, c(1,3,2,4,5)) dim(PI2) = c(prod(dim(PI2)[1:2]),dim(PI2)[3],dim(PI2)[4]) SD = apply(PI2, 3, function(m) apply(m, 2, mad, na.rm=TRUE)) MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } ) ## normalize by mean SD PI = apply(interactions$res, c(1, 2, 4), mean) for (i in 1:dim(PI)[3]) { PI[,,i] = PI[,,i] / MSD[i] } dimnames(PI) = list(template = interactions$anno$drug$GeneID, query = interactions$anno$line$mutation, phenotype = interactions$anno$ftr) PIfilter = filterFDR(PI, pAdjusted, pAdjustedThresh) ## combine controls PIfilter = apply(PIfilter, c(2,3), function(x) tapply(x, dimnames(PIfilter)$template, mean)) PIfilter = PIfilter[!grepl("ctr", dimnames(PIfilter)[[1]]) | dimnames(PIfilter)[[1]] %in% ctrlToKeep,,] celllineCorrelation = PGPC:::getCorr(aperm(PIfilter, c(2, 1, 3)), drugAnno) celllineDist = PGPC:::trsf(celllineCorrelation) hccelllineDist <- as.dendrogram(hclust(as.dist(celllineDist))) ## reorder HCT116 P1 to top lines = rownames(celllineDist) wts = rep(0, length(lines)) wts[match("HCT116 P1", lines)] = 10 hccelllineDist= reorder(hccelllineDist, wts) heatmap.2(celllineDist, Colv = hccelllineDist, Rowv = hccelllineDist, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6), margin=c(9,9)) tmp = par(mar=c(5, 4, 4, 10) + 0.1) plot(hccelllineDist, horiz=TRUE) par(tmp) ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result"))) dir.create(file.path("result"),recursive=TRUE) ## ----figureLabelsInteractionNetwork, cache=TRUE, echo=FALSE---------------- totalInt_fig.cap = c("Total number of interactions per cell line. This figure is the basis for Figure 2E in the paper.") drugInt_fig.cap = c("Number of drugs with at least one specific interaction per cell line.") pleDeg_fig.cap = c("Pleiotropic degree per cell line. This figure is the basis for Figure 2D in the paper.") intFtr_fig.cap = c("Distributions and overlap of interactions per feature") intFtr_fig.subcap = c("Total interactions per feature.", "Overlap of interactions in the 5 phenotypic classes. This figure is the basis for Figure 2C in the paper.", "Overlap of interactions in the 5 phenotypic classes shown as bar plots.") ow = '.45\\textwidth' ## ----drugCellLineInteractionData_, eval=TRUE, cache=TRUE------------------- library(PGPC) data(interactions, package="PGPC") drugAnno = interactions$anno$drug d = interactions pAdjusted = interactions$pVal[,,,2] dimnames(pAdjusted) = list(template = paste(interactions$anno$drug$GeneID), query = interactions$anno$line$mutation, phenotype = interactions$anno$ftr) pAdjustedThresh = 0.01 result = NULL for (ftr in seq_along(interactions$anno$ftr)){ pAdjusted = d$pVal[,,ftr,2] top = pAdjusted <= pAdjustedThresh r1 = d$res[,,1,ftr][top] r2 = d$res[,,2,ftr][top] rMean = apply(d$res[,,,ftr], c(1,2), mean)[top] unames <- data.frame(well = rep(d$anno$drug$Well, length(d$anno$line$startPlate)), plate= rep(as.numeric(gsub("P", "", d$anno$drug$PlateName)), length(d$anno$line$startPlate)), startPlate = rep(d$anno$line$startPlate, each=nrow(d$anno$drug))) unames$uname = paste(sprintf("%03d", unames$plate + unames$startPlate - 1), unames$well, sep="-") unames = as.matrix(unames$uname) dim(unames) = dim(top) uname = unames[which(top)] ## add annotation selTreatment = which(top) %% dim(top)[1] selTreatment[selTreatment == 0] = dim(top)[1] drug = d$anno$drug[selTreatment,] line = d$anno$line[which(top) %/% dim(top)[1]+1,] topHits = data.frame(ftr = d$anno$ftr[ftr], uname = gsub("-", "-01-", uname), GeneID = drug$GeneID, r1, r2, rMean, pAdjusted = pAdjusted[which(top)], stringsAsFactors=FALSE) topHits = cbind(topHits, line) topHits = cbind(topHits, drugAnno[match(topHits$GeneID, drugAnno$GeneID), -match("GeneID",names(drugAnno))]) topHits = topHits[order(topHits$pAdjusted),] rownames(topHits) = 1:nrow(topHits) result=rbind(result, topHits) } ## add controls to names result$Name[grep("ctr", result$GeneID)] <- result$GeneID[grep("ctr", result$GeneID)] cat("No. of all interactions (including controls):", nrow(result)) ## ----drugCellLineInteractionData3_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.cap=totalInt_fig.cap---- ######################### ## filter interactions ######################### ## 1. remove all controls result <- subset(result, !grepl("ctr", GeneID)) cat("No. of interactions (controls removed):", nrow(result)) cat("No. of drug-cell line interactions (ftrs combined):", nrow(unique(result[,c("GeneID", "mutation")]))) ## genereate summary plots ## number of interactions per line vs. no of drugs / no of interactions per drugs mutationOrder = c("HCT116 P1", "HCT116 P2", "PI3KCA wt", "AKT1", "AKT1/2", "PTEN", "KRAS wt", "MEK1", "MEK2", "CTNNB1 wt", "P53", "BAX") ## number of total interactions per cell line noIntPerLine = table(result$mutation) noIntPerLine = noIntPerLine[mutationOrder] tmp = par(mar=c(6, 4, 4, 2) + 0.1) mp <- barplot(noIntPerLine, ylab="total interactions per cell line", names.arg="") text(mp[,1], par("usr")[3], labels = names(noIntPerLine), srt = 45, adj = c(1.1,1.1), xpd = TRUE, cex=.9) ## ----drugCellLineInteractionData4_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.cap=drugInt_fig.cap---- ## number of interaction drugs per line noDrugsPerLine = sapply(names(noIntPerLine), function(line){ length(unique(result$GeneID[result$mutation==line])) }) noDrugsPerLine mp <- barplot(noDrugsPerLine, ylab="interacting drugs per cell line", names.arg="") text(mp[,1], par("usr")[3], labels = names(noDrugsPerLine), srt = 45, adj = c(1.1,1.1), xpd = TRUE, cex=.9) par(tmp) ## percentage of all possible interactions (removing controls) nrow(result) / ((dim(interactions$res)[1] - sum(grepl("ctr", interactions$anno$drug$GeneID))) * prod(dim(interactions$res)[c(2,4)])) ## percentage of drugs showing an interactions (removing controls) length(unique(result$GeneID)) / (dim(interactions$res)[1] - sum(grepl("ctr", interactions$anno$drug$GeneID))) ## ----drugCellLineInteractionData5_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.cap=pleDeg_fig.cap---- ## pleiotropy degree pleiotropicDegree = sapply(unique(result$GeneID), function(drug) length(unique(subset(result, GeneID==drug)$name))) mp <- barplot(table(pleiotropicDegree), ylab="number of drugs", names.arg="", xlab="pleiotropy degree") text(mp[,1], par("usr")[3], labels = 1:12, adj = c(1.1,1.1), xpd = TRUE, cex=.9) table(pleiotropicDegree) ## ----drugCellLineInteractionData6_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.show='hold'---- ## pleiotropic degree vs. drug effect drugEffect = apply(interactions$effect$drug, c(1,3), mean) dimnames(drugEffect) = list(interactions$anno$drug$GeneID, interactions$anno$ftr) for(ftr in colnames(drugEffect)){ plot(pleiotropicDegree, drugEffect[match(names(pleiotropicDegree), rownames(drugEffect)), ftr], main=ftr, ylab ="drug effect") } ## ----drugCellLineInteractionData7_, eval=TRUE, cache=TRUE, dependson = c(-1), fig.show='hold', fig.cap=intFtr_fig.cap, fig.subcap=intFtr_fig.subcap, out.width=ow---- ## no of interactions per feature noIntPerFtr = table(result$ftr)[interactions$anno$ftr] tmp = par(mar=c(9, 4, 4, 2) + 0.1) mp <- barplot(noIntPerFtr, ylab="total interactions per feature", names.arg="") text(mp[,1], par("usr")[3], labels = names(noIntPerFtr), srt = 45, adj = c(1.1,1.1), xpd = TRUE, cex=.9) par(tmp) result$ftrClass = PGPC:::hrClass(result$ftr) significant = lapply(unique(result$ftrClass), function(selectedFtr) unique(subset(result, ftrClass==selectedFtr)$uname)) names(significant) = unique(result$ftrClass) ## merging the interactions for each category plot(venn(significant)) overlap = sapply(names(significant), function(class1){ sapply(names(significant), function(class2){ length(intersect(significant[[class1]], significant[[class2]])) }) }) barplot(overlap, beside=TRUE, legend=names(significant), args.legend=list(x="topleft")) ## ----drugCellLineInteractionData8_, eval=TRUE, cache=TRUE, dependson = c(-1)---- ## remove low confidence interactions result <- do.call(rbind, lapply(unique(result$name), function(line) { tmp = subset(result, line == name) noFtrPerDrug = table(tmp$GeneID) selectedDrug = names(noFtrPerDrug[noFtrPerDrug > 1]) subset(tmp, GeneID %in% selectedDrug) })) ## remove ambiguous interactions noLinesPerDrug = sapply(unique(result$GeneID), function(drug) length(unique(subset(result, GeneID==drug)$name))) unambigDrugs = names(noLinesPerDrug[noLinesPerDrug <= 3]) result = subset(result, GeneID %in% unambigDrugs) ## ----drugCellLineInteractionData10_, eval=FALSE, cache=TRUE, dependson = c(-1)---- # # ## number of interactions per line vs. no of drugs / no of interactions per drugs # noIntPerLine = table(result$name) # barplot(noIntPerLine) # # noDrugsPerLine = sapply(names(noIntPerLine), # function(line) # length(unique(result$GeneID[result$name==line]))) # # plot(as.vector(noIntPerLine), noDrugsPerLine) # text(as.vector(noIntPerLine), noDrugsPerLine, names(noIntPerLine)) # # noIntPerDrug = table(result$GeneID) # barplot(noIntPerDrug) # # for(line in unique(result$name)){ # barplot(table(result$GeneID[result$name==line]), main=line) # } # # ## features showing interactions per drug # noFtrsPerDrug = sapply(names(noIntPerDrug), # function(drug) # length(unique(result$ftr[result$GeneID==drug]))) # # plot(as.vector(noIntPerDrug), noFtrsPerDrug) # text(as.vector(noIntPerDrug), noFtrsPerDrug, names(noIntPerDrug)) # # hist(noFtrsPerDrug, breaks=20) # # for(i in 1:5) print(sum(noFtrsPerDrug>i)) ## ----drugCellLineInteractionData11_, eval=TRUE, cache=TRUE, dependson = c(-1)---- write.table(result, file=file.path("result", "cytoscapeExportFiltered.txt"), sep="\t", row.names=FALSE) ############################ ## transform to pheno groups ############################ result$ftr = PGPC:::hrClass(result$ftr) columnsToRemove = c("r1", "r2", "rMean", "pAdjusted") result = unique(result[,-match(columnsToRemove, names(result))]) write.table(result, file=file.path("result", "cytoscapeExportFilteredPhenoGroups.txt"), sep="\t", row.names=FALSE) ######################### ## combine features ######################### result = do.call(rbind, lapply(unique(result$uname), function(u){ tmp = subset(result, uname==u) ftr = tmp$ftr tmp = unique(tmp[,-match(c("ftr", "ftrClass"), names(tmp))]) tmp$cellnumber = ifelse("cell number" %in% ftr, 1, 0) tmp$cellshape = ifelse("cell shape" %in% ftr, 1, 0) tmp$celltexture = ifelse("cellular texture" %in% ftr, 1, 0) tmp$nuclearshape = ifelse("nuclear shape" %in% ftr, 1, 0) tmp$nucleartexture = ifelse("nuclear texture" %in% ftr, 1, 0) tmp$noFtrClasses = length(ftr) tmp })) write.table(result, file=file.path("result", "cytoscapeExportFilteredFtrsCombined.txt"), sep="\t", row.names=FALSE) ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result","Figures"))) dir.create(file.path("result","Figures"),recursive=TRUE) ## ----heatmapAll, cache=TRUE, fig.height=9, fig.width=14, resize.width="0.9\\textwidth", dev.args=list(pointsize=6), fig.cap="Heatmap of interaction profiles for all drugs"---- if(!exists("interactions")){ data("interactions", package="PGPC") } PI = interactions$res PI2 = PI ##aperm(PI, c(1,3,2,4,5)) dim(PI2) = c(prod(dim(PI2)[1:2]),dim(PI2)[3],dim(PI2)[4]) SD = apply(PI2, 3, function(m) apply(m, 2, mad, na.rm=TRUE)) MSD = apply(SD, 2, function(x) { median(x,na.rm=TRUE) } ) ## normalize by mean SD PI = apply(interactions$res, c(1, 2, 4), mean) for (i in 1:dim(PI)[3]) { PI[,,i] = PI[,,i] / MSD[i] } dimnames(PI) = list(template = interactions$anno$drug$GeneID, query = interactions$anno$line$mutation, phenotype = interactions$anno$ftr) myColors = c(`Blue`="cornflowerblue", `Black`="#000000", `Yellow`="yellow") colBY = colorRampPalette(myColors)(513) cuts = c(-Inf, seq(-6, -2, length.out=(length(colBY)-3)/2), 0.0, seq(2, 6, length.out=(length(colBY)-3)/2), +Inf) ppiw = .25 ppih = 1.4 fac = 2.2 d = dim(PI) ordTempl = PGPC:::orderDim(PI, 1) ordQuery = PGPC:::orderDim(PI, 2) ordFeat = PGPC:::orderDim(PI, 3) PGPC:::myHeatmap(PI[ordTempl, ordQuery, ordFeat], cuts=cuts, fontsize=10, col=colBY) ## ----heatmapFiltered, cache=TRUE, fig.height=9, fig.width=14, resize.width="0.9\\textwidth", dev.args=list(pointsize=6), dependson = c(-1), fig.cap="Heatmap of interaction profiles for drugs that show at least one specific interaction"---- filterFDR = function(d, pAdjusted, pAdjustedThresh = 0.1){ select = pAdjusted <= pAdjustedThresh select[is.na(select)] = FALSE selectedRows = apply(select, 1, any) d[selectedRows,,] } pAdjustedThreshold = 0.01 pAdjusted = interactions$pVal[,,,2] PIfilter = filterFDR(PI, pAdjusted, pAdjustedThresh = pAdjustedThreshold) PIfilter = apply(PIfilter, c(2,3), function(x) tapply(x, dimnames(PIfilter)$template, mean)) ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin") ### some other contrls: # ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin", "ctrl IWP", "ctrl DAPT") PIfilter = PIfilter[!grepl("ctr", dimnames(PIfilter)[[1]]) | dimnames(PIfilter)[[1]] %in% ctrlToKeep,,] ordTempl = PGPC:::orderDim(PIfilter, 1) ordQuery = PGPC:::orderDim(PIfilter, 2) ordFeat = PGPC:::orderDim(PIfilter, 3) PGPC:::myHeatmap(PIfilter[ordTempl,ordQuery,ordFeat], cuts=cuts, fontsize=10, col=colBY) drugAnno = interactions$anno$drug subset = drugAnno[drugAnno$compoundID %in% dimnames(PIfilter)[[1]] & !grepl("ctr", drugAnno$GeneID),] write.table(subset[, c("Name", "GeneID", "Selectivity", "Selectivity_updated")], file=file.path("result", "annotation_selected_compounds.txt"), sep="\t", quote=FALSE, row.names=FALSE) ## ----clusterHeatmap, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of interaction profiles for drugs that show at least one interaction."---- PIdist = PGPC:::getDist(PIfilter, drugAnno=drugAnno) hcInt <- as.dendrogram(hclust(as.dist(PIdist))) heatmap.2(PIdist, Colv = hcInt, Rowv = hcInt, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6), cexRow=.15, cexCol=.15) ## ----clusterHeatmapReordered, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of interaction profiles for drugs that show at least one interaction. Clusters of size larger than 2 are colored. The dendrogram and coloring of clusters in this figure are the basis for Figure 5A and Appendix Figure S8 in the paper."---- ## reorder dendrogram wts = rep(0, dim(PIdist)[1]) ## reorder bio cluster inbetween = c(146, 187, 66, 170, 73, 121, 180) wts[inbetween] = 1000 drugIds = sapply(strsplit(rownames(PIdist), " "), "[", 1) ## reorder Etoposide cluster wts[match("79462", drugIds)] = 10 ## reorder calcimycin cluster wts[match("79471", drugIds)] = 5 wts[match("79982", drugIds)] = 10 hcInt = reorder(hcInt, wts) cluster = cutree(as.hclust(hcInt), h=0.6) ## make color table inCl <- table(cluster) cl2col <- data.frame(cl=names(inCl), col="white", stringsAsFactors=FALSE) largerCluster <- names(inCl[inCl>2]) cl2col$col[cl2col$cl %in% largerCluster] <- rainbow(length(largerCluster)) col = cl2col$col[match(cluster, cl2col$cl)] heatmap.2(PIdist, Colv = hcInt, Rowv = hcInt, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6), cexRow=.15, cexCol=.15, RowSideColors=col) ## ----chemicalSimilarity, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug similarity defined by Tanimoto distance."---- ## read structure file sdfset <- read.SDFset(system.file("extdata", "conf", "LOPAC-regexpr-formated.sdf", package = "PGPC")) ## map GeneIDs and library names to the compound names in the sdf file drugs <- data.frame(GeneID = dimnames(PIfilter)[[1]], stringsAsFactors=FALSE) drugs$Name = interactions$anno$drug$Name[match(drugs$GeneID, interactions$anno$drug$GeneID)] ## add SDF compound name to controls controls <- data.frame(GeneID = c("ctrl DAPT", "ctrl IWP", "ctrl LY", "ctrl Paclitaxel", "ctrl PD", "ctrl RAPA", "ctrl U0126", "ctrl Vinblastin", "ctrl Wortmannin", "ctrl Y27", "neg ctr DMSO"), Name = c("DAPT", "IWP-2", "LY-294,002 hydrochloride", "Taxol", "PD 98,059", "Sirolimus", "U0126", "Vinblastine sulfate salt", "Wortmannin from Penicillium funiculosum", "Y-27632 dihydrochloride", "neg ctr DMSO"), stringsAsFactors=FALSE) findNames <- match(controls$GeneID, drugs$GeneID) drugs$Name[findNames[!is.na(findNames)]] <- controls$Name[!is.na(findNames)] ## get drug names in sdf file and adjust format drugsInSDF <- sapply(SDFset2SDF(sdfset), datablocktag, 5) drugsInSDF[grep(" $", drugsInSDF)] <- gsub(" *$","", drugsInSDF[grep(" $", drugsInSDF)]) ## keep only selected drugs and add selected controls twice to sdf file ctrlToKeep = c("ctrl Paclitaxel", "ctrl U0126", "ctrl Vinblastin") selectedControls <- match(controls$Name[match(ctrlToKeep, controls$GeneID)], sapply(SDFset2SDF(sdfset), datablocktag, 5)) sdfsetControls <- sdfset[selectedControls] cid(sdfsetControls) <- ctrlToKeep sdfsetMerged <- c(sdfset[unique(match(drugs$Name, drugsInSDF))] , sdfsetControls) selectedDrugsInSDF <- drugsInSDF[unique(match(drugs$Name, drugsInSDF))] ## Structure similarity searching and clustering using atom pairs ## use unique(inSDFfile) because controls could appear twice ## Generate atom pair descriptor database for searching apset <- sdf2ap(sdfsetMerged) dummy <- cmp.cluster(db=apset, cutoff=0, save.distances=file.path("result", "distmat.rda")) load(file.path("result", "distmat.rda")) ## annotate distance matrix with GeneIDs and drugnames dimnames(distmat) <- list(c(drugs$GeneID[match(selectedDrugsInSDF, drugs$Name)], ctrlToKeep), c(selectedDrugsInSDF, ctrlToKeep)) hc <- as.dendrogram(hclust(as.dist(distmat), method="single")) heatmap.2(1-distmat, Rowv=hc, Colv=hc, col=colorRampPalette(c( "white","antiquewhite", "darkorange2"))(64), density.info="none", trace="none", main="Structural similarity") ## ----chemicalSimilarityReorderd, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Heatmap of drug similarity defined by Tanimoto distance ordered according to interaction profile similarity."---- ## check correct ordering drugIds = sapply(strsplit(rownames(PIdist), " "), "[", 1) drugIds[grep("ctrl", drugIds)] = gsub(" $", "", rownames(PIdist)[grep("ctrl", drugIds)]) stopifnot(!any(diff(match(drugIds, rownames(distmat))) != 1)) heatmap.2(1-distmat, Rowv=hcInt, Colv=hcInt, col=colorRampPalette(c( "white","antiquewhite", "darkorange2"))(64), density.info="none", trace="none", main="Structural similarity orderd by interaction similarity") ## ----chemicalSimilarityCombined, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles combined with chemical similarity defined by Tanimoto distance. This figure is the basis of Figure 5A in the paper."---- ## plot combined cluster heatmap chemDistOrdered = distmat[order.dendrogram(rev(hcInt)), order.dendrogram(hcInt)] ordered = PIdist[order.dendrogram(rev(hcInt)), order.dendrogram(hcInt)] ordered[nrow(ordered) - row(ordered) + 2 < col(ordered) + 1] = -1-chemDistOrdered[nrow(ordered) - row(ordered) + 2 < col(ordered) + 1] heatmap.2(ordered, Rowv=FALSE, Colv=FALSE, dendrogram="none", col=c(colorRampPalette(c( "white","antiquewhite", "darkorange2"))(64), colorRampPalette(c("darkblue", "white"))(64)), breaks = c(seq(-2,-1,length.out=64), -0.5, seq(0,0.6,length.out=64)), trace="none", main="Structural similarity orderd by interaction similarity") ## ----chemicalSimilarityCombinedCluster, results='hide', cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1)---- cluster = list(C1 = c(79802, 79184, 80101, 80082, "ctrl Paclitaxel", 80075, "ctrl Vinblastin", 79615, 79607), C2 = c("ctrl U0126", 80091, 79902), C3 = c(79225, 79014), C4=c(79275, 79047), C5=c(79087, 79033), C6=c(78978, 78919), C7=c(79411, 79410), C8=c(79294, 79028, 79812, 79190, 79016), C9=c(79653, 79215, 80136, 79191), C10=c(79104, 79074), C11=c(79497, 79444, 80044, 79819, 79111), C12=c(79926, 79740), C13=c(79164, 80032, 79143), C13B=c(80038, 79164, 80032, 79143, 79064), C14=c(79817, 79229, 79038), C15=c(79165, 79334, 79503), C15B=c(79474, 79165, 79334, 79503), C16=c(79892, 79057, 79922), C17=c(80104, 79837), C18=c(79192, 79122, 79647), C18B=c(79192, 79122, 79647, 79116, 79020), C19=c(78909, 78908, 78894), C19B=c(79859, 78909, 78908, 78894)) ## get reordered drug ids drugIds = sapply(strsplit(rownames(ordered), " "), "[", 1) drugIds[grep("ctrl", drugIds)] = gsub(" $", "", rownames(ordered)[grep("ctrl", drugIds)]) for(clName in names(cluster)){ cl = cluster[[clName]] pdf(file=file.path("result", "Figures", paste0(clName, ".pdf")), width=8, height=8) heatmap.2(ordered[match(cl, drugIds), rev(match(cl, rev(drugIds)))], Rowv=FALSE, Colv=FALSE, dendrogram="none", col=c(colorRampPalette(c("white", "antiquewhite", "darkorange2"))(64), colorRampPalette(c("darkblue", "white"))(64)), breaks = c(seq(-2,-1,length.out=64), -0.5, seq(0,0.6,length.out=64)), trace="none", key=FALSE, main=clName) dev.off() } ## ----clusterHeatmapSingleCellLine, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using data from just one parental cell line. This figure is the basis of Appendix Figure S10 in the paper."---- ## remove controls singleCellLine = PIfilter[, match("HCT116 P2", dimnames(PIfilter)[[2]]),] singleDist = PGPC:::getDist(singleCellLine, drugAnno=drugAnno) hcsingleDist <- as.dendrogram(hclust(as.dist(singleDist))) heatmap.2(singleDist, Colv = hcsingleDist, Rowv = hcsingleDist, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6)) ## ----clusterHeatmapSingleCellLineOrdered, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using data from just one parental cell line ordered by the clustering of the whole data set."---- ## combine controls heatmap.2(singleDist, Colv = hcInt, Rowv = hcInt, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6)) ## ----clusterHeatmapJustCellNumber, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using just the cell number feature of all cell lines. This figure is the basis of Appendix Figure 13 in the paper."---- ## combine controls justN = PIfilter[,,match("n", dimnames(PIfilter)[[3]])] justNDist = PGPC:::getDist(justN, drugAnno=drugAnno) hcjustNDist <- as.dendrogram(hclust(as.dist(justNDist))) heatmap.2(justNDist, Colv = hcjustNDist, Rowv = hcjustNDist, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6)) ## ----clusterHeatmapJustCellNumberOrdered, cache=TRUE, fig.width=8, fig.height=8, resize.width="0.8\\textwidth", dev.args=list(pointsize=2.5), dependson = c(-1), fig.cap="Clustering of drug interaction profiles using just the cell number feature of all cell lines ordered by the clustering of the whole data set."---- ## combine controls heatmap.2(justNDist, Colv = hcInt, Rowv = hcInt, trace="none", col=colorRampPalette(c("darkblue", "white"))(64), breaks = c(seq(0,0.5999,length.out=64),0.6)) ## ----chemicalSimilarity2, results='hide', cache=TRUE, fig.keep='none', dependson = c(-1)---- ###### check tanimoto for specifc compounds.. grepSDFset("BAY 11-7082", sdfset, field="datablock", mode="subset") grepSDFset("BAY 11-7085", sdfset, field="datablock", mode="subset") grepSDFset("Stattic", sdfset, field="datablock", mode="subset") grepSDFset("Ouabain", sdfset, field="datablock", mode="subset") grepSDFset("Dihydroouabain", sdfset, field="datablock", mode="subset") grepSDFset("Brefeldin A", sdfset, field="datablock", mode="subset") # bay7082 is CMP1018 # bay7085 is CMP183 # stattic is 1048 cmp.similarity(apset["CMP1018"], apset["CMP183"]) cmp.similarity(apset["CMP1018"], apset["CMP1048"]) cmp.similarity(apset["CMP183"], apset["CMP1048"]) #plot(sdfset[1018], print=FALSE) #plot(sdfset[183], print=FALSE) #plot(sdfset[1048], print=FALSE) #plot(sdfset[c("1018","183", "1048", "943")], print=FALSE) # ouabain is CMP943? #79229 #79817 # Dihydroouabain is CMP355 # brefeldin is 164 cmp.similarity(apset["CMP355"], apset["CMP943"]) cmp.similarity(apset["CMP355"], apset["CMP164"]) cmp.similarity(apset["CMP943"], apset["CMP164"]) ## ----sharedTargets, cache=TRUE, dev.args=list(pointsize=20), dependson = c(-1)---- ## get mean distance for drugs with shared targets drugAnno = interactions$anno$drug Selectivity <- drugAnno$Selectivity_updated[match(dimnames(PIfilter)[[1]], drugAnno$GeneID)] annoForDensity = drugAnno[match(dimnames(PIfilter)[[1]], drugAnno$GeneID),] plotDensities <- function(d, anno, ...){ M = PGPC:::getCorr(d, drugAnno=anno) sharedClass <- array(FALSE, dim=dim(M)) for(target in unique(Selectivity)){ if(is.na(target)) next targetdrugs = anno$GeneID[anno$Selectivity_updated %in% target] if(length(targetdrugs)>1){ sel = dimnames(M)[[1]] %in% targetdrugs sharedClass[sel, sel] = TRUE } } diag(sharedClass) = TRUE notSharedDist = M[!sharedClass] diag(sharedClass) = FALSE sharedDist = M[sharedClass] classDist = c(sharedDist, notSharedDist) class = c(rep("shared selectivity", length(sharedDist)), rep("no shared selectivity", length(notSharedDist))) multidensity(classDist ~ class, xlab="Correlation between drug profiles", ylim=c(0,2.5), col=c("blue", "red"), ...) tmp <- multiecdf(classDist ~ class, xlab="Correlation between drug profiles", subsample=FALSE, col=c("blue", "red"), ...) integrals <- sapply(lapply(tmp, integrate, lower=min(classDist), upper=max(classDist), subdivisions=length(table(classDist))), function(x) x$value) ## add class numbers integrals = c(integrals, N=table(class)) corOrder = order(classDist, decreasing=TRUE) classDist = classDist[corOrder] class = class[corOrder] ratio = (cumsum(class=="shared selectivity"))/ (cumsum(class=="shared selectivity") + cumsum(class=="no shared selectivity")) plot(classDist, ratio, xlab="correlation coefficient", ylab="shared target fraction", ylim=c(0,1), type="l", ...) abline(h = c(.1, .2, .3), col="grey", lty=2) text(0, 0.3, sum(ratio >= 0.3)) text(0, 0.2, sum(ratio >= 0.2)) text(0, 0.1, sum(ratio >= 0.1)) integrals } ## ----figureLabelsDrugClustering, cache=TRUE, echo=FALSE-------------------- all_fig.cap = c("Distribution of correlation coefficients of the interaction profiles for the whole data set. Pannel (b) is the basis of Figure 5B in the paper.") justN_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using only cell number for all cell lines. Pannel (b) is the basis of Figure 5B in the paper.") parOnly_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using all features of only one parental cell line. Pannel (b) is the basis of Figure 5B in the paper.") fig.subcap = c("Density distribution of correlation coefficients per class", "Cumulative distribution of correlation coefficients per class.", "Fraction of drugs with shared targets plotted against the correlation coefficients.") allStruct_fig.cap = c("Distribution of correlation coefficients of the interaction profiles for the whole data set. Pannel (b) is the basis of Appendix Figure S11A in the paper.") justNStruct_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using only cell number for all cell lines. Pannel (b) is the basis of Appendix Figure S11A in the paper.") parOnlyStruct_fig.cap = c("Distribution of correlation coefficients of the interaction profiles using all features of only one parental cell line. Pannel (b) is the basis of Appendix Figure S11A in the paper.") integral_fig.cap = "Comparison of the area under the curve (AUC) of the cummulative distribution function from the different data sets and classes defined by the target selectivity." integral_fig.subcap = c("Direct comparison of the AUC for each class and data set.", "Comparison of the differece in AUC between classes for the different data sets. This figure ist the basis for Figure 5C in the paper.") integralStruct_fig.cap = "Comparison of the area under the curve (AUC) of the cummulative distribution function from the different data sets and classes defined by the structural similarity." integralStruct_fig.subcap = c("Direct comparison of the AUC for each class and data set.", "Comparison of the differece in AUC between classes for the different data sets. This figure ist the basis for Appendix Figure S11B in the paper.", "Comparison of the differece in AUC between classes for the different data sets and classes defined by target selectivity or structural similarity.") ow = '.45\\textwidth' ## ----correlationDistributionAll, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=all_fig.cap, fig.subcap=fig.subcap, out.width=ow---- intAll <- plotDensities(PIfilter, annoForDensity, lwd=5, xlim=c(-1,1), main=paste("ctrl combined all data,", "all isogenics and all features")) ## ----correlationDistributionJustN, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=justN_fig.cap, fig.subcap=fig.subcap, out.width=ow---- intN <- plotDensities(justN, annoForDensity, lwd=5, xlim=c(-1,1), main="all isogenics, only cell number") ## ----correlationDistributionSingleLine, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=parOnly_fig.cap, fig.subcap=fig.subcap, out.width=ow---- intSingle <- plotDensities(singleCellLine, annoForDensity, lwd=5, xlim=c(-1,1), main="all features, only parental cell line") ## ----correlationDistributionIntegral, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=integral_fig.cap, fig.subcap=integral_fig.subcap, out.width=ow---- integrals <- as.data.frame(rbind(intAll, intN, intSingle)) integrals$type = c("all data", "only cell number,\nall cell lines", "only parental cell line,\nall feature") theme_new = theme_bw() theme_new$text$size = 20 theme_new$axis.text$size = rel(1.2) theme_new = theme_new + theme(axis.text.x = element_text(angle = 45, hjust = 1)) p = ggplot(data=melt(integrals, measure.vars=c("no shared selectivity", "shared selectivity")), aes(x=type, y= value, fill=variable)) dodge <- position_dodge(.5) p = p + geom_bar(stat="identity", position=dodge, width=.5) + theme_new + scale_fill_manual(values = c("blue", "red")) print(p) ## difference integrals$difference = integrals$"no shared selectivity" - integrals$"shared selectivity" integrals$similarity = "Target selectivity" p = ggplot(data=integrals, aes(x=type, y=difference)) p = p + geom_bar(stat="identity", width=.5) + theme_new + labs(title = "Target selectivity") + scale_y_continuous(limits = c(0,.3)) print(p) ## ----TargetsDistancesVsChemicalSimilarity, cache=TRUE, fig.keep='all', fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1)---- ## get mean distance for drugs with shared targets plotDensityTanimotoThresh <- function(profile, distmat, tanimotoThresh = 0.6, ...){ similarStructure = distmat < tanimotoThresh M = PGPC:::getCorr(profile, drugAnno=drugAnno) diag(similarStructure) = TRUE notSharedDist = M[!similarStructure] diag(similarStructure) = FALSE sharedDist = M[similarStructure] classDist = c(sharedDist, notSharedDist) class = c(rep("similar structure", length(sharedDist)), rep("different structure", length(notSharedDist))) multidensity(classDist ~ class, xlab="Correlation between drug profiles", ylim=c(0,2.5), col=c("blue", "red"), ...) tmp <- multiecdf(classDist ~ class, xlab="Correlation between drug profiles", subsample=FALSE, col=c("blue", "red"), ...) integrals <- sapply(lapply(tmp, integrate, lower=min(classDist), upper=max(classDist), subdivisions=length(table(classDist))), function(x) x$value) ## add class numbers integrals = c(integrals, N=table(class)) corOrder = order(classDist, decreasing=TRUE) classDist = classDist[corOrder] class = class[corOrder] ratio = (cumsum(class=="similar structure"))/ (cumsum(class=="similar structure") + cumsum(class=="different structure")) plot(classDist, ratio, xlab="correlation coefficient", ylab="shared target fraction", ylim=c(0,1), type="l", ...) abline(h = c(.1, .2, .3), col="grey", lty=2) text(0, 0.3, sum(ratio >= 0.3)) text(0, 0.2, sum(ratio >= 0.2)) text(0, 0.1, sum(ratio >= 0.1)) integrals } ## reorder according to drug order in ctrlCombined stopifnot(!any(diff(match(dimnames(PIfilter)[[1]], rownames(distmat))) != 1)) tanimotoThresh = 0.6 ## ----correlationDistributionStructureAll, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=allStruct_fig.cap, fig.subcap=fig.subcap, out.width=ow---- intAllTanimoto <- plotDensityTanimotoThresh(PIfilter, distmat, tanimotoThres=tanimotoThresh, lwd=5, xlim=c(-1,1), main=paste0("all data,", " Tanimoto dist < ", tanimotoThresh)) ## ----correlationDistributionStructureJustN, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=justNStruct_fig.cap, fig.subcap=fig.subcap, out.width=ow---- intNTanimoto <- plotDensityTanimotoThresh(justN, distmat, tanimotoThres=tanimotoThresh,lwd=5, xlim=c(-1,1), main=paste("all isogenics,", "cellnumber only,", "Tanimoto dist < ", tanimotoThresh)) ## ----correlationDistributionStructureSingleLine, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=parOnlyStruct_fig.cap, fig.subcap=fig.subcap, out.width=ow---- intSingleTanimoto <- plotDensityTanimotoThresh(singleCellLine, distmat, tanimotoThres=tanimotoThresh, lwd=5, xlim=c(-1,1), main=paste("single cell line,", "Tanimoto dist < ", tanimotoThresh)) ## ----correlationDistributionStructureIntegral, cache=TRUE, fig.width=8, fig.height=8, fig.show='hold', dev.args=list(pointsize=20), dependson = c(-1), fig.cap=integralStruct_fig.cap, fig.subcap=integralStruct_fig.subcap, out.width=ow---- integralsTanimoto <- as.data.frame(rbind(intAllTanimoto, intNTanimoto, intSingleTanimoto)) integralsTanimoto$type = c("all data", "only cell number,\nall cell lines", "only parental cell line,\nall feature") theme_new = theme_bw() theme_new$text$size = 20 theme_new$axis.text$size = rel(1.2) theme_new = theme_new + theme(axis.text.x = element_text(angle = 45, hjust = 1)) p = ggplot(data=melt(integralsTanimoto, measure.vars=c("different structure", "similar structure")), aes(x=type, y= value, fill=variable)) dodge <- position_dodge(.5) p = p + geom_bar(stat="identity", position=dodge, width=.5) + theme_new + scale_fill_manual(values = c("blue", "red")) print(p) ## difference integralsTanimoto$difference = integralsTanimoto$"different structure" - integralsTanimoto$"similar structure" integralsTanimoto$similarity = "Chemical similarity" p = ggplot(data=integralsTanimoto, aes(x=type, y=difference)) p = p + geom_bar(stat="identity", width=.5) + theme_new + labs(title = "Chemical similarity") + scale_y_continuous(limits = c(0,.3)) print(p) cols = c("type", "difference", "similarity") p = ggplot(data=rbind(integrals[,cols], integralsTanimoto[,cols]), aes(x=type, y= difference, fill=similarity)) dodge <- position_dodge(.5) p = p + geom_bar(stat="identity", position=dodge, width=.5) + theme_new + scale_fill_manual(values = c("grey", "black")) print(p) ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) if(!file.exists(file.path("result"))) dir.create(file.path("result"),recursive=TRUE) ## ----generateCellHTS2reportsDrugCombi, echo=TRUE, results='hide', cache=TRUE---- savePath="result" dataPath=system.file("extdata", "drug_combinations", package="PGPC") Name="drug_combis" LogTransform=TRUE Annotation="2013-10-22drugcombiAnno_2plates.txt" Plateconf="2013-10-22plateconfig_all.txt" Description="Description.txt" NormalizationMethod="NPI" NormalizationScaling="multiplicative" VarianceAdjust="none" SummaryMethod="mean" Score="zscore" PlateList=c("2013-10-22platelist_HCT_72_2layout_8plates_mod.txt", "2013-10-22platelist_DLD_72_2layout_8plates.txt") ## include intensities mySettings = getSettings() mySettings$plateList$intensities$include = TRUE setSettings(mySettings) ## ----generateCellHTS2reportsDrugCombi2, echo=TRUE, results='hide', cache=TRUE, dependson = c(-1)---- for(plate in PlateList){ Outdir = file.path(savePath, paste(gsub("_2layout_8plates.txt|_2layout_8plates_mod.txt", "", plate), NormalizationMethod, VarianceAdjust, sep = "_")) x = readPlateList(plate,path=dataPath, name=Name) x = configure(x, descripFile=Description, confFile=Plateconf,, path=dataPath) xp = normalizePlates(x, log=LogTransform, scale=NormalizationScaling, method=NormalizationMethod, varianceAdjust=VarianceAdjust) xp@state["normalized"] = TRUE xsc = scoreReplicates(xp, sign = "-", method = Score) xsc = summarizeReplicates(xsc, summary = SummaryMethod) xsc = cellHTS2::annotate(xsc, geneIDFile = file.path(dataPath, Annotation)) out = writeReport(raw = x, normalized = xp, scored = xsc, outdir=Outdir, force=TRUE) } ## ----qualityControl, cache=TRUE, eval=TRUE, dependson = c(-1), fig.show='hold'---- experiment = "2013-10-22platelist_HCT_72_NPI_none" ## get the dataframe and check the data in an explorative manner df = read.delim(file.path("result", experiment, "in", "topTable.txt"), stringsAsFactors=FALSE) df$identifier = paste(df$drug1, df$drug2, sep="_") # head(df) # plot correlations p = ggplot(data=df, aes(x=raw_r1_ch1, y= raw_r2_ch1)) p = p + geom_point() + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) p = ggplot(data=df, aes(x=raw_r2_ch1, y= raw_r3_ch1)) p = p + geom_point() + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) p = ggplot(data=df, aes(x=raw_r1_ch1, y= raw_r3_ch1)) p = p + geom_point() + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) p = ggplot(data=df, aes(x=raw_r4_ch1, y= raw_r5_ch1)) p = p + geom_point() + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) p = ggplot(data=df, aes(x=raw_r1_ch1, y= raw_r5_ch1)) p = p + geom_point(na.rm=TRUE) + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) # plot all p = ggplot(data=melt(df, measure.vars=names(df)[grep("raw_r", names(df))]), aes(x=concdrug1.micro_M., y=value, color=variable)) p = p + facet_wrap(~identifier) + scale_x_log10() + geom_point(na.rm=TRUE) + theme_bw() + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) p = ggplot(data=melt(df, measure.vars=names(df)[grep("normalized_r", names(df))]), aes(x=concdrug1.micro_M., y=value, color=variable)) p = p + facet_wrap(~identifier) + scale_x_log10() + geom_point(na.rm=TRUE) + theme_bw() + labs(title = gsub("_", " ", gsub("2013-10-22platelist_", "", experiment))) print(p) ## ----getExpectedValue, dependson = c(-1)----------------------------------- getExpectedValue <- function(df, position1, position2, drugNames, type=c("nonInteraction", "HSA")){ normCols = c("normalized_r1_ch1", "normalized_r2_ch1" , "normalized_r3_ch1", "normalized_r4_ch1" , "normalized_r5_ch1") normCols <- normCols[normCols %in% names(df)] if(type=="nonInteraction"){ tmp <- df[position1,] tmp[,normCols] <- tmp[,normCols] + df[position2,normCols] - 1 tmp$identifier = "expected" tmp$GeneID = gsub(drugNames[1], "expected", tmp$GeneID) tmp$drug1 = gsub(drugNames[1], "expected", tmp$GeneID) tmp$color = "white" } else if(type=="HSA"){ ## test which single compound has the stronges mean effect and use this as expected mean1 <- tapply(unlist(df[position1,normCols]), rep(df$concdrug1.micro_M.[position1], length(normCols)), mean, na.rm=TRUE) mean2 <- tapply(unlist(df[position2,normCols]), rep(df$concdrug1.micro_M.[position2], length(normCols)), mean, na.rm=TRUE) ## take values of drug1 and exchange if drug2 shows a stronger effect for a conc tmp <- df[position1,] concToChange <- names(mean1[mean1 > mean2]) for(conc in concToChange){ concPos <- df$concdrug1.micro_M.[position2] == as.numeric(conc) tmp[concPos,normCols] <- df[position2[concPos],normCols] } tmp$identifier = "expected" tmp$GeneID = gsub(drugNames[1], "expected", tmp$GeneID) tmp$drug1 = gsub(drugNames[1], "expected", tmp$GeneID) tmp$color = "white" } else { stop("type must be 'nonInteraction' or 'HSA'.") } columnsToKeep <- c("plate", normCols, "drug1", "concdrug1.micro_M.", "drug2", "concdrug2.micro_M.", "GeneID", "identifier", "color") invisible(melt(rbind(df, tmp)[,columnsToKeep], measure.vars=normCols)) } ## ----testSynergy, dependson = c(-1)---------------------------------------- testSynergy <- function(df, drugNames, summarizeWells=TRUE){ if(summarizeWells){ ## summarize wells on the same plate meanValue <- tapply(df$value, list(df$GeneID, df$variable, df$plate), mean, na.rm=TRUE) for(i in dimnames(meanValue)[[1]]){ for(j in dimnames(meanValue)[[2]]){ for(k in dimnames(meanValue)[[3]]){ selection <- df$GeneID == i & df$variable == j & df$plate == k if(sum(selection) > 0) df$meanValue[selection] = meanValue[i, j, k] } } } df$value = NULL df = unique(df) colnames(df) <- gsub("meanValue", "value", colnames(df)) } ## test synergy for each drug concentration for(conc in unique(df$concdrug1.micro_M.)){ if(conc==0) next dfConc = df[df$concdrug1.micro_M. == conc, ] test = t.test(dfConc$value[dfConc$identifier == paste(drugNames[1], drugNames[2], sep="_")], dfConc$value[dfConc$identifier == "expected"], alternative="less") df$pvalue[df$concdrug1.micro_M. == conc] = test$p.value df$combi_mean[df$concdrug1.micro_M. == conc] = test$estimate[1] df$expected_mean[df$concdrug1.micro_M. == conc] = test$estimate[2] } ## set pvalue to 1 for 0 drug concentrations df$pvalue[is.na(df$pvalue)] = 1 invisible(df) } ## ----getSummary, dependson = c(-1)----------------------------------------- getSummary <- function(df){ ## calculate mean values and sem ids = paste(df$concdrug1.micro_M., df$identifier) sem <- function(x, ...) sd(x, ...)/sqrt(sum(!is.na((x)))) summary = data.frame(concdrug1.micro_M. = as.vector(tapply(df$concdrug1.micro_M., ids, unique)), identifier = as.vector(tapply(df$identifier, ids, unique)), color = as.vector(tapply(df$color, ids, unique)), pvalue = as.vector(tapply(df$pvalue, ids, unique)), mean = as.vector(tapply(df$value, ids, mean, na.rm=TRUE)), sem = as.vector(tapply(df$value, ids, sem, na.rm=TRUE)), n = as.vector(tapply(!is.na(df$value), ids, sum)), stringsAsFactors=FALSE) summary$identifier = as.factor(summary$identifier) levels(summary$identifier) = levels(df$identifier) summary$color = as.factor(summary$color) levels(summary$color) = levels(df$color) invisible(summary) } ## ----analyseDrugCombi, dependson = c(-1)----------------------------------- analyseDrugCombi <- function(df, combi, plotSingle=TRUE, type="nonInteraction"){ drugNames = unlist(strsplit(combi, "_")) ## select all wells beloning to the combination df_sub = subset(df, grepl(paste(c(paste(drugNames, collapse="_"), paste0(drugNames, "_DMSO")), collapse="|"), df$identifier)) ## head(df_sub) df_sub <- df_sub[order(df_sub$GeneID),] position1 <- which(df_sub$identifier == paste0(drugNames[1], "_DMSO") & !grepl("DMSO_0$|^DMSO_0", df_sub$GeneID)) position2 <- which(df_sub$identifier == paste0(drugNames[2], "_DMSO") & !grepl("DMSO_0$|^DMSO_0", df_sub$GeneID)) if(!identical(gsub(drugNames[1], "", df_sub$GeneID[position1]), gsub(drugNames[2], "", df_sub$GeneID[position2]))) stop("single drug values do not match") ## add color information for plots df_sub$color = "#5CBA48" df_sub$color[position1] = "gray60" df_sub$color[position2] = "gray30" tmp <- getExpectedValue(df_sub, position1, position2, drugNames, type=type) ## manually set colors and order for plots labelOrder = c(paste0(drugNames, "_DMSO"), "expected", paste(drugNames, collapse="_")) tmp$identifier = factor(tmp$identifier, levels = labelOrder) colorForPlot = c("gray60", "gray30", "white", "#5CBA48") tmp$color = factor(tmp$color, levels= colorForPlot) tmp <- testSynergy(tmp, drugNames) if(plotSingle) PGPC:::plotSingleValues(tmp, pthresh=0.05) invisible(getSummary(tmp)) } ## ----drugInteractions, dependson = c(-1)----------------------------------- experiment = "2013-10-22platelist_HCT_72_NPI_none" ## get the dataframe and check the data in an explorative manner df = read.delim(file.path("result", experiment, "in", "topTable.txt"), stringsAsFactors=FALSE) df$identifier = paste(df$drug1, df$drug2, sep="_") df$concdrug1.micro_M. = round(df$concdrug1.micro_M., 4) ## remove problematic replicate 4 df <- df[, -grep("r4", names(df))] theme_new = theme_bw() theme_new$text$size = 20 theme_new$axis.text$size = rel(1.2) ## ----figureLegends, echo=FALSE--------------------------------------------- Bendamustin_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using an non-interacting model in the HCT116 parental cell line. Error bars, means +- s.e.m" Bendamustin_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations. This figure is the basis of Figure 4B in the paper.") BendamustinHSA_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using the HSA model in the HCT116 parental cell line. Error bars, means +- s.e.m" BendamustinHSA_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations.") Disulfiram_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the HCT116 parental cell line. Error bars, means +- s.e.m" Disulfiram_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations. This figure is the basis of Figure 4D in the paper.") DisulfiramHSA_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the HCT116 parental cell line. Error bars, means +- s.e.m" DisulfiramHSA_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations.") ow = '.45\\textwidth' ## ----drugInteractionsBendatmustin, dependson = c(-2), fig.show='hold', fig.cap=Bendamustin_fig.cap, fig.subcap=Bendamustin_fig.subcap, out.width=ow---- drugCombis = c("Bendamustine_AKTi VIII") ##drugCombis = c("Bendamustine_AKTi VIII", "Bendamustine_PD", "Bendamustine_U0126", ## "Bendamustine_MK2206") ## for better overview we plot only a selection of concentrations concentrations = c("1.25", "2.5", "5", "10") breaks = c(1,3,10) limits = c(1, 11) for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE) PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05) } ## ----drugInteractionsBendatmustinHSA, dependson = c(-1), fig.show='hold', fig.cap=BendamustinHSA_fig.cap, fig.subcap=BendamustinHSA_fig.subcap, out.width=ow---- for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA") PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05) } ## ----drugInteractionsDisulfiram, dependson = c(-1), fig.show='hold', fig.cap=Disulfiram_fig.cap, fig.subcap=Disulfiram_fig.subcap, out.width=ow---- drugCombis = c("Disulfiram_PD") #drugCombis = c("Disulfiram_PD", "Disulfiram_U0126") ## for better overview plot only a selection of concentrations concentrations = c("0.0195", "0.0391","0.0781","0.1562") breaks = c(0.01, 0.03, 0.1, 0.3) limits=c (0.01, 0.5) for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE) PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05) } ## ----drugInteractionsDisulfiramHSA, dependson = c(-1), fig.show='hold', fig.cap=DisulfiramHSA_fig.cap, fig.subcap=DisulfiramHSA_fig.subcap, out.width=ow---- for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA") PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05) } ## ----figureLegends2, echo=FALSE-------------------------------------------- BendamustinDLD_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using an non-interacting model in the DLD-1 cell line. Error bars, means +- s.e.m" BendamustinDLD_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations. This figure is the basis of Expanded view Figure EV3C in the paper.") BendamustinDLDHSA_fig.cap = "Drug synergies for Bendamustine and AKT inhibitors calculated using the HSA model in the DLD-1 cell line. Error bars, means +- s.e.m" BendamustinDLDHSA_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations.") DisulfiramDLD_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the DLD-1 cell line. Error bars, means +- s.e.m" DisulfiramDLD_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations. This figure is the basis of Expanded View Figure EV3H in the paper.") DisulfiramDLDHSA_fig.cap = "Drug synergies for Disulfiram and MEK inhibitors calculated using an non-interacting model in the DLD-1 cell line. Error bars, means +- s.e.m" DisulfiramDLDHSA_fig.subcap = c("Normalized viability values for all experiments.", "Line plot of sumarized viability values for selected concentrations.", "Barplot of sumarized viability values for selected concentrations.", "Barplot of drug inhibition for selected concentrations.") ## ----drugInteractionsDLD, dependson = c(-2)-------------------------------- experiment = "2013-10-22platelist_DLD_72_NPI_none" ## get the dataframe and check the data in an explorative manner df = read.delim(file.path("result", experiment, "in", "topTable.txt"), stringsAsFactors=FALSE) df$identifier = paste(df$drug1, df$drug2, sep="_") df$concdrug1.micro_M. = round(df$concdrug1.micro_M., 4) theme_new = theme_bw() theme_new$text$size = 20 theme_new$axis.text$size = rel(1.2) ## ----drugInteractionsDLDBendatmustin, dependson = c(-1), fig.show='hold', fig.cap=BendamustinDLD_fig.cap, fig.subcap=BendamustinDLD_fig.subcap, out.width=ow---- drugCombis = c("Bendamustine_AKTi VIII") ## drugCombis = c("Bendamustine_AKTi VIII", "Bendamustine_PD", "Bendamustine_U0126", ## "Bendamustine_MK2206") ## for better overview plot only a selection of concentrations concentrations = c("1.25", "2.5", "5", "10") breaks = c(1,3,10) limits = c(1, 11) for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE) PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05, ylimits=c(-0.05,.6)) } ## ----drugInteractionsDLDBendatmustinHSA, dependson = c(-1), fig.show='hold', fig.cap=BendamustinDLDHSA_fig.cap, fig.subcap=BendamustinDLDHSA_fig.subcap, out.width=ow---- for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA") PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05, ylimits=c(-0.05,.6)) } ## ----drugInteractionsDLDDisulfiram, dependson = c(-1), fig.show='hold', fig.cap=DisulfiramDLD_fig.cap, fig.subcap=DisulfiramDLD_fig.subcap, out.width=ow---- drugCombis = c("Disulfiram_PD") ##drugCombis = c("Disulfiram_PD", "Disulfiram_U0126") ## for better overview plot only a selection of concentrations concentrations = c("0.0195", "0.0391","0.0781","0.1562") breaks = c(0.01, 0.03, 0.1, 0.3) limits=c (0.01, 0.5) for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE) PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05) } ## ----drugInteractionsDLDDisulfiramHSA, dependson = c(-1), fig.show='hold', fig.cap=DisulfiramDLDHSA_fig.cap, fig.subcap=DisulfiramDLDHSA_fig.subcap, out.width=ow---- for (combi in drugCombis){ summary <- analyseDrugCombi(df, combi, plotSingle=TRUE, type="HSA") PGPC:::plotSummary(summary, concentration=concentrations, pthresh=0.05, breaks=breaks, limits=limits) PGPC:::plotSummaryBarplot(summary, concentration=concentrations, pthresh=0.05) PGPC:::plotSummaryBarplotInhibition(summary, concentration=concentrations, pthresh=0.05) } ## ----echo=FALSE, cache=FALSE, message=FALSE-------------------------------- knitr::set_parent(file.path('..', 'PGPC.Rnw')) library(PGPC) raw_fig.cap = "Raw plate reader values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay." raw_fig.subcap = c("Untransformed values.", "log10 transformed values.") viab_fig.cap = "Viability normalized values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay." viab_fig.subcap = c("Untransformed values.", "log10 transformed values.") prot_fig.cap = "Viability and proteasome activity normalized values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay." prot_fig.subcap = c("Untransformed values.", "log10 transformed values.") sum_fig.cap = "Summary of viability and proteasome activity normalized values for the different drugs and assays. CellTiterGlo (CTG), chymotrypsin-like (CT-L), trypsin-like (T-L), and caspase-like (C-L) activity assay. Error bars, means +- s.e.m." sum_fig.subcap = c("Untransformed values. This figure is the basis of Figure 6B in the paper. ", "log10 transformed values.") fw = 10 fh = 6 ## ----readData, dependson = c(-1), fig.show="hold", fig.cap=raw_fig.cap, fig.subcap=raw_fig.subcap, fig.width=fw, fig.height=fh---- readData <- function(path, files, log10Transform=TRUE){ do.call(rbind, lapply(files, function(file){ tmp <- read.delim(file.path(path, file)) tmp$plate = match(file, files) tmp$plateName = file if(log10Transform) tmp$log10Value = log10(tmp$value) tmp })) } path=system.file("extdata", "Proteasome_assays_follow-up", package="PGPC") files = list.files(path) data = readData(path,files) head(data) ## remove BTO-1 drug data <- subset(data, drug != "BTO-1") data$assay = factor(data$assay, levels=c("CTG", "Chym","Tryp", "CASP"), ordered=TRUE) levels(data$assay) = c("CTG", "CT-L","T-L", "C-L") data$drug = factor(data$drug, levels=c("DMSO", "Disulfiram", "ZPCK", "AG555", "CAPE", "MG132","Bortezomib 5", "AG1478", "DAPH"), ordered=TRUE) cbpalette = c("black", "#ee1e39", "#e51d3a", "#bb1f3b", "#b21e42", "#d1d2d2", "#9b9b9b", "#3c53a4", "#4a52a3") ggplot(data, aes(x=drug, y=value, color=drug, shape=plateName)) + geom_jitter(position = position_jitter(width = .3), size=2.5) + theme_bw() + scale_color_manual(values=cbpalette) + facet_grid(assay ~ ., scales="free_y") ggplot(data, aes(x=drug, y=log10Value, color=drug, shape=plateName)) + geom_jitter(position = position_jitter(width = .3), size=2.5) + theme_bw() + scale_color_manual(values=cbpalette) + facet_grid(assay ~ ., scales="free_y") ## ----plateMeanValues------------------------------------------------------- mergeWells <- function(data){ for(plate in unique(data$plate)){ pp = data$plate == plate for(drug in unique(data$drug)){ ii = data$drug == drug for (assay in unique(data$assay)){ jj = data$assay == assay data$mean[pp & ii & jj] = mean(data$value[pp & ii & jj]) } } } data$log10Value = log10(data$mean) data$value=NULL names(data) <- gsub("mean", "value", names(data)) unique(data) } data = mergeWells(data) ## ----normalizeViability, dependson = c(-1), fig.show="hold", fig.cap=viab_fig.cap, fig.subcap=viab_fig.subcap, fig.width=fw, fig.height=fh---- normalizeViability <- function(data){ for(plate in unique(data$plate)){ pp = data$plate == plate for(drug in unique(data$drug)){ ii = data$drug == drug jj = data$assay == "CTG" data$normalized[ii & pp] = data$value[ii & pp] / data$value[ii & jj & pp] data$logNormalized[ii & pp] = data$log10Value[ii & pp] - data$log10Value[ii & jj & pp] } } data } data = normalizeViability(data) head(data) # Error bars represent standard error of the mean ggplot(data, aes(x=drug, y=normalized, color=drug, shape=plateName)) + geom_point(position=position_jitter(height=0), size=2.5) + theme_bw() + scale_color_manual(values=cbpalette) + facet_grid(assay ~ ., scales="free_y") ggplot(data, aes(x=drug, y=logNormalized, color=drug, shape=plateName)) + geom_point(position=position_jitter(height=0), size=2.5) + theme_bw() + scale_color_manual(values=cbpalette) + facet_grid(assay ~ ., scales="free_y") ## ----normalizeAssay, dependson = c(-1), fig.show="hold", fig.cap=prot_fig.cap, fig.subcap=prot_fig.subcap, fig.width=fw, fig.height=fh---- normalizeAssay <- function(data){ for(plate in unique(data$plate)){ pp = data$plate == plate for(assay in unique(data$assay)){ ii = data$assay == assay jj = data$drug == "DMSO" data$normalizedActivity[ii & pp] = data$normalized[ii & pp] / data$normalized[ii & jj & pp] data$logNormalizedActivity[ii & pp] = data$logNormalized[ii & pp] - data$logNormalized[ii & jj & pp] } } data } data = normalizeAssay(data) head(data) # Error bars represent standard error of the mean ggplot(data, aes(x=drug, y=normalizedActivity, color=drug, shape=plateName)) + geom_point(position=position_jitter(height=0), size=2.5) + theme_bw() + scale_color_manual(values=cbpalette) + facet_grid(assay ~ ., scales="free_y") ggplot(data, aes(x=drug, y=logNormalizedActivity, color=drug, shape=plateName)) + geom_point(position=position_jitter(height=0), size=2.5) + theme_bw() + scale_color_manual(values=cbpalette) + facet_grid(assay ~ ., scales="free_y") ## ----summary, dependson = c(-1), fig.show="hold", fig.cap=sum_fig.cap, fig.subcap=sum_fig.subcap, fig.width=fw, fig.height=fh---- summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) { # handle NA's: if na.rm==TRUE, don't count them length2 <- function (x, na.rm=FALSE) { if (na.rm) sum(!is.na(x)) else length(x) } # summary. For each group's data frame, return a vector with # N, mean, and sd datac <- ddply(data, groupvars, .drop=.drop, .fun = function(xx, col) { tTest = try(t.test(xx[[col]] - ifelse(grepl("log", measurevar), 0, 1)), silent=TRUE) c(N = length2(xx[[col]], na.rm=na.rm), mean = mean (xx[[col]], na.rm=na.rm), sd = sd (xx[[col]], na.rm=na.rm), p.value = ifelse(class(tTest) != "try-error", tTest$p.value, 1) ) }, measurevar ) # Rename the "mean" column datac <- rename(datac, c("mean" = measurevar)) datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 ciMult <- qt(conf.interval/2 + .5, datac$N-1) datac$ci <- datac$se * ciMult return(datac) } summary = summarySE(data, measurevar="normalizedActivity", groupvars=c("drug","assay")) ## remove ctg assay summary <- subset(summary, assay!="CTG") ## remove DMSO control summary <- subset(summary, drug!="DMSO") pthresh = 0.05 p <- ggplot(summary, aes(x=assay, y=100*(1-normalizedActivity), ymin=100*(1-normalizedActivity-se), ymax=100*(1-normalizedActivity+se), fill=drug)) + geom_bar(position=position_dodge(), stat="identity", color="black") + geom_errorbar(width=.2, position=position_dodge(.9)) + theme_bw() + theme(panel.grid = element_blank()) + scale_fill_manual(values=cbpalette[-1]) + ylab("normalized proteasome inhibition (%)") if(any(summary$p.value < pthresh)) p = p + geom_point(aes(y=ifelse(p.value < pthresh, 100*(1-normalizedActivity + se + 0.05), NA)), position=position_dodge(.9), pch=8, col=1, show_guide = FALSE, na.rm=TRUE) print(p) logSummary = summarySE(data, measurevar="logNormalizedActivity", groupvars=c("drug","assay")) ## remove ctg assay logSummary <- subset(logSummary, assay!="CTG") ## remove DMSO control logSummary <- subset(logSummary, drug!="DMSO") p <- ggplot(logSummary, aes(x=assay, y=-logNormalizedActivity, ymin=-logNormalizedActivity-se, ymax=-logNormalizedActivity+se, fill=drug)) + geom_bar(position=position_dodge(), stat="identity", color="black") + geom_errorbar(width=.2, position=position_dodge(.9)) + theme_bw() + theme(panel.grid = element_blank()) + scale_fill_manual(values=cbpalette[-1]) if(any(summary$p.value < pthresh)) p = p + geom_point(aes(y=ifelse(p.value < pthresh, -logNormalizedActivity+se+0.05, NA)), position=position_dodge(width=.9), pch=8, col=1, show_guide = FALSE, na.rm=TRUE) print(p) ## ----sessionInfo, echo=FALSE----------------------------------------------- sessionInfo()