## ----style, eval=TRUE, echo=FALSE, results="asis"------------------------ BiocStyle::latex() ## ----knitr, include=FALSE, cache=FALSE----------------------------------- library(knitr) opts_chunk$set(fig.align = 'center', fig.show = 'hold', stop_on_error = 1L, par = TRUE, prompt = TRUE, comment = NA) options(replace.assign = TRUE, width = 70) ## ----setup, echo=FALSE--------------------------------------------------- suppressPackageStartupMessages(library(qvalue)) library(xtable) suppressPackageStartupMessages(library(synapter)) library(synapterdata) ## preparing some data data(ups25b) ## from synapterdata res <- ups25b ## for synergise and plotRt ## ----install, echo=TRUE, eval=FALSE-------------------------------------- ## source("http://bioconductor.org/biocLite.R") ## ## or, if you have already used the above before ## library("BiocInstaller") ## ## and to install the package ## biocLite("synapter") ## ----library, echo=TRUE, eval=FALSE-------------------------------------- ## library(synapter) ## ----plotRt, dev='pdf', fig.width=5.5, fig.height=5---------------------- plotRt(ups25b, what = "model", nsd = 1) ## ----prepare-synergise, echo=TRUE, eval=TRUE, tidy = FALSE--------------- library(synapterdata) hdmsefile <- getHDMSeFinalPeptide()[2] basename(hdmsefile) msefile <- getMSeFinalPeptide()[2] basename(msefile) msepep3dfile <- getMSePep3D()[2] basename(msepep3dfile) fas <- getFasta() basename(fas) ## the synergise input is a (named) list of filenames input <- list(identpeptide = hdmsefile, quantpeptide = msefile, quantpep3d = msepep3dfile, fasta = fas) ## a report and result files will be stored ## in the 'output' directory output <- tempdir() output ## ----synergise, echo=TRUE, eval=FALSE------------------------------------ ## res <- synergise(filenames = input, outputdir = output) ## ----performance, echo=TRUE---------------------------------------------- performance(res) ## ----inputfiles, echo=FALSE, eval=TRUE----------------------------------- inputfiles <- getHDMSeFinalPeptide() ## ----masterFdr, echo=TRUE, eval=TRUE, cache=TRUE, tidy=FALSE------------- ## using the full set of 6 HDMSe files and a ## fasta database from the synapterdata package inputfiles <- getHDMSeFinalPeptide() fasta <- getFasta() cmb <- estimateMasterFdr(inputfiles, fasta, masterFdr = 0.02, verbose = FALSE) cmb ## ----estimateFdrPlot, dev='pdf', fig.width=5, fig.height=5--------------- plot(cmb) ## ----bestComb, echo=TRUE, eval=TRUE-------------------------------------- bestComb(cmb) ## ----master, echo=TRUE, eval=TRUE, cache=TRUE---------------------------- master <- makeMaster(inputfiles[bestComb(cmb)], verbose = FALSE) master ## ----loadups, eval=TRUE, echo=TRUE--------------------------------------- data(ups25a, ups25b, ups25c, ups50a, ups50b, ups50c) ## ----setas,echo=TRUE,eval=TRUE------------------------------------------- ms25a <- as(ups25a, "MSnSet") class(ups25a)[1] class(ms25a)[1] ms25a ## ----updatesamplename---------------------------------------------------- sampleNames(ms25a) sampleNames(ms25a) <- "ups25a" sampleNames(ms25a) ## ----accessor------------------------------------------------------------ tail(exprs(ms25a)) tail(fData(ms25a)[, c(2,9)]) ## all fetaure metadata fvarLabels(ms25a) ## ----dotop3, echo=TRUE, cache=TRUE--------------------------------------- ms25a <- topN(ms25a, groupBy = fData(ms25a)$protein.Accession, n = 3) nPeps <- nQuants(ms25a, fcol = "protein.Accession") ms25a <- combineFeatures(ms25a, fData(ms25a)$protein.Accession, "sum", na.rm = TRUE, verbose = FALSE) head(exprs(ms25a)) head(nPeps) ## scale intensities exprs(ms25a) <- exprs(ms25a) * (3/nPeps) ## NaN result from the division by 0, when ## no peptide was found for that protein head(exprs(ms25a)) ## ----batch, echo=TRUE, tidy = FALSE-------------------------------------- nms <- c(paste0("ups", 25, c("b", "c")), paste0("ups", 50, c("a", "b", "c"))) tmp <- sapply(nms, function(.ups) { cat("Processing", .ups, "... ") ## get the object from workspace and convert to MSnSet x <- get(.ups, envir = .GlobalEnv) x <- as(x, "MSnSet") sampleNames(x) <- .ups ## extract top 3 peptides x <- topN(x, groupBy = fData(x)$protein.Accession, n = 3) ## calculate the number of peptides that are available nPeps <- nQuants(x, fcol = "protein.Accession") ## sum top3 peptides into protein quantitation x <- combineFeatures(x, fData(x)$protein.Accession, "sum", na.rm = TRUE, verbose = FALSE) ## adjust protein intensity based on actual number of top peptides exprs(x) <- exprs(x) * (3/nPeps) ## adjust feature variable names for combine x <- updateFvarLabels(x, .ups) ## save the new MSnExp instance in the workspace varnm <- sub("ups", "ms", .ups) assign(varnm, x, envir = .GlobalEnv) cat("done\n") }) ## ----combine25, echo=TRUE------------------------------------------------ ms25 <- combine(ms25a, ms25b) ms25 <- combine(ms25, ms25c) dim(ms25) ms25 <- filterNA(ms25, pNA = 1/3) dim(ms25) ## ----combine50, echo=TRUE------------------------------------------------ ms50 <- combine(ms50a, ms50b) ms50 <- combine(ms50, ms50c) dim(ms50) ms50 <- filterNA(ms50, pNA = 1/3) dim(ms50) ## ----msUps--------------------------------------------------------------- msUps <- combine(ms25, ms50) msUps <- filterNA(msUps, pNA = 2/6) head(exprs(msUps)) table(apply(exprs(msUps), 1, function(.x) sum(!is.na(.x)))) ## ----scale--------------------------------------------------------------- ecoli <- -grep("ups$", featureNames(msUps)) meds <- apply(exprs(msUps)[ecoli, ], 2, median, na.rm=TRUE) exprs(msUps) <- t(apply(exprs(msUps), 1, "/", meds)) ## ----stats, tidy = FALSE------------------------------------------------- ## use log2 data for t-test exprs(msUps) <- log2(exprs(msUps)) ## apply a t-test and extract the p-value pv <- apply(exprs(msUps), 1 , function(x) t.test(x[1:3], x[4:6])$p.value) ## calculate q-values library(qvalue) qv <- qvalue(pv)$qvalues ## calculate log2 fold-changes lfc <- apply(exprs(msUps), 1 , function(x) mean(x[1:3], na.rm=TRUE) - mean(x[4:6], na.rm=TRUE)) ## create a summary table res <- data.frame(cbind(exprs(msUps), pv, qv, lfc)) ## reorder based on q-values res <- res[order(res$qv), ] head(round(res, 3)) ## ----writecsv, eval=FALSE------------------------------------------------ ## write.csv(res, file = "upsResults.csv") ## ----volcano, dev='pdf', fig.width=5, fig.height=5, tidy=FALSE----------- plot(res$lfc, -log10(res$qv), col = ifelse(grepl("ups$", rownames(res)), "#4582B3AA", "#A1A1A180"), pch = 19, xlab = expression(log[2]~fold-change), ylab = expression(-log[10]~q-value)) grid() abline(v = -1, lty = "dotted") abline(h = -log10(0.1), lty = "dotted") legend("topright", c("UPS", "E. coli"), col = c("#4582B3AA", "#A1A1A1AA"), pch = 19, bty = "n") ## ----hmap, dev='pdf', fig.width=4, fig.height=4, fig.keep='last'--------- heatmap(exprs(msUps)[grep("ups",featureNames(msUps)), ]) ## ----falsepos, echo=FALSE------------------------------------------------ sel1 <- (res$qv < 0.1) signms <- rownames(res)[sel1] i <- grep("ups", signms) n1 <- length(i) n2 <- length(signms) - n1 ## ----upstab, echo=FALSE, results='asis'---------------------------------- k <- grep("ups", rownames(res)) print(xtable(round(res[k, ], 3), caption = "UPS1 proteins.", table.placement = "thb", label = "tab:ups"), size = "scriptsize") ## ----echo=FALSE---------------------------------------------------------- dev.off() ## ----sessioninfo, results='asis', echo=FALSE, cache=FALSE---------------- toLatex(sessionInfo())