## ---- echo=FALSE, results="hide", warning=FALSE------------------------------- suppressPackageStartupMessages({ library('proteomics') }) ## ----env0, message=FALSE, echo=FALSE, warning=FALSE--------------------------- library("knitr") opts_knit$set(error = FALSE) library("BiocManager") library("BiocStyle") library("RforProteomics") library("nloptr") ## ---- env, message=FALSE, echo=TRUE, warning=FALSE---------------------------- library("mzR") library("mzID") library("MSnID") library("MSnbase") library("rpx") library("MLInterfaces") library("pRoloc") library("pRolocdata") library("rols") library("hpar") ## ----r4pinstall, eval=FALSE--------------------------------------------------- # library("BiocManager") # BiocManager::install("RforProteomics", dependencies = TRUE) ## ---- pk, echo=FALSE, warning=FALSE, cache=TRUE------------------------------- biocv <- as.character(BiocManager::version()) pp <- proteomicsPackages(biocv) msp <- massSpectrometryPackages(biocv) msdp <- massSpectrometryDataPackages(biocv) ## ---- pp, eval=FALSE---------------------------------------------------------- # library("RforProteomics") # pp <- proteomicsPackages() # display(pp) ## ----datatab, results='asis', echo=FALSE-------------------------------------- datatab <- data.frame(Type = c("raw", "identification", "quantitation", "peak lists", "other"), Format = c("mzML, mzXML, netCDF, mzData", "mzIdentML", "mzQuantML", "mgf", "mzTab"), Package = c( "[`mzR`](http://bioconductor.org/packages/release/bioc/html/mzR.html) (read)", paste("[`mzR`](http://bioconductor.org/packages/release/bioc/html/mzR.html) (read) and", "[`mzID`](http://bioconductor.org/packages/release/bioc/html/mzID.html) (read)"), "", "[`MSnbase`](http://bioconductor.org/packages/release/bioc/html/MSnbase.html) (read/write)", "[`MSnbase`](http://bioconductor.org/packages/release/bioc/html/MSnbase.html) (read)")) library("knitr") kable(datatab) ## ----rpx---------------------------------------------------------------------- library("rpx") pxannounced() ## ----pxd, cache=TRUE---------------------------------------------------------- px <- PXDataset("PXD000001") px pxfiles(px) ## ----pxvar-------------------------------------------------------------------- pxtax(px) pxurl(px) pxref(px) ## ----pxget, cache=TRUE-------------------------------------------------------- fn <- "TMT_Erwinia_1uLSike_Top10HCD_isol2_45stepped_60min_01-20141210.mzML" mzf <- pxget(px, fn) mzf ## ----rawms-------------------------------------------------------------------- library("mzR") ms <- openMSfile(mzf) ms ## ----hd----------------------------------------------------------------------- hd <- header(ms) dim(hd) names(hd) ## ----hdpeaks------------------------------------------------------------------ hd[1000, ] head(peaks(ms, 1000)) plot(peaks(ms, 1000), type = "h") ## ----msmap-------------------------------------------------------------------- ## a set of spectra of interest: MS1 spectra eluted ## between 30 and 35 minutes retention time ms1 <- which(hd$msLevel == 1) rtsel <- hd$retentionTime[ms1] / 60 > 30 & hd$retentionTime[ms1] / 60 < 35 ## the map M <- MSmap(ms, ms1[rtsel], 521, 523, .005, hd) plot(M, aspect = 1, allTicks = FALSE) plot3D(M) ## With some MS2 spectra i <- ms1[which(rtsel)][1] j <- ms1[which(rtsel)][2] M2 <- MSmap(ms, i:j, 100, 1000, 1, hd) plot3D(M2) ## ----id, cache=TRUE----------------------------------------------------------- library("mzID") f <- dir(system.file("extdata", package = "RforProteomics"), pattern = "mzid", full.names=TRUE) basename(f) id <- mzID(f) id ## ----mzrvsid, eval = TRUE----------------------------------------------------- library("mzR") f <- dir(system.file("extdata", package = "RforProteomics"), pattern = "mzid", full.names=TRUE) id1 <- openIDfile(f) fid1 <- mzR::psms(id1) head(fid1) ## ----idf---------------------------------------------------------------------- mzids <- system.file("extdata", "c_elegans.mzid.gz", package="MSnID") basename(mzids) ## ----msnid1------------------------------------------------------------------- library("MSnID") msnid <- MSnID(".") msnid <- read_mzIDs(msnid, mzids) show(msnid) ## ----msnidcols, echo=FALSE---------------------------------------------------- sort(MSnID:::.mustBeColumns) ## ----msnidnames--------------------------------------------------------------- names(msnid) ## ----msnidtermini------------------------------------------------------------- msnid <- assess_termini(msnid, validCleavagePattern="[KR]\\.[^P]") msnid <- assess_missed_cleavages(msnid, missedCleavagePattern="[KR](?=[^P$])") ## ----msnidtrim---------------------------------------------------------------- msnid <- apply_filter(msnid, "numIrregCleavages == 0") msnid <- apply_filter(msnid, "numMissCleavages <= 2") show(msnid) ## ----msnidppm1---------------------------------------------------------------- summary(mass_measurement_error(msnid)) ## ----msnidppm2---------------------------------------------------------------- msnid <- apply_filter(msnid, "abs(mass_measurement_error(msnid)) < 20") summary(mass_measurement_error(msnid)) ## ----filt1-------------------------------------------------------------------- msnid$msmsScore <- -log10(msnid$`MS-GF:SpecEValue`) ## ----filt2-------------------------------------------------------------------- msnid$absParentMassErrorPPM <- abs(mass_measurement_error(msnid)) ## ----filt3-------------------------------------------------------------------- filtObj <- MSnIDFilter(msnid) filtObj$absParentMassErrorPPM <- list(comparison="<", threshold=10.0) filtObj$msmsScore <- list(comparison=">", threshold=10.0) show(filtObj) ## ----filt4-------------------------------------------------------------------- evaluate_filter(msnid, filtObj) ## ----optim1------------------------------------------------------------------- filtObj.grid <- optimize_filter(filtObj, msnid, fdr.max=0.01, method="Grid", level="peptide", n.iter=500) show(filtObj.grid) ## ----optim2------------------------------------------------------------------- evaluate_filter(msnid, filtObj.grid) ## ----optim3------------------------------------------------------------------- msnid <- apply_filter(msnid, filtObj.grid) show(msnid) ## ----optim4------------------------------------------------------------------- msnid <- apply_filter(msnid, "isDecoy == FALSE") msnid <- apply_filter(msnid, "!grepl('Contaminant',accession)") show(msnid) ## ----msnbase------------------------------------------------------------------ library("MSnbase") rawFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "mzXML$") basename(rawFile) msexp <- readMSData(rawFile, verbose = FALSE, centroided = FALSE) msexp ## ----msnb2-------------------------------------------------------------------- length(msexp) msexp[1:2] msexp[[2]] ## ----addid-------------------------------------------------------------------- fData(msexp) ## find path to a mzIdentML file identFile <- dir(system.file(package = "MSnbase", dir = "extdata"), full.name = TRUE, pattern = "dummyiTRAQ.mzid") basename(identFile) msexp <- addIdentificationData(msexp, identFile) fData(msexp) ## ----specplot----------------------------------------------------------------- msexp[[1]] plot(msexp[[1]], full=TRUE) ## ----specplot2---------------------------------------------------------------- msexp[1:3] plot(msexp[1:3], full=TRUE) ## ----quanttab, echo=FALSE, results='asis'------------------------------------- qtb <- matrix(c("XIC", "Counting", "SILAC, 15N", "iTRAQ, TMT"), nrow = 2, ncol = 2) dimnames(qtb) <- list( 'MS level' = c("MS1", "MS2"), 'Quantitation' = c("Label-free", "Labelled")) kable(qtb) ## ----itraq4plot--------------------------------------------------------------- plot(msexp[[1]], full=TRUE, reporters = iTRAQ4) ## ----quantitraq--------------------------------------------------------------- msset <- quantify(msexp, method = "trap", reporters = iTRAQ4, verbose=FALSE) exprs(msset) processingData(msset) ## ----lfms2-------------------------------------------------------------------- exprs(si <- quantify(msexp, method = "SIn")) exprs(saf <- quantify(msexp, method = "NSAF")) ## ----mztab, cache=TRUE-------------------------------------------------------- mztf <- pxget(px, "F063721.dat-mztab.txt") (mzt <- readMzTabData(mztf, what = "PEP", version = "0.9")) ## ----readmsnset2-------------------------------------------------------------- csv <- dir(system.file ("extdata" , package = "pRolocdata"), full.names = TRUE, pattern = "pr800866n_si_004-rep1.csv") getEcols(csv, split = ",") ecols <- 7:10 res <- readMSnSet2(csv, ecols) head(exprs(res)) head(fData(res)) ## ----pure--------------------------------------------------------------------- data(itraqdata) qnt <- quantify(itraqdata, method = "trap", reporters = iTRAQ4, verbose = FALSE) impurities <- matrix(c(0.929,0.059,0.002,0.000, 0.020,0.923,0.056,0.001, 0.000,0.030,0.924,0.045, 0.000,0.001,0.040,0.923), nrow=4, byrow = TRUE) ## or, using makeImpuritiesMatrix() ## impurities <- makeImpuritiesMatrix(4) qnt.crct <- purityCorrect(qnt, impurities) processingData(qnt.crct) ## ----pureplot, echo=FALSE----------------------------------------------------- plot0 <- function(x, y, main = "") { old.par <- par(no.readonly = TRUE) on.exit(par(old.par)) par(mar = c(4, 4, 1, 1)) par(mfrow = c(2, 2)) sx <- sampleNames(x) sy <- sampleNames(y) for (i in seq_len(ncol(x))) { plot(exprs(x)[, i], exprs(y)[, i], log = "xy", xlab = sx[i], ylab = sy[i]) abline(0, 1) grid() } } ## plot0(qnt, qnt.crct, main = "Before/after correction") ## ----norm--------------------------------------------------------------------- qnt.crct.nrm <- normalise(qnt.crct, "quantiles") ## ----plotnorm, echo=FALSE----------------------------------------------------- ## plot0(qnt, qnt.crct.nrm) ## ----comb--------------------------------------------------------------------- ## arbitraty grouping g <- factor(c(rep(1, 25), rep(2, 15), rep(3, 15))) g prt <- combineFeatures(qnt.crct.nrm, groupBy = g, fun = "sum") processingData(prt) ## ----impute0------------------------------------------------------------------ set.seed(1) qnt0 <- qnt exprs(qnt0)[sample(prod(dim(qnt0)), 10)] <- NA table(is.na(qnt0)) image(qnt0) ## ----filterNA----------------------------------------------------------------- ## remove features with missing values qnt00 <- filterNA(qnt0) dim(qnt00) any(is.na(qnt00)) ## ----impute------------------------------------------------------------------- ## impute missing values using knn imputation qnt.imp <- impute(qnt0, method = "knn") dim(qnt.imp) any(is.na(qnt.imp)) ## ----ml----------------------------------------------------------------------- library("MLInterfaces") library("pRoloc") library("pRolocdata") data(dunkley2006) traininds <- which(fData(dunkley2006)$markers != "unknown") ans <- MLearn(markers ~ ., data = t(dunkley2006), knnI(k = 5), traininds) ans ## ----clust-------------------------------------------------------------------- kcl <- MLearn( ~ ., data = dunkley2006, kmeansI, centers = 12) kcl plot(kcl, exprs(dunkley2006)) ## ----nont, echo=FALSE, cache=TRUE--------------------------------------------- library("rols") onts <- Ontologies() nont <- length(onts) ## ----rols--------------------------------------------------------------------- library("rols") res <- OlsSearch(q = "ESI", ontology = "MS", exact = TRUE) res ## ----rols2-------------------------------------------------------------------- res <- olsSearch(res) as(res, "Terms") as(res, "data.frame") ## ----si, echo=FALSE----------------------------------------------------------- print(sessionInfo(), local = FALSE)