## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() knitr::opts_chunk$set(fig.wide = TRUE, fig.retina = 3) ## ----arch, echo=FALSE, out.width="80%", eval=TRUE, fig.cap="Integration of rawDiag and rawrr into the Spectra ecosystem (by courtesy of Johannes Rainer)."---- knitr::include_graphics("arch.jpg") ## ----require------------------------------------------------------------------ suppressMessages( stopifnot(require(Spectra), require(MsBackendRawFileReader), require(tartare), require(BiocParallel)) ) ## ----installAssemblies, echo=TRUE--------------------------------------------- if (isFALSE(rawrr::.checkDllInMonoPath())){ rawrr::installRawFileReaderDLLs() } if (isFALSE(file.exists(rawrr:::.rawrrAssembly()))){ rawrr::installRawrrExe(sourceUrl = "http://fgcz-ms.uzh.ch/~cpanse/rawrr/rawrr.1.1.12.exe") } ## ----tartareEH4547, warning=FALSE, message=FALSE, eval=TRUE------------------- # fetch via ExperimentHub library(ExperimentHub) eh <- ExperimentHub::ExperimentHub() ## ----tartare------------------------------------------------------------------ query(eh, c('tartare')) ## ----EH3220, message=FALSE, warning=FALSE------------------------------------- EH3220 <- normalizePath(eh[["EH3220"]]) (rawfileEH3220 <- paste0(EH3220, ".raw")) if (!file.exists(rawfileEH3220)){ file.link(EH3220, rawfileEH3220) } EH3222 <- normalizePath(eh[["EH3222"]]) (rawfileEH3222 <- paste0(EH3222, ".raw")) if (!file.exists(rawfileEH3222)){ file.link(EH3222, rawfileEH3222) } EH4547 <- normalizePath(eh[["EH4547"]]) (rawfileEH4547 <- paste0(EH4547 , ".raw")) if (!file.exists(rawfileEH4547 )){ file.link(EH4547 , rawfileEH4547 ) } ## ----backendInitializeMsBackendRawFileReader, message=FALSE------------------- beRaw <- Spectra::backendInitialize( MsBackendRawFileReader::MsBackendRawFileReader(), files = c(rawfileEH3220, rawfileEH3222, rawfileEH4547)) ## ----show--------------------------------------------------------------------- beRaw ## ----rawrrFigure2, fig.cap = "Peptide spectrum match. The vertical grey lines indicate the *in-silico* computed y-ions of the peptide precusor LGGNEQVTR++ as calculated by the [protViz]( https://CRAN.R-project.org/package=protViz) package."---- (S <- (beRaw |> filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") )[437]) |> plotSpectra() # supposed to be scanIndex 9594 S # add yIonSeries to the plot (yIonSeries <- protViz::fragmentIon("LGGNEQVTR")[[1]]$y[1:8]) names(yIonSeries) <- paste0("y", seq(1, length(yIonSeries))) abline(v = yIonSeries, col='#DDDDDD88', lwd=5) axis(3, yIonSeries, names(yIonSeries)) ## ----defineFilterIon---------------------------------------------------------- setGeneric("filterIons", function(object, ...) standardGeneric("filterIons")) setMethod("filterIons", "MsBackend", function(object, mZ=numeric(), tol=numeric(), ...) { keep <- lapply(peaksData(object, BPPARAM = bpparam()), FUN=function(x){ NN <- protViz::findNN(mZ, x[, 1]) hit <- (error <- mZ - x[NN, 1]) < tol & x[NN, 2] >= quantile(x[, 2], .9) if (sum(hit) == length(mZ)) TRUE else FALSE }) object[unlist(keep)] }) ## ----applyFilterIons---------------------------------------------------------- start_time <- Sys.time() X <- beRaw |> MsBackendRawFileReader::filterScan("FTMS + c NSI Full ms2 487.2567@hcd27.00 [100.0000-1015.0000]") |> filterIons(yIonSeries, tol = 0.005) |> Spectra::Spectra() |> Spectra::peaksData() end_time <- Sys.time() ## ----runTime------------------------------------------------------------------ end_time - start_time ## ----definePlotLGGNEQVTR------------------------------------------------------ .plot.LGGNEQVTR <- function(x){ yIonSeries <- protViz::fragmentIon("LGGNEQVTR")[[1]]$y[1:8] names(yIonSeries) <- paste0("y", seq(1, length(yIonSeries))) plot(x, type ='h', xlim=range(yIonSeries)) abline(v = yIonSeries, col='#DDDDDD88', lwd=5) axis(3, yIonSeries, names(yIonSeries)) # find nearest mZ value idx <- protViz::findNN(yIonSeries, x[,1]) data.frame( ion = names(yIonSeries), mZ.yIon = yIonSeries, mZ = x[idx, 1], intensity = x[idx, 2] ) } ## ----filterIons2, fig.height=8, fig.cap = "Visualizing of the LGGNEQVTR spectrum matches.", echo=FALSE---- op <- par(mfrow=c(4, 1), mar=c(4,4,4,1)) rv <- X |> lapply(FUN = .plot.LGGNEQVTR) |> Reduce(f=rbind) ## ----sanityCheck-------------------------------------------------------------- stats::aggregate(formula = mZ ~ ion, FUN = base::mean, data=rv) stats::aggregate(formula = intensity ~ ion, FUN = base::max, data=rv) ## ----combinePeaks, fig.cap = "Combined LGGNEQVTR peptide spectrum match plot."---- X |> Spectra::combinePeaks(ppm=10, intensityFun=base::max) |> .plot.LGGNEQVTR() ## ----readBenchmarkData, fig.cap="I/O Benchmark. The XY plot graphs how many spectra the backend can read in one second versus the chunk size of the rawrr::readSpectrum method for different compute architectures."---- ioBm <- file.path(system.file(package = 'MsBackendRawFileReader'), 'extdata', 'specs.csv') |> read.csv2(header=TRUE) # perform and include a local IO benchmark ioBmLocal <- ioBenchmark(1000, c(32, 64, 128, 256), rawfile = rawfileEH4547) lattice::xyplot((1 / as.numeric(time)) * workers ~ size | factor(n) , group = host, data = rbind(ioBm, ioBmLocal), horizontal = FALSE, scales=list(y = list(log = 10)), auto.key = TRUE, layout = c(3, 1), ylab = 'spectra read in one second', xlab = 'number of spectra / file') ## ----mzXML-------------------------------------------------------------------- mzXMLEH3219 <- normalizePath(eh[["EH3219"]]) mzXMLEH3221 <- normalizePath(eh[["EH3221"]]) ## ----backendInitialize, message=FALSE, fig.cap='Aggregated intensities mzXML versus raw of the 1st 100 spectra.', message=FALSE, warning=FALSE---- if (require(mzR)){ beMzXML <- Spectra::backendInitialize( Spectra::MsBackendMzR(), files = c(mzXMLEH3219)) beRaw <- Spectra::backendInitialize( MsBackendRawFileReader::MsBackendRawFileReader(), files = c(rawfileEH3220)) intensity.xml <- sapply(intensity(beMzXML[1:100]), sum) intensity.raw <- sapply(intensity(beRaw[1:100]), sum) plot(intensity.xml ~ intensity.raw, log = 'xy', asp = 1, pch = 16, col = rgb(0.5, 0.5, 0.5, alpha=0.5), cex=2) abline(lm(intensity.xml ~ intensity.raw), col='red') } ## ----------------------------------------------------------------------------- if (require(mzR)){ table(scanIndex(beRaw) %in% scanIndex(beMzXML)) } ## ----sessionInfo-------------------------------------------------------------- sessionInfo()