Laurent Gatto
Laurent Gatto Computational Proteomics Unit
https://lgatto.github.io University of Cambridge
lg390@cam.ac.uk @lgatt0
These slides are available under a creative common CC-BY license. You are free to share (copy and redistribute the material in any medium or format) and adapt (remix, transform, and build upon the material) for any purpose, even commercially.
library("msdata")
library("mzR")
fls <- proteomics(full = TRUE)
basename(fl <- fls[2])
## [1] "MS3TMT10_01022016_32917-33481.mzML.gz"
rw <- openMSfile(fl)
rw
## Mass Spectrometry file handle.
## Filename: MS3TMT10_01022016_32917-33481.mzML.gz
## Number of scans: 565
softwareInfo(rw)
## [1] "Xcalibur 2.0.1258.15"
str(spectra(rw, 10:11))
## List of 2
## $ : num [1:725, 1:2] 172 173 174 175 176 ...
## $ : num [1:779, 1:2] 195 197 209 213 216 ...
str(header(rw))
## 'data.frame': 565 obs. of 22 variables:
## $ seqNum : int 1 2 3 4 5 6 7 8 9 10 ...
## $ acquisitionNum : int 32918 32919 32920 32921 32922 32923 32924 32925 32926 32927 ...
## $ msLevel : int 1 2 2 2 2 3 2 2 3 2 ...
## $ polarity : int 1 1 1 1 1 1 1 1 1 1 ...
## $ peaksCount : int 48304 755 803 765 499 2540 685 858 1422 725 ...
## $ totIonCurrent : num 3.01e+09 1.03e+06 6.97e+05 1.22e+06 4.95e+05 ...
## $ retentionTime : num 4423 4423 4423 4423 4423 ...
## $ basePeakMZ : num 697 646 642 550 719 ...
## $ basePeakIntensity : num 1.73e+08 1.21e+05 6.04e+04 1.03e+05 1.15e+05 ...
## $ collisionEnergy : num 0 35 35 35 35 65 35 35 65 35 ...
## $ ionisationEnergy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ lowMZ : num 376 187 188 200 166 ...
## $ highMZ : num 1515 1646 1701 1596 1375 ...
## $ precursorScanNum : int 0 32918 32918 32918 32918 0 32918 32918 0 32918 ...
## $ precursorMZ : num 0 652 647 673 575 ...
## $ precursorCharge : int 0 3 3 3 3 0 2 3 1 2 ...
## $ precursorIntensity : num 0 9841531 3921567 7623700 2357085 ...
## $ mergedScan : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mergedResultScanNum : int 0 0 0 0 0 0 0 0 0 0 ...
## $ mergedResultStartScanNum: int 0 0 0 0 0 0 0 0 0 0 ...
## $ mergedResultEndScanNum : int 0 0 0 0 0 0 0 0 0 0 ...
## $ injectionTime : num 0.00081 0.01084 0.01869 0.01233 0.00818 ...
MSnExp
Using MSnExp
objects from the MSnbase package to conveniently and efficiently manage raw MS experiments.
library("MSnbase")
(x <- readMSData2(fl))
## MSn experiment data ("OnDiskMSnExp")
## Object size in memory: 0.14 Mb
## - - - Spectra data - - -
## MS level(s): 1 2 3
## Number of spectra: 565
## MSn retention times: 73:43 - 74:54 minutes
## - - - Processing information - - -
## Data loaded [Thu Jun 15 06:02:59 2017]
## MSnbase version: 2.3.3
## - - - Meta data - - -
## phenoData
## rowNames: MS3TMT10_01022016_32917-33481.mzML.gz
## varLabels: sampleNames
## varMetadata: labelDescription
## Loaded from:
## MS3TMT10_01022016_32917-33481.mzML.gz
## protocolData: none
## featureData
## featureNames: X001.1 X002.1 ... X565.1 (565 total)
## fvarLabels: fileIdx spIdx ... spectrum (27 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
table(msLevel(x))
##
## 1 2 3
## 25 270 270
head(centroided(x))
## X001.1 X002.1 X003.1 X004.1 X005.1 X006.1
## NA NA NA NA NA NA
table(iscent <- isCentroided(x), msLevel(x))
##
## 1 2 3
## FALSE 25 0 270
## TRUE 0 270 0
centroided(x) <- iscent
head(centroided(x))
## X001.1 X002.1 X003.1 X004.1 X005.1 X006.1
## FALSE TRUE TRUE TRUE TRUE FALSE
library("magrittr")
x2 <- x %>%
filterMsLevel(2L) %>%
filterMz(c(126, 132))
x2
## MSn experiment data ("OnDiskMSnExp")
## Object size in memory: 0.08 Mb
## - - - Spectra data - - -
## MS level(s): 2
## Number of spectra: 270
## MSn retention times: 73:43 - 74:54 minutes
## - - - Processing information - - -
## Data loaded [Wed Jun 14 11:13:24 2017]
## Filter: select MS level(s) 2 [Thu Jun 15 06:03:00 2017]
## Filter: trim MZ [126..132] on MS level(s) 2.
## MSnbase version: 2.3.3
## - - - Lazy processing queue content - - -
## o filterMz
## - - - Meta data - - -
## phenoData
## rowNames: MS3TMT10_01022016_32917-33481.mzML.gz
## varLabels: sampleNames
## varMetadata: labelDescription
## Loaded from:
## MS3TMT10_01022016_32917-33481.mzML.gz
## protocolData: none
## featureData
## featureNames: X002.1 X003.1 ... X563.1 (270 total)
## fvarLabels: fileIdx spIdx ... spectrum (27 total)
## fvarMetadata: labelDescription
## experimentData: use 'experimentData(object)'
data(itraqdata)
itraqdata[[22]]
## Object of class "Spectrum2"
## Precursor: 974.4916
## Retention time: 31:44
## Charge: 2
## MSn level: 2
## Peaks count: 3124
## Total ion count: 151732400
plot(itraqdata[[22]], full = TRUE, reporters = iTRAQ4)
itraqdata2 <- pickPeaks(itraqdata)
plot(itraqdata2[[22]], itraqdata2[[26]])
Credit abrg.org
library("MSnbase")
cid <- calculateFragments("AEGKLRFK",
type=c("b", "y"), z=2)
ht(cid, n = 3)
## mz ion type pos z seq
## 1 36.52583 b1 b 1 2 A
## 2 101.04713 b2 b 2 2 AE
## 3 129.55786 b3 b 3 2 AEG
## ...
## mz ion type pos z seq
## 31 357.7185 y6* y* 6 2 GKLRFK
## 32 422.2398 y7* y* 7 2 EGKLRFK
## 33 457.7583 y8* y* 8 2 AEGKLRFK
idf <- system.file("mzid", "Tandem.mzid.gz", package = "msdata")
id <- openIDfile(idf)
softwareInfo(id)
## [1] "xtandem x! tandem CYCLONE (2010.06.01.5) "
## [2] "ProteoWizard MzIdentML 3.0.501 ProteoWizard"
enzymes(id)
## name nTermGain cTermGain minDistance missedCleavages
## 1 Trypsin H OH 0 1
str(psms(id))
## 'data.frame': 171 obs. of 18 variables:
## $ spectrumID : Factor w/ 169 levels "index=0","index=1",..: 13 117 159 73 70 23 152 92 94 30 ...
## $ chargeState : int 3 3 3 3 3 2 3 2 2 3 ...
## $ rank : int 1 1 1 1 1 1 1 1 1 1 ...
## $ passThreshold : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ experimentalMassToCharge: num 904 792 793 850 527 ...
## $ calculatedMassToCharge : num 903 792 792 850 527 ...
## $ sequence : Factor w/ 153 levels "AAFSAGSDPGASLR",..: 83 80 80 138 70 72 4 125 74 120 ...
## $ modNum : int 2 1 1 1 1 1 0 1 2 0 ...
## $ isDecoy : logi FALSE FALSE FALSE FALSE FALSE TRUE ...
## $ post : Factor w/ 18 levels "A","C","D","E",..: 14 9 9 16 13 3 14 15 13 13 ...
## $ pre : Factor w/ 6 levels "-","A","G","K",..: 4 6 6 4 6 4 6 6 6 6 ...
## $ start : int 217 292 292 842 297 392 50 1000 1006 407 ...
## $ end : int 239 313 313 865 311 404 63 1012 1017 422 ...
## $ DatabaseAccess : Factor w/ 147 levels "psu|NC_LIV_000750",..: 3 3 3 25 37 138 80 105 93 84 ...
## $ DBseqLength : int 376 376 376 1028 316 2167 646 1895 1103 718 ...
## $ DatabaseSeq : Factor w/ 147 levels "AAAPVFSAGPYSAVLPSTFNHPDRAQDVIEADLDRSPTKGSRLRRRCEAMAKFSYHELDESLAPPRSLYAFLLVGLIKGDAQERVFAVSSEPQDQMGLKRALKDDKSIKPT"| __truncated__,..: 69 69 69 111 67 12 47 23 36 77 ...
## $ DatabaseDescription : Factor w/ 1 level "": 1 1 1 1 1 1 1 1 1 1 ...
## $ acquisitionNum : num 12 285 83 21 198 13 77 26 261 137 ...
Next figure from Qeli and Ahrens (2010). See also Nesvizhskii and Aebersold (2005).
quantFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
full.name = TRUE, pattern = "mzXML$")
identFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
full.name = TRUE, pattern = "dummyiTRAQ.mzid")
x <- readMSData(quantFile)
x <- addIdentificationData(x, identFile)
fData(x)$pepseq
## [1] "VESITARHGEVLQLRPK" "IDGQWVTHQWLKK" NA
## [4] NA "LVILLFR"
File/format | package |
---|---|
Raw (mz*ML ) |
mzR , MSnbase |
mzTab | MSnbase |
mgf |
MSnbase |
mzIdentML |
mzR , mzID |
mzQuantML |
NA |
MS1 | MS2 | |
---|---|---|
Labelled | SILAC, N15 | iTRAQ, TMT |
Label-free | XIC | Spectral counting |
xq <- quantify(x, reporters = iTRAQ4, method = "max")
exprs(xq)
## iTRAQ4.114 iTRAQ4.115 iTRAQ4.116 iTRAQ4.117
## X1.1 706555.7 685055.1 929016.1 668245.2
## X2.1 260663.7 212745.0 163782.8 239142.7
## X3.1 2213566.0 2069209.6 2204032.2 2331846.8
## X4.1 616043.4 705976.6 671828.8 666845.6
## X5.1 1736128.2 1787622.5 1795311.8 1825523.0
head(fData(xq))
## spectrum scan number(s) passthreshold rank calculatedmasstocharge
## X1.1 1 1 TRUE 1 645.0375
## X2.1 2 2 TRUE 1 546.9633
## experimentalmasstocharge chargestate ms-gf:denovoscore ms-gf:evalue
## X1.1 645.3741 3 77 79.36958
## X2.1 546.9586 3 39 13.46615
## ms-gf:rawscore ms-gf:specevalue assumeddissociationmethod
## X1.1 -39 5.527468e-05 CID
## X2.1 -30 9.399048e-06 CID
## isotopeerror isdecoy post pre end start accession length
## X1.1 1 FALSE A R 186 170 ECA0984;ECA3829 231
## X2.1 0 FALSE A K 62 50 ECA1028 275
## description
## X1.1 DNA mismatch repair protein;acetolactate synthase isozyme III large subunit
## X2.1 2,3,4,5-tetrahydropyridine-2,6-dicarboxylate N-succinyltransferase
## pepseq modified modification idFile
## X1.1 VESITARHGEVLQLRPK FALSE NA dummyiTRAQ.mzid
## X2.1 IDGQWVTHQWLKK FALSE NA dummyiTRAQ.mzid
## databaseFile nprot npep.prot npsm.prot npsm.pep file
## X1.1 erwinia_carotovora.fasta 2 1 1 1 1
## X2.1 erwinia_carotovora.fasta 1 1 1 1 1
## retention.time precursor.mz precursor.intensity charge peaks.count
## X1.1 1501.35 645.3741 47659400 3 2921
## X2.1 1501.59 546.9586 26356100 3 1012
## tic ionCount ms.level acquisition.number collision.energy
## X1.1 182542000 668170086 2 1 40
## X2.1 16488100 56758067 2 2 40
## [ reached getOption("max.print") -- omitted 3 rows ]
pData(xq)
## mz reporters
## iTRAQ4.114 114.1112 iTRAQ4
## iTRAQ4.115 115.1083 iTRAQ4
## iTRAQ4.116 116.1116 iTRAQ4
## iTRAQ4.117 117.1150 iTRAQ4
qt <- quantify(rx, reporters = TMT6)
## qt <- readMSnSet("quantdata.csv", ecols = 5:11)
nqt <- normalise(qt, method = "vsn")
boxplot(exprs(nqt))
MAplot(nqt[, 1:2])
RforProteomics
packagelibrary("RforProteomics")
RforProteomics()
RProtVis()
citation(package = "RforProteomics")
Thank you for your attention