## ----style, echo = FALSE------------------------------------------------- BiocStyle::markdown() ## ----, env, message=FALSE, echo=FALSE, cache=FALSE, warning=FALSE-------- library("Gviz") library("Pbase") library("Biostrings") library("biomaRt") library("BSgenome.Hsapiens.UCSC.hg19") library("ggplot2") ## ----schema, echo=FALSE, fig.cap='Mapping proteins to a genome reference.', fig.align='center'---- par(mar = rep(0, 4), oma = rep(0, 4)) xlim <- c(0, 10) ylim <-c(5.05, 10.25) plot(0, type = "n", axes = FALSE, xlab = "", ylab = "", xlim = xlim, ylim = ylim) y <- 10 g <- c(0, 10) exons <- list(c(0, 2), c(4.5, 5), c(6, 7.1), c(9, 10)) trans <- list(c(1, 2, 3, 4), c(1, 3, 4), c(1, 4)) prot <- list(exons[[1]] + c(0.5, 0), ## 5'UTR exons[[3]], exons[[4]] + c(0, -0.5)) ## 3'UTR peps <- list(c(1, 1.29), c(6.1, 6.3), c(6.85, 7), c(9.1, 9.25)) rect(g[1], y-0.25, g[2], y+0.25, col = "black") abline(h = y, lwd = 2) text(g[1], y+0.25, expression(g[s]), pos = 3) text(g[2], y+0.25, expression(g[e]), pos = 3) text(0-0.26, y, "G", pos = 3) abline(v = c(unlist(exons), unlist(prot), unlist(peps)), lty = "dotted", col = "lightgrey") y <- y - 1 for (i in 1:length(trans)) { tr <- trans[[i]] abline(h = y, lty = "dotted") ## text(0-0.26, y, expression(T[x]), pos = 3) text(0-0.26, y, substitute(paste("T", list(x)), list(x = i)), pos = 3) for (j in 1:length(tr)) { e1 <- exons[[tr[j]]][1] e2 <- exons[[tr[j]]][2] rect(e1, y-0.25, e2, y+0.25, col = "grey") if (i == 1) { text(e1, y+0.25, expression(e[s]^i), pos = 3) text(e2, y+0.25, expression(e[e]^i), pos = 3) } text(mean(c(e1, e2)), y, paste0("i = ", tr[j]), cex = .7) } y <- y - .6 } y <- y - .4 abline(h = y, lty = "dotted") text(0-0.26, y, expression(P), pos = 3) for (i in 1:length(prot)) { pr <- prot[[i]] rect(pr[1], y - 0.25, pr[2], y + 0.25, col = "steelblue") text(pr[1], y + 0.25, expression(p[s]^j), pos = 3) text(pr[2], y + 0.25, expression(p[e]^j), pos = 3) text(mean(c(pr[1], pr[2])), y, paste0("j = ", i), cex = .7) } y <- y - .75 ## concat protein ## center x <- mean(xlim) protlen <- sum(sapply(prot, diff)) X0 <- x0 <- x - protlen/2 ## left start abline(h = y, lty = "dotted") text(0-0.26, y, expression(P), pos = 3) for (i in 1:length(prot)) { pr <- prot[[i]] .pr1 <- x0 .pr2 <- x0 + (pr[2] - pr[1]) rect(.pr1, y - 0.25, .pr2, y + 0.25, col = "steelblue") segments(.pr1, y + 0.25, pr[1], y + .75 - .25, lty = "dotted") segments(.pr2, y + 0.25, pr[2], y + .75 - .25, lty = "dotted") text(mean(c(.pr1,.pr2)), y, paste0("j = ", i), cex = .7) x0 <- .pr2 } text(X0, y, expression(1), pos = 2) text(X0 + protlen, y, expression(L[P]), pos = 4) ## pos and length relpeps <- list(c(0.5, 0.29), ## 1.% is length of exon 1 c(1.5 + 0.1, 0.2), c(1.5 + 0.85, 0.15), ## 1.1 is length of exon 2 c(1.5 + 1.1 + 0.1, 0.15)) for (i in 1:length(relpeps)) { rp <- relpeps[[i]] pep <- peps[[i]] rect(X0 + rp[1], y - 0.25, X0 + rp[1] + rp[2], y + 0.25, col = "#FFA50450", lwd = 0) segments(X0 + rp[1], y - 0.25, pep[1], y - .75 + .25, lty = "dotted") segments(X0 + rp[1] + rp[2], y - 0.25, pep[2], y - .75 + .25, lty = "dotted") } y <- y - .75 abline(h = y, lty = "dotted") text(0-0.26, y, expression(Pi), pos = 3) for (i in 1:length(peps)) { pep <- peps[[i]] rect(pep[1], y - 0.25, pep[2], y + 0.25, col = "#FFA504FF") text(pep[1], y + 0.25, expression(pi[s]^k), pos = 3, cex = .7) text(pep[2], y + 0.25, expression(pi[e]^k), pos = 3, cex = .7) text(mean(c(pep[1], pep[2])), y-0.35, paste0("k = ", i), cex = .8) } ## ----, echo=FALSE-------------------------------------------------------- data(p) sp <- seqnames(p)[5] ## ----, p, message=FALSE-------------------------------------------------- library("Pbase") data(p) p seqnames(p) dat <- data.frame(i = 1:length(p), npeps = elementLengths(pfeatures(p)), protln = width(aa(p))) dat sp <- seqnames(p)[5] ## ----bm, cache=TRUE, message=FALSE--------------------------------------- library("biomaRt") ens <- useMart("ensembl", "hsapiens_gene_ensembl") bm <- select(ens, keys = sp, keytype = "uniprot_swissprot_accession", columns = c( "ensembl_gene_id", "ensembl_transcript_id", "ensembl_peptide_id", "ensembl_exon_id", "description", "chromosome_name", "strand", "exon_chrom_start", "exon_chrom_end", "5_utr_start", "5_utr_end", "3_utr_start", "3_utr_end", "start_position", "end_position")) bm ## ------------------------------------------------------------------------ bm <- bm[bm$chromosome_name == "X", ] tr <- unique(bm$ensembl_transcript_id) pr <- unique(bm$ensembl_peptide_id) ## ----gviz, cache=TRUE, message=FALSE------------------------------------- library("Gviz") options(ucscChromosomeNames=FALSE) bmTrack <- BiomartGeneRegionTrack(start = min(bm$start_position), end = max(bm$end_position), chromosome = "chrX", genome = "hg19") ## see also the filters param and getBM to ## filter on biotype == "protein_coding" bmTracks <- split(bmTrack, transcript(bmTrack)) grTrack <- bmTracks[[which(names(bmTracks) == tr)]] names(grTrack) <- tr ## ----gvizdfr------------------------------------------------------------- as(grTrack, "data.frame") ## ----gvizfig, fig.align='center', cache=TRUE----------------------------- ideoTrack <- IdeogramTrack(genome = "hg19", chromosome = "chrX") axisTrack <- GenomeAxisTrack() seqlevels(ranges(grTrack)) <- chromosome(grTrack) <- chromosome(ideoTrack) plotTracks(list(ideoTrack, axisTrack, grTrack), add53 = TRUE, add35 = TRUE) ## ----, txdb, cache=TRUE, message=FALSE----------------------------------- library("TxDb.Hsapiens.UCSC.hg19.knownGene") txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene txTr <- GeneRegionTrack(txdb, chromosome = "chrX", start = bm$start_position[1], end = bm$end_position[1]) head(as(txTr, "data.frame")) ## ----, pp0, echo=FALSE--------------------------------------------------- pp <- p[sp] n <- elementLengths(pranges(pp)) pepstring <- paste(unique(pcols(pp)[[1]]$pepseq), collapse = ", ") ## ----, pepsfig, fig.align='center'--------------------------------------- pp <- p[sp] pranges(pp) plot(pp) ## ----, protranges-------------------------------------------------------- (prng <- ranges(grTrack[feature(grTrack) == "protein_coding",])) ## ----, extractat, eval=FALSE--------------------------------------------- # library("Biostrings") # chrx <- readDNAStringSet("./Homo_sapiens.GRCh37.75.dna.chromosome.X.fa.gz") # seq <- extractAt(chrx[[1]], ranges(prng)) # pptr <- translate(unlist(seq)) ## ----, protfromgenome---------------------------------------------------- ucsc <- select(ens, key = pr, keytype = "ensembl_peptide_id", columns = "ucsc") ucsc library("BSgenome.Hsapiens.UCSC.hg19") prng2 <- ranges(txTr[transcript(txTr) %in% ucsc]) prng2 <- prng2[prng2$feature == "CDS"] seq <- getSeq(BSgenome.Hsapiens.UCSC.hg19, prng2) pptr <- translate(unlist(seq)) ## remove stop codon pptr <- pptr[1:417] ## ----, align------------------------------------------------------------- writePairwiseAlignments(pairwiseAlignment(pp[[1]], pptr)) ## ----, mapping----------------------------------------------------------- ## exons ranges along the protein j <- cumsum(width(seq)) i <- cumsum(c(1, width(seq)))[1:length(j)] prex <- IRanges(start = i, end = j) peprng <- pranges(pp)[[1]] ## peptide positions on cdna peprng2 <- IRanges(start = 1 + (start(peprng)-1) * 3, width = width(peprng) * 3) ## find exon and position in prex start_ex <- subjectHits(findOverlaps(start(peprng2), prex)) end_ex <- subjectHits(findOverlaps(end(peprng2), prex)) getPos <- function(p, i, prtex = prex, nclex = prng2) { ## position in cdna ## exon index start(prng2[i]) + (p - start(prtex[i])) } peptides_on_genome <- IRanges(start = getPos(start(peprng2), start_ex), end = getPos(end(peprng2), end_ex)) ## ----, pepcoords, fig.align='center'------------------------------------- pepTr <- AnnotationTrack(start = start(peptides_on_genome), end = end(peptides_on_genome), chr = "chrX", genome = "hg19", strand = "*", id = pcols(pp)[[1]]$pepseq, name = "pfeatures", col = "steelblue") plotTracks(list(ideoTrack, axisTrack, grTrack, pepTr), groupAnnotation = "id", just.group = "below", fontsize.group = 9, add53 = TRUE, add35 = TRUE) ## ----, msmsspectra, fig.align='center'----------------------------------- data(pms) library("ggplot2") details <- function(identifier, ...) { p <- plot(pms[[as.numeric(identifier)]], full=TRUE, plot=FALSE) + ggtitle("") p <- p + theme_bw() + theme(axis.text.y = element_blank(), axis.text.x = element_blank()) + labs(x = NULL, y = NULL) print(p, newpage=FALSE) } deTrack <- AnnotationTrack(start = start(peptides_on_genome), end = end(peptides_on_genome), genome = "hg19", chromosom = "chrX", id = pcols(pp)[[1]]$acquisitionnum, name = "MS2 spectra", stacking = "squish", fun = details) plotTracks(list(ideoTrack, axisTrack, deTrack, grTrack), add53 = TRUE, add35 = TRUE) ## ----, si---------------------------------------------------------------- sessionInfo()