## ----setup, include=FALSE, cache=FALSE----------------------------------- library(knitr) ## set global chunk options opts_chunk$set(fig.path='figure/manual-', cache.path='cache/manual-', fig.align='center', fig.show='hold', par=TRUE) ## I use = but I can replace it with <-; set code/output width to be 68 options(replace.assign=TRUE, width=68, digits=4) ## tune details of base graphics (http://yihui.name/knitr/hooks) knit_hooks$set(par=function(before, options, envir){ if (before && options$fig.show!='none') par(mar=c(4,4,.1,.1),cex.lab=.95,cex.axis=.9,mgp=c(2,.7,0),tcl=-.3) }) ## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"------------------ BiocStyle::latex() ## ----loadTFBSTools, echo=FALSE, warning=FALSE---------------------------- library("TFBSTools") ## ----PFMatrix, eval=TRUE, cache=TRUE, results = "markup"----------------- ## PFMatrix construction; Not all of the slots need to be initialised. pfm = PFMatrix(ID="MA0004.1", name="Arnt", matrixClass="Zipper-Type", strand="+", bg=c(A=0.25, C=0.25, G=0.25, T=0.25), tags=list(family="Helix-Loop-Helix", species="10090", tax_group="vertebrates",medline="7592839", type="SELEX", ACC="P53762", pazar_tf_id="TF0000003", TFBSshape_ID="11", TFencyclopedia_ID="580"), profileMatrix=matrix(c(4L, 19L, 0L, 0L, 0L, 0L, 16L, 0L, 20L, 0L, 0L, 0L, 0L, 1L, 0L, 20L, 0L, 20L, 0L, 0L, 0L, 0L, 20L, 0L), byrow=TRUE, nrow=4, dimnames=list(c("A", "C", "G", "T"))) ) pfm ## coerced to matrix as.matrix(pfm) ## access the slots of pfm ID(pfm) name(pfm) Matrix(pfm) ncol(pfm) length(pfm) ## convert a FPM to PWM, ICM pwm = toPWM(pfm, type="log2probratio", pseudocounts=0.8, bg=c(A=0.25, C=0.25, G=0.25, T=0.25)) icm = toICM(pfm, pseudocounts=sqrt(rowSums(pfm)[1]), schneider=FALSE, bg=c(A=0.25, C=0.25, G=0.25, T=0.25)) ## get the reverse complment matrix with all the same information except the strand. pwmRevComp <- reverseComplement(pwm) ## ----PFMatrixList, eval=TRUE, results ="markup", cache=TRUE-------------- pfm2 = pfm pfmList = PFMatrixList(pfm1=pfm, pfm2=pfm2, use.names=TRUE) pfmList names(pfmList) ## ----searchDB, eval=TRUE, results = "markup", cache=TRUE----------------- library(JASPAR2014) opts = list() opts[["species"]] = 9606 opts[["name"]] = "RUNX1" #opts[["class"]] = "Ig-fold" opts[["type"]] = "SELEX" opts[["all_versions"]] = TRUE PFMatrixList = getMatrixSet(JASPAR2014, opts) PFMatrixList opts2 = list() opts2[["type"]] = "SELEX" PFMatrixList2 = getMatrixSet(JASPAR2014, opts2) PFMatrixList2 ## ----operateDb, eval=FALSE, results="hide"------------------------------- ## db = "myMatrixDb.sqlite" ## initializeJASPARDB(db) ## storeMatrix(db, pfm) ## deleteMatrixHavingID(db, "MA0003") ## ----PWMmatrixMethods, eval=TRUE, results = "markup", cache=TRUE--------- pwm = toPWM(pfm, pseudocounts=0.8) pwm ## ----ICMmatrixMethods, eval=TRUE, results = "markup", cache=TRUE--------- icm = toICM(pfm, pseudocounts=0.8, schneider=TRUE) icm ## ----seqLogo1, fig.show="hold", fig.width=6, fig.height=4, dpi=300------- seqLogo(icm) ## ----rtl-PFMSimi, eval=TRUE, results = "markup", cache=TRUE-------------- ## one to one comparison data(MA0003.2) data(MA0004.1) pfmSubject = MA0003.2 pfmQuery = MA0004.1 PFMSimilarity(pfmSubject, pfmQuery) ## one to several comparsion PFMSimilarity(pfmList, pfmQuery) ## align IUPAC string IUPACString = "ACGTMRWSYKVHDBN" PFMSimilarity(pfmList, IUPACString) ## ----rtl-PWMSimilarity, eval=TRUE, results = "markup", cache=TRUE-------- data(MA0003.2) data(MA0004.1) pwm1 = toPWM(MA0003.2, type="prob") pwm2 = toPWM(MA0004.1, type="prob") PWMSimilarity(pwm1, pwm2, method="Euclidean") PWMSimilarity(pwm1, pwm2, method="Pearson") PWMSimilarity(pwm1, pwm2, method="KL") ## ----permuteMatrix, eval=TRUE, results = "markup", cache=TRUE------------ ## Matrice permutation permuteMatrix(pfmQuery) permuteMatrix(pfmList, type="intra") permuteMatrix(pfmList, type="inter") ## ----samplingMatrix, eval=FALSE, results = "markup", cache=TRUE---------- ## ## Dirichlet model training ## data(MA0003.2) ## data(MA0004.1) ## pfmList <- PFMatrixList(pfm1=MA0003.2, pfm2=MA0004.1, use.names=TRUE) ## dmmParameters <- dmmEM(pfmList, K=6, alg="C") ## ## Matrice sampling from trained Dirichlet model ## pwmSampled <- rPWMDmm(MA0003.2, dmmParameters$alpha0, dmmParameters$pmix, ## N=1, W=6) ## ----searchSeq, eval=TRUE, results = "markup", cache=TRUE---------------- library(Biostrings) data(MA0003.2) data(MA0004.1) pwmList = PWMatrixList(MA0003.2=toPWM(MA0003.2), MA0004.1=toPWM(MA0004.1), use.names=TRUE) subject = DNAString("GAATTCTCTCTTGTTGTAGTCTCTTGACAAAATG") siteset = searchSeq(pwm, subject, seqname="seq1", min.score="60%", strand="*") sitesetList = searchSeq(pwmList, subject, seqname="seq1", min.score="60%", strand="*") ## generate gff2 or gff3 style output head(writeGFF3(siteset)) head(writeGFF3(sitesetList)) head(writeGFF2(siteset)) ## get the relative scores relScore(siteset) relScore(sitesetList) ## calculate the empirical p-values of the scores pvalues(siteset, type="TFMPvalue") pvalues(siteset, type="sampling") ## ----searchAln, eval=TRUE, results = "markup", cache=TRUE---------------- library(Biostrings) data(MA0003.2) pwm <- toPWM(MA0003.2) aln1 <- DNAString("ACTTCACCAGCTCCCTGGCGGTAAGTTGATC---AAAGG---AAACGCAAAGTTTTCAAG") aln2 <- DNAString("GTTTCACTACTTCCTTTCGGGTAAGTAAATATATAAATATATAAAAATATAATTTTCATC") sitePairSet <- searchAln(pwm, aln1, aln2, seqname1="seq1", seqname2="seq2", min.score="50%", cutoff=0.5, strand="*", type="any") ## generate gff style output head(writeGFF3(sitePairSet)) head(writeGFF2(sitePairSet)) ## search the Axt alignment library(CNEr) axtFilesHg19DanRer7 <- file.path(system.file("extdata", package="CNEr"), "hg19.danRer7.net.axt") axtHg19DanRer7 <- readAxt(axtFilesHg19DanRer7) sitePairSet <- searchAln(pwm, axtHg19DanRer7, min.score="80%", windowSize=51L, cutoff=0.7, strand="*", type="any", conservation=NULL, mc.cores=2) GRangesTFBS <- toGRangesList(sitePairSet, axtHg19DanRer7) GRangesTFBS$targetTFBS GRangesTFBS$queryTFBS ## ----searchBSgenome, eval=FALSE, results = "hide"------------------------ ## library(rtracklayer) ## library(JASPAR2014) ## library(BSgenome.Hsapiens.UCSC.hg19) ## library(BSgenome.Mmusculus.UCSC.mm10) ## pfm <- getMatrixByID(JASPAR2014, ID="MA0004.1") ## pwm <- toPWM(pfm) ## chain <- import.chain("Downloads/hg19ToMm10.over.chain") ## sitePairSet <- searchPairBSgenome(pwm, BSgenome.Hsapiens.UCSC.hg19, ## BSgenome.Mmusculus.UCSC.mm10, ## chr1="chr1", chr2="chr1", ## min.score="90%", strand="+", chain=chain) ## ----MEME-wrapper, eval=FALSE, results = "hide"-------------------------- ## motifSet <- runMEME(file.path(system.file("extdata", ## package="TFBSTools"), "crp0.s"), ## binary="meme", ## arguments=list("-nmotifs"=3) ## ) ## ## Get the sites sequences and surrounding sequences ## sitesSeq(motifSet, type="all") ## ## Get the sites sequences only ## sitesSeq(motifSet, type="none") ## consensusMatrix(motifSet) ## ----session-info-------------------------------------------------------- sessionInfo()