% \VignetteIndexEntry{Reporting on RNA-seq differential expression}
% \VignetteDepends{}
% \VignetteKeywords{reprise}
% \VignettePackage{ReportingTools}
\documentclass[10pt]{article}

\usepackage{times}
\usepackage{hyperref}
\usepackage{Sweave}

\textwidth=6.5in
\textheight=8.5in
\oddsidemargin=-.1in
\evensidemargin=-.1in
\headheight=-.3in

\title{Using ReportingTools in an Analysis of RNA-seq Data}
\author{Jessica L. Larson and Christina Chaivorapol}
\date{\today}

\begin{document}

\maketitle
\tableofcontents

\newpage

\section{Introduction}
The \tt ReportingTools \rm package can be used with differential gene expression results from RNA-sequencing analysis.  In this vignette we show how to \tt publish \rm output from an \tt edgeR\rm, Gene Ontology (GO) and/or Protein family (PFAM) analysis.  In the final section we \tt publish \rm all our pages onto one, creating a comprehensive output page.


\section{Differential expression analysis with \tt edgeR \rm}
In this section we demonstrate how to use the \tt ReportingTools \rm package to generate a table of differentially expressed genes as determined by the \tt edgeR \rm software.
We begin by loading our library and data set.  The \tt mockRnaSeqData \rm contains random RNA-seq output for random mouse genes.

<<load data, eval=FALSE>>=
library(ReportingTools)
data(mockRnaSeqData)
@

Next, we run \tt edgeR \rm to find differentially expressed genes.
<<run_edgeR, eval=FALSE>>=
library(edgeR)
conditions <- c(rep("case",3), rep("control", 3))
d <- DGEList(counts = mockRnaSeqData, group = conditions)
d <- calcNormFactors(d)
d <- estimateCommonDisp(d)
## Get an edgeR object
edgeR.de <- exactTest(d)
@

Now the results can be written to a report using the \tt DGEExact \rm object. 

<<edgeR_report, eval=FALSE>>=
library(lattice)
rep.theme <- reporting.theme()
## Change symbol colors in plots
rep.theme$superpose.symbol$col <- c("blue", "red")
rep.theme$superpose.symbol$fill <- c("blue", "red")
lattice.options(default.theme = rep.theme)
## Publish a report of the top 10 genes with p-values < 0.05 and log-fold change > 2
## In this case, the plots contain the counts from mockRnaSeqData, which are not normalized.
## The publish function does not normalize counts for the countTable argument to allow for
## flexibility in plotting various units (e.g. RPKM instead of counts).

deReport <- HTMLReport(shortName = 'RNAseq_analysis_with_edgeR',
    title = 'RNA-seq analysis of differential expression using edgeR',
    reportDirectory = "./reports")
publish(edgeR.de, deReport, countTable=mockRnaSeqData,
	conditions=conditions, annotation.db = 'org.Mm.eg', 
	pvalueCutoff = .05, lfc = 2, n = 10, name="edgeR")
finish(deReport)

## If you would like to plot normalized counts, run the following commands instead:
## mockRnaSeqData.norm <- d$pseudo.counts
## publish(edgeR.de, deReport, mockRnaSeqData.norm, 
##        conditions, annotation.db = 'org.Mm.eg', 
## 	  pvalueCutoff = .05, lfc = 2, n = 10)
## finish(deReport)
@ 

\begin{figure}
\centering
\includegraphics[scale = .5]{rnaseq1.png}
\caption{Resulting page created by \tt publish \rm for \tt edgeR.de\rm.}
\end{figure}



We can also ouput results of the LRT test from edgeR.

<<edgeR_report, eval=FALSE>>=
d <- DGEList(counts = mockRnaSeqData, group = conditions)
d <- calcNormFactors(d)
design <- model.matrix(~conditions)
d <- estimateGLMCommonDisp(d, design)
d <- estimateGLMTrendedDisp(d, design)
d <- estimateGLMTagwiseDisp(d, design)
fit <- glmFit(d,design)
edgeR.lrt <- glmLRT(fit, coef=2)

deReport2 <- HTMLReport(shortName = 'RNAseq_analysis_with_edgeR_2',
    title = 'RNA-seq analysis of differential expression using edgeR (LRT)',
    reportDirectory = "./reports")
publish(edgeR.lrt, deReport2, countTable=mockRnaSeqData,
	conditions=conditions, annotation.db = 'org.Mm.eg', 
	pvalueCutoff = .05, lfc = 2, n = 10, name="edgeRlrt")
finish(deReport2)
@ 


\section{Differential expression analysis with \tt DESeq2 \rm}
In this section we demonstrate how to use the \tt ReportingTools \rm package to generate a table of differentially expressed genes as determined by the \tt DESeq2 \rm packages.


We start by running \tt DESeq2 \rm to find differentially expressed genes.
<<run_DESeq2, eval=FALSE>>=
library(DESeq2)
conditions <- c(rep("case",3), rep("control", 3))
mockRna.dse <- DESeqDataSetFromMatrix(countData = mockRnaSeqData,
                        colData = as.data.frame(conditions), design = ~ conditions)
colData(mockRna.dse)$conditions <- factor(colData(mockRna.dse)$conditions, levels=c("control", "case"))
## Get a DESeqDataSet object
mockRna.dse <- DESeq(mockRna.dse)
@

Now the results can be written to a report using the \tt DESeqDataSet \rm object.  

<<DESeq2_report, eval=FALSE>>=
des2Report <- HTMLReport(shortName = 'RNAseq_analysis_with_DESeq2',
    title = 'RNA-seq analysis of differential expression using DESeq2',
    reportDirectory = "./reports")
publish(mockRna.dse,des2Report, pvalueCutoff=0.05,
    annotation.db="org.Mm.eg.db", factor = colData(mockRna.dse)$conditions,
    reportDir="./reports")
finish(des2Report)
@ 

\begin{figure}
\centering
\includegraphics[scale = .5]{rnaseq3.png}
\caption{Resulting page created with \tt DESeqDataSet \rm object from \tt DESeq2 \rm analysis}
\end{figure}

\section{GO analysis using GOstats}
This section will demonstrate how to use \tt ReportingTools \rm to write a table of GO analysis results to an html file.  First we select our genes of interest, and then run the \tt hyperGTest\rm.

<<Do GO analysis, eval=FALSE>>=
library(GOstats)
library(org.Mm.eg.db)
tt <- topTags(edgeR.de, n = 1000, adjust.method = 'BH', sort.by = 'p.value')
selectedIDs <- rownames(tt$table)
universeIDs <- rownames(mockRnaSeqData)
goParams <- new("GOHyperGParams", 
    geneIds = selectedIDs, 
    universeGeneIds = universeIDs, 
    annotation ="org.Mm.eg" , 
    ontology = "MF", 
    pvalueCutoff = 0.01,
    conditional = TRUE, 
    testDirection = "over")
goResults <- hyperGTest(goParams)
@

With these results, we can then make the GO report.  

<<make the GO report, eval=FALSE>>=
goReport <- HTMLReport(shortName = 'go_analysis_rnaseq',
	title = "GO analysis of mockRnaSeqData",
	reportDirectory = "./reports")
publish(goResults, goReport, selectedIDs=selectedIDs, annotation.db="org.Mm.eg", 
	pvalueCutoff= 0.05)
finish(goReport)
@


\section{PFAM analysis}
In this section, we show how to use \tt ReportingTools \rm to write a table of PFAM analysis results to an html file.  First we run the \tt hyperGTest \rm using our genes of interest from the previous section.


<<Do PFAM analysis, eval=FALSE>>=
library(Category)
params <- new("PFAMHyperGParams", 
	geneIds= selectedIDs, 
	universeGeneIds=universeIDs, 
	annotation="org.Mm.eg",
	pvalueCutoff= 0.01,
	testDirection="over")
PFAMResults <- hyperGTest(params)
@



Then we make the PFAM report.
<<make the PFAM report, eval=FALSE>>=
PFAMReport <- HTMLReport(shortName = 'pfam_analysis_rnaseq',
	title = "PFAM analysis of mockRnaSeqData",
	reportDirectory = "./reports")
publish(PFAMResults, PFAMReport, selectedIDs=selectedIDs, annotation.db="org.Mm.eg",categorySize=5)
finish(PFAMReport)
@

\begin{figure}
\centering
\includegraphics[scale = .5]{rnaseq4.png}
\caption{Resulting page created by \tt publish \rm for \tt PFAMResults\rm}
\end{figure}


\section{Putting it all together}
Here, we make an index page that puts all three analyses together for easy navigation.
<<make the index page, eval=FALSE>>=
indexPage <- HTMLReport(shortName = "indexRNASeq",
    title = "Analysis of mockRnaSeqData",
    reportDirectory = "./reports")
publish(Link(list(deReport,des2Report, goReport, PFAMReport), report = indexPage),
    indexPage)
finish(indexPage)
@

\begin{figure}
\centering
\includegraphics[scale = .5]{rnaseq5.png}
\caption{Resulting page created by calling \tt publish \rm on all our analysis pages}
\end{figure}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{References}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Huntley, M.A., Larson, J.L., Chaivorapol, C., Becker, G., Lawrence, M., Hackney, J.A., 
and J.S. Kaminker. (2013).  ReportingTools: an automated results processing and 
presentation toolkit for high throughput genomic analyses. {\it Bioinformatics}. 
{\bf 29}(24): 3220-3221.

\end{document}