## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, include = FALSE---------------------------------------------------
library(broadSeq)

## ----fig.cap = "Input and output data structures of different methods to idetify differentially expressed genes.", echo = FALSE----
knitr::include_graphics("img/io_p.png")

## ----echo = FALSE,fig.cap="Flowchart"-----------------------------------------
knitr::include_graphics("img/broadSeq.png")

## ----eval = FALSE-------------------------------------------------------------
#  library(broadSeq)
#  library(ggplot2)

## ----message=FALSE------------------------------------------------------------
se <- readRDS(system.file("extdata","rat_vole_mouseSE_salmon.rds", package = "broadSeq"))
SummarizedExperiment::assayNames(se)

## -----------------------------------------------------------------------------
as.data.frame(colData(se)) %>% dplyr::count(stage,species) %>% tidyr::spread(stage,n)

se$stage <- factor(se$stage,levels = c("Bud","Cap","Late Cap","Bell"))

## ----lowExpr,warning=FALSE,fig.width= 6---------------------------------------
assays(se)[["counts"]][,5] %>% ggpubr::ggdensity(y = "count")+
    ggplot2::geom_vline(xintercept = 10)+ggplot2::scale_x_log10()

keep <- (assays(se)[["counts"]] >= 3) %>% rowSums() >= 5 
# smallest Group Size is 5
table(keep)

## ----warning=FALSE,message=FALSE----------------------------------------------
se <- broadSeq::normalizeEdgerCPM(se ,method = "none",cpm.log = TRUE )
## The normalized values are added with the assay name "logCPM"
SummarizedExperiment::assayNames(se)

## ----warning=FALSE,message=FALSE----------------------------------------------
se <- broadSeq::normalizeEdgerCPM(se , method = "TMM", cpm.log = FALSE )
## The normalized values are added with the assay name "TMM"
SummarizedExperiment::assayNames(se)

## -----------------------------------------------------------------------------
assays(se)[["counts"]][1:5,1:5]
assays(se)[["TMM"]][1:5,1:5]
assays(se)[["logCPM"]][1:5,1:5]

## ----warning=FALSE------------------------------------------------------------
se <- broadSeq::transformDESeq2(se,method = "vst"  )

## -----------------------------------------------------------------------------
se <- broadSeq::transformDESeq2(se, method = "normTransform"  )

## ----eval=FALSE---------------------------------------------------------------
#  se <- broadSeq::transformDESeq2(se, method = "rlog")

## -----------------------------------------------------------------------------
SummarizedExperiment::assayNames(se)

## ----warning=FALSE,fig.height=6,fig.width=8-----------------------------------
p <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse" ], 
                           assayName = "counts", fill = "stage", 
                           yscale = "log2")+ rremove("x.text")

p1 <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse"], 
                           assayName = "vst", fill = "stage")+ rremove("x.text")

p2 <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse"], 
                           assayName = "TMM", fill = "stage", 
                           yscale = "log10")+ rremove("x.text")

p3 <- broadSeq::sampleAssay_plot(se[, se$species=="Mouse"], 
                           assayName = "logCPM", fill = "stage")+ rremove("x.text")


ggarrange(p,p1,p2,p3, common.legend = TRUE, labels = c("A","B","C"))

## ----eval=FALSE---------------------------------------------------------------
#  if (requireNamespace("vsn", quietly = TRUE)) {
#      library("vsn")
#      x <- meanSdPlot(
#          log2(assays(se[, se$species == "Rat"])[["counts"]]+1),
#          plot = FALSE)
#      print(x$gg +ggtitle(label = "log2(n+1) "))
#  
#      x <- meanSdPlot(
#          assays(se[, se$species == "Rat"])[["vst"]],
#          plot = FALSE)
#  
#      print(x$gg +ggtitle(label = "Vst"))
#  
#      x <- meanSdPlot(
#          assays(se[, se$species == "Rat"])[["logCPM"]],
#          plot = FALSE)
#      print(x$gg + ggtitle(label = "logCPM"))
#  }
#  

## ----message=FALSE,warning=FALSE----------------------------------------------
## Multiple assay of a single gene
broadSeq::assay_plot(se, feature = c("Shh"), 
           assayNames = c("counts","logCPM","vst","TMM"),
           x = "stage", fill="species", add="dotplot", palette = "npg")

## Expression of multiple genes from a single assay
broadSeq::genes_plot(se, 
                     features = c("Shh","Edar"), 
                     facet.by = "symbol",
                     x = "stage", assayName = "vst", fill="species", palette = "jco")

## ----warning=FALSE,message=FALSE,fig.height=6,fig.width=8---------------------
jco <- broadSeq::genes_plot(se[,se$species == "Mouse"], 
                     features = c("Shh"), facet.by = "symbol", assayName =  "logCPM",
                     x = "stage",  fill="stage", add="dotplot", xlab = "", 
                     title = "Journal of Clinical Oncology", palette = "jco") 

npg <- broadSeq::genes_plot(se[,se$species == "Mouse"], 
                     features = c("Shh"), facet.by = "symbol",assayName =  "logCPM",
                     x = "stage", fill="stage", add="dotplot", xlab = "",
                     title = "Nature Publishing Group", palette = "npg") 

aaas <- broadSeq::genes_plot(se[,se$species == "Mouse"], 
                     features = c("Shh"), facet.by = "symbol", assayName = "logCPM",
                     x = "stage", fill="stage", add="dotplot", xlab = "",
                     title = "Science", palette = "aaas")

nejm <- broadSeq::genes_plot(se[,se$species == "Mouse"], 
                     features = c("Shh"), facet.by = "symbol", assayName = "logCPM",
                     x = "stage", fill="stage", add="dotplot",  xlab = "",
                     title = "New England Journal of Medicine",palette = "nejm")

# ggarrange(jco,npg,aaas,nejm, 
#           common.legend = TRUE,legend = "none",
#           labels = c("A","B","C","D"))

ggarrange(jco+ggpubr::rotate_x_text(), npg+ggpubr::rotate_x_text(),
          aaas+ggpubr::rotate_x_text(),nejm+ggpubr::rotate_x_text(),
          nrow = 1, common.legend = TRUE,legend = "none",
          labels = c("A","B","C","D")) %>% 
    annotate_figure( top = text_grob("Color palette")) 

## ----fig.height=6,fig.width=8-------------------------------------------------
broadSeq::plot_MDS(se, scaledAssay = "vst", ntop=500, 
                   color = "species", shape = "stage", 
                   ellipse=TRUE, legend = "bottom")

## -----------------------------------------------------------------------------
head(rowData(se))

## ----fig.height=6,fig.width=8-------------------------------------------------
p_vst <- broadSeq::plotHeatmapCluster(
    se,
    scaledAssay = "vst",
    annotation_col = c("species", "stage"),
    annotation_row = c("Class","gene_biotype"),
    ntop = 30, show_geneAs = "symbol",
    cluster_cols = TRUE, cluster_rows = FALSE,
    show_rownames = TRUE, show_colnames = FALSE,
    main = "Top 30 variable gene vst"
)

## ----warning=FALSE------------------------------------------------------------
computedPCA_logCPM <- broadSeq::prcompTidy(se, scaledAssay = "logCPM", ntop = 500)
## PCA based on vst values
computedPCA_vst <- broadSeq::prcompTidy(se, scaledAssay = "vst", ntop = 500)

## ----fig.width=8,fig.height=6-------------------------------------------------
plotAnyPC(computedPCA = computedPCA_logCPM,
          x = 1, y = 2, color = "species", shape = "stage",
          legend = "bottom")

## ----fig.width=8,fig.height=6-------------------------------------------------
pca_vst <- plotAnyPC(computedPCA = computedPCA_vst,
            x = 2, y = 3,  color = "species", shape = "stage", 
            legend = "bottom") 

## ----fig.width=8,fig.height=6-------------------------------------------------
computedPCA_vst$eigen_values %>%
        dplyr::filter(var_exp >= 2) %>%
    ggbarplot(x="PC",y="var_exp", label = TRUE, label.pos = "out")

## ----fig.width=8,fig.height=6-------------------------------------------------
pca_vst_2_3 <-plotAnyPC(computedPCA = computedPCA_vst,
                x = 2, y = 3,  
                color = "species", shape = "stage", legend = "bottom")
# pca_vst_2_3

## ----fig.width=7,fig.height=6-------------------------------------------------
computedPCA_vst %>% broadSeq::getFeatureLoadRanking(keep = c("symbol","Class")) %>% head()

#  Top 5 genes of PC2

computedPCA_vst$loadings %>% top_n(5,abs(PC2)  ) %>% dplyr::select(gene,PC2)

pca_vst_loading <- computedPCA_vst %>% 
    broadSeq::getFeatureLoadRanking(keep = c("symbol","Class"), topN = 50, pcs=1:10) %>% 
    dplyr::count(Class, PC) %>%
    ggbarplot(
        x = "PC", y = "n", fill = "Class",
        legend = "bottom", palette = c("red","blue","orange","purple","white","grey")
    ) 
# pca_vst_loading

## ----fig.width=8,fig.height=6-------------------------------------------------
# By default it plots top 2 genes from each PC axis
pca_vst_bi <- broadSeq::biplotAnyPC(computedPCA = computedPCA_vst, 
            x = 1, y = 2, genesLabel = "symbol", 
            color = "species", shape = "stage", 
            legend = "bottom")
# pca_vst_bi

## ----fig.width=8,fig.height=8-------------------------------------------------
ggarrange(
    ggarrange(pca_vst_bi+ggtitle(label =  ""),
          pca_vst_2_3+ggtitle(label =  ""), common.legend = TRUE),
    pca_vst_loading, nrow = 2)


## ----fig.width=8,fig.height=6-------------------------------------------------
# Top 5 genes of PC3
biplotAnyPC(computedPCA = computedPCA_vst,x = 2, y = 3, 
            color = "species", shape = "stage",
            genes= computedPCA_vst$loadings %>% 
                top_n(5,abs(PC3)) %>% pull(gene),
            genesLabel = "symbol")

## Plot progression gene "Shh" 
biplotAnyPC(computedPCA = computedPCA_vst,x = 2, y = 3, 
            color = "species", shape = "stage",
            genes=c("Shh"),
            genesLabel = "symbol")

## ----warning=FALSE,eval=TRUE--------------------------------------------------
se <- readRDS(system.file("extdata","rat_vole_mouseSE_salmon.rds", package = "broadSeq"))

# To reduce the run time, subset of the data used here
se <- se[,colData(se)$species == "Mouse"]

## ----warning=FALSE,eval=FALSE,echo=FALSE--------------------------------------
#  count_matrix <- as.matrix(read.table(file = system.file("extdata",
#                                                "tooth_RNASeq_counts.txt",
#                                                package = "DELocal")))
#  colData <- data.frame(condition=gsub("\\..*",x=colnames(count_matrix),replacement = ""))
#  
#  gene_location <- read.table(file = system.file("extdata",
#                                                "gene_location.txt",
#                                                package = "DELocal"))
#  
#  se <- SummarizedExperiment::SummarizedExperiment(assays=list(counts=count_matrix),
#                                                        rowData = gene_location,
#                                                        colData=colData)
#  # contrast= c("condition","ME13","ME14")

## ----warning=FALSE------------------------------------------------------------
head(rownames(se))
head(rowData(se))

## ----warning=FALSE,eval=TRUE--------------------------------------------------
head(colData(se))
table(colData(se)$stage)

## ----warning=FALSE,fig.width=6,fig.height=6,message=FALSE---------------------
result_Noiseq <- 
    use_NOIseq(se = se, 
           colData_id = "stage", control = "Bud", treatment = "Cap",
           rank = TRUE, 
           r = 10) # r is an argument of NOISeq::noiseqbio

head(result_Noiseq)

## ----warning=FALSE,fig.width=8,fig.height=6,message=FALSE---------------------
pg <- broadSeq::genes_plot(se, x = "stage", assayName =  "counts",  
           features = result_Noiseq %>% dplyr::filter(rank <5) %>% rownames(),
           fill="stage", facet.by = "symbol",
           palette="jco", add = "dotplot")+rotate_x_text()

pg_sc <- ggscatter(result_Noiseq, x="Bud_mean", y="Cap_mean",color = "prob")+ 
    scale_x_log10()+scale_y_log10()

pg+pg_sc

## ----warning=FALSE,results="hide", fig.keep = "none"--------------------------
# limma 
?use_limma_trend(se, colData_id, control, treatment, rank = FALSE, ...)
?use_limma_voom(se, colData_id, control, treatment, rank = FALSE, ...)

# edgeR 
?use_edgeR_exact(se, colData_id, control, treatment, rank = FALSE, ...)
?use_edgeR_GLM(se, colData_id, control, treatment, rank = FALSE, ...)

# deseq2
?use_deseq2(se, colData_id, control, treatment, rank = FALSE, ...)

# DELocal
?use_DELocal(se, colData_id, control, treatment, rank = FALSE, ...)

# noiseq
?use_NOIseq(se, colData_id, control, treatment, rank = FALSE, ...) 

# EBSeq 
?use_EBSeq(se, colData_id, control, treatment, rank = FALSE, ...)

# samseq
?use_SAMseq(se, colData_id, control, treatment, rank = FALSE, ...)

## ----warning=FALSE,results="hide", fig.keep = "none"--------------------------
# First define a named list of functions
funs <- list(limma_trend = use_limma_trend, limma_voom = use_limma_voom,
             edgeR_exact = use_edgeR_exact, edgeR_glm = use_edgeR_GLM,
             deseq2 = use_deseq2, 
             DELocal = use_DELocal, noiseq = use_NOIseq, 
             EBSeq = use_EBSeq) 


multi_result <- broadSeq::use_multDE(
    se = se, 
    deFun_list = funs, return.df = TRUE,  
    colData_id = "stage", control = "Bud", treatment = "Cap", 
    rank = TRUE)

## ----warning=FALSE------------------------------------------------------------
head(multi_result)
# nrow(multi_result) == nrow(se)
colnames(multi_result)

## ----warning=FALSE,fig.width=6,fig.height=6,eval=TRUE-------------------------
clusters <- multi_result %>% dplyr::select(ends_with("rank")) %>% t() %>% dist() %>% hclust()
plot(clusters,main =  "distance: Euclidean")

## ----warning=FALSE,fig.width=6,fig.height=6,eval=TRUE-------------------------
multi_result %>% broadSeq::volcanoPlot(
    pValName = "deseq2_padj",
    lFCName = "deseq2_log2FoldChange",
    labelName = "symbol",
    palette = "lancet" ,
    selectedLabel =
        multi_result %>% dplyr::arrange(deseq2_padj) %>% pull(symbol) %>% head()
)

multi_result %>% broadSeq::volcanoPlot(
    pValName = "deseq2_padj",
    lFCName = "deseq2_log2FoldChange",
    labelName = "symbol",
    palette = c("purple","orange","grey"),
    selectedLabel = list(criteria = "(`x` > 5 | `x` < -2) & (`y` > 10)")
) +xlim(-7.5,7.5)

## ----warning=FALSE,error=FALSE------------------------------------------------
sessionInfo()