## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----seqArchRplus-install, echo=TRUE, eval=FALSE------------------------------
#  ## When available on Bioconductor
#  if (!require("BiocManager", quietly = TRUE)) {
#      install.packages("BiocManager")
#  }
#  BiocManager::install("seqArchRplus")

## ----setup-two, echo=TRUE,include=TRUE,results="hide",message=FALSE,warning=FALSE----
# Load seqArchRplus
library(seqArchRplus)
library(seqArchR)
library(Biostrings)
library(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
library(org.Dm.eg.db)
library(ChIPseeker)

# Set seed for reproducibility
set.seed(1234)

## ----prepare-fetch-data, eval=TRUE--------------------------------------------
## 1. Raw DNA sequences
raw_seqs <- Biostrings::readDNAStringSet(
    filepath = system.file("extdata",
        "example_promoters45.fa.gz",
        package = "seqArchRplus",
        mustWork = TRUE
    )
)

## 2. Clustering information (arbitrary order/unordered)
unord_clusts <- readRDS(system.file("extdata", "example_clust_info.rds",
                            package = "seqArchRplus", mustWork = TRUE
                        ))

## 3. GRanges object
tc_gr <- readRDS(system.file("extdata", "example_tc_gr.rds",
                    package = "seqArchRplus", mustWork = TRUE
                ))

## 4. BED file
bed_info_fname <- system.file("extdata", "example_info_df.bed.gz",
                                package = "seqArchRplus", mustWork = TRUE
                            )
info_df <- read.delim(file = bed_info_fname, sep = "\t", header = TRUE)

## 5. seqArchR result
seqArchR_result <- readRDS(system.file("extdata", "seqArchR_result.rds",
      package = "seqArchRplus", mustWork = TRUE))

## **NOTE** Only for the example seqArchR result object provided with this
## package:
## 
## Any seqArchR result object has the raw DNA sequences as a part of it.
## See details here: https://snikumbh.github.io/seqArchR/reference/seqArchR.html
## 
## But, in order to reduce the size of the example seqArchR result object 
## provided with this (seqArchRplus) package, the `rawSeqs` element of the 
## result is missing. This is reassigned below for the purposes of this 
## vignette.
## 
## The seqArchR result was obtained by processing the DNA sequences in 
## `example_promoters45.fa.gz`. Thus we reassign them to 
## `seqArchR_result$rawSeqs` here
##  
if(!("rawSeqs" %in% names(seqArchR_result)))
    seqArchR_result$rawSeqs <- raw_seqs


## ----curation-call-1, echo=TRUE, eval=TRUE, warning=FALSE, fig.cap="Figure showing a combined panel of dendrogram + sequence logos at the first step of curation", fig.wide=TRUE, fig.height=3.5----

sn <- "RAL28_10_to_12h"
use_aggl <- "complete"
use_dist <- "euclid"

## get seqArchR clusters custom curated
seqArchR_curated_clusts <- curate_clusters(sname = sn,
                                use_aggl = use_aggl, use_dist = use_dist,
                                seqArchR_result = seqArchR_result, iter = 5,
                                pos_lab = NULL, regularize = TRUE, topn = 50,
                                use_cutk = 2, final = FALSE, dir_path = NULL
                            )
seqArchR_curated_clusts$curation_plot

## ----curation-set-k, echo=TRUE, eval=TRUE, warning=FALSE, fig.cap="Figure showing a combined panel of dendrogram + sequence logos at the second step of curation", fig.wide=TRUE, fig.height=3.5----
## Let us set K=5 for this example, and combine clusters 1 and 2 into one.
set_cutk <- 5

## Form the lists `need_change` and `change_to` for re-assignments
need_change <- list(c(2))
change_to <- list(c(1))

seqArchR_curated_clusts <- curate_clusters(sname = sn,
                                use_aggl = use_aggl, use_dist = use_dist,
                                seqArchR_result = seqArchR_result, iter = 5,
                                regularize = TRUE, topn = 50,
                                use_cutk = set_cutk, #***
                                need_change = need_change,
                                change_to = change_to,
                                final = FALSE, #***
                                dir_path = NULL
                            )
seqArchR_curated_clusts$curation_plot

## ----curation-final-call, echo=TRUE, eval=TRUE, warning=FALSE, fig.cap="Figure showing three panels  combined: dendrogram + original sequence logos + collated cluster sequence logos at the final step of curation", fig.wide=TRUE, fig.height=4, fig.width=15----
## Satisfied with the re-assignments? now set final = TRUE
seqArchR_curated_clusts <- curate_clusters(sname = sn,
                                use_aggl = use_aggl, use_dist = use_dist,
                                seqArchR_result = seqArchR_result, iter = 5,
                                pos_lab = NULL, regularize = FALSE, topn = 50,
                                use_cutk = set_cutk, #***
                                need_change = need_change,
                                change_to = change_to,
                                final = TRUE, #***
                                dir_path = NULL
                            )
seqArchR_curated_clusts$curation_plot

## ----show-clusts--------------------------------------------------------------
str(seqArchR_curated_clusts$clusters_list)


## ----seqlogos-one-plot, eval=TRUE, attr.source='.numberLines', warning=FALSE----
##
seqlogos_oneplot_pl <- per_cluster_seqlogos(
                            sname = "RAL28_10_to_12h",
                            seqs = raw_seqs,
                            clusts = unord_clusts,
                            pos_lab = -45:45, bits_yax = "auto",
                            strand_sep = FALSE, one_plot = TRUE,
                            txt_size = 12, dir_path = NULL
                        )
seqlogos_oneplot_pl

## ----seqlogos-list, eval=FALSE, attr.source='.numberLines', warning=FALSE-----
#  ## Obtain the sequence logos as a list for combining later by setting the
#  ## 'one_plot' argument to FALSE
#  seqlogos_list_pl <- per_cluster_seqlogos(
#                          sname = "RAL28_10_to_12h",
#                          seqs = raw_seqs,
#                          clusts = unord_clusts,
#                          pos_lab = -45:45, bits_yax = "auto",
#                          strand_sep = FALSE, one_plot = FALSE,
#                          dir_path = NULL,
#                          txt_size = 12
#                      )

## ----iqw-tpm-plot, eval=TRUE, attr.source='.numberLines'----------------------
##
iqw_tpm_pl <- iqw_tpm_plots(sname = "RAL28_10_to_12h",
                            dir_path = NULL,
                            info_df = info_df,
                            iqw = TRUE, tpm = TRUE, cons = FALSE,
                            clusts = unord_clusts,
                            txt_size = 15
                            )
iqw_tpm_pl

## ----order-clusters, eval=TRUE, attr.source='.numberLines'--------------------
## get clusters ordered by median IQW values
seqArchR_clusts_ord <- order_clusters_iqw(sname = "RAL28_10_to_12h", 
                                                        clusts = unord_clusts,
                                                        info_df = info_df, 
                                                        order_by_median = TRUE
                                    )
str(unord_clusts)
str(seqArchR_clusts_ord)

## ----acgt-image, eval=TRUE, attr.source='.numberLines'------------------------
##
seqs_acgt_image(sname = "RAL28_10_to_12h",
                        seqs = raw_seqs,
                        seqs_ord = unlist(seqArchR_clusts_ord),
                        pos_lab = -45:45, dir_path = NULL
                    )

## ----motif-heatmaps, eval=TRUE, attr.source='.numberLines'--------------------
## Get larger flank FASTA sequences (larger than those used for seqArchR)
use_seqs <- Biostrings::readDNAStringSet(filepath = system.file("extdata",
                                            "example_promoters200.fa.gz",
                                            package = "seqArchRplus",
                                            mustWork = TRUE
                                        )
                                        )

plot_motif_heatmaps(sname = "RAL28_10_to_12h", seqs = use_seqs,
                                    flanks = c(200),
                                    clusts = seqArchR_clusts_ord,
                                    motifs = c("WW", "SS"),
                                    dir_path = NULL,
                                    fheight = 80, fwidth = 160
                                )


## ----annotation-one-plot, eval=TRUE, attr.source='.numberLines'---------------
##
annotations_oneplot_pl <-
    per_cluster_annotations(
        sname = "RAL28_10_to_12h",
        clusts = seqArchR_clusts_ord,
        tc_gr = tc_gr,
        cager_obj = NULL,
        qLow = 0.1, qUp = 0.9,
        txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene,
        tss_region = c(-500, 100),
        orgdb_obj = NULL, dir_path = NULL,
        one_plot = TRUE,
        txt_size = 12
    )
annotations_oneplot_pl
## Obtain the per cluster annotations as a list for combining later by setting 
## the 'one_plot' argument to FALSE

# annotations_list_pl <- per_cluster_annotations(
#                             sname = "RAL28_10_to_12h",
#                             clusts = unord_clusts,
#                             tc_gr = tc_gr,
#                             cager_obj = NULL,
#                             qLow = 0.1, qUp = 0.9,
#                             txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene,
#                             tss_region = c(-500, 100),
#                             orgdb_obj = NULL, dir_path = NULL,
#                             one_plot = FALSE,
#                             txt_size = 12
#                         )

## ----stranded-seqlogos, eval=TRUE, attr.source='.numberLines', fig.height=10, warning=FALSE----
## Obtain strand-separated sequence logos corresponding to each cluster
stranded_seqlogos_pl <- per_cluster_seqlogos(
                            sname = "RAL28_10_to_12h",
                            seqs = raw_seqs,
                            clusts = seqArchR_clusts_ord,
                            pos_lab = -45:45, bits_yax = "auto",
                            info_df = info_df,
                            strand_sep = TRUE, #**
                            one_plot = FALSE, #**
                            dir_path = NULL,
                            txt_size = 12
                        )
one_plot <- cowplot::plot_grid(plotlist = stranded_seqlogos_pl, ncol = 1)
one_plot

## ----stranded-chrom-prop, eval=TRUE, attr.source='.numberLines', fig.height=7----

pair_colrs <- RColorBrewer::brewer.pal(n = 5, name = "Set3")
## Obtain strand-separated sequence logos corresponding to each cluster
stranded_dist_pl <- per_cluster_strand_dist(
                            sname = "RAL28_10_to_12h",
                            clusts = seqArchR_clusts_ord,
                            info_df = info_df,
                            dir_path = NULL,
                            txt_size = 12
                        )
one_plot <- cowplot::plot_grid(plotlist = stranded_dist_pl, ncol = 1)
one_plot

## ----go-enrichment, eval=TRUE, attr.source='.numberLines', fig.width=15, fig.height=15----

go_enrich_pl <- per_cluster_go_term_enrichments(
                            sname = "RAL28_10_to_12h",
                            clusts = seqArchR_clusts_ord,
                            tc_gr = tc_gr, 
                            txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene,
                            dir_path = NULL,
                            one_file = FALSE, #***
                            tss_region = c(-500,100),
                            orgdb_obj = "org.Dm.eg.db"
                        )

one_plot <- cowplot::plot_grid(plotlist = go_enrich_pl, ncol = 2)
one_plot

## ----large-panels, eval=TRUE, attr.source='.numberLines', fig.wide=TRUE, fig.height=8, fig.width=20, fig.cap="Multiple plots combined as panels to generate one large figure."----


form_combined_panel(iqw_tpm_pl = iqw_tpm_pl, seqlogos_pl = seqlogos_oneplot_pl, 
                    annot_pl = annotations_oneplot_pl)


## ----html-report, eval=TRUE, attr.source='.numberLines', fig.wide=TRUE, fig.height=8, fig.width=20, fig.cap="Multiple plots combined as panels to geenrate one large figure.", eval=FALSE----
#  
#  
#  generate_html_report(snames = c("RAL28_10_to_12h", "RAL28_10_to_12h"),
#                          file_type = "PDF", dir_path = tempdir())
#  

## ----track-bed, eval=TRUE, attr.source='.numberLines'-------------------------
##
write_seqArchR_cluster_track_bed(
                    sname = "RAL28_10_to_12h",
                    clusts = seqArchR_clusts_ord,
                    info_df = info_df,
                    use_q_bound = FALSE,
                    one_zip_all = TRUE,
                    org_name = "Dmelanogaster",
                    dir_path = tempdir(),
                    include_in_report = FALSE,
                    strand_sep = FALSE
                )

## ----generate-all-plots, eval=FALSE-------------------------------------------
#  
#  generate_all_plots(
#      sname = "RAL28_10_to_12h",
#      bed_info_fname = bed_info_fname,
#      seqArchR_clusts = unord_clusts,
#      raw_seqs = raw_seqs,
#      tc_gr = tc_gr,
#      use_q_bound = FALSE,
#      order_by_iqw = TRUE,
#      use_median_iqw = TRUE,
#      iqw = TRUE, tpm = TRUE, cons = FALSE,
#      pos_lab = -45:45,
#      txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene,
#      org_name = "Dmelanogaster",
#      qLow = 0.1, qUp = 0.9,
#      tss_region = c(-500, 100),
#      raw_seqs_mh = use_seqs,
#      motifs = c("WW", "SS", "TATAA", "CG"),
#      motif_heatmaps_flanks = c(50, 100, 200),
#      dir_path = tempdir(),
#      txt_size = 25
#  )

## ----session_info, include=TRUE, echo=TRUE, results='markup'------------------
sessionInfo()