## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----seqArchRplus-install, echo=TRUE, eval=FALSE------------------------------ # ## When available on Bioconductor # if (!require("BiocManager", quietly = TRUE)) { # install.packages("BiocManager") # } # BiocManager::install("seqArchRplus") ## ----setup-two, echo=TRUE,include=TRUE,results="hide",message=FALSE,warning=FALSE---- # Load seqArchRplus library(seqArchRplus) library(seqArchR) library(Biostrings) library(TxDb.Dmelanogaster.UCSC.dm6.ensGene) library(org.Dm.eg.db) library(ChIPseeker) # Set seed for reproducibility set.seed(1234) ## ----prepare-fetch-data, eval=TRUE-------------------------------------------- ## 1. Raw DNA sequences raw_seqs <- Biostrings::readDNAStringSet( filepath = system.file("extdata", "example_promoters45.fa.gz", package = "seqArchRplus", mustWork = TRUE ) ) ## 2. Clustering information (arbitrary order/unordered) unord_clusts <- readRDS(system.file("extdata", "example_clust_info.rds", package = "seqArchRplus", mustWork = TRUE )) ## 3. GRanges object tc_gr <- readRDS(system.file("extdata", "example_tc_gr.rds", package = "seqArchRplus", mustWork = TRUE )) ## 4. BED file bed_info_fname <- system.file("extdata", "example_info_df.bed.gz", package = "seqArchRplus", mustWork = TRUE ) info_df <- read.delim(file = bed_info_fname, sep = "\t", header = TRUE) ## 5. seqArchR result seqArchR_result <- readRDS(system.file("extdata", "seqArchR_result.rds", package = "seqArchRplus", mustWork = TRUE)) ## **NOTE** Only for the example seqArchR result object provided with this ## package: ## ## Any seqArchR result object has the raw DNA sequences as a part of it. ## See details here: https://snikumbh.github.io/seqArchR/reference/seqArchR.html ## ## But, in order to reduce the size of the example seqArchR result object ## provided with this (seqArchRplus) package, the `rawSeqs` element of the ## result is missing. This is reassigned below for the purposes of this ## vignette. ## ## The seqArchR result was obtained by processing the DNA sequences in ## `example_promoters45.fa.gz`. Thus we reassign them to ## `seqArchR_result$rawSeqs` here ## if(!("rawSeqs" %in% names(seqArchR_result))) seqArchR_result$rawSeqs <- raw_seqs ## ----curation-call-1, echo=TRUE, eval=TRUE, warning=FALSE, fig.cap="Figure showing a combined panel of dendrogram + sequence logos at the first step of curation", fig.wide=TRUE, fig.height=3.5---- sn <- "RAL28_10_to_12h" use_aggl <- "complete" use_dist <- "euclid" ## get seqArchR clusters custom curated seqArchR_curated_clusts <- curate_clusters(sname = sn, use_aggl = use_aggl, use_dist = use_dist, seqArchR_result = seqArchR_result, iter = 5, pos_lab = NULL, regularize = TRUE, topn = 50, use_cutk = 2, final = FALSE, dir_path = NULL ) seqArchR_curated_clusts$curation_plot ## ----curation-set-k, echo=TRUE, eval=TRUE, warning=FALSE, fig.cap="Figure showing a combined panel of dendrogram + sequence logos at the second step of curation", fig.wide=TRUE, fig.height=3.5---- ## Let us set K=5 for this example, and combine clusters 1 and 2 into one. set_cutk <- 5 ## Form the lists `need_change` and `change_to` for re-assignments need_change <- list(c(2)) change_to <- list(c(1)) seqArchR_curated_clusts <- curate_clusters(sname = sn, use_aggl = use_aggl, use_dist = use_dist, seqArchR_result = seqArchR_result, iter = 5, regularize = TRUE, topn = 50, use_cutk = set_cutk, #*** need_change = need_change, change_to = change_to, final = FALSE, #*** dir_path = NULL ) seqArchR_curated_clusts$curation_plot ## ----curation-final-call, echo=TRUE, eval=TRUE, warning=FALSE, fig.cap="Figure showing three panels combined: dendrogram + original sequence logos + collated cluster sequence logos at the final step of curation", fig.wide=TRUE, fig.height=4, fig.width=15---- ## Satisfied with the re-assignments? now set final = TRUE seqArchR_curated_clusts <- curate_clusters(sname = sn, use_aggl = use_aggl, use_dist = use_dist, seqArchR_result = seqArchR_result, iter = 5, pos_lab = NULL, regularize = FALSE, topn = 50, use_cutk = set_cutk, #*** need_change = need_change, change_to = change_to, final = TRUE, #*** dir_path = NULL ) seqArchR_curated_clusts$curation_plot ## ----show-clusts-------------------------------------------------------------- str(seqArchR_curated_clusts$clusters_list) ## ----seqlogos-one-plot, eval=TRUE, attr.source='.numberLines', warning=FALSE---- ## seqlogos_oneplot_pl <- per_cluster_seqlogos( sname = "RAL28_10_to_12h", seqs = raw_seqs, clusts = unord_clusts, pos_lab = -45:45, bits_yax = "auto", strand_sep = FALSE, one_plot = TRUE, txt_size = 12, dir_path = NULL ) seqlogos_oneplot_pl ## ----seqlogos-list, eval=FALSE, attr.source='.numberLines', warning=FALSE----- # ## Obtain the sequence logos as a list for combining later by setting the # ## 'one_plot' argument to FALSE # seqlogos_list_pl <- per_cluster_seqlogos( # sname = "RAL28_10_to_12h", # seqs = raw_seqs, # clusts = unord_clusts, # pos_lab = -45:45, bits_yax = "auto", # strand_sep = FALSE, one_plot = FALSE, # dir_path = NULL, # txt_size = 12 # ) ## ----iqw-tpm-plot, eval=TRUE, attr.source='.numberLines'---------------------- ## iqw_tpm_pl <- iqw_tpm_plots(sname = "RAL28_10_to_12h", dir_path = NULL, info_df = info_df, iqw = TRUE, tpm = TRUE, cons = FALSE, clusts = unord_clusts, txt_size = 15 ) iqw_tpm_pl ## ----order-clusters, eval=TRUE, attr.source='.numberLines'-------------------- ## get clusters ordered by median IQW values seqArchR_clusts_ord <- order_clusters_iqw(sname = "RAL28_10_to_12h", clusts = unord_clusts, info_df = info_df, order_by_median = TRUE ) str(unord_clusts) str(seqArchR_clusts_ord) ## ----acgt-image, eval=TRUE, attr.source='.numberLines'------------------------ ## seqs_acgt_image(sname = "RAL28_10_to_12h", seqs = raw_seqs, seqs_ord = unlist(seqArchR_clusts_ord), pos_lab = -45:45, dir_path = NULL ) ## ----motif-heatmaps, eval=TRUE, attr.source='.numberLines'-------------------- ## Get larger flank FASTA sequences (larger than those used for seqArchR) use_seqs <- Biostrings::readDNAStringSet(filepath = system.file("extdata", "example_promoters200.fa.gz", package = "seqArchRplus", mustWork = TRUE ) ) plot_motif_heatmaps(sname = "RAL28_10_to_12h", seqs = use_seqs, flanks = c(200), clusts = seqArchR_clusts_ord, motifs = c("WW", "SS"), dir_path = NULL, fheight = 80, fwidth = 160 ) ## ----annotation-one-plot, eval=TRUE, attr.source='.numberLines'--------------- ## annotations_oneplot_pl <- per_cluster_annotations( sname = "RAL28_10_to_12h", clusts = seqArchR_clusts_ord, tc_gr = tc_gr, cager_obj = NULL, qLow = 0.1, qUp = 0.9, txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene, tss_region = c(-500, 100), orgdb_obj = NULL, dir_path = NULL, one_plot = TRUE, txt_size = 12 ) annotations_oneplot_pl ## Obtain the per cluster annotations as a list for combining later by setting ## the 'one_plot' argument to FALSE # annotations_list_pl <- per_cluster_annotations( # sname = "RAL28_10_to_12h", # clusts = unord_clusts, # tc_gr = tc_gr, # cager_obj = NULL, # qLow = 0.1, qUp = 0.9, # txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene, # tss_region = c(-500, 100), # orgdb_obj = NULL, dir_path = NULL, # one_plot = FALSE, # txt_size = 12 # ) ## ----stranded-seqlogos, eval=TRUE, attr.source='.numberLines', fig.height=10, warning=FALSE---- ## Obtain strand-separated sequence logos corresponding to each cluster stranded_seqlogos_pl <- per_cluster_seqlogos( sname = "RAL28_10_to_12h", seqs = raw_seqs, clusts = seqArchR_clusts_ord, pos_lab = -45:45, bits_yax = "auto", info_df = info_df, strand_sep = TRUE, #** one_plot = FALSE, #** dir_path = NULL, txt_size = 12 ) one_plot <- cowplot::plot_grid(plotlist = stranded_seqlogos_pl, ncol = 1) one_plot ## ----stranded-chrom-prop, eval=TRUE, attr.source='.numberLines', fig.height=7---- pair_colrs <- RColorBrewer::brewer.pal(n = 5, name = "Set3") ## Obtain strand-separated sequence logos corresponding to each cluster stranded_dist_pl <- per_cluster_strand_dist( sname = "RAL28_10_to_12h", clusts = seqArchR_clusts_ord, info_df = info_df, dir_path = NULL, txt_size = 12 ) one_plot <- cowplot::plot_grid(plotlist = stranded_dist_pl, ncol = 1) one_plot ## ----go-enrichment, eval=TRUE, attr.source='.numberLines', fig.width=15, fig.height=15---- go_enrich_pl <- per_cluster_go_term_enrichments( sname = "RAL28_10_to_12h", clusts = seqArchR_clusts_ord, tc_gr = tc_gr, txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene, dir_path = NULL, one_file = FALSE, #*** tss_region = c(-500,100), orgdb_obj = "org.Dm.eg.db" ) one_plot <- cowplot::plot_grid(plotlist = go_enrich_pl, ncol = 2) one_plot ## ----large-panels, eval=TRUE, attr.source='.numberLines', fig.wide=TRUE, fig.height=8, fig.width=20, fig.cap="Multiple plots combined as panels to generate one large figure."---- form_combined_panel(iqw_tpm_pl = iqw_tpm_pl, seqlogos_pl = seqlogos_oneplot_pl, annot_pl = annotations_oneplot_pl) ## ----html-report, eval=TRUE, attr.source='.numberLines', fig.wide=TRUE, fig.height=8, fig.width=20, fig.cap="Multiple plots combined as panels to geenrate one large figure.", eval=FALSE---- # # # generate_html_report(snames = c("RAL28_10_to_12h", "RAL28_10_to_12h"), # file_type = "PDF", dir_path = tempdir()) # ## ----track-bed, eval=TRUE, attr.source='.numberLines'------------------------- ## write_seqArchR_cluster_track_bed( sname = "RAL28_10_to_12h", clusts = seqArchR_clusts_ord, info_df = info_df, use_q_bound = FALSE, one_zip_all = TRUE, org_name = "Dmelanogaster", dir_path = tempdir(), include_in_report = FALSE, strand_sep = FALSE ) ## ----generate-all-plots, eval=FALSE------------------------------------------- # # generate_all_plots( # sname = "RAL28_10_to_12h", # bed_info_fname = bed_info_fname, # seqArchR_clusts = unord_clusts, # raw_seqs = raw_seqs, # tc_gr = tc_gr, # use_q_bound = FALSE, # order_by_iqw = TRUE, # use_median_iqw = TRUE, # iqw = TRUE, tpm = TRUE, cons = FALSE, # pos_lab = -45:45, # txdb_obj = TxDb.Dmelanogaster.UCSC.dm6.ensGene, # org_name = "Dmelanogaster", # qLow = 0.1, qUp = 0.9, # tss_region = c(-500, 100), # raw_seqs_mh = use_seqs, # motifs = c("WW", "SS", "TATAA", "CG"), # motif_heatmaps_flanks = c(50, 100, 200), # dir_path = tempdir(), # txt_size = 25 # ) ## ----session_info, include=TRUE, echo=TRUE, results='markup'------------------ sessionInfo()