## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  fig.align='center',
  external=TRUE,
  echo=TRUE,
  warning=FALSE,
  comment = "#>"
)


## ----echo=FALSE---------------------------------------------------------------
cap01<-"Comprehensive diagram of complete mobileRNA workflows. "

## ----fig.align="centre", echo=FALSE, fig.cap=cap01----------------------------
knitr::include_graphics("../man/figures/mobileRNA_graphic_2.png")

## ----eval=FALSE, message=FALSE------------------------------------------------
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("mobileRNA")

## ----eval=FALSE, message=FALSE------------------------------------------------
#  if (!require("devtools")) install.packages("devtools")
#  devtools::install_github("KJeynesCupper/mobileRNA", ref = "main")

## ----message=FALSE------------------------------------------------------------
library("mobileRNA")

## ----Load, message=FALSE------------------------------------------------------
# Load small RNA data
data("sRNA_data")

# Load messenger RNA data
data("mRNA_data")

## ----message=FALSE------------------------------------------------------------
fasta_1 <- system.file("extdata","reduced_chr12_Eggplant.fa.gz", 
                       package="mobileRNA")

fasta_2 <-system.file("extdata","reduced_chr2_Tomato.fa.gz",
                      package="mobileRNA")
# define temporary output directory - replace with your directory
output_assembly_file <- file.path(tempfile("merged_assembly", 
                                           fileext = ".fa"))

# merge
merged_reference <- RNAmergeGenomes(genomeA = fasta_1,
                                    genomeB = fasta_2,
                                    output_file = output_assembly_file)

## ----message=FALSE------------------------------------------------------------
anno1 <- system.file("extdata","reduced_chr12_Eggplant.gff.gz",
                     package="mobileRNA")

anno2 <- system.file("extdata","reduced_chr2_Tomato.gff.gz", 
                     package="mobileRNA")

# define temporary output directory 
output_annotation_file <- file.path(tempfile("merged_annotation", 
                                             fileext = ".gff3"))

# merge annotation files into a single file
merged_annotation <- RNAmergeAnnotations(annotationA = anno1,
                                         annotationB = anno2,
                                         output_file = output_annotation_file)

## ----eval=FALSE---------------------------------------------------------------
#  # directory containing only sRNAseq samples
#  samples <- system.file("extdata/sRNAseq",package="mobileRNA")
#  
#  # output location
#  output_location <- tempdir()
#  
#  mapRNA(input = "sRNA",
#         input_files_dir = samples,
#         output_dir = output_location,
#         genomefile = output_assembly_file,
#         condaenv = "/Users/user-name/miniconda3/envs/ShortStack4",
#         mmap = "n")

## ----eval=FALSE---------------------------------------------------------------
#  # Directory containing results
#  results_dir <-  file.path(output_location,"2_sRNA_results")
#  
#  # Sample names and total number of reads, in the same order.
#  sample_names <- c("selfgraft_demo_1", "selfgraft_demo_2",
#                    "selfgraft_demo_3", "heterograft_demo_1",
#                    "heterograft_demo_2", "heterograft_demo_3")
#  
#  
#  sRNA_data_demo <- RNAimport(input = "sRNA",
#                              directory = results_dir,
#                              samples = sample_names)

## -----------------------------------------------------------------------------
data("sRNA_data")

## ----message=FALSE------------------------------------------------------------
# define control samples
controls <- c("selfgraft_1", "selfgraft_2", "selfgraft_3")

mobile_sRNA <- RNAmobile(input = "sRNA",
                         data = sRNA_data, 
                         controls = controls,
                         genome.ID = "B",
                         task = "keep")

## ----eval=FALSE---------------------------------------------------------------
#  # save output as txt file
#  write.table(mobile_sRNA, "./sRNA_mobile_output.txt")

## ----echo=FALSE---------------------------------------------------------------
cap1 <- "An example facet line graph to show the distribution of sRNA classes within each sample."
cap2 <- "An example facet bar graph to show the distribution of sRNA classes within each sample."

## -----------------------------------------------------------------------------
# plot each replicate as a line, separately, and facet 
sample_distribution_line <- RNAdistribution(sRNA_data,
                                            style = "line",
                                            overlap =  FALSE, 
                                            facet = TRUE,
                                            colour = "darkgreen")

                                            
 # plot each replicate as a bar, separately, and facet 
sample_distribution_bar <- RNAdistribution(sRNA_data,
                                           style = "bar",
                                           facet = TRUE,
                                           colour ="lightblue")

## ----fig.cap=cap1, fig.show="hold", fig.height=10, fig.width=15---------------
# View plot (only)
sample_distribution_line$plot

## ----fig.cap=cap2, fig.show="hold", fig.height=10, fig.width=15---------------
# View plot (only)
sample_distribution_bar$plot

## ----echo=FALSE---------------------------------------------------------------
cap4 <-"An example of a PCA, illustrating  the sRNA data set sample similarity"

## ----message=FALSE, fig.cap=cap4, fig.show="hold", fig.height=8, fig.width=15----
groups <- c("Heterograft", "Heterograft", "Heterograft",
            "Selfgraft", "Selfgraft", "Selfgraft")

 plotSamplePCA(data = sRNA_data, group = groups, size.ratio = 2)

## ----echo=FALSE---------------------------------------------------------------
cap5 <-"An example of a heatmap, illustrating the sRNA data set sample similarity"


## ----message=FALSE, fig.cap=cap5, fig.show="hold"-----------------------------
plotSampleDistance(sRNA_data)

## ----message=FALSE------------------------------------------------------------
# define consensus, store as a data summary file.
sRNA_data_dicercall <- RNAdicercall(data = sRNA_data, 
                                     chimeric = TRUE, 
                                     genome.ID = "B_", 
                                     controls = c("selfgraft_1", 
                                                  "selfgraft_2", 
                                                  "selfgraft_3"))

## ----message=FALSE------------------------------------------------------------
# Subset data for analysis: 24-nt sRNAs
sRNA_24 <- RNAsubset(sRNA_data_dicercall, class = 24)

# Subset data for analysis: 21/22-nt sRNAs
sRNA_2122 <- RNAsubset(sRNA_data_dicercall, class = c(21, 22))

## ----message=FALSE------------------------------------------------------------
consensus_plot <- RNAdistribution(data = sRNA_data_dicercall,
                                  style = "bar",
                                  data.type = "consensus")

## ----echo=FALSE---------------------------------------------------------------
cap6<-"An example of the distribution of small RNA consensus dicer classifications. "

## ----fig.cap=cap6, fig.height=10, fig.width=15--------------------------------
# view 
consensus_plot$plot

## ----message=FALSE------------------------------------------------------------
#reorder df
controls <- c("selfgraft_1", "selfgraft_2", "selfgraft_3")
reorder_df <- RNAreorder(sRNA_data_dicercall, controls)

# sample conditions in order within dataframe
groups <- c("Selfgraft", "Selfgraft", "Selfgraft", 
            "Heterograft", "Heterograft", "Heterograft")

## Differential analysis of whole dataset: DESeq2 method 
sRNA_DESeq2 <- RNAdifferentialAnalysis(data = reorder_df,
                              group = groups,
                              method = "DESeq2")

## ----results='asis'-----------------------------------------------------------
RNAsummary(sRNA_DESeq2)
 

## ----results='asis'-----------------------------------------------------------
RNAsummary(sRNA_DESeq2, alpha=0.05)


## ----eval = FALSE-------------------------------------------------------------
#  write.table(sRNA_DESeq2, "./sRNA_core_dataset.txt")

## ----fig.show="hold"----------------------------------------------------------
# summary of statistical sRNA clusters
RNAsummary(sRNA_DESeq2, alpha=0.05)

# select significant 
significant_sRNAs <-  sRNA_DESeq2[sRNA_DESeq2$padjusted < 0.05, ]

## ----echo = FALSE-------------------------------------------------------------
capp1 <- "Heatmap of statistically significant sRNA clusters"

## ----fig.show="hold", message=FALSE-------------------------------------------
p1 <- plotHeatmap(significant_sRNAs, value = "RPM", row.names = FALSE,
                  title = "Heatmap of log-transformed FPKM")

## ----fig.show="hold", message=FALSE, fig.cap=capp1----------------------------
p1$plot

## ----message=FALSE------------------------------------------------------------
# select sRNA clusters only found in treatment & not in the control samples
gained_sRNA <- RNApopulation(data = sRNA_DESeq2, 
                                  conditions = c("heterograft_1", 
                                                 "heterograft_2" , 
                                                 "heterograft_3"),
                                  chimeric = TRUE, 
                                  genome.ID = "B_", 
                                  controls = c("selfgraft_1",
                                               "selfgraft_2", 
                                               "selfgraft_3"))

# look at number of sRNA cluster only found in treatment 
nrow(gained_sRNA)

## ----message=FALSE------------------------------------------------------------
# select sRNA clusters only found in control & not in the treatment samples
lost_sRNA <- RNApopulation(data = sRNA_DESeq2, 
                                conditions = c("selfgraft_1", 
                                               "selfgraft_2" , 
                                               "selfgraft_3"), 
                                  chimeric = TRUE, 
                                  genome.ID = "B_", 
                                  controls = c("selfgraft_1",
                                               "selfgraft_2", 
                                               "selfgraft_3"))
# look at number of sRNA cluster only found in control  
nrow(lost_sRNA)

## ----message=FALSE------------------------------------------------------------
gained_sRNA_sequences <- RNAsequences(gained_sRNA, method = "consensus" )

## ----message=FALSE------------------------------------------------------------
gained_sRNA_attributes <- RNAattributes(data = gained_sRNA, 
                                        match ="genes",
                            annotation = output_annotation_file)

## ----message=FALSE------------------------------------------------------------
# vector of control names
control_names <- c("selfgraft_1", "selfgraft_2", "selfgraft_3")


## Identify potential tomato mobile molecules
mobile_sRNA <- RNAmobile(input = "sRNA",
                        data = sRNA_DESeq2, 
                         controls = control_names,
                         genome.ID = "B_",
                         task = "keep", 
                         statistical = FALSE)

## ----echo=FALSE---------------------------------------------------------------
cap7 <- "An example heatmap of candidate mobile small RNAs. Where the columns represent the sample replicates and the rows represent the small RNA cluster."


## ----fig.cap=cap7, fig.show="hold" , message=FALSE----------------------------
p2 <- plotHeatmap(mobile_sRNA, row.names = FALSE)

p2$plot

## ----eval = FALSE-------------------------------------------------------------
#  write.table(mobile_sRNA, "./candidate_mobile_sRNAs.txt")

## -----------------------------------------------------------------------------
annotation_file <- system.file("extdata", 
                                    "prefix_reduced_chr2_Tomato.gff.gz",
                                    package="mobileRNA")

mobile_attributes <- RNAattributes(data = mobile_sRNA, 
                                      match ="genes",
                                      annotation = annotation_file)

## -----------------------------------------------------------------------------
mobile_features <- RNAfeatures(data = mobile_sRNA, 
                               annotation = annotation_file)
print(mobile_features)

# plot as stacked bar chart
features_plot <- plotRNAfeatures(data = sRNA_data,  
                                 annotation = annotation_file)

plot(features_plot)

## ----message=FALSE------------------------------------------------------------
mobile_sequences <- RNAsequences(mobile_sRNA, method = "consensus") 

## ----eval = FALSE-------------------------------------------------------------
#  write.table(mobile_sequences, "./candidate_mobile_sRNA_sequences.txt")

## ----message=FALSE------------------------------------------------------------
# load `dplyr` package
library(dplyr)

# select the cluster and sequence columns 
sequences <- mobile_sequences %>% select(Cluster, Sequence)

# add prefix, remove row with NA
prefix <- ">"
sequences$Cluster <- paste0(prefix, sequences$Cluster)
sequences <- na.omit(sequences)

# convert to required format:
res <- do.call(rbind, lapply(seq(nrow(sequences)), 
                             function(i) t(sequences[i, ])))

## ----eval = FALSE, message=FALSE----------------------------------------------
#  # save output
#  write.table(res, file ="./mobile_sRNA_sequences.txt",
#              row.names = FALSE, col.names = FALSE, quote = FALSE)

## ----eval = FALSE-------------------------------------------------------------
#  # load `bioseq` package
#  library(bioseq)
#  
#  # build a RNA vector from a character vector:
#  mobileRNA_seq <- bioseq::rna(mobile_sequences$Sequence)
#  
#  # Find a consensus sequence for a set of sequences:
#  bioseq::seq_consensus(mobileRNA_seq)

## ----eval=FALSE---------------------------------------------------------------
#  # names of samples, represented by folder names produced in previous step
#  samples <- c("[sample names]")
#  
#  # location of output folders from previous step
#  dir <- "[output_dir]"
#  
#  # merge information of the detected de novo sRNA clusters
#  gff_alignment <- GenomicRanges::GRangesList()
#  for (i in samples) {
#    file_path <- file.path(dir, i, "Results.gff.gz3")
#    if (file.exists(file_path)) {
#      gff_alignment[[i]] <- rtracklayer::import.gff.gz(file_path)
#    } else{
#      Stop("File does not exist:", file_path, "\n")
#    }
#  }
#  
#  gff_merged <- GenomicRanges::reduce(unlist(gff_alignment),
#                                      ignore.strand = TRUE)
#  
#  gff_merged <- as.data.frame(gff_merged)
#  colnames(gff_merged)[1] <- "chr"
#  if('*' %in% gff_merged$strand){
#      gff_merged <- gff_merged[, -match("strand", colnames(gff_merged))]
#    }
#  
#  locifile_txt <- data.frame(Locus = paste0(gff_merged$chr, ":",
#                                            gff_merged$start,"-",
#                                            gff_merged$end),
#                             Cluster = paste0("cluster_",
#                                              seq_len(nrow(gff_merged))))
#  
#  # save output to location
#  dir_locifile <- "[output directory]"
#  loci_out <- file.path(dir_locifile,"locifile.txt")
#  utils::write.table(locifile_txt, file = loci_out, quote = FALSE,
#                     sep = "\t", row.names = FALSE, col.names = TRUE)
#  

## ----eval=FALSE---------------------------------------------------------------
#  # names of samples, represented by folder names produced in previous step
#  samples <- c("[sample names]")
#  
#  # location of output folders from previous step
#  dir <- "[output_dir]"
#  
#  # merge information of the detected de novo sRNA clusters
#  gff_alignment <- GenomicRanges::GRangesList()
#  for (i in samples) {
#    file_path <- file.path(dir, i, "Results.gff.gz3")
#    if (file.exists(file_path)) {
#      gff_alignment[[i]] <- rtracklayer::import.gff.gz(file_path)
#    } else{
#      Stop("File does not exist:", file_path, "\n")
#    }
#  }
#  
#  gff_merged <- GenomicRanges::reduce(unlist(gff_alignment),
#                                      ignore.strand = TRUE)
#  
#  gff_merged <- as.data.frame(gff_merged)
#  colnames(gff_merged)[1] <- "chr"
#  if('*' %in% gff_merged$strand){
#      gff_merged <- gff_merged[, -match("strand", colnames(gff_merged))]
#    }
#  
#  locifile_txt <- data.frame(Locus = paste0(gff_merged$chr, ":",
#                                            gff_merged$start,"-",
#                                            gff_merged$end),
#                             Cluster = paste0("cluster_",
#                                              seq_len(nrow(gff_merged))))
#  
#  # save output to location
#  dir_locifile <- "[output directory]"
#  loci_out <- file.path(dir_locifile,"locifile.txt")
#  utils::write.table(locifile_txt, file = loci_out, quote = FALSE,
#                     sep = "\t", row.names = FALSE, col.names = TRUE)

## -----------------------------------------------------------------------------
sessionInfo()