## ----code, echo=TRUE, eval=FALSE----------------------------------------------
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("MOSim")

## ----code1, echo=TRUE, eval=FALSE---------------------------------------------
#  library(devtools)
#  
#  devtools::install_github("ConesaLab/MOSim")

## ----load, echo=FALSE, eval=TRUE, include=TRUE--------------------------------
library(MOSim)

## ----code2, echo=TRUE, eval=TRUE----------------------------------------------
library(MOSim)

omic_list <- c("RNA-seq")

rnaseq_simulation <- mosim(omics = omic_list)

## ----code3, echo=TRUE, eval=TRUE, include=TRUE--------------------------------
rnaseq_simulation <- mosim(omics = c("RNA-seq"),
                           times = 0,
                           numberGroups = 2,
                           numberReps = 4)

## ----code4, echo=TRUE, eval=FALSE, include=TRUE, error=TRUE-------------------
#  rnaseq_simulation <- mosim(omics = c("RNA-seq"),
#                             times = 0,
#                             numberGroups = 1,
#                             numberReps = 4)

## ----code5, echo=TRUE, eval=FALSE, include=TRUE-------------------------------
#  multi_simulation <- mosim(omics = c("RNA-seq", "DNase-seq"),
#                            times = c(0, 5, 10),
#                            numberGroups = 2,
#                            numberReps = 4,
#                            diffGenes = .3)
#  
#  dnase_simulation <- mosim(omics = c("DNase-seq"),
#                            times = c(0, 5, 10),
#                            numberGroups = 2,
#                            numberReps = 4,
#                            diffGenes = .3)

## ----code6, echo=TRUE, eval=TRUE,cache=TRUE, tidy=FALSE, results='hide', message=FALSE----
# Take a subset of the included dataset for illustration
# purposes. We could also load it from a csv file or RData,
# as long as we transform it to have 1 column named "Counts"
# and the identifiers as row names.
data("sampleData")

custom_rnaseq <- head(sampleData$SimRNAseq$data, 100)

# In this case, 'custom_rnaseq' is a data frame with
# the structure:
head(custom_rnaseq)
##                    Counts
## ENSMUSG00000000001   6572
## ENSMUSG00000000003      0
## ENSMUSG00000000028   4644
## ENSMUSG00000000031      8
## ENSMUSG00000000037      0
## ENSMUSG00000000049      0


# The helper 'omicData' returns an object with our custom data.
rnaseq_customdata <- omicData("RNA-seq", data = custom_rnaseq)

# We use the associative list of 'omics' parameter to pass
# the RNA-seq object.
rnaseq_simulation <- mosim(omics = list("RNA-seq" = rnaseq_customdata))

## ----code7, echo=TRUE, eval=TRUE, tidy=FALSE, include=TRUE, results='hide', message=FALSE----
# Select a subset of the available data as a custom dataset
data("sampleData")

custom_dnaseseq <- head(sampleData$SimDNaseseq$data, 100)

# Retrieve a subset of the default association list.
dnase_genes <- sampleData$SimDNaseseq$idToGene
dnase_genes <- dnase_genes[dnase_genes$ID %in%
                               rownames(custom_dnaseseq), ]

# In this case, 'custom_dnaseseq' is a data frame with
# the structure:
head(custom_dnaseseq)
##                       Counts
## 1_63176480_63177113      513
## 1_125435495_125436168   1058
## 1_128319376_128319506     37
## 1_139067124_139067654    235
## 1_152305595_152305752    105
## 1_172490322_172490824    290

# The association list 'dnase_genes' is another data frame
# with the structure:
head(dnase_genes)
##                      ID               Gene
## 29195 1_3670777_3670902 ENSMUSG00000051951
## 29196 1_3873195_3873351 ENSMUSG00000089420
## 29197 1_4332428_4332928 ENSMUSG00000025900
## 29198 1_4346315_4346445 ENSMUSG00000025900
## 29199 1_4416827_4416973 ENSMUSG00000025900
## 29200 1_4516660_4516798 ENSMUSG00000096126

dnaseseq_customdata <- omicData("DNase-seq",
                                data = custom_dnaseseq,
                                associationList = dnase_genes)

multi_simulation <- mosim(omics = list(
    "RNA-seq" = rnaseq_customdata,
    "DNase-seq" = dnaseseq_customdata)
    )

## ----code8, echo=TRUE, eval=TRUE, tidy=FALSE, include = TRUE, results='hide', message=FALSE----
# Select a subset of the available data as a custom dataset
data("sampleData")

custom_tf <- head(sampleData$SimRNAseq$TFtoGene, 100)
#      TF             TFgene         LinkedGene
# 1  Aebp2 ENSMUSG00000030232 ENSMUSG00000000711
# 2  Aebp2 ENSMUSG00000030232 ENSMUSG00000001157
# 3  Aebp2 ENSMUSG00000030232 ENSMUSG00000001211
# 4  Aebp2 ENSMUSG00000030232 ENSMUSG00000001227
# 5  Aebp2 ENSMUSG00000030232 ENSMUSG00000001305
# 6  Aebp2 ENSMUSG00000030232 ENSMUSG00000001794

multi_simulation <- mosim(omics = list(
    "RNA-seq" = rnaseq_customdata,
    "DNase-seq" = dnaseseq_customdata),
    # The option is passed directly to mosim function instead of
    # being an element inside "omics" parameter.
    TFtoGene = custom_tf
    )

## ----code9, echo=TRUE, eval=TRUE, tidy=FALSE, results='hide', message=FALSE----
# Select a subset of the available data as a custom dataset
data("sampleData")

custom_cpgs <- head(sampleData$SimMethylseq$idToGene, 100)

# The ID column will be splitted using the "_" char
# assuming <chr>_<start>_<end>.
#
# These positions will be considered as CpG sites
# and used to generate CpG islands and other elements.
#
# Please refer to MOSim paper for more information.
#
#                    ID               Gene
# 1  11_3101154_3101154 ENSMUSG00000082286
# 2  11_3101170_3101170 ENSMUSG00000082286
# 3  11_3101229_3101229 ENSMUSG00000082286
# 4  11_3101287_3101287 ENSMUSG00000082286
# 5  11_3101329_3101329 ENSMUSG00000082286
# 6  11_3101404_3101404 ENSMUSG00000082286

## ----code10, echo=TRUE, eval=FALSE, tidy = FALSE, results='hide', message=FALSE----
#  omic_list <- c("RNA-seq")
#  
#  rnaseq_options <- omicSim("RNA-seq", totalFeatures = 2500)
#  
#  # The return value is an associative list compatible with
#  # 'omicsOptions'
#  rnaseq_simulation <- mosim(omics = omic_list,
#                             omicsOptions = rnaseq_options)

## ----code11, echo=TRUE, eval=TRUE, tidy = FALSE, results='hide', message=FALSE----
omics_list <- c("RNA-seq", "DNase-seq")

# In R concatenaning two lists creates another one merging
# its elements, we use that for 'omicsOptions' parameter.
omics_options <- c(omicSim("RNA-seq", totalFeatures = 2500, depth = 74),
                   omicSim("DNase-seq",
                           # Limit the number of features to simulate
                           totalFeatures = 1500,
                           # Modify the percentage of regulators with effects.
                           regulatorEffect = list(
                                'activator' = 0.68,
                                'repressor' = 0.3,
                                'NE' = 0.02
                            )))

set.seed(12345)

multi_simulation <- mosim(omics = omics_list,
                          omicsOptions = omics_options)

## ----code12, echo=TRUE, eval=TRUE, tidy = FALSE, results='hide', message=FALSE----
rnaseq_customdata <- omicData("RNA-seq", data = custom_rnaseq)
rnaseq_options <- omicSim("RNA-seq", totalFeatures = 100)

rnaseq_simulation <- mosim(omics = list("RNA-seq" = rnaseq_customdata),
                           omicsOptions = rnaseq_options)

## ----code13, echo=TRUE, eval=TRUE, tidy = FALSE, results='hide', message=FALSE----
# This will be a data frame with RNA-seq settings (DE flag, profiles)
rnaseq_settings <- omicSettings(multi_simulation, "RNA-seq")

# This will be a list containing all the simulated omics (RNA-seq
# and DNase-seq in this case)
all_settings <- omicSettings(multi_simulation)

## ----code14, echo=TRUE, eval=TRUE, tidy=FALSE, results='hide', message=FALSE----
# This will be a list with 3 keys: settings, association and regulators
dnase_settings <- omicSettings(multi_simulation, "DNase-seq",
                               association = TRUE)

## ----code15, echo=TRUE, eval=TRUE, tidy = FALSE, results='hide', message=FALSE----
# multi_simulation is an object returned by mosim function.

# This will be a data frame with RNA-seq counts
rnaseq_simulated <- omicResults(multi_simulation, "RNA-seq")

#                    Group1.Time0.Rep1 Group1.Time0.Rep2 Group1.Time0.Rep3 ...
# ENSMUSG00000073155              4539              5374              5808 ...
# ENSMUSG00000026251                 0                 0                 0 ...
# ENSMUSG00000040472              2742              2714              2912 ...
# ENSMUSG00000021598              5256              4640              5130 ...
# ENSMUSG00000032348               421               348               492 ...
# ENSMUSG00000097226                16                14                 9 ...
# ENSMUSG00000027857                 0                 0                 0 ...
# ENSMUSG00000032081                 1                 0                 0 ...
# ENSMUSG00000097164               794               822               965 ...
# ENSMUSG00000097871                 0                 0                 0 ...

# This will be a list containing RNA-seq and DNase-seq counts
all_simulated <- omicResults(multi_simulation)

## ----code16, echo=TRUE, eval=TRUE, tidy = FALSE, results = "hide", message=FALSE, warning=FALSE----
# Generate a simulation object
omic_list <- c("RNA-seq")
rnaseq_simulation <- mosim(omics = omic_list)

# This will be a data frame with RNA-seq counts
design_matrix <- experimentalDesign(rnaseq_simulation)
design_matrix

## ----code17, echo=TRUE, eval=FALSE, tidy = FALSE, results = "hide", message=FALSE, warning=FALSE----
#  
#  # The methods returns a ggplot plot, if called directly
#  # it will be automatically plotted.
#  plotProfile(multi_simulation,
#              omics = c("RNA-seq", "DNase-seq"),
#              featureIDS = list(
#                  "RNA-seq" = "ENSMUSG00000024691",
#                  "DNase-seq" = "19_12574278_12574408"
#              ))

## ----code18, echo=TRUE, eval=FALSE, tidy = FALSE, results = "hide", message=FALSE, warning=FALSE----
#  library(ggplot2)
#  
#  # Store the plot in a variable
#  profile_plot <- plotProfile(multi_simulation,
#              omics = c("RNA-seq", "DNase-seq"),
#              featureIDS = list(
#                  "RNA-seq" = "ENSMUSG00000024691",
#                  "DNase-seq" = "19_12574278_12574408"
#              ))
#  
#  # Modify the title and print
#  profile_plot +
#      ggtitle("My custom title") +
#      theme_bw() +
#      theme(legend.position="top")

## ----code19, echo=TRUE, eval=TRUE, tidy = FALSE, results = "hide", message=FALSE, warning=FALSE----
rnaseq_simulation <- mosim(omics = c("RNA-seq", "ChIP-seq"))
rnaseq_simulated <- omicResults(rnaseq_simulation, c("RNA-seq", "ChIP-seq"))
discrete_ChIP <- discretize(rnaseq_simulated, "ChIP-seq")