## ----settings, include = FALSE--------------------------------------------------------------------
options(width = 100)
knitr::opts_chunk$set(collapse = TRUE, comment = "#>",class.source = "whiteCode")
library(dplyr)

## ----sesame, include = FALSE, eval = TRUE---------------------------------------------------------
library(sesameData)

## ----eval = FALSE---------------------------------------------------------------------------------
# if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager")
# BiocManager::install("MethReg", dependencies = TRUE)

## ----setup, eval = TRUE---------------------------------------------------------------------------
library(MethReg)

## ----workflow, fig.cap = "Figure 2 Workflow of MethReg. Data - MethReg required inputs are: (1) array-based DNA methylation data (HM450/EPIC) with beta-values, (2) RNA-seq data with normalized counts, (3) estimated TF activities from the RNA-seq data using GSVA or VIPER  software. Creating triplets – there are multiple ways to create a CpG-TF-target gene triplet: (1) CpGs can be mapped to human TFs by using TF motifs from databases such as JASPAR or ReMap and scanning the CpGs region to identify if there is a binding site (2) CpG can be mapped to target genes using a distance-based approach, CpGs will be linked to a gene if it is on promoter region (+-1K bp away from the TSS), for a distal CpG it can be linked to either all genes within a fixed width (i.e. 500k bp), or a fixed number of genes upstream and downstream of the CpG location (3) TF-target genes can be retrieved from external databases (i.e. Cistrome Cancer; Dorothea). Using two different pairs (i.e. CpG-TF, TF-target gene), triplets can then be created. Analysis – each triplet will be evaluated using a robust linear model in which TF activity is estimated from GSVA/Viper software and DNAm.group is a binary variable used to model if the sample CpG belongs to the top 25% methylation levels or the lowest 25% methylation levels. Results – MethReg will output the prioritized triplets and classify both the TF role on the gene expression (repressor or activator) and the DNAm effect on the TF activity (Enhancing or attenuating).", echo = FALSE, fig.width = 13----
png::readPNG("workflow_methReg.png") %>% grid::grid.raster()

## ----warning=FALSE--------------------------------------------------------------------------------
data("dna.met.chr21")

## -------------------------------------------------------------------------------------------------
dna.met.chr21[1:5,1:5]

## -------------------------------------------------------------------------------------------------
dna.met.chr21.se <- make_dnam_se(
  dnam = dna.met.chr21,
  genome = "hg38",
  arrayType = "450k",
  betaToM = FALSE, # transform beta to m-values 
  verbose = FALSE # hide informative messages
)

## -------------------------------------------------------------------------------------------------
dna.met.chr21.se
SummarizedExperiment::rowRanges(dna.met.chr21.se)[1:4,1:4]

## -------------------------------------------------------------------------------------------------
data("gene.exp.chr21.log2")
gene.exp.chr21.log2[1:5,1:5]

## -------------------------------------------------------------------------------------------------
gene.exp.chr21.se <- make_exp_se(
  exp = gene.exp.chr21.log2,
  genome = "hg38",
  verbose = FALSE
)
gene.exp.chr21.se
SummarizedExperiment::rowRanges(gene.exp.chr21.se)[1:5,]

## ----plot, fig.cap = "Different target linking strategies", echo = FALSE--------------------------
png::readPNG("mapping_target_strategies.png") %>% grid::grid.raster()

## ----message = FALSE, results = "hide"------------------------------------------------------------
triplet.promoter <- create_triplet_distance_based(
  region = dna.met.chr21.se,
  target.method = "genes.promoter.overlap",
  genome = "hg38",
  target.promoter.upstream.dist.tss = 2000,
  target.promoter.downstream.dist.tss = 2000,
  motif.search.window.size = 400,
  motif.search.p.cutoff  = 1e-08,
  cores = 1  
)

## ----message = FALSE, results = "hide"------------------------------------------------------------
# Map probes to genes within 500kb window
triplet.distal.window <- create_triplet_distance_based(
  region = dna.met.chr21.se,
    genome = "hg38", 
    target.method = "window",
    target.window.size = 500 * 10^3,
    target.rm.promoter.regions.from.distal.linking = TRUE,
    motif.search.window.size = 500,
    motif.search.p.cutoff  = 1e-08,
    cores = 1
)

## ----message = FALSE, results = "hide"------------------------------------------------------------
# Map probes to 5 genes upstream and 5 downstream
triplet.distal.nearby.genes <- create_triplet_distance_based(
  region = dna.met.chr21.se,
    genome = "hg38", 
    target.method = "nearby.genes",
    target.num.flanking.genes = 5,
    target.window.size = 500 * 10^3,
    target.rm.promoter.regions.from.distal.linking = TRUE,
    motif.search.window.size = 400,
    motif.search.p.cutoff  = 1e-08,
    cores = 1  
)

## ----eval = FALSE---------------------------------------------------------------------------------
# if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager")
# BiocManager::install("remap-cisreg/ReMapEnrich", dependencies = TRUE)

## ----eval = FALSE---------------------------------------------------------------------------------
# library(ReMapEnrich)
# remapCatalog2018hg38 <- downloadRemapCatalog("/tmp/", assembly = "hg38")
# remapCatalog <- bedToGranges(remapCatalog2018hg38)

## ----eval = FALSE---------------------------------------------------------------------------------
# #-------------------------------------------------------------------------------
# # Triplets promoter using remap
# #-------------------------------------------------------------------------------
# triplet.promoter.remap <- create_triplet_distance_based(
#   region = dna.met.chr21.se,
#   genome = "hg19",
#   target.method =  "genes.promoter.overlap",
#   TF.peaks.gr = remapCatalog,
#   motif.search.window.size = 400,
#   max.distance.region.target = 10^6,
# )

## ----eval = FALSE---------------------------------------------------------------------------------
# if (!"BiocManager" %in% rownames(installed.packages()))
#      install.packages("BiocManager")
# BiocManager::install("dorothea", dependencies = TRUE)

## -------------------------------------------------------------------------------------------------
regulons.dorothea <- dorothea::dorothea_hs
regulons.dorothea %>% head

## -------------------------------------------------------------------------------------------------
rnaseq.tf.es <- get_tf_ES(
  exp = gene.exp.chr21.se %>% SummarizedExperiment::assay(),
  regulons = regulons.dorothea
)

## ----message = FALSE, results = "hide"------------------------------------------------------------
  triplet.regulon <- create_triplet_regulon_based(
    region = dna.met.chr21.se,
    genome = "hg38",  
    motif.search.window.size = 400,
    tf.target = regulons.dorothea,
    max.distance.region.target = 10^6 # 1Mbp
  ) 

## -------------------------------------------------------------------------------------------------
triplet.regulon %>% head

## -------------------------------------------------------------------------------------------------
str(triplet.promoter)
triplet.promoter$distance_region_target_tss %>% range
triplet.promoter %>% head

## ----interaction_model, message = FALSE, results = "hide", eval = TRUE----------------------------
results.interaction.model <- interaction_model(
    triplet = triplet.promoter, 
    dnam = dna.met.chr21.se,
    exp = gene.exp.chr21.se,
    dnam.group.threshold = 0.1,
    sig.threshold = 0.05,
    fdr = T,
    stage.wise.analysis = FALSE,
    filter.correlated.tf.exp.dnam = F,
    filter.triplet.by.sig.term = T
)

## -------------------------------------------------------------------------------------------------
# Results for quartile model
results.interaction.model %>% dplyr::select(
  c(1,4,5,grep("RLM",colnames(results.interaction.model)))
  ) %>% head

## ----stratified_model, message = FALSE, warning = FALSE, results = "hide", eval = TRUE------------
results.stratified.model <- stratified_model(
    triplet = results.interaction.model,
    dnam = dna.met.chr21.se,
    exp = gene.exp.chr21.se,
    dnam.group.threshold = 0.25
)

## -------------------------------------------------------------------------------------------------
results.stratified.model %>% head

## ----plot_interaction_model, eval = TRUE, message = FALSE, results = "hide", warning = FALSE------
plots <- plot_interaction_model(
    triplet.results = results.interaction.model[1,], 
    dnam = dna.met.chr21.se, 
    exp = gene.exp.chr21.se,
    dnam.group.threshold = 0.25
)

## ----fig.height = 8, fig.width = 13, eval = TRUE, fig.cap = "An example output from MethReg."-----
plots

## ----scenarios, fig.cap =  "Scenarios modeled by MethReg. A and B: DNA methylation decreases TF activity. In A TF is a repressor of the target gene, while in B TF is an activator. C and D: DNA methylation increases TF activity. In C TF is a repressor of the target gene, while in D TF is an activator. E and F: DNA methylation inverts the TF role. In E when DNA methylation levels are low the TF works as a repressor, when DNA methylation levels are high the TF acts as an activator. In F when the DNA methylation levels are low the TF works as an activator, when methylation levels are high the TF acts a repressor.", echo = FALSE, fig.width=13----
png::readPNG("scenarios.png")  %>% grid::grid.raster()

## ----residuals, results = "hide", eval = FALSE----------------------------------------------------
# data("gene.exp.chr21.log2")
# data("clinical")
# metadata <- clinical[,c("sample_type","gender")]
# 
# gene.exp.chr21.residuals <- get_residuals(gene.exp.chr21, metadata) %>% as.matrix()

## ----eval = FALSE---------------------------------------------------------------------------------
# gene.exp.chr21.residuals[1:5,1:5]

## ----results = "hide", eval = FALSE---------------------------------------------------------------
# data("dna.met.chr21")
# dna.met.chr21 <- make_se_from_dnam_probes(
#   dnam = dna.met.chr21,
#   genome = "hg38",
#   arrayType = "450k",
#   betaToM = TRUE
# )
# dna.met.chr21.residuals <- get_residuals(dna.met.chr21, metadata) %>% as.matrix()

## ----eval = FALSE---------------------------------------------------------------------------------
# dna.met.chr21.residuals[1:5,1:5]

## ----message = FALSE, results = "hide", eval = FALSE----------------------------------------------
# results <- interaction_model(
#     triplet = triplet,
#     dnam = dna.met.chr21.residuals,
#     exp = gene.exp.chr21.residuals
# )

## -------------------------------------------------------------------------------------------------
regulons.dorothea <- dorothea::dorothea_hs
regulons.dorothea %>% head

## ----message = FALSE, results = "hide"------------------------------------------------------------
rnaseq.tf.es <- get_tf_ES(
  exp = gene.exp.chr21.se %>% SummarizedExperiment::assay(),
  regulons = regulons.dorothea
)

## -------------------------------------------------------------------------------------------------
rnaseq.tf.es[1:4,1:4]

## ----message = FALSE, results = "hide"------------------------------------------------------------
regulons.dorothea <- dorothea::dorothea_hs
regulons.dorothea$tf <- MethReg:::map_symbol_to_ensg(
  gene.symbol = regulons.dorothea$tf,
  genome = "hg38"
)
regulons.dorothea$target <- MethReg:::map_symbol_to_ensg(
  gene.symbol = regulons.dorothea$target,
  genome = "hg38"
)
split_tibble <- function(tibble, col = 'col') tibble %>% split(., .[, col])
regulons.dorothea.list <- regulons.dorothea %>% na.omit() %>% 
  split_tibble('tf') %>% 
  lapply(function(x){x[[3]]})

## ----message = FALSE, results = "hide", eval = FALSE----------------------------------------------
# library(GSVA)
# rnaseq.tf.es.gsva <- gsva(
#   expr = gene.exp.chr21.se %>% SummarizedExperiment::assay(),
#   gset.idx.list = regulons.dorothea.list,
#   method = "gsva",
#   kcdf = "Gaussian",
#   abs.ranking = TRUE,
#   min.sz = 5,
#   max.sz = Inf,
#   parallel.sz = 1L,
#   mx.diff = TRUE,
#   ssgsea.norm = TRUE,
#   verbose = TRUE
# )

## ----size = 'tiny'--------------------------------------------------------------------------------
sessionInfo()