## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 12,
  fig.height = 6.75,
  fig.show='hide'
)

## ----setup, message = F-------------------------------------------------------
library(methodical)
library(TumourMethData)
library(BSgenome.Hsapiens.UCSC.hg19)

## ----eval=TRUE----------------------------------------------------------------
# Download RangedSummarizedExperiment with methylation data for prostate metastases from TumourMethData
mcrpc_wgbs_hg38_chr11 = TumourMethData::download_meth_dataset(dataset = "mcrpc_wgbs_hg38_chr11")

## ----eval=TRUE----------------------------------------------------------------
# Create a GRanges with the hg38 genomic coordinates for the GSTP1, including 
# 2 KB upstream of its designated start in Ensembl
gstp1_start_site_region <- GRanges("chr11:67581742-67586656:+")

# Extract methylation values for CpG sites overlapping GSTP1 gene body
gstp1_cpg_methylation <- extractGRangesMethSiteValues(
  meth_rse = mcrpc_wgbs_hg38_chr11, genomic_regions = gstp1_start_site_region)

# View the first few rows and columns of the result. 
# extractGRangesMethSiteValues returns a row for each methylation site and a 
# separate column for each sample where row names give the coordinates of the 
# methylation sites in character format. 
gstp1_cpg_methylation[1:6, 1:6]

## ----eval=TRUE----------------------------------------------------------------
# Load CpG islands annotation for hg38 
cpg_island_annotation <- annotatr::build_annotations(genome = "hg38", annotation = "hg38_cpgs")
names(cpg_island_annotation) <- cpg_island_annotation$id

# Filter for annotation for chr11
cpg_island_annotation = cpg_island_annotation[seqnames(cpg_island_annotation) == "chr11"]

# Convert into a GRangesList with separate GRanges for islands, shores, shelves and inter island regions
cpg_island_annotation <- GRangesList(split(cpg_island_annotation, cpg_island_annotation$type))

# Create a BPPARAM class
BPPARAM = BiocParallel::bpparam()

# Summarize methylation levels for CpG islands
cpg_island_methylation <- summarizeRegionMethylation(
  meth_rse = mcrpc_wgbs_hg38_chr11, genomic_regions = cpg_island_annotation$hg38_cpg_islands, 
  BPPARAM = BPPARAM, summary_function = colMeans)

# Print a few rows for the first few samples of the result
cpg_island_methylation[1000:1006, 1:6]

## ----eval=TRUE----------------------------------------------------------------
# Plot the methylation values along the GSTP1 gene body for one prostate metastasis sample.
gstp1_methylation_plot = plotMethylationValues(gstp1_cpg_methylation, sample_name = "DTB_003")
print(gstp1_methylation_plot)

## ----eval=TRUE----------------------------------------------------------------
# Annotate gstp1_methylation_plot with cpg_island_annotation
annotatePlot(meth_site_plot = gstp1_methylation_plot, 
  annotation_grl = cpg_island_annotation, annotation_plot_proportion = 0.3, 
  grl_colours = c("#DEEBF7", "#9ECAE1", "#4292C6", "#08519C"))

# Create same plot, except showing the distance to the GSTP1 start site on the x-axis
annotatePlot(meth_site_plot = gstp1_methylation_plot, 
  annotation_grl = cpg_island_annotation, 
  reference_tss = GRanges("chr11:67583742"),annotation_plot_proportion = 0.3, 
  grl_colours = c("#DEEBF7", "#9ECAE1", "#4292C6", "#08519C"))

# Return the annotation plot by itself
annotatePlot(meth_site_plot = gstp1_methylation_plot, 
  annotation_grl = cpg_island_annotation, annotation_plot_proportion = 0.3, 
  grl_colours = c("#DEEBF7", "#9ECAE1", "#4292C6", "#08519C"), annotation_plot_only = TRUE)

## ----eval=TRUE----------------------------------------------------------------
# Download repetitive sequences from AnnotationHub and filter for LINE elements
repeat_annotation_hg38 <- AnnotationHub::AnnotationHub()[["AH99003"]]
line_elements_hg38 <- repeat_annotation_hg38[repeat_annotation_hg38$repClass == "SINE"]

# Mask LINE elements in mcrpc_wgbs_hg38_chr11
mcrpc_wgbs_hg38_chr11_lines_masked <- maskRangesInRSE(rse = mcrpc_wgbs_hg38_chr11, 
  mask_ranges = line_elements_hg38)

# Extract the methylation values for one of the LINE elements in the 
# unmasked and masked RSE
extractGRangesMethSiteValues(meth_rse = mcrpc_wgbs_hg38_chr11, 
  genomic_regions = line_elements_hg38[1000])[, 1:6]
extractGRangesMethSiteValues(meth_rse = mcrpc_wgbs_hg38_chr11_lines_masked, 
  genomic_regions = line_elements_hg38[1000])[, 1:6]


## ----eval=TRUE----------------------------------------------------------------
# Create a DNAStringSet for chromosome11
chr11_dss = setNames(DNAStringSet(BSgenome.Hsapiens.UCSC.hg19[["chr11"]]), "chr11")

# Get CpG sites for hg19 for chromsome 11
hg19_cpgs <- methodical::extractMethSitesFromGenome(genome = chr11_dss)

# Download hg38 to hg19 liftover chain from AnnotationHub
hg38tohg19Chain <- AnnotationHub::AnnotationHub()[["AH14108"]]

# Liftover mcrpc_wgbs_hg38_chr11 to mcrpc_wgbs_hg19_chr11
mcrpc_wgbs_hg19_chr11 <- liftoverMethRSE(meth_rse = mcrpc_wgbs_hg38_chr11, chain = hg38tohg19Chain, 
  remove_one_to_many_mapping = TRUE, permitted_target_regions = hg19_cpgs)

# Compare the dimensions of mcrpc_wgbs_hg38_chr11 and mcrpc_wgbs_hg19_chr11. 
# 1,423,050 methylation sites could not be lifted over from hg38 to hg19. 
dim(mcrpc_wgbs_hg38_chr11)
dim(mcrpc_wgbs_hg19_chr11)

# chr1:921635 should be lifted over to chr1:857015 so confirm that they have 
# the same methylation values in hg38 and hg19
rtracklayer::liftOver(GRanges("chr11:67581759"), hg38tohg19Chain)
extractGRangesMethSiteValues(mcrpc_wgbs_hg38_chr11, GRanges("chr11:67581759"))[, 1:8]
extractGRangesMethSiteValues(mcrpc_wgbs_hg19_chr11, GRanges("chr11:67349230"))[, 1:8]

## -----------------------------------------------------------------------------
sessionInfo()