This section contains the complete ELMER code for the following analysis:
Below is the complete code that was explained in the other sections.
library(MultiAssayExperiment)
library(ELMER.data)
library(ELMER)
# get distal probes that are 2kb away from TSS on chromosome 1
get.feature.probe(
distal.probes <-genome = "hg19",
met.platform = "450K",
rm.chr = paste0("chr",c(2:22,"X","Y"))
)data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
data(LUSC_meth_refined,package = "ELMER.data") # Meth
createMAE(
mae <-exp = GeneExp,
met = Meth,
save = TRUE,
linearize.exp = TRUE,
save.filename = "mae.rda",
filter.probes = distal.probes,
met.platform = "450K",
genome = "hg19",
TCGA = TRUE
)
"definition"
group.col <- "Primary solid Tumor"
group1 <- "Solid Tissue Normal"
group2 <- "result"
dir.out <- "hypo" # Search for hypomethylated probes in group 1
diff.dir <-
get.diff.meth(
sig.diff <-data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
minSubgroupFrac = 0.2,
sig.dif = 0.3,
diff.dir = diff.dir,
cores = 1,
dir.out = dir.out,
pvalue = 0.01
)
GetNearGenes(
nearGenes <-data = mae,
probes = sig.diff$probe,
numFlankingGenes = 20
# 10 upstream and 10 dowstream genes
)
get.pair(
pair <-data = mae,
group.col = group.col,
group1 = group1,
mode = "unsupervised",
group2 = group2,
nearGenes = nearGenes,
diff.dir = diff.dir,
minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
permu.dir = file.path(dir.out,"permu"),
permu.size = 100, # Please set to 100000 to get significant results
raw.pvalue = 0.05,
Pe = 0.01, # Please set to 0.001 to get significant results
filter.probes = TRUE, # See preAssociationProbeFiltering function
filter.percentage = 0.05,
filter.portion = 0.3,
dir.out = dir.out,
cores = 1,
label = diff.dir
)
# Identify enriched motif for significantly hypomethylated probes which
# have putative target genes.
get.enriched.motif(
enriched.motif <-data = mae,
probes = pair$Probe,
dir.out = dir.out,
label = diff.dir,
min.incidence = 10,
lower.OR = 1.1
)
get.TFs(
TF <-data = mae,
mode = "unsupervised",
group.col = group.col,
group1 = group1,
group2 = group2,
enriched.motif = enriched.motif,
dir.out = dir.out,
cores = 1,
label = diff.dir
)
library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")
"mae_BRCA_hg38_450K_no_ffpe.rda"
file <-if(file.exists(file)) {
get(load(file))
mae <-else {
} getTCGA(
disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
basedir = "DATA", # Where data will be downloaded
genome = "hg38"
# Genome of refenrece "hg38" or "hg19"
)
get.feature.probe(
distal.probes <-feature = NULL,
genome = "hg38",
met.platform = "450K"
)
createMAE(
mae <-exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda",
met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda",
met.platform = "450K",
genome = "hg38",
linearize.exp = TRUE,
filter.probes = distal.probes,
met.na.cut = 0.2,
save = FALSE,
TCGA = TRUE
) # Remove FFPE samples from the analysis
mae[,!mae$is_ffpe]
mae <-
# Get molecular subytpe information from cell paper and more metadata (purity etc...)
# https://doi.org/10.1016/j.cell.2015.09.033
"http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
file <-::download(file, basename(file))
downloader readxl::read_excel(basename(file), skip = 2)
subtypes <-
$sample <- substr(subtypes$Methylation,1,16)
subtypes merge(colData(mae),subtypes,by = "sample",all.x = T)
meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
meta.data <- S4Vectors::DataFrame(meta.data)
meta.data <-rownames(meta.data) <- meta.data$sample
stopifnot(all(meta.data$patient == colData(mae)$patient))
colData(mae) <- meta.data
save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
} "BRCA_unsupervised_hg38/hypo"
dir.out <- 10
cores <- get.diff.meth(
diff.probes <-data = mae,
group.col = "definition",
group1 = "Primary solid Tumor",
group2 = "Solid Tissue Normal",
diff.dir = "hypo", # Get probes hypometh. in group 1
cores = cores,
minSubgroupFrac = 0.2, # % group samples used.
pvalue = 0.01,
sig.dif = 0.3,
dir.out = dir.out,
save = TRUE
)
# For each differently methylated probes we will get the
# 20 nearby genes (10 downstream and 10 upstream)
GetNearGenes(
nearGenes <-data = mae,
probes = diff.probes$probe,
numFlankingGenes = 20
)
# This step is the most time consuming. Depending on the size of the groups
# and the number of probes found previously it migh take hours
get.pair(
Hypo.pair <-data = mae,
nearGenes = nearGenes,
group.col = "definition",
group1 = "Primary solid Tumor",
group2 = "Solid Tissue Normal",
permu.dir = paste0(dir.out,"/permu"),
permu.size = 10000,
mode = "unsupervised",
minSubgroupFrac = 0.4, # 40% of samples to create U and M
raw.pvalue = 0.001,
Pe = 0.001,
filter.probes = TRUE,
filter.percentage = 0.05,
filter.portion = 0.3,
dir.out = dir.out,
cores = cores,
label = "hypo"
)# Number of pairs: 2950
get.enriched.motif(
enriched.motif <-data = mae,
min.motif.quality = "DS",
probes = unique(Hypo.pair$Probe),
dir.out = dir.out,
label = "hypo",
min.incidence = 10,
lower.OR = 1.1
) get.TFs(
TF <-data = mae,
group.col = "definition",
group1 = "Primary solid Tumor",
group2 = "Solid Tissue Normal",
minSubgroupFrac = 0.4, # Set to 1 if supervised mode
enriched.motif = enriched.motif,
dir.out = dir.out,
cores = cores,
label = "hypo"
)
library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
#-----------------------------------
# 1 - Samples
# ----------------------------------
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")
"mae_BRCA_hg38_450K_no_ffpe.rda"
file <-if(file.exists(file)) {
get(load(file))
mae <-else {
} getTCGA(
disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
basedir = "DATA", # Where data will be downloaded
genome = "hg38"
# Genome of refenrece "hg38" or "hg19"
)
get.feature.probe(
distal.probes <-feature = NULL,
genome = "hg38",
met.platform = "450K"
)
createMAE(
mae <-exp = "DATA/BRCA/BRCA_RNA_hg38.rda",
met = "DATA/BRCA/BRCA_meth_hg38.rda",
met.platform = "450K",
genome = "hg38",
linearize.exp = TRUE,
filter.probes = distal.probes,
met.na.cut = 0.2,
save = FALSE,
TCGA = TRUE
) # Remove FFPE samples from the analysis
mae[,!mae$is_ffpe]
mae <-
# Get molecular subytpe information from cell paper and more metadata (purity etc...)
# https://doi.org/10.1016/j.cell.2015.09.033
"http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
file <-::download(file, basename(file))
downloader readxl::read_excel(basename(file), skip = 2)
subtypes <-
$sample <- substr(subtypes$Methylation,1,16)
subtypes merge(colData(mae),subtypes,by = "sample",all.x = T)
meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
meta.data <- S4Vectors::DataFrame(meta.data)
meta.data <-rownames(meta.data) <- meta.data$sample
stopifnot(all(meta.data$patient == colData(mae)$patient))
colData(mae) <- meta.data
save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}
6
cores <- c( "hypo","hyper")
direction <- "hg38"
genome <- "PAM50"
group.col <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
groups <-for(g in 1:nrow(groups)) {
groups[g,1]
group1 <- groups[g,2]
group2 <-for (j in direction){
tryCatch({
message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j)
dir.out <-dir.create(dir.out, recursive = TRUE)
#--------------------------------------
# STEP 3: Analysis |
#--------------------------------------
# Step 3.1: Get diff methylated probes |
#--------------------------------------
get.diff.meth(
Sig.probes <-data = mae,
group.col = group.col,
group1 = group1,
group2 = group2,
sig.dif = 0.3,
minSubgroupFrac = 1,
cores = cores,
dir.out = dir.out,
diff.dir = j,
pvalue = 0.01
)if(nrow(Sig.probes) == 0) next
#-------------------------------------------------------------
# Step 3.2: Identify significant probe-gene pairs |
#-------------------------------------------------------------
# Collect nearby 20 genes for Sig.probes
GetNearGenes(
nearGenes <-data = mae,
probe = Sig.probes$probe
)
get.pair(
pair <-data = mae,
nearGenes = nearGenes,
group.col = group.col,
group1 = group1,
group2 = group2,
permu.dir = paste0(dir.out,"/permu"),
dir.out = dir.out,
mode = "supervised",
diff.dir = j,
cores = cores,
label = j,
permu.size = 10000,
raw.pvalue = 0.001
)
readr::read_csv(
Sig.probes.paired <-paste0(dir.out,
"/getPair.",j,
".pairs.significant.csv")
1, drop = TRUE]
)[,
#-------------------------------------------------------------
# Step 3.3: Motif enrichment analysis on the selected probes |
#-------------------------------------------------------------
if(length(Sig.probes.paired) > 0 ){
#-------------------------------------------------------------
# Step 3.3: Motif enrichment analysis on the selected probes |
#-------------------------------------------------------------
get.enriched.motif(
enriched.motif <-probes = Sig.probes.paired,
dir.out = dir.out,
data = mae,
label = j,
plot.title = paste0("BRCA: OR for paired probes ",
"methylated in ",
j, " vs ",group2)
group1,
) readr::read_csv(
motif.enrichment <-paste0(dir.out,
"/getMotif.",j,
".motif.enrichment.csv")
)if(length(enriched.motif) > 0){
#-------------------------------------------------------------
# Step 3.4: Identifying regulatory TFs |
#-------------------------------------------------------------
print("get.TFs")
get.TFs(
TF <-data = mae,
enriched.motif = enriched.motif,
dir.out = dir.out,
mode = "supervised",
group.col = group.col,
group1 = group1,
diff.dir = j,
group2 = group2,
cores = cores,
label = j
) get(
TF.meth.cor <-load(paste0(dir.out, "/getTF.",j, ".TFs.with.motif.pvalue.rda"))
)save(
mae, TF, enriched.motif, Sig.probes.paired,
pair, nearGenes, Sig.probes, motif.enrichment,
TF.meth.cor,file = paste0(dir.out,"/ELMER_results_",j,".rda")
)
}
}error = function(e){
}, message(e)
})
} }