## ----fig.show='hold'----------------------------------------------------------
library(TCGAWorkflowData)
data("elmerExample")
data("TCGA_LGG_Transcriptome_20_samples")
data("TCGA_GBM_Transcriptome_20_samples")
data("histoneMarks")
data("biogrid")
data("genes_GR")
data("maf_lgg_gbm")

## ----eval = FALSE, message=FALSE,warning=FALSE, include=TRUE------------------
# library(GenomicRanges)
# library(TCGAbiolinks)
# ##############################
# ## Recurrent CNV annotation ##
# ##############################
# # Get gene information from GENCODE using biomart
# genes <- TCGAbiolinks:::get.GRCh.bioMart(genome = "hg19")
# genes <- genes[genes$external_gene_id != "" & genes$chromosome_name %in% c(1:22,"X","Y"),]
# genes[genes$chromosome_name == "X", "chromosome_name"] <- 23
# genes[genes$chromosome_name == "Y", "chromosome_name"] <- 24
# genes$chromosome_name <- sapply(genes$chromosome_name,as.integer)
# genes <- genes[order(genes$start_position),]
# genes <- genes[order(genes$chromosome_name),]
# genes <- genes[,c("external_gene_id", "chromosome_name", "start_position","end_position")]
# colnames(genes) <- c("GeneSymbol","Chr","Start","End")
# genes_GR <- makeGRangesFromDataFrame(genes,keep.extra.columns = TRUE)
# save(genes_GR,genes,file = "genes_GR.rda", compress = "xz")

## ----eval=FALSE, message=FALSE, warning=FALSE, include=TRUE-------------------
# library(RTCGAToolbox)
# # Download GISTIC results
# lastAnalyseDate <- getFirehoseAnalyzeDates(1)
# gistic <- getFirehoseData(
#   dataset = "GBM",
#   gistic2_Date = lastAnalyseDate,
#   GISTIC = TRUE
# )
# 
# # get GISTIC results
# gistic_allbygene <- getData(
#   gistic,
#   type = "GISTIC",
#   platform = "AllByGene"
# )
# 
# gistic_thresholedbygene <- getData(
#   gistic,
#   type = "GISTIC",
#   platform = "ThresholdedByGene"
# )
# 
# save(
#   gistic_allbygene,
#   gistic_thresholedbygene,
#   file = "GBMGistic.rda", compress = "xz"
# )

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
# query_exp_lgg <- GDCquery(
#   project = "TCGA-LGG",
#   data.category = "Transcriptome Profiling",
#   data.type = "Gene Expression Quantification",
#   workflow.type = "STAR - Counts"
# )
# # Get only first 20 samples to make example faster
# query_exp_lgg$results[[1]] <- query_exp_lgg$results[[1]][1:20,]
# GDCdownload(query_exp_lgg)
# exp_lgg <- GDCprepare(
#   query = query_exp_lgg
# )
# 
# query_exp_gbm <- GDCquery(
#   project = "TCGA-GBM",
#   data.category = "Transcriptome Profiling",
#   data.type = "Gene Expression Quantification",
#   workflow.type = "STAR - Counts"
# )
# # Get only first 20 samples to make example faster
# query_exp_gbm$results[[1]] <- query_exp_gbm$results[[1]][1:20,]
# GDCdownload(query_exp_gbm)
# exp_gbm <- GDCprepare(
#   query = query_exp_gbm
# )

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
# #----------- 8.3 Identification of Regulatory Enhancers   -------
# library(TCGAbiolinks)
# # Samples: primary solid tumor w/ DNA methylation and gene expression
# matched_met_exp <- function(project, n = NULL){
#   # get primary solid tumor samples: DNA methylation
#   message("Download DNA methylation information")
#   met450k <- GDCquery(
#     project = project,
#     data.category = "DNA Methylation",
#     platform = "Illumina Human Methylation 450",
#     data.type = "Methylation Beta Value",
#     sample.type = c("Primary Tumor")
#   )
#   met450k.tp <-  met450k$results[[1]]$cases
# 
#   # get primary solid tumor samples: RNAseq
#   message("Download gene expression information")
#   exp <- GDCquery(
#     project = project,
#     data.category = "Transcriptome Profiling",
#     data.type = "Gene Expression Quantification",
#     workflow.type = "STAR - Counts"
#     sample.type = c("Primary Tumor")
#   )
# 
#   exp.tp <- exp$results[[1]]$cases
#   # Get patients with samples in both platforms
#   patients <- unique(substr(exp.tp,1,15)[substr(exp.tp,1,12) %in% substr(met450k.tp,1,12)] )
#   if(!is.null(n)) patients <- patients[1:n] # get only n samples
#   return(patients)
# }
# lgg.samples <- matched_met_exp("TCGA-LGG", n = 10)
# gbm.samples <- matched_met_exp("TCGA-GBM", n = 10)
# samples <- c(lgg.samples,gbm.samples)
# 
# #-----------------------------------
# # 1 - Methylation
# # ----------------------------------
# query.met <- GDCquery(
#   project = c("TCGA-LGG","TCGA-GBM"),
#   data.category = "DNA Methylation",
#   platform = "Illumina Human Methylation 450",
#   data.type = "Methylation Beta Value",
#   barcode = samples
# )
# GDCdownload(query.met)
# met <- GDCprepare(query.met, save = FALSE)
# met <- subset(met,subset = as.character(GenomicRanges::seqnames(met)) %in% c("chr9"))
# 
# #-----------------------------------
# # 2 - Expression
# # ----------------------------------
# query.exp <- GDCquery(
#   project = c("TCGA-LGG","TCGA-GBM"),
#   data.category = "Transcriptome Profiling",
#   data.type = "Gene Expression Quantification",
#   workflow.type = "STAR - Counts"
#   sample.type = c("Primary Tumor")
#   barcode =  samples
# )
# GDCdownload(query.exp)
# exp <- GDCprepare(query.exp, save = FALSE)
# save(exp, met, gbm.samples, lgg.samples, file = "elmerExample.rda", compress = "xz")

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
# library(TCGAbiolinks)
# query <- GDCquery(
#   project = c("TCGA-LGG","TCGA-GBM"),
#   data.category = "Simple Nucleotide Variation",
#   access = "open",
#   data.type = "Masked Somatic Mutation",
#   workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
# )
# GDCdownload(query)
# maf <- GDCprepare(query)
# save(maf,file = "maf_lgg_gbm.rda",compress = "xz")

## ----eval=FALSE, include=TRUE, results='asis'---------------------------------
# ### read biogrid info
# ### Check last version in https://thebiogrid.org/download.php
# file <- "https://downloads.thebiogrid.org/Download/BioGRID/Latest-Release/BIOGRID-ALL-LATEST.tab2.zip"
# if(!file.exists(gsub("zip","txt",basename(file)))){
#   downloader::download(file,basename(file))
#   unzip(basename(file),junkpaths =TRUE)
# }
# 
# tmp.biogrid <- vroom::vroom(
#   dir(pattern = "BIOGRID-ALL.*\\.txt")
# )
# save(tmp.biogrid, file = "biogrid.rda", compress = "xz")

## ----results='hide', eval=FALSE, echo=FALSE, message=FALSE,warning=FALSE------
# library(ChIPseeker)
# library(AnnotationHub)
# library(pbapply)
# library(ggplot2)
# #------------------ Working with ChipSeq data ---------------
# # Step 1: download histone marks for a brain and non-brain samples.
# #------------------------------------------------------------
# # loading annotation hub database
# ah = AnnotationHub()
# 
# # Searching for brain consolidated epigenomes in the roadmap database
# bpChipEpi_brain <- query(ah , c("EpigenomeRoadMap", "narrowPeak", "chip", "consolidated","brain","E068"))
# 
# # Get chip-seq data
# histone.marks <- pblapply(names(bpChipEpi_brain), function(x) {ah[[x]]})
# names(histone.marks) <- names(bpChipEpi_brain)
# save(histone.marks, file = "histoneMarks.rda", compress = "xz")

## ----sessionInfo, results='asis', echo=FALSE----------------------------------
pander::pander(sessionInfo(), compact = FALSE)