## ---- eval=FALSE-------------------------------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("TCGAutils") ## ----include=TRUE,results="hide",message=FALSE,warning=FALSE------------------ library(TCGAutils) library(curatedTCGAData) library(MultiAssayExperiment) library(RTCGAToolbox) library(BiocFileCache) library(rtracklayer) library(R.utils) ## ----include=TRUE,results="hide",message=FALSE,warning=FALSE------------------ coad <- curatedTCGAData::curatedTCGAData(diseaseCode = "COAD", assays = c("CNASeq", "Mutation", "miRNA*", "RNASeq2*", "mRNAArray", "Methyl*"), dry.run = FALSE) ## ----------------------------------------------------------------------------- curatedTCGAData("COAD", "*") ## ----------------------------------------------------------------------------- sampleTables(coad) ## ----------------------------------------------------------------------------- data("sampleTypes") head(sampleTypes) ## ----------------------------------------------------------------------------- (tnmae <- splitAssays(coad, c("01", "11"))) ## ----------------------------------------------------------------------------- (matchmae <- as(tnmae[, , c(4, 6, 7)], "MatchedAssayExperiment")) ## ----------------------------------------------------------------------------- getSubtypeMap(coad) ## ----------------------------------------------------------------------------- getClinicalNames("COAD") ## ----------------------------------------------------------------------------- class(colData(coad)[["vital_status.x"]]) class(colData(coad)[["vital_status.y"]]) table(colData(coad)[["vital_status.x"]]) table(colData(coad)[["vital_status.y"]]) ## ----------------------------------------------------------------------------- methcoad <- CpGtoRanges(coad) ## ----------------------------------------------------------------------------- mircoad <- mirToRanges(coad) ## ----------------------------------------------------------------------------- rag <- "COAD_Mutation-20160128" genome(coad[[rag]]) <- translateBuild(genome(coad[[rag]])) seqlevelsStyle(coad[[rag]]) <- "UCSC" genome(coad[[rag]]) ## ----------------------------------------------------------------------------- lifturl <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg18/liftOver/hg18ToHg19.over.chain.gz" bfc <- BiocFileCache() qfile <- bfcquery(bfc, "18to19chain", exact = TRUE)[["rpath"]] cfile <- if (length(qfile) && file.exists(qfile)) { bfcquery(bfc, "18to19chain", exact = TRUE)[["rpath"]] } else { bfcadd(bfc, "18to19chain", lifturl) } chainfile <- file.path(tempdir(), gsub("\\.gz", "", basename(cfile))) R.utils::gunzip(cfile, destname = chainfile, remove = FALSE) chain <- suppressMessages( rtracklayer::import.chain(chainfile) ) ranges19 <- rtracklayer::liftOver(rowRanges(coad[[rag]]), chain) ## ----------------------------------------------------------------------------- liftchain <- "http://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz" bfc <- BiocFileCache() q38file <- bfcquery(bfc, "19to38chain", exact = TRUE)[["rpath"]] c38file <- if (length(q38file) && file.exists(q38file)) { bfcquery(bfc, "19to38chain", exact = TRUE)[["rpath"]] } else { bfcadd(bfc, "19to38chain", liftchain) } cloc38 <- file.path(tempdir(), gsub("\\.gz", "", basename(c38file))) R.utils::gunzip(c38file, destname = cloc38, remove = FALSE) chain38 <- suppressMessages( rtracklayer::import.chain(cloc38) ) ## then use the liftOver function using the 'chain38' object ## as above ranges38 <- rtracklayer::liftOver(unlist(ranges19), chain38) ## ----------------------------------------------------------------------------- re19 <- coad[[rag]][as.logical(lengths(ranges19))] ranges19 <- unlist(ranges19) genome(ranges19) <- "hg19" rowRanges(re19) <- ranges19 # replacement coad[["COAD_Mutation-20160128"]] <- re19 rowRanges(re19) ## ----------------------------------------------------------------------------- coad <- qreduceTCGA(coad, keep.assay = TRUE) ## ----------------------------------------------------------------------------- symbolsToRanges(coad) ## ----------------------------------------------------------------------------- library(GenomicDataCommons) queso <- files(legacy = TRUE) %>% filter( ~ cases.project.project_id == "TCGA-COAD" & data_category == "Gene expression" & data_type == "Exon quantification") gdc_set_cache(directory = tempdir()) ## ---- eval=FALSE-------------------------------------------------------------- # ## FALSE until gdcdata works # qu <- manifest(queso) # qq <- gdcdata(qu$id[1:4]) # # makeGRangesListFromExonFiles(qq, nrows = 4) ## ----------------------------------------------------------------------------- ## Load example file found in package pkgDir <- system.file("extdata", package = "TCGAutils", mustWork = TRUE) exonFile <- list.files(pkgDir, pattern = "cation\\.txt$", full.names = TRUE) exonFile ## We add the original file prefix to query for the UUID and get the ## TCGAbarcode filePrefix <- "unc.edu.32741f9a-9fec-441f-96b4-e504e62c5362.1755371." ## Add actual file name manually makeGRangesListFromExonFiles(exonFile, fileNames = paste0(filePrefix, basename(exonFile))) ## ----------------------------------------------------------------------------- grlFile <- system.file("extdata", "blca_cnaseq.txt", package = "TCGAutils") grl <- read.table(grlFile) head(grl) makeGRangesListFromCopyNumber(grl, split.field = "Sample") makeGRangesListFromCopyNumber(grl, split.field = "Sample", keep.extra.columns = TRUE) ## ----------------------------------------------------------------------------- tempDIR <- tempdir() co <- getFirehoseData("COAD", clinical = FALSE, GISTIC = TRUE, destdir = tempDIR) selectType(co, "GISTIC") class(selectType(co, "GISTIC")) makeSummarizedExperimentFromGISTIC(co, "Peaks") ## ----------------------------------------------------------------------------- race_df <- DataFrame(race_f = factor(colData(coad)[["race"]]), row.names = rownames(colData(coad))) mergeColData(coad, race_df) ## ----------------------------------------------------------------------------- (xbarcode <- head(colnames(coad)[["COAD_CNASeq-20160128_simplified"]], 4L)) barcodeToUUID(xbarcode) ## ----------------------------------------------------------------------------- UUIDtoBarcode("ae55b2d3-62a1-419e-9f9a-5ddfac356db4", from_type = "case_id") ## ----------------------------------------------------------------------------- UUIDtoBarcode("0001801b-54b0-4551-8d7a-d66fb59429bf", from_type = "file_id") ## ----------------------------------------------------------------------------- UUIDtoBarcode("d85d8a17-8aea-49d3-8a03-8f13141c163b", from_type = "aliquot_ids") ## ----------------------------------------------------------------------------- head(UUIDtoUUID("ae55b2d3-62a1-419e-9f9a-5ddfac356db4", to_type = "file_id")) ## ----------------------------------------------------------------------------- ## Return participant barcodes TCGAbarcode(xbarcode, participant = TRUE) ## Just return samples TCGAbarcode(xbarcode, participant = FALSE, sample = TRUE) ## Include sample data as well TCGAbarcode(xbarcode, participant = TRUE, sample = TRUE) ## Include portion and analyte data TCGAbarcode(xbarcode, participant = TRUE, sample = TRUE, portion = TRUE) ## ----------------------------------------------------------------------------- ## Select primary solid tumors TCGAsampleSelect(xbarcode, "01") ## Select blood derived normals TCGAsampleSelect(xbarcode, "10") ## ----------------------------------------------------------------------------- TCGAbiospec(xbarcode) ## ----------------------------------------------------------------------------- oncoPrintTCGA(coad, matchassay = rag) ## ----------------------------------------------------------------------------- ## Obtained previously sampleCodes <- TCGAbarcode(xbarcode, participant = FALSE, sample = TRUE) ## Lookup table head(sampleTypes) ## Match codes found in the barcode to the lookup table sampleTypes[match(unique(substr(sampleCodes, 1L, 2L)), sampleTypes[["Code"]]), ] ## ----------------------------------------------------------------------------- data("clinicalNames") clinicalNames lengths(clinicalNames) ## ----------------------------------------------------------------------------- sessionInfo()