Workflow
library(GenomicDataCommons)
library(tibble)
Genes and gene details
grep_fields('genes', 'symbol')
## [1] "symbol"
head(available_values('genes','symbol'))
## [1] "y_rna" "c12orf79" "aff2" "c2orf15" "c6orf165" "cbs"
tp53 = genes() %>%
GenomicDataCommons::filter(symbol=='TP53') %>%
results(size=10000) %>%
as_tibble()
ssms
ssms() %>%
GenomicDataCommons::filter(
chromosome==paste0('chr',tp53$gene_chromosome[1]) &
start_position > tp53$gene_start[1] &
end_position < tp53$gene_end[1]) %>%
GenomicDataCommons::count()
## [1] 1429
ssms() %>%
GenomicDataCommons::filter(
consequence.transcript.gene.symbol %in% c('TP53')) %>%
GenomicDataCommons::count()
## [1] 1426
convert to VRanges
library(VariantAnnotation)
vars = ssms() %>%
GenomicDataCommons::filter(
consequence.transcript.gene.symbol %in% c('TP53')) %>%
GenomicDataCommons::results_all() %>%
as_tibble()
vr = VRanges(seqnames = vars$chromosome,
ranges = IRanges(start=vars$start_position, width=1),
ref = vars$reference_allele,
alt = vars$tumor_allele)
ssm_occurrences() %>%
GenomicDataCommons::filter(
ssm.consequence.transcript.gene.symbol %in% c('TP53')) %>%
GenomicDataCommons::count()
## [1] 5368
var_samples = ssm_occurrences() %>%
GenomicDataCommons::filter(
ssm.consequence.transcript.gene.symbol %in% c('TP53')) %>%
GenomicDataCommons::expand(c('case', 'ssm', 'case.project')) %>%
GenomicDataCommons::results_all() %>%
as_tibble()
table(var_samples$case$disease_type)
##
## Acinar Cell Neoplasms
## 8
## Adenomas and Adenocarcinomas
## 1549
## Adnexal and Skin Appendage Neoplasms
## 1
## Complex Epithelial Neoplasms
## 16
## Complex Mixed and Stromal Neoplasms
## 72
## Cystic, Mucinous and Serous Neoplasms
## 687
## Ductal and Lobular Neoplasms
## 664
## Epithelial Neoplasms, NOS
## 9
## Fibromatous Neoplasms
## 15
## Germ Cell Neoplasms
## 1
## Gliomas
## 515
## Lipomatous Neoplasms
## 6
## Lymphoid Leukemias
## 2
## Mature B-Cell Lymphomas
## 5
## Mesothelial Neoplasms
## 14
## Miscellaneous Bone Tumors
## 1
## Myeloid Leukemias
## 18
## Myomatous Neoplasms
## 60
## Nerve Sheath Tumors
## 1
## Nevi and Melanomas
## 96
## Not Reported
## 9
## Paragangliomas and Glomus Tumors
## 1
## Plasma Cell Tumors
## 57
## Soft Tissue Tumors and Sarcomas, NOS
## 34
## Squamous Cell Neoplasms
## 1277
## Thymic Epithelial Neoplasms
## 4
## Transitional Cell Papillomas and Carcinomas
## 246
OncoPrint
fnames = files() %>%
GenomicDataCommons::filter(
cases.project.project_id=='TCGA-SKCM' & data_format=='MAF' & data_type=='Masked Somatic Mutation' & analysis.workflow_type=='MuTect2 Variant Aggregation and Masking') %>%
ids() %>%
gdcdata()
## Rows: 1 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (4): id, filename, md5, state
## dbl (1): size
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(maftools)
melanoma = read.maf(maf = fnames[1])
## -Reading
## -Validating
## -Silent variants: 166224
## -Summarizing
## --Possible FLAGS among top ten genes:
## TTN
## MUC16
## -Processing clinical data
## --Missing clinical data
## -Finished in 34.4s elapsed (36.5s cpu)
maftools::oncoplot(melanoma)