## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(comment = "#>", 
                      collapse = TRUE, 
                      message = FALSE, 
                      warning = FALSE)

## -----------------------------------------------------------------------------
suppressPackageStartupMessages({
    library(OmicsMLRepoR)
    library(dplyr)
    library(curatedMetagenomicData)
    library(cBioPortalData)
})

## -----------------------------------------------------------------------------
cmd <- getMetadata("cMD")
cmd

## -----------------------------------------------------------------------------
cbio <- getMetadata("cBioPortal")
cbio

## ----echo=FALSE---------------------------------------------------------------
colnames(cmd)[grep("_ontology_term_id", colnames(cmd))] %>% 
    gsub("_ontology_term_id", "", .)

## -----------------------------------------------------------------------------
## Information spread out in two different columns
nrow(sampleMetadata |> filter(study_condition == "CRC"))
nrow(sampleMetadata |> filter(disease == "CRC"))

## Case sensitive
nrow(sampleMetadata |> filter(study_condition == "CRC"))
nrow(sampleMetadata |> filter(study_condition == "crc"))

## Synonyms not covered
nrow(sampleMetadata |> filter(study_condition == "Colorectal Carcinoma"))
nrow(sampleMetadata |> filter(study_condition == "Colorectal Cancer"))

## -----------------------------------------------------------------------------
nrow(cmd |> tree_filter(disease, "Colorectal Carcinoma"))
nrow(cmd |> tree_filter(disease, "colorectal carcinoma"))

## -----------------------------------------------------------------------------
syn_res1 <- cmd |> tree_filter(disease, "CRC")
syn_res2 <- cmd |> tree_filter(disease, "Colorectal Cancer")
syn_res3 <- cmd |> tree_filter(disease, "Colorectal Carcinoma")

nrow(syn_res1)
nrow(syn_res2)
nrow(syn_res3)

## -----------------------------------------------------------------------------
unique(syn_res1$disease)
unique(syn_res2$disease)
unique(syn_res3$disease)

## -----------------------------------------------------------------------------
onto_res <- cmd |> tree_filter(disease, "Intestinal Disorder")
unique(onto_res$disease)

## -----------------------------------------------------------------------------
res_or <- cmd %>% tree_filter(disease, c("migraine", "diabetes"), "OR")

## -----------------------------------------------------------------------------
res_and <- cmd %>% tree_filter(disease, c("migraine", "diabetes"), "AND")
res_not <- cmd %>% tree_filter(disease, c("migraine", "diabetes"), "NOT")

## -----------------------------------------------------------------------------
res_or_below30 <- cmd %>% 
    filter(age_years < 30) %>%
    tree_filter(disease, c("migraine", "diabetes"))

## -----------------------------------------------------------------------------
cmd_biomarker <- cmd %>% 
    filter(!is.na(biomarker)) %>% 
    select(curation_id, biomarker)
wtb <- getWideMetaTb(cmd_biomarker, "biomarker")
head(wtb)

## -----------------------------------------------------------------------------
ltb <- getLongMetaTb(cmd, targetCol = "target_condition")
dim(cmd)
dim(ltb)

## ----debug_needed, echo=FALSE-------------------------------------------------
cmd_sub <- tree_filter(cmd, target_condition, "Alzheimer's disease")

## -----------------------------------------------------------------------------
cmd_dat <- cmd %>%
    tree_filter(col = "disease", "Type 2 Diabetes Mellitus") %>%
    filter(sex == "Female") %>%
    filter(age_group == "Elderly") %>%
    returnSamples("relative_abundance", rownames = "short")

## -----------------------------------------------------------------------------
cbio_sub <- cbio %>%
    getLongMetaTb("treatment_name", "<;>") %>%
    filter(treatment_name == "Fluorouracil") %>%
    filter(age_at_diagnosis > 50) %>%
    filter(sex == "Female") %>%
    getShortMetaTb(idCols = "curation_id", targetCol = "treatment_name")

dim(cbio_sub)
studies <- unique(cbio_sub$studyId)
studies

## ----echo=FALSE, eval=FALSE---------------------------------------------------
# cbio_api <- cBioPortal()
# resAll <- as.list(vector(length = length(studies)))
# 
# for (i in seq_along(studies)) {
#     study <- studies[i]
#     samples <- cbio_sub %>%
#         filter(studyId == study) %>%
#         pull(sampleId)
# 
#     res <- cBioPortalData(
#         api = cbio_api,
#         by = "hugoGeneSymbol",
#         studyId = study,
#         sampleIds = samples,
#         genePanelId = "IMPACT341"
#     )
# 
#     resAll[[i]] <- res
# }

## -----------------------------------------------------------------------------
sessionInfo()