## ----eval=FALSE--------------------------------------------------------------- # # Install via BioConductor # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("katdetectr") # # # or the latest version # BiocManager::install("katdetectr", version = "devel") # # # or from github # devtools::install_git("https://github.com/ErasmusMC-CCBC/katdetectr") ## ----------------------------------------------------------------------------- library(katdetectr) ## ----label = 'Importing genomic variants', eval = TRUE------------------------ # Genomic variants stored within the VCF format. pathToVCF <- system.file(package = "katdetectr", "extdata/CPTAC_Breast.vcf") # Genomic variants stored within the MAF format. pathToMAF <- system.file(package = "katdetectr", "extdata/APL_primary.maf") # In addition, we can generate synthetic genomic variants including kataegis foci using generateSyntheticData(). # This functions returns a VRanges object. syntheticData <- generateSyntheticData(nBackgroundVariants = 2500, nKataegisFoci = 1) ## ----label = 'Detection of kataegis foci', eval = TRUE------------------------ # Detect kataegis foci within the given VCF file. kdVCF <- detectKataegis(genomicVariants = pathToVCF) # # Detect kataegis foci within the given MAF file. # As this file contains multiple samples, we set aggregateRecords = TRUE. kdMAF <- detectKataegis(genomicVariants = pathToMAF, aggregateRecords = TRUE) # Detect kataegis foci within the synthetic data. kdSynthetic <- detectKataegis(genomicVariants = syntheticData) ## ----label = 'Overview of KatDetect objects', eval = TRUE--------------------- summary(kdVCF) print(kdVCF) show(kdVCF) # Or simply: kdVCF ## ----label = 'Retrieving information from KatDetect objects', eval = TRUE----- getGenomicVariants(kdVCF) getSegments(kdVCF) getKataegisFoci(kdVCF) getInfo(kdVCF) ## ----label = 'Visualisation of KatDetect objects', eval = TRUE---------------- rainfallPlot(kdVCF) # With showSegmentation, the detected segments (changepoints) as visualized with their mean IMD. rainfallPlot(kdMAF, showSegmentation = TRUE) # With showSequence, we can display specific chromosomes or all chromosomes in which a putative kataegis foci has been detected. rainfallPlot(kdSynthetic, showKataegis = TRUE, showSegmentation = TRUE, showSequence = "Kataegis") ## ----------------------------------------------------------------------------- # detect putative Omikli, size 3 and mean IMD = 500 kdSyntheticOmikli500 <- detectKataegis(genomicVariants = syntheticData, minSizeKataegis = 3, IMDcutoff = 500) # detect putative Omikli, size 4 and mean IMD = 1000 kdSyntheticOmikli500 <- detectKataegis(genomicVariants = syntheticData, minSizeKataegis = 4, IMDcutoff = 1000) ## ----------------------------------------------------------------------------- # function for modeling sample mutation rate modelSampleRate <- function(IMDs){ lambda <- log(2) / median(IMDs) return(lambda) } # function for calculating the nth root of x nthroot = function(x, n){ y <- x^(1 / n) return(y) } # Function that defines the IMD cutoff specific for each segment # Within this function you can use all variables available in the slots: genomicVariants and segments IMDcutoffFun <- function(genomicVariants, segments){ IMDs <- genomicVariants$IMD totalVariants <- segments$totalVariants width <- segments |> dplyr::as_tibble() |> dplyr::pull(width) sampleRate <- modelSampleRate(IMDs) IMDcutoff <- -log(1 - nthroot(0.01 / width, ifelse(totalVariants != 0, totalVariants - 1, 1))) / sampleRate IMDcutoff <- replace(IMDcutoff, IMDcutoff > 1000, 1000) return(IMDcutoff) } kdCustom <- detectKataegis(syntheticData, IMDcutoff = IMDcutoffFun) ## ----------------------------------------------------------------------------- # generate data that contains non standard sequences syndata1 <- generateSyntheticData(seqnames = c("chr1_gl000191_random", "chr4_ctg9_hap1")) syndata2 <- generateSyntheticData(seqnames = "chr1") syndata <- suppressWarnings(c(syndata1, syndata2)) # construct a dataframe that contains the length of the sequences # each column name (name of the sequence) sequenceLength = data.frame( chr1 = 249250621, chr1_gl000191_random = 106433, chr4_ctg9_hap1 = 590426 ) # provide the dataframe with the sequence lengths using the refSeq argument kdNonStandard <- detectKataegis(genomicVariants = syndata, refSeq = sequenceLength) ## ----label = 'Session Information', eval = TRUE------------------------------- utils::sessionInfo()