## ----Options, echo = FALSE------------------------------------------------- knitr::opts_chunk$set(fig.align = "center", message = FALSE) ## ----Installation, eval = FALSE-------------------------------------------- # source("https://bioconductor.org/biocLite.R") # biocLite("seqCAT") ## ----Create SNV profile with R--------------------------------------------- # Load the package library("seqCAT") # List the example VCF file vcf <- system.file("extdata", "example.vcf.gz", package = "seqCAT") # Create two SNV profiles create_profile(vcf, "HCT116", "hct116_profile.txt") create_profile(vcf, "RKO", "rko_profile.txt", filter_depth = 15) ## ----Create SNV profile with Python, eval = FALSE-------------------------- # create_profile(vcf, "RKO", "RKO_profile.txt", python = TRUE) ## ----List COSMIC----------------------------------------------------------- file <- system.file("extdata", "subset_CosmicCLP_MutantExport.tsv.gz", package = "seqCAT") cell_lines <- list_cosmic(file) head(cell_lines) ## ----Search COSMIC--------------------------------------------------------- any(grepl("HCT116", cell_lines)) ## ----Read COSMIC----------------------------------------------------------- cosmic <- read_cosmic(file, "HCT116") head(cosmic) ## ----Count COSMIC---------------------------------------------------------- length(cosmic) ## ----Read SNV profiles----------------------------------------------------- hct116 <- read_profile("hct116_profile.txt", "HCT116") rko <- read_profile("rko_profile.txt", "RKO") head(hct116) ## ----Compare profiles------------------------------------------------------ hct116_rko <- compare_profiles(hct116, rko) head(hct116_rko) ## ----Compare profiles (union)---------------------------------------------- hct116_rko_union <- compare_profiles(hct116, rko, mode = "union") head(hct116_rko_union) ## ----Compare with COSMIC--------------------------------------------------- hct116_cosmic <- compare_profiles(hct116, cosmic) head(hct116_cosmic) ## ----Calculate similarities------------------------------------------------ similarity <- calculate_similarity(hct116_rko) similarity ## ----Calculate similarities iteratively------------------------------------ # Create and read HKE3 profile create_profile(vcf, "HKE3", "hke3_profile.txt") hke3 <- read_profile("hke3_profile.txt", "HKE3") # Compare HCT116 and HKE3 hct116_hke3 <- compare_profiles(hct116, hke3) # Add HCT116/HKE3 similarities to HCT116/RKO similarities similarities <- calculate_similarity(hct116_hke3, similarity, a = 1, b = 10) similarities ## ----Impact distributions-------------------------------------------------- impacts <- plot_impacts(hct116_rko) impacts ## ----Subset impacts-------------------------------------------------------- hct116_rko_hm <- hct116_rko[hct116_rko$impact == "HIGH" | hct116_rko$impact == "MODERATE", ] nrow(hct116_rko_hm) ## ----Subset chromosome or region------------------------------------------- hct116_rko_region <- hct116_rko[hct116_rko$chr == 12 & hct116_rko$pos >= 25000000 & hct116_rko$pos <= 30000000, ] head(hct116_rko_region) ## ----Subset gene or transcript--------------------------------------------- hct116_rko_eps8_t <- hct116_rko[hct116_rko$ENSTID == "ENST00000281172", ] hct116_rko_vamp1 <- hct116_rko[hct116_rko$ENSGID == "ENSG00000139190", ] hct116_rko_ldhb <- hct116_rko[hct116_rko$gene == "LDHB", ] head(hct116_rko_ldhb) ## ----Subset KRAS----------------------------------------------------------- hct116_rko_kras <- hct116_rko[hct116_rko$rsID == "rs112445441", ] hct116_rko_kras <- hct116_rko[hct116_rko$chr == 12 & hct116_rko$pos == 25398281, ] nrow(hct116_rko_kras) ## ----Create known variants list-------------------------------------------- known_variants <- data.frame(chr = c(12, 12, 12, 12), pos = c(25358650, 21788465, 21797029, 25398281), gene = c("LYRM5", "LDHB", "LDHB", "KRAS"), stringsAsFactors = FALSE) known_variants ## ----List variants--------------------------------------------------------- variant_list <- list_variants(list(hct116, rko), known_variants) variant_list ## ---- Plot variant list---------------------------------------------------- # Set row names to "chr: pos (gene)" row.names(variant_list) <- paste0(variant_list$chr, ":", variant_list$pos, " (", variant_list$gene, ")") # Remove "chr", "pos" and "gene" columns to_remove <- c("chr", "pos", "gene") variant_list <- variant_list[, !names(variant_list) %in% to_remove] # Plot the genotypes in a grid genotype_grid <- plot_variant_list(variant_list) genotype_grid ## ----Many-to-many comparisons---------------------------------------------- # Create list of SNV profiles profiles <- list(hct116, hke3, rko) # Perform many-to-many comparisons many <- compare_many(profiles) many[[1]] ## ----HKE3 self-comparisons------------------------------------------------- hke3_hke3 <- many[[2]][[4]] head(hke3_hke3) ## ----COSMIC-to-many comparisons-------------------------------------------- many_cosmic <- compare_many(profiles, one = cosmic) many_cosmic[[1]] ## ----Plot heatmap, out.width = "60 %"-------------------------------------- heatmap <- plot_heatmap(many[[1]]) heatmap ## ----Remove temporary files, echo = FALSE, results = "hide"---------------- file.remove("hct116_profile.txt") file.remove("rko_profile.txt") file.remove("hke3_profile.txt") ## ----Session info, echo = FALSE-------------------------------------------- sessionInfo()