## ----style, echo = FALSE, results = 'asis'---------------- BiocStyle::markdown() options(width = 60, max.print = 1000) knitr::opts_chunk$set( eval = as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache = as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), tidy.opts = list(width.cutoff = 60), tidy = TRUE ) ## ----setup, echo=FALSE, message=FALSE, warning=FALSE, eval=TRUE---- suppressPackageStartupMessages({ library(systemPipeR) }) ## ----generate_workenvir, eval=FALSE----------------------- # library(systemPipeRdata) # genWorkenvir(workflow = "varseq", mydirname = "varseq") # setwd("varseq") ## ----download_commands, eval=FALSE------------------------ # targets <- read.delim(system.file("extdata", "workflows", "varseq", "targetsPE_varseq.txt", package = "systemPipeRdata"), comment.char = "#") # source("VARseq_helper.R") # defines helper functions # options(timeout = 3600) # increase time limit for downloads # commands <- varseq_example_fastq(targets) # for (cmd in commands) eval(parse(text = cmd)) # download_ref(ref="hg38.fa.gz") # download_tool(tool="snpEff_latest") ## ----load_targets_file, eval=TRUE------------------------- targetspath <- system.file("extdata", "workflows", "varseq", "targetsPE_varseq.txt", package = "systemPipeRdata") targets <- read.delim(targetspath, comment.char = "#") targets[1:4, -(5:8)] ## ----project_varseq, eval=FALSE--------------------------- # library(systemPipeR) # sal <- SPRproject() # # sal <- SPRproject(resume=TRUE, load.envir=TRUE) # to resume workflow if needed # sal <- importWF(sal, file_path = "systemPipeVARseq.Rmd", verbose = FALSE) # sal ## ----run_varseq, eval=FALSE------------------------------- # sal <- runWF(sal) ## ----plot_varseq, eval=FALSE------------------------------ # plotWF(sal) ## ----varseq-toplogy, eval=TRUE, warning= FALSE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Topology graph of VAR-Seq workflow.", warning=FALSE---- knitr::include_graphics("results/plotwf_varseq.png") ## ----report_varseq, eval=FALSE---------------------------- # # Scientific report # sal <- renderReport(sal) # rmarkdown::render("systemPipeVARseq.Rmd", clean = TRUE, output_format = "BiocStyle::html_document") # # # Technical (log) report # sal <- renderLogs(sal) ## ----status_varseq, eval=FALSE---------------------------- # statusWF(sal) ## ----save_sal, eval=FALSE--------------------------------- # # sal <- write_SYSargsList(sal) ## ----load_SPR, message=FALSE, eval=FALSE, spr=TRUE-------- # cat(crayon::blue$bold("To use this workflow, following R packages are expected:\n")) # cat(c("'ggplot2', 'dplyr'\n"), sep = "', '") # ###pre-end # appendStep(sal) <- LineWise( # code = { # library(systemPipeR) # }, # step_name = "load_SPR" # ) ## ----fastqc, eval=FALSE, spr=TRUE------------------------- # appendStep(sal) <- SYSargsList( # step_name = "fastqc", # targets = "targetsPE_varseq.txt", # wf_file = "fastqc/workflow_fastqc.cwl", # input_file = "fastqc/fastqc.yml", # dir_path = "param/cwl", # inputvars = c( # FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_" # ), # dependency = "load_SPR" # ) ## ----fastq_report, eval=FALSE, message=FALSE, spr=TRUE---- # appendStep(sal) <- LineWise(code = { # fastq1 <- getColumn(sal, step = "fastqc", "targetsWF", column = 1) # fastq2 <- getColumn(sal, step = "fastqc", "targetsWF", column = 2) # fastq <- setNames(c(rbind(fastq1, fastq2)), c(rbind(names(fastq1), names(fastq2)))) # fqlist <- seeFastq(fastq = fastq, batchsize = 1000, klength = 8) # png("./results/fastqReport_varseq.png", height = 650, width = 288 * length(fqlist)) # seeFastqPlot(fqlist) # dev.off() # }, step_name = "fastq_report", # dependency = "fastqc") ## ----trimmomatic, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- SYSargsList( # step_name = "trimmomatic", # targets = "targetsPE_varseq.txt", # wf_file = "trimmomatic/trimmomatic-pe.cwl", # input_file = "trimmomatic/trimmomatic-pe.yml", # dir_path = "param/cwl", # inputvars = c( # FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_" # ), # dependency = c("load_SPR"), # run_step = "optional" # ) ## ----preprocessing, message=FALSE, eval=FALSE, spr=TRUE---- # appendStep(sal) <- SYSargsList( # step_name = "preprocessing", # targets = "targetsPE_varseq.txt", dir = TRUE, # wf_file = "preprocessReads/preprocessReads-pe.cwl", # input_file = "preprocessReads/preprocessReads-pe.yml", # dir_path = "param/cwl", # inputvars = c( # FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_" # ), # dependency = c("load_SPR"), # run_step = "optional" # ) ## ----bwa_index, eval=FALSE, spr=TRUE---------------------- # appendStep(sal) <- SYSargsList( # step_name = "bwa_index", # dir = FALSE, targets = NULL, # wf_file = "gatk/workflow_bwa-index.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # dependency = "load_SPR" # ) ## ----fasta_index, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- SYSargsList( # step_name = "fasta_index", # dir = FALSE, targets = NULL, # wf_file = "gatk/workflow_fasta_dict.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # dependency = "bwa_index" # ) ## ----faidx_index, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- SYSargsList( # step_name = "faidx_index", # dir = FALSE, targets = NULL, # wf_file = "gatk/workflow_fasta_faidx.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # dependency = "fasta_index" # ) ## ----bwa_alignment, eval=FALSE, spr=TRUE------------------ # appendStep(sal) <- SYSargsList( # step_name = "bwa_alignment", # targets = "targetsPE_varseq.txt", # wf_file = "gatk/workflow_bwa-pe.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c( # FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_" # ), # dependency = c("faidx_index") # ) ## ----align_stats, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- LineWise( # code = { # bampaths <- getColumn(sal, step = "bwa_alignment", "outfiles", column = "samtools_sort_bam") # fqpaths <- getColumn(sal, step = "bwa_alignment", "targetsWF", column = "FileName1") # read_statsDF <- alignStats(args = bampaths, fqpaths = fqpaths, pairEnd = TRUE) # write.table(read_statsDF, "results/alignStats_varseq.xls", row.names = FALSE, quote = FALSE, sep = "\t") # }, # step_name = "align_stats", # dependency = "bwa_alignment", # run_step = "optional" # ) ## ----align_stats_view, eval=TRUE-------------------------- read.table("results/alignStats_varseq.xls", header = TRUE)[1:4,] ## ----bam_urls, eval=FALSE, spr=TRUE----------------------- # appendStep(sal) <- LineWise( # code = { # bampaths <- getColumn(sal, step = "bwa_alignment", "outfiles", column = "samtools_sort_bam") # symLink2bam( # sysargs = bampaths, htmldir = c("~/.html/", "/"), # urlbase = "/~/", # urlfile = "./results/IGVurl.txt" # ) # }, # step_name = "bam_urls", # dependency = "bwa_alignment", # run_step = "optional" # ) ## ----fastq2ubam, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- SYSargsList( # step_name = "fastq2ubam", # targets = "targetsPE_varseq.txt", # wf_file = "gatk/workflow_gatk_fastq2ubam.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c( # FileName1 = "_FASTQ_PATH1_", # FileName2 = "_FASTQ_PATH2_", # SampleName = "_SampleName_" # ), # dependency = c("faidx_index") # ) ## ----merge_bam, eval=FALSE, spr=TRUE---------------------- # appendStep(sal) <- SYSargsList( # step_name = "merge_bam", # targets = c("bwa_alignment", "fastq2ubam"), # wf_file = "gatk/workflow_gatk_mergebams.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c( # bwa_men_sam = "_bwasam_", # ubam = "_ubam_", # SampleName = "_SampleName_" # ), # rm_targets_col = c("preprocessReads_1", "preprocessReads_2"), # dependency = c("bwa_alignment", "fastq2ubam") # ) ## ----sort, eval=FALSE, spr=TRUE--------------------------- # appendStep(sal) <- SYSargsList( # step_name = "sort", # targets = "merge_bam", # wf_file = "gatk/workflow_gatk_sort.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c(merge_bam = "_mergebam_", SampleName = "_SampleName_"), # rm_targets_col = c( # "bwa_men_sam", "ubam", "SampleName_fastq2ubam", # "Factor_fastq2ubam", "SampleLong_fastq2ubam", # "Experiment_fastq2ubam", "Date_fastq2ubam" # ), # dependency = c("merge_bam") # ) ## ----mark_dup, eval=FALSE, spr=TRUE----------------------- # appendStep(sal) <- SYSargsList( # step_name = "mark_dup", # targets = "sort", # wf_file = "gatk/workflow_gatk_markduplicates.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c(sort_bam = "_sort_", SampleName = "_SampleName_"), # rm_targets_col = c("merge_bam"), # dependency = c("sort") # ) ## ----fix_tag, eval=FALSE, spr=TRUE------------------------ # appendStep(sal) <- SYSargsList( # step_name = "fix_tag", # targets = "mark_dup", # wf_file = "gatk/workflow_gatk_fixtag.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c(mark_bam = "_mark_", SampleName = "_SampleName_"), # rm_targets_col = c("sort_bam"), # dependency = c("mark_dup") # ) ## ----hap_caller, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- SYSargsList( # step_name = "hap_caller", # targets = "fix_tag", # wf_file = "gatk/workflow_gatk_haplotypecaller.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c(fixtag_bam = "_fixed_", SampleName = "_SampleName_"), # rm_targets_col = c("mark_bam"), # dependency = c("fix_tag") # ) ## ----import, eval=FALSE, spr=TRUE------------------------- # appendStep(sal) <- SYSargsList( # step_name = "import", # targets = NULL, dir = FALSE, # wf_file = "gatk/workflow_gatk_genomicsDBImport.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # dependency = c("hap_caller") # ) ## ----call_variants, eval=FALSE, spr=TRUE------------------ # appendStep(sal) <- SYSargsList( # step_name = "call_variants", # targets = NULL, dir = FALSE, # wf_file = "gatk/workflow_gatk_genotypeGVCFs.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # dependency = c("import") # ) ## ----filter, eval=FALSE, spr=TRUE------------------------- # appendStep(sal) <- SYSargsList( # step_name = "filter", # targets = NULL, dir = FALSE, # wf_file = "gatk/workflow_gatk_variantFiltration.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # dependency = c("call_variants") # ) ## ----create_vcf, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- SYSargsList( # step_name = "create_vcf", # targets = "hap_caller", # wf_file = "gatk/workflow_gatk_select_variant.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c(SampleName = "_SampleName_"), # dependency = c("hap_caller", "filter") # ) ## ----inspect_vcf, eval=FALSE------------------------------ # library(VariantAnnotation) # vcf_raw <- getColumn(sal, "create_vcf") # vcf <- readVcf(vcf_raw[1], "Homo sapiens") # vcf # vr <- as(vcf, "VRanges") # vr ## ----filter_vcf, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- LineWise( # code = { # source("VARseq_helper.R") # vcf_raw <- getColumn(sal, "create_vcf") # library(VariantAnnotation) # filter <- "totalDepth(vr) >= 20 & (altDepth(vr) / totalDepth(vr) >= 0.8)" # vcf_filter <- suppressWarnings(filter_vars(vcf_raw, filter, organism = "Homo sapiens", out_dir = "results/vcf_filter")) # }, # step_name = "filter_vcf", # dependency = "create_vcf", # run_step = "optional" # ) ## ----check_filter, eval=FALSE----------------------------- # copyEnvir(sal, "vcf_raw", globalenv()) # copyEnvir(sal, "vcf_filter", globalenv()) # length(as(readVcf(vcf_raw[1], genome = "Homo sapiens"), "VRanges")[, 1]) # length(as(readVcf(vcf_filter[1], genome = "Homo sapiens"), "VRanges")[, 1]) ## ----summary_filter, eval=FALSE, spr=TRUE----------------- # appendStep(sal) <- LineWise( # code = { # # # read in the cohort VCF file # vcf_all <- suppressWarnings(VariantAnnotation::readVcf("./results/samples_filter.vcf.gz", "Homo sapiens")) # # filter_values <- VariantAnnotation::filt(vcf_all) # filter_values[is.na(filter_values)] <- "" # ensure character comparisons work # overall_counts <- table(filter_values) |> # dplyr::as_tibble() |> # dplyr::arrange(dplyr::desc(n)) # # vcf_all_ft <- VariantAnnotation::geno(vcf_all)$FT # passes_per_sample <- apply(vcf_all_ft, 2, function(x) sum(x == "PASS", na.rm = TRUE)) # fails_per_sample <- apply(vcf_all_ft, 2, function(x) sum(x != "PASS", na.rm = TRUE)) # sample_filter_summary <- dplyr::tibble( # sample = names(passes_per_sample), # passed = passes_per_sample, # filtered = fails_per_sample # ) # # write.table(overall_counts, file = "results/summary_filter_overall.tsv", sep = "\t", quote = FALSE, row.names = FALSE) # write.table(sample_filter_summary, file = "results/summary_filter_per_sample.tsv", sep = "\t", quote = FALSE, row.names = FALSE) # # library(ggplot2) # source("VARseq_helper.R") # png("results/summary_filter_plot.png", width = 800, height = 600) # p_filter <- plot_summary_filter_plot(sample_filter_summary) # print(p_filter) # dev.off() # # # clean up RAM # try(rm(vcf_all, vcf_all_ft, filter_values, passes_per_sample, fails_per_sample), silent = TRUE) # invisible(gc()) # # }, # step_name = "summary_filter", # dependency = "filter" # ) ## ----annotate_vcf, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- SYSargsList( # step_name = "annotate_vcf", # targets = "create_vcf", dir = TRUE, # wf_file = "gatk/snpeff.cwl", # input_file = "gatk/gatk.yaml", # dir_path = "param/cwl", # inputvars = c(SampleName = "_SampleName_", vcf_raw = "_vcf_raw_"), # dependency = c("create_vcf") # ) ## ----combine_var, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- LineWise( # code = { # vcf_anno <- getColumn(sal, "annotate_vcf", position = "outfiles", column = "ann_vcf") # # clean_vcf_file <- function(path) { # lines <- readLines(path, warn = FALSE) # if (!length(lines)) return(path) # header_start <- which(grepl("^##fileformat=", lines, perl = TRUE))[1] # if (is.na(header_start) || header_start <= 1) return(path) # writeLines(lines[header_start:length(lines)], path) # path # } # # vcf_anno <- vapply(vcf_anno, clean_vcf_file, FUN.VALUE = character(1)) # vr_from_vcf <- function(path) { # message("Importing annotated VCF: ", path) # vcf <- suppressWarnings(VariantAnnotation::readVcf(path, "Homo sapiens")) # # ft <- VariantAnnotation::geno(vcf)$FT # if (!is.null(ft) && !isTRUE(is.character(ft))) { # ft_vec <- as.character(ft) # ft_clean <- matrix(ft_vec, ncol = 1) # if (!is.null(dim(ft))) { # ft_clean <- matrix(ft_vec, nrow = nrow(ft), dimnames = dimnames(ft)) # } # VariantAnnotation::geno(vcf)$FT <- ft_clean # } # suppressWarnings(as(vcf, "VRanges")) # } # # vcf_vranges <- lapply(vcf_anno, vr_from_vcf) # }, # step_name = "combine_var", # dependency = "annotate_vcf" # ) ## ----summary_var, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- LineWise( # code = { # source("VARseq_helper.R") # summary_var <- extract_var_table(vcf_vranges) # utils::write.table(summary_var, file = "results/variant_summary_long.tsv", sep = "\t", quote = FALSE, row.names = FALSE) # }, # step_name = "summary_var", # dependency = "combine_var" # ) ## ----plot_var_consequence, eval=FALSE, spr=TRUE----------- # appendStep(sal) <- LineWise( # code = { # library(ggplot2) # effect_counts <- summary_var |> # dplyr::count(sample, effect, name = "n") # # png("./results/var_consequence_log.png", width = 1000, height = 600) # p_var_consequence_log <- ggplot(effect_counts, aes(sample, n, fill = effect)) + # geom_col(position = "dodge", alpha = 0.8) + # geom_text(aes(label = scales::comma(n)), position = position_dodge(width = 0.9), vjust = -0.2, size = 3) + # scale_y_continuous(trans = "log10", labels = scales::comma_format()) + # labs(y = "Variant count (log10 scale)", title = "Variant consequences (log scale)") + # theme_minimal() + # theme(axis.text.x = element_text(angle = 45, hjust = 1)) # print(p_var_consequence_log) # dev.off() # }, # step_name = "plot_var_consequence", # dependency = "summary_var" # ) ## ----plot_var_stats, eval=FALSE, spr=TRUE----------------- # appendStep(sal) <- LineWise( # code = { # library(ggplot2) # plot_summary_data <- summary_var |> # dplyr::filter( # consequence %in% c( # "nonsynonymous_variant", "stop_gained", "frameshift_variant", # "splice_acceptor_variant", "splice_donor_variant", # "start_lost", "stop_lost" # ), # effect == "HIGH" # ) |> # dplyr::mutate( # sample = factor(sample, levels = getColumn(sal, step = "annotate_vcf", position = "targetsWF", column = "SampleName")), # sex = getColumn(sal, step = "annotate_vcf", position = "targetsWF", column = "Factor")[sample] # ) # # png("./results/var_summary.png") # p_var_summary <- ggplot(plot_summary_data) + # geom_bar(aes(x = sample, fill = sex), alpha = 0.75) + # scale_fill_brewer(palette = "Set2") + # theme_minimal() # print(p_var_summary) # dev.off() # }, # step_name = "plot_var_stats", # dependency = "summary_var" # ) ## ----plot_var_boxplot, eval=FALSE, spr=TRUE--------------- # appendStep(sal) <- LineWise( # code = { # source("VARseq_helper.R") # # change the sample and group columns as needed # p_summary_boxplot <- plot_summary_boxplot(plot_summary_data, sample_col = "sample", group_col = "sex") # library(ggplot2) # png("./results/var_summary_boxplot.png") # print(p_summary_boxplot) # dev.off() # }, # step_name = "plot_var_boxplot", # dependency = "plot_var_stats" # ) ## ----venn_diagram, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # unique_samples <- summary_var |> dplyr::distinct(sample) |> dplyr::pull(sample) # if (!length(unique_samples)) { # stop("No samples available in `summary_var`; cannot draw Venn diagram.") # } # # top_n <- min(3, length(unique_samples)) # selected_samples <- unique_samples[seq_len(top_n)] # # variant_df <- summary_var |> # dplyr::filter(sample %in% selected_samples) |> # dplyr::distinct(sample, seqnames, start, ref, alt) |> # dplyr::mutate(variant_id = paste0(seqnames, ":", start, "_", ref, "/", alt)) # # variant_sets <- split(variant_df$variant_id, variant_df$sample) # vennset <- overLapper(variant_sets, type = "vennsets") # png("./results/vennplot_var.png") # vennPlot(vennset, mymain = "Venn Plot of First 3 Samples", mysub = "", colmode = 2, ccol = c("red", "blue")) # dev.off() # }, # step_name = "venn_diagram", # dependency = "summary_var" # ) ## ----plot_variant, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # source("VARseq_helper.R") # first_high <- summary_var |> # dplyr::filter(effect == "HIGH") |> # head(n = 1) # library(ggbio) # library(VariantAnnotation) # mychr <- as.character(first_high$seqnames) # mystart <- as.numeric(first_high$start) - 500 # myend <- as.numeric(first_high$end) + 500 # bam_path <- getColumn(sal, "fix_tag")[first_high$sample] # vcf_path <- getColumn(sal, step = "create_vcf")[first_high$sample] # # vcf <- suppressWarnings(readVcf(vcf_path, "Homo sapiens")) # ga <- readGAlignments(bam_path, use.names = TRUE, param = ScanBamParam(which = GRanges(mychr, IRanges(mystart, myend)))) # vcf_chr <- normalize_ft(simplify_info(vcf[seqnames(vcf) == mychr])) # vr <- suppressWarnings(as(vcf_chr, "VRanges")) # vr_region <- vr[start(vr) >= mystart & end(vr) <= myend] # if (!length(vr_region)) { # vr_region <- vr # } # p1 <- autoplot(ga, geom = "rect") # p2 <- autoplot(ga, geom = "line", stat = "coverage") # p3 <- autoplot(vr_region, type = "fixed") + # xlim(mystart, myend) + # theme(legend.position = "none", axis.text.y = element_blank(), axis.ticks.y = element_blank()) # p1_3 <- tracks( # place_holder = ggplot2::ggplot(), # Reads = p1, # Coverage = p2, # Variant = p3, # heights = c(0, 0.3, 0.2, 0.1) # ) + ylab("") # ggbio::ggsave(p1_3, filename = "./results/plot_variant.png", units = "in") # }, # step_name = "plot_variant", # dependency = "summary_var" # ) ## ----non_syn_vars, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # vardf <- read.delim("results/variant_summary_long.tsv") # source("VARseq_helper.R") # common_nonsyn_entrez <- filterNonSyn(df=vardf) # writeLines(common_nonsyn_entrez, "results/common_nonsyn_entrez") # }, # step_name = "non_syn_vars", # dependency = "plot_var_consequence" # ) ## ----pathenrich, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- LineWise( # code = { # common_nonsyn_entrez <- readLines("results/common_nonsyn_entrez") # source("VARseq_helper.R") # reacdb <- load_reacList(org="R-HSA") # # library(fgsea); library(data.table); library(ggplot2) # foraRes <- fora(genes=common_nonsyn_entrez, universe=unique(unlist(reacdb)), pathways=reacdb) # if (!dir.exists("results/fea")) dir.create("results/fea", recursive = TRUE) # foraRes$overlapGenes <- vapply(foraRes$overlapGenes, toString, FUN.VALUE = character(1)) # write.table(foraRes, file = "results/fea/foraRes.xls", row.names = FALSE, sep = "\t", quote = FALSE) # foraRes$pathway <- gsub("\\(.*\\) ", "", foraRes$pathway) # foraRes$pathway <- factor(foraRes$pathway, levels = rev(foraRes$pathway)) # png("./results/fea/pathenrich.png", width = 680) # ggplot(head(foraRes, 15), aes(pathway, overlap, fill = padj)) + # geom_bar(position="dodge", stat="identity") + coord_flip() + # scale_fill_distiller(palette = "RdBu", direction=-1, limits = range(head(foraRes$padj, 15))) + # theme(axis.text=element_text(angle=0, hjust=1, size=12), axis.title = element_text(size = 14)) # dev.off() # }, # step_name = "pathenrich", # dependency = "non_syn_vars" # ) ## ----drug_target_analysis, eval=FALSE, spr=TRUE----------- # appendStep(sal) <- LineWise( # code = { # ## Configure paths for drugTargetInteractions. Under chembldb provide path to chembl_xx.db on your system # # chembldb <- system.file("extdata", "chembl_sample.db", package="drugTargetInteractions") # resultsPath <- "results/drug_target/" # config <- drugTargetInteractions::genConfig(chemblDbPath=chembldb, resultsPath=resultsPath) # downloadUniChem(config=config) # cmpIdMapping(config=config) # foraRes <- read.delim("results/fea/foraRes.xls") # entrez_ids <- unlist(strsplit(foraRes[13,7], ", ")) # select here pathway of interest # source("VARseq_helper.R") # drugMap <- runGeneTargetDrug(entrez=entrez_ids)[[1]] # drugMap <- drugMap[!grepl("Query_", drugMap$GeneName), c("GeneName", "UniProt_ID", "Target_Desc", "Drug_Name", "CHEMBL_CMP_ID", "MOA", "Mesh_Indication")] # drugMap <- drugMap[!is.na(drugMap$CHEMBL_CMP_ID),] # if (!dir.exists("results/drug_target")) dir.create("results/drug_target", recursive = TRUE) # write.table(drugMap, file="results/drug_target/drug_target.xls", row.names=FALSE, sep="\t", quote=FALSE) # }, # step_name = "drug_target", # dependency = "pathenrich" # ) ## ----read_drug_table, eval=TRUE--------------------------- drugMap <- read.delim("results/drug_target.xls") DT::datatable(drugMap) ## ----sessionInfo, eval=FALSE, spr=TRUE-------------------- # appendStep(sal) <- LineWise( # code = { # sessionInfo() # }, # step_name = "sessionInfo", # dependency = "drug_target") ## ----runWF, eval=FALSE------------------------------------ # sal <- runWF(sal) ## ----runWF_cluster, eval=FALSE---------------------------- # # wall time in mins, memory in MB # resources <- list(conffile=".batchtools.conf.R", # template="batchtools.slurm.tmpl", # Njobs=8, # walltime=120, # ntasks=1, # ncpus=4, # memory=1024, # partition = "short" # ) # sal <- addResources(sal, c("bwa_alignment"), resources = resources) # sal <- runWF(sal) ## ----plotWF, eval=FALSE----------------------------------- # plotWF(sal, rstudio = TRUE) ## ----statusWF, eval=FALSE--------------------------------- # sal # statusWF(sal) ## ----logsWF, eval=FALSE----------------------------------- # sal <- renderLogs(sal) ## ----list_tools, eval=TRUE-------------------------------- if(file.exists(file.path(".SPRproject", "SYSargsList.yml"))) { local({ sal <- systemPipeR::SPRproject(resume = TRUE) systemPipeR::listCmdTools(sal) systemPipeR::listCmdModules(sal) }) } else { cat(crayon::blue$bold("Tools and modules required by this workflow are:\n")) cat(c("trimmomatic/0.39", "samtools/1.14", "gatk/4.2.0.0", "bcftools/1.15", "bwa/0.7.17", "snpEff/5.3"), sep = "\n") } ## ----report_session_info, eval=TRUE----------------------- sessionInfo()