## ----style, echo = FALSE, results = 'asis'---------------- BiocStyle::markdown() options(width=60, max.print=1000) knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), tidy.opts=list(width.cutoff=60), tidy=TRUE) ## ----setup, echo=FALSE, message=FALSE, warning=FALSE, eval=FALSE---- # suppressPackageStartupMessages({ # library(systemPipeR) # }) ## ----create_workflow, message=FALSE, eval=FALSE----------- # library(systemPipeR) # sal <- SPRproject() # sal ## ----load_packages, eval=FALSE, spr=TRUE------------------ # cat(crayon::blue$bold("To use this workflow, following R packages are expected:\n")) # cat(c("'ChemmineR", "ggplot2", "tibble", "readr", "ggpubr", "gplots'\n"), sep = "', '") # ###pre-end # appendStep(sal) <- LineWise( # code = { # library(systemPipeR) # library(ChemmineR) # }, # step_name = "load_packages" # ) ## ----load_data, eval=FALSE, spr=TRUE---------------------- # # Here, the dataset is downloaded. If you already have the data locally, change URL to local path. # appendStep(sal) <- LineWise( # code = { # sdfset <- read.SDFset("http://faculty.ucr.edu/~tgirke/Documents/R_BioCond/Samples/sdfsample.sdf") # # rename molecule IDs by IDs in the header. If your molecules' header does not have ID or # # not unique, remove following code and use the default IDs # cid(sdfset) <- makeUnique(sdfid(sdfset)) # }, # step_name = "load_data", # dependency = "load_packages" # ) ## ----vis_mol, eval=FALSE, spr=TRUE------------------------ # appendStep(sal) <- LineWise( # code = { # png("results/mols_plot.png", 700, 600) # # Here only first 4 are plotted. Please choose the ones you want to plot. # ChemmineR::plot(sdfset[1:4]) # dev.off() # }, # step_name = "vis_mol", # dependency = "load_data", # run_step = "optional" # ) ## ----basic_mol_info, eval=FALSE, spr=TRUE----------------- # appendStep(sal) <- LineWise( # code = { # propma <- data.frame(MF=MF(sdfset), MW=MW(sdfset), atomcountMA(sdfset)) # readr::write_csv(propma, "results/basic_mol_info.csv") # }, # step_name = "basic_mol_info", # dependency = "load_data", # run_step = "optional" # ) ## ----mol_info_plot, eval=FALSE, spr=TRUE------------------ # appendStep(sal) <- LineWise( # code = { # png("results/atom_req.png", 700, 700) # boxplot(propma[, 3:ncol(propma)], col="#6cabfa", main="Atom Frequency") # dev.off() # }, # step_name = "mol_info_plot", # dependency = "basic_mol_info", # run_step = "optional" # ) ## ----apfp_convert, eval=FALSE, spr=TRUE------------------- # appendStep(sal) <- LineWise( # code = { # apset <- sdf2ap(sdfset) # fpset <- desc2fp(apset, descnames=1024, type="FPset") # # save them to a file so they can be loaded directly next time # readr::write_rds(apset, "results/apset.rds") # readr::write_rds(fpset, "results/fpset.rds") # }, # step_name = "apfp_convert", # dependency = "load_data" # ) ## ----fp_dedup, eval=FALSE, spr=TRUE----------------------- # appendStep(sal) <- LineWise( # code = { # fpset <- fpset[which(!cmp.duplicated(apset))] # }, # step_name = "fp_dedup", # dependency = "apfp_convert" # ) ## ----fp_similarity, eval=FALSE, spr=TRUE------------------ # appendStep(sal) <- LineWise( # code = { # # If your set is very large, compute a small portion is a better idea, e.g. # # sapply(cid(fpset)[1:10], ... # simMAfp <- sapply(cid(fpset), function(x) fpSim(x=fpset[x], fpset, sorted=FALSE)) # }, # step_name = "fp_similarity", # dependency = "fp_dedup" # ) ## ----hclust, eval=FALSE, spr=TRUE------------------------- # appendStep(sal) <- LineWise( # code = { # hc <- hclust(as.dist(1-simMAfp)) # png("results/hclust.png", 1000, 700) # plot(as.dendrogram(hc), edgePar=list(col=4, lwd=2), horiz=TRUE) # dev.off() # # to see the tree groupings, one need to cut the tree, for example, by height of 0.9 # tree_cut <- cutree(hc, h = 0.9) # grouping <- tibble::tibble( # cid = names(tree_cut), # group_id = tree_cut # ) # readr::write_csv(grouping, "results/hclust_grouping.csv") # }, # step_name = "hclust", # dependency = "fp_similarity", # run_step = "optional" # ) ## ----PCA, eval=FALSE, spr=TRUE---------------------------- # appendStep(sal) <- LineWise( # code = { # library(magrittr) # library(ggplot2) # # simMAfp is already 0-1 value, no need to normalize again # mol_pca <- princomp(simMAfp) # # find the variance # mol_pca_var <- mol_pca$sdev[1:2]^2 / sum(mol_pca$sdev^2) # # plot # png("results/mol_pca.png", 650, 700) # tibble::tibble( # PC1 = mol_pca$loadings[, 1], # PC2 = mol_pca$loadings[, 2], # group_id = as.factor(grouping$group_id) # ) %>% # # here only group labels are used to color, if you have experimental conditions # # or drug types, the coloring or shaping will be more meaningful. # ggplot(aes(x = PC1, y = PC2, color = group_id)) + # geom_point(size = 2) + # stat_ellipse() + # labs( # x = paste0("PC1 ", round(mol_pca_var[1], 3)*100, "%"), # y = paste0("PC1 ", round(mol_pca_var[2], 3)*100, "%") # ) + # ggpubr::theme_pubr(base_size = 16) + # scale_color_brewer(palette = "Set2") # dev.off() # }, # step_name = "PCA", # dependency = "hclust", # run_step = "optional" # ) ## ----heatmap, eval=FALSE, spr=TRUE------------------------ # appendStep(sal) <- LineWise( # code = { # library(gplots) # png("results/mol_heatmap.png", 700, 700) # heatmap.2(simMAfp, Rowv=as.dendrogram(hc), Colv=as.dendrogram(hc), # col=colorpanel(40, "darkblue", "yellow", "white"), # density.info="none", trace="none") # dev.off() # }, # step_name = "heatmap", # dependency = "fp_similarity", # run_step = "optional" # ) ## ----wf_session, eval=FALSE, spr=TRUE--------------------- # appendStep(sal) <- LineWise( # code = { # sessionInfo() # }, # step_name = "wf_session", # dependency = "heatmap") ## ----runWF, eval=FALSE------------------------------------ # sal <- runWF(sal) ## ----list_tools------------------------------------------- if(file.exists(file.path(".SPRproject", "SYSargsList.yml"))) { local({ sal <- systemPipeR::SPRproject(resume = TRUE) systemPipeR::listCmdTools(sal) systemPipeR::listCmdModules(sal) }) } else { cat(crayon::blue$bold("Tools and modules required by this workflow are:\n")) cat(c("No other tool is required"), sep = "\n") } ## ----report_session_info, eval=TRUE----------------------- sessionInfo()