## ----style, echo = FALSE, results = 'hide', warning=FALSE, message=FALSE------ BiocStyle::markdown() suppressPackageStartupMessages({ library(knitr) library(enrichViewNet) library(gprofiler2) library(ggplot2) library(igraph) library(ggtangle) library(ggrepel) }) ## Set it globally options(ggrepel.max.overlaps = 100) set.seed(1214) ## ----graphDemo01, echo = FALSE, fig.align="center", fig.cap="A network where significant GO terms and genes are presented as nodes while edges connect each gene to its associated term(s).", out.width = '90%'---- knitr::include_graphics("demo01.jpeg") ## ----graphDemo02, echo=FALSE, fig.align="center", fig.cap="An enrichment map using significant Kegg terms where edges are connecting terms with overlapping genes.", out.width = '95%'---- knitr::include_graphics("demo_KEGG_emap_v03.jpg") ## ----installDemo01, eval=FALSE, warning=FALSE, message=FALSE------------------ # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("enrichViewNet") ## ----graphWorkflow, echo=FALSE, fig.align="center", fig.cap="The enrichViewNet general workflow", out.width = '100%'---- knitr::include_graphics("Figure_enrichViewNet_workflow_v05.jpg") ## ----graphListToGraph01, echo = FALSE, fig.align="left", fig.cap="From an enrichment list (A) to a Cytoscape network (B).", out.width = '100%'---- knitr::include_graphics("FromListToGraph_v03.jpg") ## ----gprofiler, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE---- ## Required library library(gprofiler2) ## The dataset of differentially expressed genes done between ## napabucasin treated and DMSO control parental (Froeling et al 2019) ## All genes tested are present data("parentalNapaVsDMSODEG") ## Retain significant results ## (absolute fold change superior to 1 and adjusted p-value inferior to 0.05) retained <- which(abs(parentalNapaVsDMSODEG$log2FoldChange) > 1 & parentalNapaVsDMSODEG$padj < 0.05) signRes <- parentalNapaVsDMSODEG[retained, ] ## Run one functional enrichment analysis using all significant genes ## The species is homo sapiens ("hsapiens") ## The g:SCS multiple testing correction method (Raudvere U et al 2019) ## The WikiPathways database is used ## Only the significant results are retained (significant=TRUE) ## The evidence codes are included in the results (evcodes=TRUE) ## A custom background included the tested genes is used gostres <- gprofiler2::gost( query=list(parental_napa_vs_DMSO=unique(signRes$EnsemblID)), organism="hsapiens", correction_method="g_SCS", sources=c("WP"), significant=TRUE, evcodes=TRUE, custom_bg=unique(parentalNapaVsDMSODEG$EnsemblID)) ## ----gostResult, echo=TRUE, eval=TRUE----------------------------------------- ## The 'gostres' object is a list of 2 entries ## The 'result' entry contains the enrichment results ## The 'meta' entry contains the metadata information ## Some columns of interest in the results gostres$result[1:4, c("query", "p_value", "term_size", "query_size", "intersection_size", "term_id")] ## The term names can be longer than the one shown gostres$result[19:22, c("term_id", "source", "term_name")] ## ----cytoscapeLogo01, echo = FALSE, fig.align="center", fig.cap="Cytoscape software logo.", out.width = '55%'---- knitr::include_graphics("cy3sticker.png") ## ----runCreateNetwork, echo=TRUE, eval=TRUE, message=FALSE-------------------- ## Load saved enrichment results between parental Napa vs DMSO data("parentalNapaVsDMSOEnrichment") ## Create network for REACTOME significant terms ## The 'removeRoot=TRUE' parameter removes the root term from the network ## The network will either by created in Cytoscape (if the application is open) ## or a CX file will be created in the temporary directory createNetwork(gostObject=parentalNapaVsDMSOEnrichment, source="REAC", removeRoot=TRUE, title="REACTOME_All", collection="parental_napa_vs_DMSO", fileName=file.path(tempdir(), "parentalNapaVsDMSOEnrichment.cx")) ## ----networkInCytoscape, echo=FALSE, fig.align="center", fig.cap="All reactome terms in a gene-term network loaded in Cytoscape.", out.width = '110%'---- knitr::include_graphics("cytoscape_reactome_all_parental_napa_vs_DMSO.png") ## ----runCreateNetworkSelected, echo=TRUE, eval=TRUE, message=FALSE------------ ## Load saved enrichment results between parental Napa vs DMSO data("parentalNapaVsDMSOEnrichment") ## List of terms of interest reactomeSelected <- c("REAC:R-HSA-9031628", "REAC:R-HSA-198725", "REAC:R-HSA-9614085", "REAC:R-HSA-9617828", "REAC:R-HSA-9614657", "REAC:R-HSA-73857", "REAC:R-HSA-74160", "REAC:R-HSA-381340") ## All enrichment results results <- parentalNapaVsDMSOEnrichment$result ## Retain selected results selectedRes <- results[which(results$term_id %in% reactomeSelected), ] ## Print the first selected terms selectedRes[, c("term_name")] ## ----runCreateNetworkSelected2, echo=TRUE, eval=TRUE, message=FALSE, fig.align="center", fig.cap="Enrichment map."---- ## Create network for REACTOME selected terms ## The 'source="TERM_ID"' parameter enable to specify a personalized ## list of terms of interest ## The network will either by created in Cytoscape (if the application is open) ## or a CX file will be created in the temporary directory createNetwork(gostObject=parentalNapaVsDMSOEnrichment, source="TERM_ID", termIDs=selectedRes$term_id, title="REACTOME_Selected", collection="parental_napa_vs_DMSO", fileName=file.path(tempdir(), "parentalNapaVsDMSO_REACTOME.cx")) ## ----networkInCytoscapeSelected, echo=FALSE, fig.align="center", fig.cap="Selected Reactome terms in a gene-term network loaded in Cytoscape.", out.width = '110%'---- knitr::include_graphics("cytoscape_with_selected_REACTOME_v01.png") ## ----networkFinalReactome, echo=FALSE, fig.align="center", fig.cap="Final Reactome network after customization inside Cytoscape.", out.width = '100%'---- knitr::include_graphics("REACTOME_Selected.jpeg") ## ----gprofiler2, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE---- ## Required library library(gprofiler2) ## The dataset of differentially expressed genes done between ## napabucasin treated and DMSO control parental (Froeling et al 2019) ## All genes tested are present data("parentalNapaVsDMSODEG") ## Retain significant results ## (absolute fold change superior to 1 and adjusted p-value inferior to 0.05) retained <- which(abs(parentalNapaVsDMSODEG$log2FoldChange) > 1 & parentalNapaVsDMSODEG$padj < 0.05) signRes <- parentalNapaVsDMSODEG[retained, ] ## Run one functional enrichment analysis using all significant genes ## The species is homo sapiens ("hsapiens") ## The g:SCS multiple testing correction method (Raudvere U et al 2019) ## The WikiPathways database is used ## Only the significant results are retained (significant=TRUE) ## The evidence codes are included in the results (evcodes=TRUE) ## A custom background included the tested genes is used gostres <- gprofiler2::gost( query=list(parental_napa_vs_DMSO=unique(signRes$EnsemblID)), organism="hsapiens", correction_method="g_SCS", sources=c("WP"), significant=TRUE, evcodes=TRUE, custom_bg=unique(parentalNapaVsDMSODEG$EnsemblID)) ## ----gostResult2, echo=TRUE, eval=TRUE---------------------------------------- ## The 'gostres' object is a list of 2 entries ## The 'result' entry contains the enrichment results ## The 'meta' entry contains the metadata information ## Some columns of interest in the results gostres$result[1:4, c("query", "p_value", "term_size", "query_size", "intersection_size", "term_id")] ## The term names can be longer than the one shown gostres$result[19:22, c("term_id", "source", "term_name")] ## ----runCreateEmap01, echo=TRUE, eval=TRUE, fig.cap="A Kegg enrichment map where terms with overlapping genes cluster together.", fig.align="center"---- ## Load saved enrichment results between parental Napa vs DMSO data(parentalNapaVsDMSOEnrichment) ## Create network for all Kegg terms ## All terms will be shown even if there is overlapping createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="KEGG") ## ----runCreateEmapTerms, echo=TRUE, eval=TRUE, fig.cap="An enrichment map showing only the user selected terms.", fig.align="center"---- ## Load saved enrichment results between parental Napa vs DMSO data(parentalNapaVsDMSOEnrichment) ## The term IDs must correspond to the IDs present in the "term_id" column head(parentalNapaVsDMSOEnrichment$result[, c("query", "term_id", "term_name")], n=3) ## List of selected terms from different sources termID <- c("KEGG:04115", "WP:WP4963", "KEGG:04010", "REAC:R-HSA-5675221", "REAC:R-HSA-112409", "WP:WP382") ## Create network for all selected terms createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="TERM_ID", termIDs=termID) ## ----runCreateEmap03, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.cap="An enrichment map with personalized colors.", fig.align="center"---- ## The ggplot2 library is required library(ggplot2) ## Create network for all Kegg terms graphKegg <- createEnrichMap(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", source="KEGG") ## Nodes with lowest p-values will be in orange and highest p-values in black ## The title of the legend is also modified graphKegg + scale_color_continuous(name="P-value adjusted", low="orange", high="black") ## ----runCreateEmapIgraph01, echo=TRUE, eval=TRUE, fig.cap="A Reactome (12 top terms) enrichment map where terms with overlapping genes cluster together.", fig.align="center"---- ## Load saved enrichment results between parental Napa vs DMSO data(parentalNapaVsDMSOEnrichment) ## Create network for all Reactome terms ## All terms will be shown even if there is overlapping ## The similarity cut off is set to 0.3 ## The top 10 terms with best p-values are shown emapGraph <- createEnrichMapAsIgraph(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", similarityCutOff=0.3, source="REAC", showCategory=12) ## Set seed to ensure reproducible results set.seed(121) ## The igraph library is required library(igraph) ## Use library igraph to create the visual representation plot(emapGraph, layout=layout_with_fr, vertex.label.cex=0.5, vertex.label.color="black", vertex.color="lightblue2") ## ----runCreateEmapIgraph02, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.cap="An enrichment map with a different seed.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(12) ## The igraph library is required library(igraph) ## Use library igraph to create the visual representation plot(emapGraph, layout=layout_with_fr, vertex.label.cex=0.5, vertex.label.color="black", vertex.color="lightblue2") ## ----runCreateEmapIgraph03, echo=TRUE, eval=TRUE, message=FALSE, warning=FALSE, fig.cap="An enrichment map with personalized visualization options.", fig.align="center"---- ## The required libraries library(igraph) library(ggplot2) library(ggtangle) library(ggrepel) ## Create network for all Reactome terms ## Minimum similarity to have a edge is set to 0.4 ## Only the top 15 terms with the bestp-values are shown graphReac <- createEnrichMapAsIgraph(gostObject=parentalNapaVsDMSOEnrichment, query="parental_napa_vs_DMSO", similarityCutOff=0.4, source="REAC", showCategory=10) ## Set seed to ensure reproducible results set.seed(92) ## Using ggplot2 to generate a ggplot graph graphEmap <- ggplot(graphReac, layout=layout_with_fr) ## Move right the node related to Signal Transduction graphEmap$data$x[graphEmap$data$label == "Signal Transduction"] <- 2.3 ## Using ggtangle and ggrepel libraries to personalize output ## Set the color of the text, the nodes and the edges ## Set the node size using size value present in the igraph object ## The edge width is associated to the similarity value ## the coord_fixed() function is used to fix 1:1 ratio graphEmap + geom_edge(aes(linewidth=weight), color="blue1") + geom_point(aes(size=size), colour="gray40") + geom_text_repel(aes(x=x, y=y, label=label), nudge_x=0.5, nudge_y=0.2, col="darkviolet", min.segment.length=0.1, max.overlaps=10) + scale_size_continuous(range=c(4, 12)) + scale_linewidth_binned(range=c(1, 3)) + coord_fixed() ## ----emapMulti01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg enrichment results for 2 different experiments.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(2121) ## The dataset of functional enriched terms for two experiments: ## napabucasin treated and DMSO control parental and ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("parentalNapaVsDMSOEnrichment") data("rosaNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects gostObjectList <- list(parentalNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment) ## The queryList is a list of query names retained for each of the enrichment ## object (same order). Beware that a enrichment object can contain more than ## one query. query_01 <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] query_02 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] queryList <- list(query_01, query_02) ## Enrichment map where the groups are the KEGG results for the 2 different ## experiments createEnrichMapMultiBasic(gostObjectList=gostObjectList, queryList=queryList, source="KEGG", removeRoot=TRUE) ## ----emapMultiCustom, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map using KEGG terms from two enrichment analyses with personalized colors and legend."---- ## Required library library(ggplot2) ## Enrichment map where the groups are the KEGG results for the 2 different ## experiments createEnrichMapMultiBasic(gostObjectList=gostObjectList, queryList=queryList, source="KEGG", removeRoot=TRUE) + scale_fill_manual(name="Groups", breaks=queryList, values=c("cyan4", "bisque3"), labels=c("parental", "rosa")) + theme(legend.title=element_text(face="bold")) ## ----emapMultiAsIgraph01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing GO Molecular Function enrichment results (top 10) for 2 different experiments.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(2121) ## The dataset of functional enriched terms for two experiments: ## napabucasin treated and DMSO control parental and ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("parentalNapaVsDMSOEnrichment") data("rosaNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects gostObjectList <- list(parentalNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment) ## The queryList is a list of query names retained for each of the enrichment ## object (same order). Beware that a enrichment object can contain more than ## one query. query_01 <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] query_02 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] queryList <- list(query_01, query_02) ## Enrichment map where the groups are the GO Molecular Function results ## for the 2 different experiments ## Only the top 10 terms for each experiments (based on p-value) are shown ## Minimum Jaccard coefficient between 2 nodes is set to 0.6 emapGraph <- createEnrichMapMultiBasicAsIgraph(gostObjectList=gostObjectList, queryList=queryList, source="GO:MF", removeRoot=TRUE, showCategory=5, similarityCutOff=0.6) library(igraph) plot(emapGraph) ## ----emapMultiAsIgraph02, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map using GO Molecular Function terms from two enrichment analyses with personalized colors and legend."---- ## The required libraries library(igraph) library(ggplot2) library(ggtangle) library(ggrepel) library(scatterpie) set.seed(11) graphEmap <- ggplot(emapGraph, layout=layout_with_fr) ## Extract information from igraph object about group associated to each node pieInfo <- as.data.frame(do.call(rbind, V(emapGraph)$pie)) colnames(pieInfo) <- V(emapGraph)$pieName[[1]] ## Add information into the ggplot object to be able to color nodes for (i in seq_len(ncol(pieInfo))) { graphEmap$data[colnames(pieInfo)[i]] <- pieInfo[, i] } ## Colors selected for the groups groupColor <- c("darkorange", "darkviolet") ## Using ggtangle, scatterpie and ggrepel libraries to personalize output ## geom_scatterpie() allows to have scatter pie plot ## geom_text_repel() allows to have minimum overlying terms ## scale_fill_manual() allows to personalize the color of the nodes ## coord_fixed() forces the plot to have a 1:1 aspect ratio graphEmap + geom_edge(aes(linewidth=weight), color="black") + geom_scatterpie(aes(x=x, y=y, r=size/100), cols=c(colnames(pieInfo)), legend_name="Group", color=NA) + geom_scatterpie_legend(radius=graphEmap$data$size/100, n=4, x=max(graphEmap$data$x)+1, y=min(graphEmap$data$y)-0.5, labeller=function(x) {round(x*100)}, label_position="right") + scale_fill_manual(values=groupColor) + scale_size_continuous(range=c(2, 8)) + scale_linewidth_binned(range=c(1, 3)) + geom_text_repel(aes(x=x, y=y, label=label), color="blue2", max.overlaps=10) + coord_fixed() ## ----emapMultiComplex01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO analysis.", fig.align="center"---- ## Set seed to ensure reproducible results set.seed(3221) ## The dataset of functional enriched terms for one experiment: ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, the same enrichment object is used twice gostObjectList <- list(rosaNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_01 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object. ## The number of rows must correspond to the number of enrichment objects/ ## The query name must be present in the enrichment object. ## The source can be: "GO:BP" for Gene Ontology Biological Process, ## "GO:CC" for Gene Ontology Cellular Component, "GO:MF" for Gene Ontology ## Molecular Function, "KEGG" for Kegg, "REAC" for Reactome, ## "TF" for TRANSFAC, "MIRNA" for miRTarBase, "CORUM" for CORUM database, ## "HP" for Human phenotype ontology and "WP" for WikiPathways or ## "TERM_ID" when a list of terms is specified. ## The termsIDs is an empty string except when the source is set to "TERM_ID". ## The group names are going to be used in the legend and should be unique to ## each group. queryInfo <- data.frame(queryName=c(query_01, query_01), source=c("KEGG", "REAC"), removeRoot=c(TRUE, TRUE), termIDs=c("", ""), groupName=c("Kegg", "Reactome"), stringsAsFactors=FALSE) ## Enrichment map where the groups are the KEGG and Reactome results for the ## same experiment createEnrichMapMultiComplex(gostObjectList=gostObjectList, queryInfo=queryInfo) ## ----emapMultiCustom2, echo=TRUE, warning=FALSE, message=FALSE, collapse=FALSE, eval=TRUE, fig.cap="An enrichment map using selected terms related to MAP kinases and interleukin in two different experiments."---- ## Set seed to ensure reproducible results set.seed(28) ## The datasets of functional enriched terms for the two experiments: ## napabucasin treated and DMSO control expressing Rosa26 control vector and ## napabucasin treated and DMSO control parental MiaPaCa2 cells ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") data("parentalNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, the same enrichment object is used twice ## The order of the objects must respect the order on the queryInfo data frame ## In this case: ## 1. rosa dataset (for MAP kinases) ## 2. parental dataset (for MAP kinases) ## 3. rosa dataset (for interleukin) ## 4. parental dataset (for interleukin) gostObjectList <- list(rosaNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_rosa <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] query_parental <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] ## List of selected terms that will be shown in each group rosa_mapk <- "GO:0017017,GO:0033549,KEGG:04010,WP:WP382" rosa_il <- "KEGG:04657,WP:WP4754" parental_mapk <- paste0("GO:0017017,GO:0033549,KEGG:04010,", "REAC:R-HSA-5675221,REAC:R-HSA-112409,WP:WP382") parental_il <- "WP:WP4754,WP:WP395" ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object ## The number of rows must correspond to the number of enrichment objects ## The query name must be present in the enrichment object ## The source is set to "TERM_ID" so that the terms present in termIDs column ## will be used ## The group name will be used for the legend, the same name cannot be ## used twice queryInfo <- data.frame(queryName=c(query_rosa, query_parental, query_rosa, query_parental), source=c("TERM_ID", "TERM_ID", "TERM_ID", "TERM_ID"), removeRoot=c(FALSE, FALSE, FALSE, FALSE), termIDs=c(rosa_mapk, parental_mapk, rosa_il, parental_il), groupName=c("rosa - MAP kinases", "parental - MAP kinases", "rosa - Interleukin", "parental - Interleukin"), stringsAsFactors=FALSE) ## Enrichment map where the groups TODO createEnrichMapMultiComplex(gostObjectList=gostObjectList, queryInfo=queryInfo) ## ----emapMultiComplexIgraph01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO analysis.", fig.align="center"---- ## The dataset of functional enriched terms for one experiment: ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, the same enrichment object is used twice gostObjectList <- list(rosaNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_01 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object. ## The number of rows must correspond to the number of enrichment objects/ ## The query name must be present in the enrichment object. ## The source can be: "GO:BP" for Gene Ontology Biological Process, ## "GO:CC" for Gene Ontology Cellular Component, "GO:MF" for Gene Ontology ## Molecular Function, "KEGG" for Kegg, "REAC" for Reactome, ## "TF" for TRANSFAC, "MIRNA" for miRTarBase, "CORUM" for CORUM database, ## "HP" for Human phenotype ontology and "WP" for WikiPathways or ## "TERM_ID" when a list of terms is specified. ## The termsIDs is an empty string except when the source is set to "TERM_ID". ## The group names are going to be used in the legend and should be unique to ## each group. queryInfo <- data.frame(queryName=c(query_01, query_01), source=c("KEGG", "REAC"), removeRoot=c(TRUE, TRUE), termIDs=c("", ""), groupName=c("Kegg", "Reactome"), stringsAsFactors=FALSE) ## Enrichment map where the groups are the KEGG and Reactome results for the ## same experiment ## Minimum similarity coefficient to have 2 terms linked is set to 0.5 emapGraph <- createEnrichMapMultiComplexAsIgraph(gostObjectList=gostObjectList, queryInfo=queryInfo, similarityCutOff=0.5) ## The igraph library is required library(igraph) ## Set seed to ensure reproducible results set.seed(3221) ## Use library igraph to create the visual representation plot(emapGraph, layout=layout_with_fr, vertex.label.cex=0.5, vertex.label.color="black") ## ----changeSeedIgraph02, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO analysis with a different seed.", fig.align="center"---- ## The igraph library is required library(igraph) ## Set seed to ensure reproducible results set.seed(911) ## Use library igraph to create the visual representation plot(emapGraph, layout=layout_with_fr, vertex.label.cex=0.5, vertex.label.color="black") ## ----personalizedIgraph01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO and parental vs DMSO analyses as a ggplot graph.", fig.align="center"---- ## Visualization libraries is required library(igraph) library(ggplot2) library(ggtangle) library(ggrepel) library(scatterpie) ## Set seed to ensure reproducible results set.seed(21) ## Using ggplot2 to generate a ggplot graph graphEmap <- ggplot(emapGraph, layout=layout_with_fr) ## Extract information about group associated to each node pieInfo <- as.data.frame(do.call(rbind, V(emapGraph)$pie)) colnames(pieInfo) <- V(emapGraph)$pieName[[1]] ## Add information into the ggplot object to be able to color nodes for (i in seq_len(ncol(pieInfo))) { graphEmap$data[colnames(pieInfo)[i]] <- pieInfo[, i] } ## Colors selected for the groups groupColor <- c("blue", "darkviolet") ## Using scatterpie, ggtangle and ggrepel libraries to personalize output ## geom_scatterpie() allows to have scatter pie plot ## geom_text_repel() allows to have minimum overlying terms ## scale_fill_manual() allows to personalize the color of the nodes ## coord_fixed() forces the plot to have a 1:1 aspect ratio graphEmap + geom_edge(aes(linewidth=weight), color="lightblue3") + geom_scatterpie(aes(x=x, y=y, r=size/50), cols=c(colnames(pieInfo)), legend_name="Group", color=NA) + geom_scatterpie_legend(radius=graphEmap$data$size/50, n=3, x=max(graphEmap$data$x)+0.5, y=min(graphEmap$data$y)-0.5, labeller=function(x) {round(x*50)}, label_position="right") + scale_fill_manual(values=groupColor) + scale_size_continuous(range=c(2, 8)) + scale_linewidth_binned(range=c(1, 3)) + geom_text_repel(aes(x=x, y=y, label=label), color="black", max.overlaps=10) + coord_fixed() ## ----createEnrichMapMultiComplexAsIgraph01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO, and parenteal vs DMSO analyses.", fig.align="center"---- ## The dataset of functional enriched terms for two experiments: ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") data("parentalNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, each enrichment object is used twice gostObjectList <- list(rosaNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_01 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] query_02 <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object. ## The number of rows must correspond to the number of enrichment objects/ ## The query name must be present in the enrichment object. ## The source can be: "GO:BP" for Gene Ontology Biological Process, ## "GO:CC" for Gene Ontology Cellular Component, "GO:MF" for Gene Ontology ## Molecular Function, "KEGG" for Kegg, "REAC" for Reactome, ## "TF" for TRANSFAC, "MIRNA" for miRTarBase, "CORUM" for CORUM database, ## "HP" for Human phenotype ontology and "WP" for WikiPathways or ## "TERM_ID" when a list of terms is specified. ## The termsIDs is an empty string except when the source is set to "TERM_ID". ## The group names are going to be used in the legend and should be unique to ## each group. queryInfo <- data.frame(queryName=c(query_01, query_01, query_02, query_02), source=c("GO:BP", "GO:CC", "GO:BP", "GO:CC"), removeRoot=c(TRUE, TRUE, TRUE, TRUE), termIDs=c("", "", "", ""), groupName=c("GO:BP - Napa", "GO:CC - Napa", "GO:BP - Parental", "GO:CC - Parental"), stringsAsFactors=FALSE) ## Enrichment map where there are 2 groups generated from each ## experiment ## Minimum similarity coefficient to have 2 terms linked is set to 0.5 ## The 10 terms with the best p-value are selected for each group emapGraph <- createEnrichMapMultiComplexAsIgraph(gostObjectList=gostObjectList, queryInfo=queryInfo, similarityCutOff=0.5, showCategory=10) ## The igraph library is required library(igraph) ## Set seed to ensure reproducible results set.seed(3221) ## Use library igraph to create the visual representation ## Unfortunately, the output does not generate a scatter pie plot ## See next topic to generate a scatter pie plot plot(emapGraph, layout=layout_with_fr, vertex.label.cex=0.5, vertex.label.color="black") ## ----personalized01, echo=TRUE, warning=FALSE, message=FALSE, collapse=F, eval=TRUE, fig.cap="An enrichment map containing Kegg and Reactome results from the rosa Napa vs DMSO and parental vs DMSO analyses as a ggplot graph.", fig.align="center"---- ## Visualization libraries is required library(igraph) library(ggplot2) library(ggtangle) library(ggrepel) library(scatterpie) ## The dataset of functional enriched terms for two experiments: ## napabucasin treated and DMSO control expressing Rosa26 control vector ## (Froeling et al 2019) data("rosaNapaVsDMSOEnrichment") data("parentalNapaVsDMSOEnrichment") ## The gostObjectList is a list containing all ## the functional enrichment objects ## In this case, each enrichment object is used twice gostObjectList <- list(rosaNapaVsDMSOEnrichment, rosaNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment, parentalNapaVsDMSOEnrichment) ## Extract the query name from the enrichment object query_01 <- unique(rosaNapaVsDMSOEnrichment$result$query)[1] query_02 <- unique(parentalNapaVsDMSOEnrichment$result$query)[1] ## The query information is a data frame containing the information required ## to extract the specific terms for each enrichment object. ## The number of rows must correspond to the number of enrichment objects/ ## The query name must be present in the enrichment object. ## The source can be: "GO:BP" for Gene Ontology Biological Process, ## "GO:CC" for Gene Ontology Cellular Component, "GO:MF" for Gene Ontology ## Molecular Function, "KEGG" for Kegg, "REAC" for Reactome, ## "TF" for TRANSFAC, "MIRNA" for miRTarBase, "CORUM" for CORUM database, ## "HP" for Human phenotype ontology and "WP" for WikiPathways or ## "TERM_ID" when a list of terms is specified. ## The termsIDs is an empty string except when the source is set to "TERM_ID". ## The group names are going to be used in the legend and should be unique to ## each group. queryInfo <- data.frame(queryName=c(query_01, query_01, query_02, query_02), source=c("KEGG", "REAC", "KEGG", "REAC"), removeRoot=c(TRUE, TRUE, TRUE, TRUE), termIDs=c("", "", "", ""), groupName=c("Kegg - Rosa", "Reactome - Rosa", "Kegg - Parental", "Reactome - Parental"), stringsAsFactors=FALSE) ## Enrichment map where the groups are the KEGG and Reactome results for ## two enrichment analyses ## Minimum similarity coefficient to have 2 terms linked is set to 0.3 ## Only the 7 terms with best p-value are going to be shown in each group emapIgraph <- createEnrichMapMultiComplexAsIgraph(gostObjectList=gostObjectList, queryInfo=queryInfo, showCategory=5, similarityCutOff=0.3) ## Set seed to ensure reproducible results set.seed(21) ## Using ggplot2 to generate a ggplot graph graphEmap <- ggplot(emapIgraph, layout=layout_with_fr) ## Extract information about group associated to each node pieInfo <- as.data.frame(do.call(rbind, V(emapIgraph)$pie)) colnames(pieInfo) <- V(emapIgraph)$pieName[[1]] ## Add information into the ggplot object to be able to color nodes for (i in seq_len(ncol(pieInfo))) { graphEmap$data[colnames(pieInfo)[i]] <- pieInfo[, i] } ## Colors selected for the groups groupColor <- c("darkorange", "violet", "darkorange4", "darkviolet") ## Using scatterpie, ggtangle and ggrepel libraries to personalize output ## geom_scatterpie() allows to have scatter pie plot ## geom_text_repel() allows to have minimum overlying terms ## scale_fill_manual() allows to personalize the color of the nodes ## coord_fixed() forces the plot to have a 1:1 aspect ratio graphEmap + geom_edge(aes(linewidth=weight), color="blue3") + geom_scatterpie(aes(x=x, y=y, r=size/50), cols=c(colnames(pieInfo)), legend_name="Group", color=NA) + geom_scatterpie_legend(radius=graphEmap$data$size/50, n=3, x=max(graphEmap$data$x)+0.5, y=min(graphEmap$data$y)-0.5, labeller=function(x) {round(x*50)}, label_position="right") + scale_fill_manual(values=groupColor) + scale_size_continuous(range=c(2, 8)) + scale_linewidth_binned(range=c(1, 2)) + geom_text_repel(aes(x=x, y=y, label=label), color="black", max.overlaps=10) + coord_fixed() ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()