epiregulon.extra 0.99.10
This tutorial is continuation of the walk through the reprogram-seq data set begun in vignette. Here we present the data visualization and network analysis functions to explore in detail the results outputted from the epiregulon workflow. We will use the pruned version of regulon object in which only relevant columns are kept.
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("epiregulon.extra")
library(epiregulon)
library(epiregulon.extra)
Apart from the regulon data frame we will need a gene expression matrix which together allow for the visualization of the insights provided by the epiregulon workflow.
# load the MAE object
library(scMultiome)
## Loading required package: AnnotationHub
## Loading required package: BiocFileCache
## Loading required package: dbplyr
##
## Attaching package: 'AnnotationHub'
## The following object is masked from 'package:Biobase':
##
## cache
## Loading required package: ExperimentHub
## Loading required package: MultiAssayExperiment
mae <- scMultiome::reprogramSeq()
## see ?scMultiome and browseVignettes('scMultiome') for documentation
## loading from cache
# expression matrix
GeneExpressionMatrix <- mae[["GeneExpressionMatrix"]]
rownames(GeneExpressionMatrix) <- rowData(GeneExpressionMatrix)$name
reducedDim(GeneExpressionMatrix, "UMAP_Combined") <- reducedDim(mae[["TileMatrix500"]], "UMAP_Combined")
# arrange hash_assigment
GeneExpressionMatrix$hash_assignment <- factor(as.character(GeneExpressionMatrix$hash_assignment),
levels = c(
"HTO10_GATA6_UTR", "HTO2_GATA6_v2", "HTO8_NKX2.1_UTR", "HTO3_NKX2.1_v2",
"HTO1_FOXA2_v2", "HTO4_mFOXA1_v2", "HTO6_hFOXA1_UTR", "HTO5_NeonG_v2"
)
)
Using epiregulon we can calculate activity of transcription factors inclued in the regulon object.
data(regulon)
score.combine <- calculateActivity(
expMatrix = GeneExpressionMatrix,
regulon = regulon,
mode = "weight",
method = "weightedMean",
exp_assay = "normalizedCounts",
normalize = FALSE
)
## calculating TF activity from regulon using weightedmean
## Warning in calculateActivity(expMatrix = GeneExpressionMatrix, regulon =
## regulon, : The weight column contains multiple subcolumns but no cluster
## information was provided. Using first column to compute activity...
## aggregating regulons...
## creating weight matrix...
## calculating activity scores...
## normalize by the number of targets...
markers <- findDifferentialActivity(
activity_matrix = score.combine,
clusters = GeneExpressionMatrix$hash_assignment,
pval.type = "some",
direction = "up",
test.type = "t"
)
Take the top TFs
markers.sig <- getSigGenes(markers, topgenes = 5)
## Using a logFC cutoff of 0 for class HTO10_GATA6_UTR for direction equal to any
## Using a logFC cutoff of 0 for class HTO2_GATA6_v2 for direction equal to any
## Using a logFC cutoff of 0 for class HTO8_NKX2.1_UTR for direction equal to any
## Using a logFC cutoff of 0 for class HTO3_NKX2.1_v2 for direction equal to any
## Using a logFC cutoff of 0 for class HTO1_FOXA2_v2 for direction equal to any
## Using a logFC cutoff of 0 for class HTO4_mFOXA1_v2 for direction equal to any
## Using a logFC cutoff of 0 for class HTO6_hFOXA1_UTR for direction equal to any
## Using a logFC cutoff of 0 for class HTO5_NeonG_v2 for direction equal to any
First visualize the known differential TFs by bubble plot
plotBubble(
activity_matrix = score.combine,
tf = c("NKX2-1", "GATA6", "FOXA1", "FOXA2"),
clusters = GeneExpressionMatrix$hash_assignment
)
Then visualize the most differential TFs by clusters
plotBubble(
activity_matrix = score.combine,
tf = markers.sig$tf,
clusters = GeneExpressionMatrix$hash_assignment
)
Visualize the known differential TFs by violin plot. Note there is no activity calculated for SOX2 because the expression of SOX2 is 0 in all cells.
plotActivityViolin(
activity_matrix = score.combine,
tf = c("NKX2-1", "GATA6", "FOXA1", "FOXA2", "AR"),
clusters = GeneExpressionMatrix$hash_assignment
)
Visualize the known differential TFs by UMAP
ActivityPlot <- plotActivityDim(
sce = GeneExpressionMatrix,
activity_matrix = score.combine,
tf = c("NKX2-1", "GATA6", "FOXA1", "FOXA2", "AR"),
dimtype = "UMAP_Combined",
label = "Clusters",
point_size = 1,
ncol = 3
)
for(i in seq_len(length(ActivityPlot))){
ActivityPlot[[i]] <- ggrastr::rasterise(ActivityPlot[[i]], dpi =50)
}
ActivityPlot
In contrast, the gene expression of the TFs is very sparse
ActivityPlot <- plotActivityDim(
sce = GeneExpressionMatrix,
activity_matrix = counts(GeneExpressionMatrix),
tf = c("NKX2-1", "GATA6", "FOXA1", "FOXA2", "AR"),
dimtype = "UMAP_Combined",
label = "Clusters",
point_size = 1,
ncol = 3,
limit = c(0, 2),
colors = c("grey", "blue"),
legend.label = "GEX"
)
for(i in seq_len(length(ActivityPlot))){
ActivityPlot[[i]] <- ggrastr::rasterise(ActivityPlot[[i]], dpi =50)
}
ActivityPlot
Visualize the gene expression of the regulons by heatmap
plotHeatmapRegulon(
sce = GeneExpressionMatrix,
tfs = c("GATA6", "NKX2-1"),
regulon = regulon,
regulon_cutoff = 0,
downsample = 1000,
cell_attributes = "hash_assignment",
col_gap = "hash_assignment",
exprs_values = "normalizedCounts",
name = "regulon heatmap",
column_title_rot = 45,
raster_quality=4
)
plotHeatmapActivity(
activity = score.combine,
sce = GeneExpressionMatrix,
tfs = c("GATA6", "NKX2-1", "FOXA1", "FOXA2"),
downsample = 5000,
cell_attributes = "hash_assignment",
col_gap = "hash_assignment",
name = "Activity",
column_title_rot = 45,
raster_quality=4
)
# Geneset enrichment
Sometimes we are interested to know what pathways are enriched in the regulon of a particular TF. We can perform geneset enrichment using the enricher function from clusterProfiler.
# retrieve genesets
H <- EnrichmentBrowser::getGenesets(
org = "hsa",
db = "msigdb",
cat = "H",
gene.id.type = "SYMBOL"
)
C2 <- EnrichmentBrowser::getGenesets(
org = "hsa",
db = "msigdb",
cat = "C2",
gene.id.type = "SYMBOL"
)
C6 <- EnrichmentBrowser::getGenesets(
org = "hsa",
db = "msigdb",
cat = "C6",
gene.id.type = "SYMBOL"
)
# combine genesets and convert genesets to be compatible with enricher
gs <- c(H, C2, C6)
gs.list <- do.call(rbind, lapply(names(gs), function(x) {
data.frame(gs = x, genes = gs[[x]])
}))
enrichresults <- regulonEnrich(
TF = c("GATA6", "NKX2-1"),
regulon = regulon,
weight = "weight",
weight_cutoff = 0,
genesets = gs.list
)
## GATA6
##
## NKX2-1
# plot results
enrichPlot(results = enrichresults)
# Network analysis
We can visualize the genesets as a network
plotGseaNetwork(
tf = names(enrichresults),
enrichresults = enrichresults,
p.adj_cutoff = 0.1,
ntop_pathways = 10
)
We are interested in understanding the differential networks between two conditions and determining which transcription factors account for the differences in the topology of networks. The pruned regulons with cluster-specific test statistics computed by pruneRegulon
can be used to generate cluster-specific networks based on user-defined cutoffs and to visualize differential networks for transcription factors of interest. In this dataset, the GATA6 gene was only expressed in cluster 1 (C1) and NKX2-1 was only expressed in cluster 3 (C3). If we visualize the target genes of GATA6, we can see that C1 has many more target genes of GATA6 compared to C5, a cluster that does not express GATA6. Similarly, NKX2-1 target genes are confined to C3 which is the only cluster that exogenously expresses NKX2-1.
plotDiffNetwork(regulon,
cutoff = 0,
tf = c("GATA6"),
weight = "weight",
clusters = c("C1", "C5"),
layout = "stress"
)
## Building graph using weight as edge weights
plotDiffNetwork(regulon,
cutoff = 0,
tf = c("NKX2-1"),
weight = "weight",
clusters = c("C3", "C5"),
layout = "stress"
)
## Building graph using weight as edge weights
We can also visualize how transcription factors relate to other transcription factors in each cluster.
selected <- which(regulon$weight[, "C1"] > 0 &
regulon$tf %in% c("GATA6", "FOXA1", "AR"))
C1_network <- buildGraph(regulon[selected, ],
weights = "weight",
cluster = "C1"
)
## Building graph using weight as edge weights
selected <- which(regulon$weight[, "C5"] > 0 &
regulon$tf %in% c("GATA6", "FOXA1", "AR"))
C5_network <- buildGraph(regulon[selected, ],
weights = "weight",
cluster = "C5"
)
## Building graph using weight as edge weights
plotEpiregulonNetwork(C1_network,
layout = "sugiyama",
tfs_to_highlight = c("GATA6", "FOXA1", "AR")
) +
ggplot2::ggtitle("C1")
plotEpiregulonNetwork(C5_network,
layout = "sugiyama",
tfs_to_highlight = c("GATA6", "FOXA1", "AR")
) +
ggplot2::ggtitle("C5")
To systematically examine the differential network topology between two clusters, we perform an edge subtraction between two graphs, using weights computed by
pruneRegulon
. We then calculate the degree centrality of the weighted differential graphs and if desired, normalize the differential centrality against the total number of edges. The default normalization function is sqrt
as it preserves both the difference in the number of edges (but scaled by sqrt) and the differences in the weights. If the user only wants to examine the differences in the averaged weights, the FUN
argument can be changed to identity
. Finally, we rank the transcription factors by (normalized) differential centrality.
# rank by differential centrality
C1_network <- buildGraph(regulon, weights = "weight", cluster = "C1")
## Building graph using weight as edge weights
C5_network <- buildGraph(regulon, weights = "weight", cluster = "C5")
## Building graph using weight as edge weights
diff_graph <- buildDiffGraph(C1_network, C5_network, abs_diff = FALSE)
diff_graph <- addCentrality(diff_graph)
diff_graph <- normalizeCentrality(diff_graph)
rank_table <- rankTfs(diff_graph)
library(ggplot2)
ggplot(rank_table, aes(x = rank, y = centrality)) +
geom_point() +
ggrepel::geom_text_repel(data = rbind(head(rank_table, 5), tail(rank_table, 5)), aes(label = tf)) +
theme_classic()
sessionInfo()
## R version 4.4.0 beta (2024-04-15 r86425)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggplot2_3.5.0 org.Hs.eg.db_3.19.0
## [3] AnnotationDbi_1.65.2 msigdbr_7.5.1
## [5] scMultiome_1.3.3 MultiAssayExperiment_1.29.1
## [7] ExperimentHub_2.11.3 AnnotationHub_3.11.4
## [9] BiocFileCache_2.11.2 dbplyr_2.5.0
## [11] epiregulon.extra_0.99.10 epiregulon_0.99.16
## [13] SingleCellExperiment_1.25.1 SummarizedExperiment_1.33.3
## [15] Biobase_2.63.1 GenomicRanges_1.55.4
## [17] GenomeInfoDb_1.39.14 IRanges_2.37.1
## [19] S4Vectors_0.41.6 BiocGenerics_0.49.1
## [21] MatrixGenerics_1.15.0 matrixStats_1.3.0
## [23] BiocStyle_2.31.0
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.3 bitops_1.0-7
## [3] enrichplot_1.23.2 HDO.db_0.99.1
## [5] httr_1.4.7 RColorBrewer_1.1-3
## [7] doParallel_1.0.17 Rgraphviz_2.47.0
## [9] tools_4.4.0 backports_1.4.1
## [11] utf8_1.2.4 R6_2.5.1
## [13] HDF5Array_1.31.6 lazyeval_0.2.2
## [15] rhdf5filters_1.15.5 GetoptLong_1.0.5
## [17] withr_3.0.0 gridExtra_2.3
## [19] cli_3.6.2 Cairo_1.6-2
## [21] scatterpie_0.2.2 labeling_0.4.3
## [23] sass_0.4.9 KEGGgraph_1.63.0
## [25] yulab.utils_0.1.4 gson_0.1.0
## [27] DOSE_3.29.2 scater_1.31.2
## [29] limma_3.59.8 RSQLite_2.3.6
## [31] generics_0.1.3 gridGraphics_0.5-1
## [33] shape_1.4.6.1 dplyr_1.1.4
## [35] GO.db_3.19.0 Matrix_1.7-0
## [37] ggbeeswarm_0.7.2 fansi_1.0.6
## [39] abind_1.4-5 lifecycle_1.0.4
## [41] yaml_2.3.8 edgeR_4.1.23
## [43] rhdf5_2.47.7 qvalue_2.35.0
## [45] SparseArray_1.3.5 grid_4.4.0
## [47] blob_1.2.4 dqrng_0.3.2
## [49] crayon_1.5.2 lattice_0.22-6
## [51] beachmat_2.19.4 cowplot_1.1.3
## [53] annotate_1.81.2 KEGGREST_1.43.0
## [55] magick_2.8.3 pillar_1.9.0
## [57] knitr_1.46 ComplexHeatmap_2.19.0
## [59] metapod_1.11.1 fgsea_1.29.0
## [61] rjson_0.2.21 codetools_0.2-20
## [63] fastmatch_1.1-4 glue_1.7.0
## [65] ggfun_0.1.4 data.table_1.15.4
## [67] vctrs_0.6.5 png_0.1-8
## [69] treeio_1.27.1 gtable_0.3.4
## [71] cachem_1.0.8 xfun_0.43
## [73] S4Arrays_1.3.7 mime_0.12
## [75] tidygraph_1.3.1 iterators_1.0.14
## [77] tinytex_0.50 statmod_1.5.0
## [79] bluster_1.13.0 nlme_3.1-164
## [81] ggtree_3.11.2 bit64_4.0.5
## [83] filelock_1.0.3 bslib_0.7.0
## [85] irlba_2.3.5.1 vipor_0.4.7
## [87] colorspace_2.1-0 DBI_1.2.2
## [89] ggrastr_1.0.2 tidyselect_1.2.1
## [91] bit_4.0.5 compiler_4.4.0
## [93] curl_5.2.1 graph_1.81.0
## [95] BiocNeighbors_1.21.2 DelayedArray_0.29.9
## [97] shadowtext_0.1.3 bookdown_0.39
## [99] checkmate_2.3.1 scales_1.3.0
## [101] rappdirs_0.3.3 stringr_1.5.1
## [103] digest_0.6.35 rmarkdown_2.26
## [105] XVector_0.43.1 htmltools_0.5.8.1
## [107] pkgconfig_2.0.3 sparseMatrixStats_1.15.1
## [109] highr_0.10 fastmap_1.1.1
## [111] rlang_1.1.3 GlobalOptions_0.1.2
## [113] UCSC.utils_0.99.7 DelayedMatrixStats_1.25.2
## [115] farver_2.1.1 jquerylib_0.1.4
## [117] jsonlite_1.8.8 BiocParallel_1.37.1
## [119] GOSemSim_2.29.2 BiocSingular_1.19.0
## [121] RCurl_1.98-1.14 magrittr_2.0.3
## [123] scuttle_1.13.1 GenomeInfoDbData_1.2.12
## [125] ggplotify_0.1.2 patchwork_1.2.0
## [127] Rhdf5lib_1.25.3 munsell_0.5.1
## [129] Rcpp_1.0.12 ape_5.8
## [131] babelgene_22.9 viridis_0.6.5
## [133] EnrichmentBrowser_2.33.1 stringi_1.8.3
## [135] ggraph_2.2.1 zlibbioc_1.49.3
## [137] MASS_7.3-60.2 plyr_1.8.9
## [139] parallel_4.4.0 ggrepel_0.9.5
## [141] Biostrings_2.71.5 graphlayouts_1.1.1
## [143] splines_4.4.0 circlize_0.4.16
## [145] locfit_1.5-9.9 igraph_2.0.3
## [147] reshape2_1.4.4 ScaledMatrix_1.11.1
## [149] BiocVersion_3.19.1 XML_3.99-0.16.1
## [151] evaluate_0.23 scran_1.31.3
## [153] BiocManager_1.30.22 foreach_1.5.2
## [155] tweenr_2.0.3 tidyr_1.3.1
## [157] purrr_1.0.2 polyclip_1.10-6
## [159] clue_0.3-65 ggforce_0.4.2
## [161] rsvd_1.0.5 xtable_1.8-4
## [163] tidytree_0.4.6 viridisLite_0.4.2
## [165] tibble_3.2.1 clusterProfiler_4.11.1
## [167] aplot_0.2.2 memoise_2.0.1
## [169] beeswarm_0.4.0 cluster_2.1.6
## [171] GSEABase_1.65.1