## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# if (!require(ReactomeGSA))
#   BiocManager::install("ReactomeGSA")
# 
# # install the ReactomeGSA.data package for the example data
# if (!require(ReactomeGSA.data))
#   BiocManager::install("ReactomeGSA.data")

## -----------------------------------------------------------------------------
library(ReactomeGSA.data)
data(jerby_b_cells)

jerby_b_cells

## -----------------------------------------------------------------------------
# store the Idents as a meta.data field - this was
# removed while compressing the object as an example
jerby_b_cells$clusters <- Idents(jerby_b_cells)

table(jerby_b_cells$clusters)

## -----------------------------------------------------------------------------
library(ReactomeGSA)

# This creates a pseudo-bulk object by splitting each cluster in 4
# random bulks of data. This approach can be changed through the
# split_by and k_variable parameter.
pseudo_bulk_data <- generate_pseudo_bulk_data(jerby_b_cells, group_by = "clusters")

# we can immediately create the metadata data.frame for this data
pseudo_metadata <- generate_metadata(pseudo_bulk_data)

## -----------------------------------------------------------------------------
# Create a new request object using 'Camera' for the gene set analysis
my_request <- ReactomeAnalysisRequest(method = "Camera")

# set the maximum number of allowed missing values to 50%
my_request <- set_parameters(request = my_request, max_missing_values = 0.5)

# add the pseudo-bulk data as a dataset
my_request <- add_dataset(request = my_request,
                          expression_values = pseudo_bulk_data,
                          sample_data = pseudo_metadata,
                          name = "Pseudo-bulk",
                          type = "rnaseq_counts",
                          comparison_factor = "Group",
                          comparison_group_1 = "Cluster 1",
                          comparison_group_2 = "Cluster 2")

my_request

## -----------------------------------------------------------------------------
quant_result <- perform_reactome_analysis(my_request, compress = FALSE)
quant_result

## -----------------------------------------------------------------------------
# get the pathway-level results
quant_pathways <- pathways(quant_result)
head(quant_pathways)

## -----------------------------------------------------------------------------
# get the top pathways to label them
library(tidyr)
library(dplyr)

top_pathways <- quant_pathways %>% 
  tibble::rownames_to_column(var = "Id") %>%
  filter(`FDR.Pseudo-bulk` < 0.001) %>%
  arrange(desc(`av_foldchange.Pseudo-bulk`))

# limit to a few pathway for aesthetic reasons
top_pathways <- top_pathways[c(1,5,6), ]

# create a simple volcano plot of the pathway results
library(ggplot2)
ggplot(quant_pathways,
       aes(x = `av_foldchange.Pseudo-bulk`,
           y = -log10(`FDR.Pseudo-bulk`))) +
  geom_vline(xintercept = c(log2(0.5), log2(2)), colour="grey", linetype = "longdash") +
  geom_hline(yintercept = -log10(0.05), colour="grey", linetype = "longdash") +
  geom_point() +
  geom_label(data = top_pathways, aes(label = Name), nudge_y = 1, check_overlap = TRUE)

## -----------------------------------------------------------------------------
gsva_result <- analyse_sc_clusters(jerby_b_cells, verbose = TRUE)

## -----------------------------------------------------------------------------
gsva_result

## -----------------------------------------------------------------------------
pathway_expression <- pathways(gsva_result)

# simplify the column names by removing the default dataset identifier
colnames(pathway_expression) <- gsub("\\.Seurat", "", colnames(pathway_expression))

pathway_expression[1:3,]

## -----------------------------------------------------------------------------
# find the maximum differently expressed pathway
max_difference <- do.call(rbind, apply(pathway_expression, 1, function(row) {
    values <- as.numeric(row[2:length(row)])
    return(data.frame(name = row[1], min = min(values), max = max(values)))
}))

max_difference$diff <- max_difference$max - max_difference$min

# sort based on the difference
max_difference <- max_difference[order(max_difference$diff, decreasing = T), ]

head(max_difference)

## ----fig.width=7, fig.height=4------------------------------------------------
plot_gsva_pathway(gsva_result, pathway_id = rownames(max_difference)[1])

## ----fig.width=7, fig.height=8------------------------------------------------
# Additional parameters are directly passed to gplots heatmap.2 function
plot_gsva_heatmap(gsva_result, max_pathways = 15, margins = c(6,20))

## ----fig.width=7, fig.height=4------------------------------------------------
# limit to selected B cell related pathways
relevant_pathways <- c("R-HSA-983170", "R-HSA-388841", "R-HSA-2132295", "R-HSA-983705", "R-HSA-5690714")
plot_gsva_heatmap(gsva_result, 
                  pathway_ids = relevant_pathways, # limit to these pathways
                  margins = c(6,30), # adapt the figure margins in heatmap.2
                  dendrogram = "col", # only plot column dendrogram
                  scale = "row", # scale for each pathway
                  key = FALSE, # don't display the color key
                  lwid=c(0.1,4)) # remove the white space on the left

## ----fig.width=6, fig.height=4------------------------------------------------
plot_gsva_pca(gsva_result)

## -----------------------------------------------------------------------------
sessionInfo()