## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

options(
  rmarkdown.html_vignette.check_title = FALSE
)

## ----setup, message = FALSE---------------------------------------------------
library(tidytof)
library(dplyr)
library(stringr)
library(ggplot2)
library(tidyr)
library(forcats)

## ----eval = FALSE-------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#      install.packages("BiocManager")
#  }
#  
#  BiocManager::install("HDCytoData")

## ----message = FALSE, warning = FALSE-----------------------------------------
citrus_raw <- HDCytoData::Bodenmiller_BCR_XL_flowSet()

citrus_data <-
    citrus_raw |>
    as_tof_tbl(sep = "_")

## -----------------------------------------------------------------------------
citrus_metadata <-
    tibble(
        file_name = as.character(flowCore::pData(citrus_raw)[[1]]),
        sample_id = 1:length(file_name),
        patient = stringr::str_extract(file_name, "patient[:digit:]"),
        stimulation = stringr::str_extract(file_name, "(BCR-XL)|Reference")
    ) |>
    mutate(
        stimulation = if_else(stimulation == "Reference", "Basal", stimulation)
    )

citrus_metadata |>
    head()

## -----------------------------------------------------------------------------
citrus_data <-
    citrus_data |>
    left_join(citrus_metadata, by = "sample_id")

## -----------------------------------------------------------------------------
daa_result <-
    citrus_data |>
    tof_analyze_abundance(
        cluster_col = population_id,
        effect_col = stimulation,
        group_cols = patient,
        test_type = "paired",
        method = "ttest"
    )

daa_result

## ----message = FALSE----------------------------------------------------------
plot_data <-
    citrus_data |>
    mutate(population_id = as.character(population_id)) |>
    left_join(
        select(daa_result, population_id, significant, mean_fc),
        by = "population_id"
    ) |>
    dplyr::count(patient, stimulation, population_id, significant, mean_fc, name = "n") |>
    group_by(patient, stimulation) |>
    mutate(prop = n / sum(n)) |>
    ungroup() |>
    pivot_wider(
        names_from = stimulation,
        values_from = c(prop, n),
    ) |>
    mutate(
        diff = `prop_BCR-XL` - prop_Basal,
        fc = `prop_BCR-XL` / prop_Basal,
        population_id = fct_reorder(population_id, diff),
        direction =
            case_when(
                mean_fc > 1 & significant == "*" ~ "increase",
                mean_fc < 1 & significant == "*" ~ "decrease",
                TRUE ~ NA_character_
            )
    )

significance_data <-
    plot_data |>
    group_by(population_id, significant, direction) |>
    summarize(diff = max(diff), fc = max(fc)) |>
    ungroup()

plot_data |>
    ggplot(aes(x = population_id, y = fc, fill = direction)) +
    geom_violin(trim = FALSE) +
    geom_hline(yintercept = 1, color = "red", linetype = "dotted", size = 0.5) +
    geom_point() +
    geom_text(
        aes(x = population_id, y = fc, label = significant),
        data = significance_data,
        size = 8,
        nudge_x = 0.2,
        nudge_y = 0.06
    ) +
    scale_x_discrete(labels = function(x) str_c("cluster ", x)) +
    scale_fill_manual(
        values = c("decrease" = "#cd5241", "increase" = "#207394"),
        na.translate = FALSE
    ) +
    labs(
        x = NULL,
        y = "Abundance fold-change (stimulated / basal)",
        fill = "Effect",
        caption = "Asterisks indicate significance at an adjusted p-value of 0.05"
    )

## -----------------------------------------------------------------------------
signaling_markers <-
    c(
        "pNFkB_Nd142", "pStat5_Nd150", "pAkt_Sm152", "pStat1_Eu153", "pStat3_Gd158",
        "pSlp76_Dy164", "pBtk_Er166", "pErk_Er168", "pS6_Yb172", "pZap70_Gd156"
    )

dea_result <-
    citrus_data |>
    tof_preprocess(channel_cols = any_of(signaling_markers)) |>
    tof_analyze_expression(
        method = "ttest",
        cluster_col = population_id,
        marker_cols = any_of(signaling_markers),
        effect_col = stimulation,
        group_cols = patient,
        test_type = "paired"
    )

dea_result |>
    head()

## -----------------------------------------------------------------------------
volcano_data <-
    dea_result |>
    mutate(
        log2_fc = log(mean_fc, base = 2),
        log_p = -log(p_adj),
        significance =
            case_when(
                p_adj < 0.05 & mean_fc > 1 ~ "increased",
                p_adj < 0.05 & mean_fc < 1 ~ "decreased",
                TRUE ~ NA_character_
            ),
        marker =
            str_extract(marker, ".+_") |>
                str_remove("_"),
        pair = str_c(marker, str_c("cluster ", population_id), sep = "@")
    )

volcano_data |>
    ggplot(aes(x = log2_fc, y = log_p, fill = significance)) +
    geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
    geom_hline(yintercept = -log(0.05), linetype = "dashed", color = "red") +
    geom_point(shape = 21, size = 2) +
    ggrepel::geom_text_repel(
        aes(label = pair),
        data = slice_head(volcano_data, n = 10L),
        size = 2
    ) +
    scale_fill_manual(
        values = c("decreased" = "#cd5241", "increased" = "#207394"),
        na.value = "#cdcdcd"
    ) +
    labs(
        x = "log2(Fold-change)",
        y = "-log10(p-value)",
        fill = NULL,
        caption = "Labels indicate the 10 most significant marker-cluster pairs"
    )

## -----------------------------------------------------------------------------
sessionInfo()