## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(message = FALSE, warning = FALSE, comment = NA, 
                      fig.width = 6.25, fig.height = 5)

library(tidyverse)
library(microbiome)
library(magrittr)
library(qwraps2)
library(ANCOMBC)
library(DT)
options(DT.options = list(
  initComplete = JS("function(settings, json) {",
  "$(this.api().table().header()).css({'background-color': 
  '#000', 'color': '#fff'});","}")))

## ----getPackage, eval=FALSE---------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("ANCOMBC")

## ----load, eval=FALSE---------------------------------------------------------
#  library(ANCOMBC)

## ----importData---------------------------------------------------------------
data(atlas1006) 
# Subset to baseline
pseq = subset_samples(atlas1006, time == 0)
# Re-code the bmi group
sample_data(pseq)$bmi_group = recode(sample_data(pseq)$bmi_group,
                                     underweight = "lean",
                                     lean = "lean",
                                     overweight = "overweight",
                                     obese = "obese",
                                     severeobese = "obese",
                                     morbidobese = "obese")
# Re-code the nationality group
sample_data(pseq)$nation = recode(sample_data(pseq)$nationality,
                                  Scandinavia = "NE",
                                  UKIE = "NE",
                                  SouthEurope = "SE",
                                  CentralEurope = "CE",
                                  EasternEurope = "EE")

# Aggregate to phylum level
phylum_data = aggregate_taxa(pseq, "Phylum")

## ----dataSummary, results = "asis"--------------------------------------------
options(qwraps2_markup = "markdown")
summary_template =
  list("Age" =
       list("min" = ~ min(age, na.rm = TRUE),
            "max" = ~ max(age, na.rm = TRUE),
            "mean (sd)" = ~ mean_sd(age, na_rm = TRUE, 
                                    show_n = "never")),
       "Gender" =
       list("F" = ~ n_perc0(sex == "female", na_rm = TRUE),
            "M" = ~ n_perc0(sex == "male", na_rm = TRUE),
            "NA" = ~ n_perc0(is.na(sex))),
       "Nationality" =
       list("Central Europe" = ~ n_perc0(nation == "CE", na_rm = TRUE),
            "Eastern Europe" = ~ n_perc0(nation == "EE", na_rm = TRUE),
            "Northern Europe" = ~ n_perc0(nation == "NE", na_rm = TRUE),
            "Southern Europe" = ~ n_perc0(nation == "SE", na_rm = TRUE),
            "US" = ~ n_perc0(nation == "US", na_rm = TRUE),
            "NA" = ~ n_perc0(is.na(nation))),
       "BMI" =
       list("Lean" = ~ n_perc0(bmi_group == "lean", na_rm = TRUE),
            "Overweight" = ~ n_perc0(bmi_group == "overweight", 
                                     na_rm = TRUE),
            "Obese" = ~ n_perc0(bmi_group == "obese", na_rm = TRUE),
            "NA" = ~ n_perc0(is.na(bmi_group)))
       )
data_summary = summary_table(meta(pseq), summary_template)
data_summary

## ----ancombc------------------------------------------------------------------
out = ancombc(phyloseq = phylum_data, formula = "age + nation + bmi_group", 
              p_adj_method = "holm", zero_cut = 0.90, lib_cut = 1000, 
              group = "nation", struc_zero = TRUE, neg_lb = TRUE, tol = 1e-5, 
              max_iter = 100, conserve = TRUE, alpha = 0.05, global = TRUE)

res = out$res
res_global = out$res_global

## -----------------------------------------------------------------------------
tab_coef = res$beta
col_name = c("Age", "EE - CE", "NE - CE", "SE - CE", "US - CE", 
             "Oerweight - Lean", "Obese - Lean")
colnames(tab_coef) = col_name
tab_coef %>% datatable(caption = "Coefficients from the Primary Result") %>%
      formatRound(col_name, digits = 2)

## -----------------------------------------------------------------------------
tab_se = res$se
colnames(tab_se) = col_name
tab_se %>% datatable(caption = "SEs from the Primary Result") %>%
      formatRound(col_name, digits = 2)

## -----------------------------------------------------------------------------
tab_w = res$W
colnames(tab_w) = col_name
tab_w %>% datatable(caption = "Test Statistics from the Primary Result") %>%
      formatRound(col_name, digits = 2)

## -----------------------------------------------------------------------------
tab_p = res$p_val
colnames(tab_p) = col_name
tab_p %>% datatable(caption = "P-values from the Primary Result") %>%
      formatRound(col_name, digits = 2)

## -----------------------------------------------------------------------------
tab_q = res$q
colnames(tab_q) = col_name
tab_q %>% datatable(caption = "Adjusted p-values from the Primary Result") %>%
      formatRound(col_name, digits = 2)

## -----------------------------------------------------------------------------
tab_diff = res$diff_abn
colnames(tab_diff) = col_name
tab_diff %>% 
  datatable(caption = "Differentially Abundant Taxa 
            from the Primary Result")

## -----------------------------------------------------------------------------
samp_frac = out$samp_frac
# Replace NA with 0
samp_frac[is.na(samp_frac)] = 0 

# Add pesudo-count (1) to avoid taking the log of 0
log_obs_abn = log(abundances(phylum_data) + 1) 
# Adjust the log observed abundances
log_obs_abn_adj = t(t(log_obs_abn) - samp_frac)
# Show the first 6 samples
round(log_obs_abn_adj[, 1:6], 2) %>% 
  datatable(caption = "Bias-adjusted log observed abundances")

## -----------------------------------------------------------------------------
df_fig1 = data.frame(res$beta * res$diff_abn, check.names = FALSE) %>% 
  rownames_to_column("taxon_id")
df_fig2 = data.frame(res$se * res$diff_abn, check.names = FALSE) %>% 
  rownames_to_column("taxon_id")
colnames(df_fig2)[-1] = paste0(colnames(df_fig2)[-1], "SD")
df_fig = df_fig1 %>% left_join(df_fig2, by = "taxon_id") %>%
  transmute(taxon_id, age, ageSD) %>%
  filter(age != 0) %>% arrange(desc(age)) %>%
  mutate(group = ifelse(age > 0, "g1", "g2"))
df_fig$taxon_id = factor(df_fig$taxon_id, levels = df_fig$taxon_id)
  
p = ggplot(data = df_fig, 
           aes(x = taxon_id, y = age, fill = group, color = group)) + 
  geom_bar(stat = "identity", width = 0.7, 
           position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(ymin = age - ageSD, ymax = age + ageSD), width = 0.2,
                position = position_dodge(0.05), color = "black") + 
  labs(x = NULL, y = "Log fold change", 
       title = "Waterfall Plot for the Age Effect") + 
  theme_bw() + 
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        panel.grid.minor.y = element_blank(),
        axis.text.x = element_text(angle = 60, hjust = 1))
p

## -----------------------------------------------------------------------------
tab_w = res_global[, "W", drop = FALSE]
colnames(tab_w) = "Nationality"
tab_w %>% datatable(caption = "Test Statistics 
                    from the Global Test Result") %>%
      formatRound(c("Nationality"), digits = 2)

## -----------------------------------------------------------------------------
tab_p = res_global[, "p_val", drop = FALSE]
colnames(tab_p) = "Nationality"
tab_p %>% datatable(caption = "P-values 
                    from the Global Test Result") %>%
      formatRound(c("Nationality"), digits = 2)

## -----------------------------------------------------------------------------
tab_q = res_global[, "q_val", drop = FALSE]
colnames(tab_q) = "Nationality"
tab_q %>% datatable(caption = "Adjusted p-values 
                    from the Global Test Result") %>%
      formatRound(c("Nationality"), digits = 2)

## -----------------------------------------------------------------------------
tab_diff = res_global[, "diff_abn", drop = FALSE]
colnames(tab_diff) = "Nationality"
tab_diff %>% datatable(caption = "Differentially Abundant Taxa 
                       from the Global Test Result")

## ----sessionInfo, message = FALSE, warning = FALSE, comment = NA--------------
sessionInfo()