## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(message = FALSE, warning = FALSE, comment = NA, fig.width = 6.25, fig.height = 5) library(tidyverse) library(microbiome) library(magrittr) library(qwraps2) library(ANCOMBC) library(DT) options(DT.options = list( initComplete = JS("function(settings, json) {", "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});","}"))) ## ----getPackage, eval=FALSE--------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("ANCOMBC") ## ----load, eval=FALSE--------------------------------------------------------- # library(ANCOMBC) ## ----importData--------------------------------------------------------------- data(atlas1006) # Subset to baseline pseq = subset_samples(atlas1006, time == 0) # Re-code the bmi group sample_data(pseq)$bmi_group = recode(sample_data(pseq)$bmi_group, underweight = "lean", lean = "lean", overweight = "overweight", obese = "obese", severeobese = "obese", morbidobese = "obese") # Re-code the nationality group sample_data(pseq)$nation = recode(sample_data(pseq)$nationality, Scandinavia = "NE", UKIE = "NE", SouthEurope = "SE", CentralEurope = "CE", EasternEurope = "EE") # Aggregate to phylum level phylum_data = aggregate_taxa(pseq, "Phylum") ## ----dataSummary, results = "asis"-------------------------------------------- options(qwraps2_markup = "markdown") summary_template = list("Age" = list("min" = ~ min(age, na.rm = TRUE), "max" = ~ max(age, na.rm = TRUE), "mean (sd)" = ~ mean_sd(age, na_rm = TRUE, show_n = "never")), "Gender" = list("F" = ~ n_perc0(sex == "female", na_rm = TRUE), "M" = ~ n_perc0(sex == "male", na_rm = TRUE), "NA" = ~ n_perc0(is.na(sex))), "Nationality" = list("Central Europe" = ~ n_perc0(nation == "CE", na_rm = TRUE), "Eastern Europe" = ~ n_perc0(nation == "EE", na_rm = TRUE), "Northern Europe" = ~ n_perc0(nation == "NE", na_rm = TRUE), "Southern Europe" = ~ n_perc0(nation == "SE", na_rm = TRUE), "US" = ~ n_perc0(nation == "US", na_rm = TRUE), "NA" = ~ n_perc0(is.na(nation))), "BMI" = list("Lean" = ~ n_perc0(bmi_group == "lean", na_rm = TRUE), "Overweight" = ~ n_perc0(bmi_group == "overweight", na_rm = TRUE), "Obese" = ~ n_perc0(bmi_group == "obese", na_rm = TRUE), "NA" = ~ n_perc0(is.na(bmi_group))) ) data_summary = summary_table(meta(pseq), summary_template) data_summary ## ----ancombc------------------------------------------------------------------ out = ancombc(phyloseq = phylum_data, formula = "age + nation + bmi_group", p_adj_method = "holm", zero_cut = 0.90, lib_cut = 1000, group = "nation", struc_zero = TRUE, neg_lb = TRUE, tol = 1e-5, max_iter = 100, conserve = TRUE, alpha = 0.05, global = TRUE) res = out$res res_global = out$res_global ## ----------------------------------------------------------------------------- tab_coef = res$beta col_name = c("Age", "EE - CE", "NE - CE", "SE - CE", "US - CE", "Oerweight - Lean", "Obese - Lean") colnames(tab_coef) = col_name tab_coef %>% datatable(caption = "Coefficients from the Primary Result") %>% formatRound(col_name, digits = 2) ## ----------------------------------------------------------------------------- tab_se = res$se colnames(tab_se) = col_name tab_se %>% datatable(caption = "SEs from the Primary Result") %>% formatRound(col_name, digits = 2) ## ----------------------------------------------------------------------------- tab_w = res$W colnames(tab_w) = col_name tab_w %>% datatable(caption = "Test Statistics from the Primary Result") %>% formatRound(col_name, digits = 2) ## ----------------------------------------------------------------------------- tab_p = res$p_val colnames(tab_p) = col_name tab_p %>% datatable(caption = "P-values from the Primary Result") %>% formatRound(col_name, digits = 2) ## ----------------------------------------------------------------------------- tab_q = res$q colnames(tab_q) = col_name tab_q %>% datatable(caption = "Adjusted p-values from the Primary Result") %>% formatRound(col_name, digits = 2) ## ----------------------------------------------------------------------------- tab_diff = res$diff_abn colnames(tab_diff) = col_name tab_diff %>% datatable(caption = "Differentially Abundant Taxa from the Primary Result") ## ----------------------------------------------------------------------------- samp_frac = out$samp_frac # Replace NA with 0 samp_frac[is.na(samp_frac)] = 0 # Add pesudo-count (1) to avoid taking the log of 0 log_obs_abn = log(abundances(phylum_data) + 1) # Adjust the log observed abundances log_obs_abn_adj = t(t(log_obs_abn) - samp_frac) # Show the first 6 samples round(log_obs_abn_adj[, 1:6], 2) %>% datatable(caption = "Bias-adjusted log observed abundances") ## ----------------------------------------------------------------------------- df_fig1 = data.frame(res$beta * res$diff_abn, check.names = FALSE) %>% rownames_to_column("taxon_id") df_fig2 = data.frame(res$se * res$diff_abn, check.names = FALSE) %>% rownames_to_column("taxon_id") colnames(df_fig2)[-1] = paste0(colnames(df_fig2)[-1], "SD") df_fig = df_fig1 %>% left_join(df_fig2, by = "taxon_id") %>% transmute(taxon_id, age, ageSD) %>% filter(age != 0) %>% arrange(desc(age)) %>% mutate(group = ifelse(age > 0, "g1", "g2")) df_fig$taxon_id = factor(df_fig$taxon_id, levels = df_fig$taxon_id) p = ggplot(data = df_fig, aes(x = taxon_id, y = age, fill = group, color = group)) + geom_bar(stat = "identity", width = 0.7, position = position_dodge(width = 0.4)) + geom_errorbar(aes(ymin = age - ageSD, ymax = age + ageSD), width = 0.2, position = position_dodge(0.05), color = "black") + labs(x = NULL, y = "Log fold change", title = "Waterfall Plot for the Age Effect") + theme_bw() + theme(legend.position = "none", plot.title = element_text(hjust = 0.5), panel.grid.minor.y = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) p ## ----------------------------------------------------------------------------- tab_w = res_global[, "W", drop = FALSE] colnames(tab_w) = "Nationality" tab_w %>% datatable(caption = "Test Statistics from the Global Test Result") %>% formatRound(c("Nationality"), digits = 2) ## ----------------------------------------------------------------------------- tab_p = res_global[, "p_val", drop = FALSE] colnames(tab_p) = "Nationality" tab_p %>% datatable(caption = "P-values from the Global Test Result") %>% formatRound(c("Nationality"), digits = 2) ## ----------------------------------------------------------------------------- tab_q = res_global[, "q_val", drop = FALSE] colnames(tab_q) = "Nationality" tab_q %>% datatable(caption = "Adjusted p-values from the Global Test Result") %>% formatRound(c("Nationality"), digits = 2) ## ----------------------------------------------------------------------------- tab_diff = res_global[, "diff_abn", drop = FALSE] colnames(tab_diff) = "Nationality" tab_diff %>% datatable(caption = "Differentially Abundant Taxa from the Global Test Result") ## ----sessionInfo, message = FALSE, warning = FALSE, comment = NA-------------- sessionInfo()