## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE--------------------------------------------------- library(AlphaMissenseR) library(ExperimentHub) library(dplyr) library(tidyr) library(ggdist) library(gghalves) library(ggplot2) ## ----access_dms--------------------------------------------------------------- eh <- ExperimentHub::ExperimentHub() dms_data <- eh[['EH9555']] head(names(dms_data)) ## ----am_scores---------------------------------------------------------------- am_scores <- eh[['EH9554']] am_scores |> head() ## ----dms_am_NUD15------------------------------------------------------------- NUD15_dms <- dms_data[["NUD15_HUMAN_Suiter_2020"]] NUD15_am <- am_scores |> filter(DMS_id == "NUD15_HUMAN_Suiter_2020") ## ----dms_am_wrangle----------------------------------------------------------- NUD15_am <- NUD15_am |> mutate(Uniprot_ID = "Q9NV35") merged_table <- left_join( NUD15_am, NUD15_dms, by = c("Uniprot_ID" = "UniProt_id", "variant_id" = "mutant"), relationship = "many-to-many" ) |> select(Uniprot_ID, variant_id, AlphaMissense, DMS_score) |> na.omit() ## ----dms_am_plot, fig.width=5, fig.height=3.5--------------------------------- correlation_plot <- merged_table |> ggplot( aes(y = .data$AlphaMissense, x = .data$DMS_score) ) + geom_bin2d(bins = 60) + scale_fill_continuous(type = "viridis") + xlab("DMS score") + ylab("AlphaMissense score") + theme_classic() + theme( axis.text.x = element_text(size = 16), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 16, vjust = 2), axis.title.x = element_text(size = 16, vjust = 0), legend.title = element_text(size = 16), legend.text = element_text(size = 16) ) correlation_plot ## ----corrtest----------------------------------------------------------------- cor.test( merged_table$AlphaMissense, merged_table$DMS_score, method="spearman", exact = FALSE ) ## ----load_metrics------------------------------------------------------------- metrics_scores <- eh[['EH9593']] ## ----prep_data---------------------------------------------------------------- chosen_assays <- c( "A0A1I9GEU1_NEIME_Kennouche_2019", "A0A192B1T2_9HIV1_Haddox_2018", "ADRB2_HUMAN_Jones_2020", "BRCA1_HUMAN_Findlay_2018", "CALM1_HUMAN_Weile_2017", "GAL4_YEAST_Kitzman_2015", "Q59976_STRSQ_Romero_2015", "UBC9_HUMAN_Weile_2017", "TPK1_HUMAN_Weile_2017", "YAP1_HUMAN_Araya_2012") am_subset <- am_scores |> filter(DMS_id %in% chosen_assays) dms_subset <- dms_data[names(dms_data) %in% chosen_assays] dms_subset <- bind_rows(dms_subset) |> as.data.frame() metric_subset <- metrics_scores[["Spearman"]] |> filter(DMS_ID %in% chosen_assays) |> select(-c(Number_of_Mutants, Selection_Type, UniProt_ID, MSA_Neff_L_category, Taxon)) merge_am_dms <- left_join( am_subset, dms_subset, by = c("DMS_id" = "DMS_id", "variant_id" = "mutant"), relationship = "many-to-many" ) |> na.omit() ## ----calc_sp------------------------------------------------------------------ spearman_res <- merge_am_dms |> group_by(DMS_id) |> summarize( AlphaMissense = cor(AlphaMissense, DMS_score, method = "spearman") ) |> mutate( AlphaMissense = abs(AlphaMissense) ) DMS_IDs <- metric_subset |> pull(DMS_ID) metric_subset <- metric_subset |> select(-DMS_ID) |> abs() |> mutate(DMS_id = DMS_IDs) all_sp <- left_join( metric_subset, spearman_res, by = "DMS_id" ) ## ----pivot_long--------------------------------------------------------------- res_long <- all_sp |> select(-DMS_id) |> tidyr::pivot_longer( cols = everything(), names_to = "model", values_to = "score" ) |> group_by(model) |> mutate( model_mean = mean(score) ) |> mutate( model = as.factor(model) ) |> ungroup() unique_ordered_models <- unique(res_long$model[order(res_long$model_mean, decreasing = TRUE)]) res_long$model <- factor(res_long$model, levels = unique_ordered_models) topmodels <- levels(res_long$model)[1:5] top_spearmans <- res_long |> filter(model %in% topmodels) |> droplevels() ## ----raincloud, fig.width=7, fig.height=4.5----------------------------------- top_spearmans |> ggplot( aes(x = model, y = score, fill = model, group = model) ) + ggdist::stat_halfeye( adjust = .5, width = .6, .width = 0, justification = -.2, point_colour = NA ) + geom_boxplot( width = .15, outlier.shape = NA ) + gghalves::geom_half_point( side = "l", range_scale = .4, alpha = .2 ) + coord_cartesian(clip = "off") + scale_fill_discrete(name = "Models") + theme_classic() + ylab("Spearman") + theme( axis.text.x = element_text(size = 11, angle = -12), axis.text.y = element_text(size = 16), axis.title.y = element_text(size = 16), axis.title.x = element_blank(), legend.title = element_text(size = 16), legend.text = element_text(size = 11) ) ## ----------------------------------------------------------------------------- sessionInfo()