## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, # fig.align = "center", comment = ">" ) ## ---- eval = FALSE------------------------------------------------------------ # # install.packages("BiocManager") # BiocManager::install("POMA") ## ---- warning = FALSE, message = FALSE, comment = FALSE----------------------- library(POMA) ## ---- eval = FALSE------------------------------------------------------------ # data("st000336") # PomaEDA(st000336) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ # This file is part of POMA. # POMA is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # POMA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with POMA. If not, see . library(knitr) library(dplyr) library(tibble) library(ggplot2) library(SummarizedExperiment) library(POMA) data <- st000336 imputation <- "knn" normalization <- "log_pareto" clean_outliers <- TRUE coeff_outliers <- 1.5 ## e <- t(SummarizedExperiment::assay(data)) target <- SummarizedExperiment::colData(data) %>% as.data.frame() %>% rownames_to_column("ID") %>% dplyr::rename(Group = 2) %>% dplyr::select(ID, Group) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if(clean_outliers){ imputed <- PomaImpute(data, method = imputation) pre_processed <- PomaNorm(imputed, method = normalization) %>% PomaOutliers(coef = coeff_outliers) } else { imputed <- PomaImpute(data, method = imputation) pre_processed <- PomaNorm(imputed, method = normalization) } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ # zeros zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>% rownames_to_column("names") %>% filter(number != 0) all_zero <- zeros %>% filter(number == nrow(e)) # missing values nas <- data.frame(number = colSums(is.na(e))) %>% rownames_to_column("names") %>% filter(number != 0) # zero variance var_zero <- e %>% as.data.frame() %>% summarise_all(~ var(., na.rm = TRUE)) %>% t() %>% as.data.frame() %>% rownames_to_column("names") %>% filter(V1 == 0) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ summary_table1 <- data.frame(Samples = nrow(e), Features = ncol(e), Covariates = ncol(SummarizedExperiment::colData(data)) - 1) summary_table2 <- data.frame(Counts_Zero = sum(zeros$number), Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%")) summary_table3 <- data.frame(Counts_NA = sum(is.na(e)), Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) knitr::kable(summary_table1) knitr::kable(summary_table2) knitr::kable(summary_table3) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if (nrow(nas) >= 1){ ggplot(nas, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Missing values") + xlab("") + ggtitle("Missing Value Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if (nrow(zeros) >= 1){ ggplot(zeros, aes(reorder(names, number), number, fill = number)) + geom_col() + ylab("Zeros") + xlab("") + ggtitle("Zeros Plot") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") + scale_fill_viridis_d(begin = 0, end = 0.8) } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ counts <- data.frame(table(target$Group)) colnames(counts) <- c("Group", "Counts") ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) + geom_col() + ylab("Counts") + xlab("") + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none") + scale_fill_viridis_d(begin = 0, end = 0.8) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ indNum <- nrow(SummarizedExperiment::colData(pre_processed)) jttr <- ifelse(indNum <= 10, TRUE, FALSE) p1 <- PomaBoxplots(imputed, jitter = jttr, label_size = 8) + xlab("Samples") + ylab("Value") + ggtitle("Not Normalized") + theme(legend.position = "bottom") p2 <- PomaBoxplots(pre_processed, jitter = jttr, label_size = 8) + xlab("Samples") + ylab("Value") + ggtitle(paste0("Normalized (", normalization, ")")) + theme(legend.position = "bottom") p1 p2 ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ p3 <- PomaDensity(imputed) + ggtitle("Not Normalized") p4 <- PomaDensity(pre_processed) + ggtitle(paste0("Normalized (", normalization, ")")) p3 p4 ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ outliers <- data %>% PomaImpute(method = imputation) %>% PomaNorm(method = normalization) %>% PomaOutliers(do = "analyze", coef = coeff_outliers) outliers$polygon_plot ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ if(nrow(outliers$outliers) >= 1){ knitr::kable(outliers$outliers) } ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ correlations <- PomaCorr(pre_processed, label_size = 8) high_correlations <- correlations$correlations %>% filter(abs(R) > 0.97) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE, fig.align = 'center'---- correlations$corrplot ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ PomaHeatmap(pre_processed) ## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------ PomaMultivariate(pre_processed, method = "pca", ellipse = FALSE)$scoresplot