## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  # fig.align = "center",
  comment = ">"
)

## ---- eval = FALSE------------------------------------------------------------
#  # install.packages("BiocManager")
#  BiocManager::install("POMA")

## ---- warning = FALSE, message = FALSE, comment = FALSE-----------------------
library(POMA)

## ---- eval = FALSE------------------------------------------------------------
#  data("st000336")
#  PomaEDA(st000336)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
# This file is part of POMA.

# POMA is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# POMA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with POMA. If not, see <https://www.gnu.org/licenses/>.

library(knitr)
library(dplyr)
library(tibble)
library(ggplot2)
library(SummarizedExperiment)
library(POMA)

data <- st000336
imputation <- "knn"
normalization <- "log_pareto"
clean_outliers <- TRUE
coeff_outliers <- 1.5

##

e <- t(SummarizedExperiment::assay(data))
target <- SummarizedExperiment::colData(data) %>% 
  as.data.frame() %>% 
  rownames_to_column("ID") %>% 
  dplyr::rename(Group = 2) %>% 
  dplyr::select(ID, Group)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if(clean_outliers){

  imputed <- PomaImpute(data, method = imputation)
  pre_processed <- PomaNorm(imputed, method = normalization) %>%
    PomaOutliers(coef = coeff_outliers)
  
} else {
  
    imputed <- PomaImpute(data, method = imputation)
    pre_processed <- PomaNorm(imputed, method = normalization)
  
}

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
# zeros
zeros <- data.frame(number = colSums(e == 0, na.rm = TRUE)) %>%
  rownames_to_column("names") %>%
  filter(number != 0)

all_zero <- zeros %>% 
  filter(number == nrow(e))

# missing values
nas <- data.frame(number = colSums(is.na(e))) %>%
  rownames_to_column("names") %>%
  filter(number != 0)

# zero variance
var_zero <- e %>%
  as.data.frame() %>%
  summarise_all(~ var(., na.rm = TRUE)) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("names") %>%
  filter(V1 == 0)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
summary_table1 <- data.frame(Samples = nrow(e),
                             Features = ncol(e),
                             Covariates = ncol(SummarizedExperiment::colData(data)) - 1)
summary_table2 <- data.frame(Counts_Zero = sum(zeros$number),
                             Percent_Zero = paste(round((sum(zeros$number)/(nrow(e)*ncol(e)))*100, 2), "%"))
summary_table3 <- data.frame(Counts_NA = sum(is.na(e)),
                             Percent_NA = paste(round((sum(is.na(e))/(nrow(e)*ncol(e)))*100, 2), "%")) 
knitr::kable(summary_table1)
knitr::kable(summary_table2)
knitr::kable(summary_table3)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if (nrow(nas) >= 1){
  ggplot(nas, aes(reorder(names, number), number, fill = number)) +
    geom_col() +
    ylab("Missing values") +
    xlab("") +
    ggtitle("Missing Value Plot") +
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "none")
}

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if (nrow(zeros) >= 1){
  ggplot(zeros, aes(reorder(names, number), number, fill = number)) +
    geom_col() +
    ylab("Zeros") +
    xlab("") +
    ggtitle("Zeros Plot") +
    theme_bw() + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          legend.position = "none") +
    scale_fill_viridis_d(begin = 0, end = 0.8)
}

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
counts <- data.frame(table(target$Group))
colnames(counts) <- c("Group", "Counts")

ggplot(counts, aes(reorder(Group, Counts), Counts, fill = Group)) +
  geom_col() +
  ylab("Counts") +
  xlab("") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "none") +
    scale_fill_viridis_d(begin = 0, end = 0.8)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
indNum <- nrow(SummarizedExperiment::colData(pre_processed))
jttr <- ifelse(indNum <= 10, TRUE, FALSE)

p1 <- PomaBoxplots(imputed, jitter = jttr, label_size = 8) +
  xlab("Samples") +
  ylab("Value") +
  ggtitle("Not Normalized") +
  theme(legend.position = "bottom")

p2 <- PomaBoxplots(pre_processed, jitter = jttr, label_size = 8) +
  xlab("Samples") +
  ylab("Value") +
  ggtitle(paste0("Normalized (", normalization, ")")) +
  theme(legend.position = "bottom")

p1
p2

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
p3 <- PomaDensity(imputed) +
  ggtitle("Not Normalized")

p4 <- PomaDensity(pre_processed) +
  ggtitle(paste0("Normalized (", normalization, ")"))

p3
p4

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
outliers <- data %>% 
  PomaImpute(method = imputation) %>%
  PomaNorm(method = normalization) %>%
  PomaOutliers(do = "analyze", coef = coeff_outliers)
outliers$polygon_plot

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
if(nrow(outliers$outliers) >= 1){
  knitr::kable(outliers$outliers)
  }

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
correlations <- PomaCorr(pre_processed, 
                         label_size = 8)
high_correlations <- correlations$correlations %>% filter(abs(R) > 0.97)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE, fig.align = 'center'----
correlations$corrplot

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
PomaHeatmap(pre_processed)

## ---- echo = FALSE, warning = FALSE, comment = NA, message = FALSE------------
PomaMultivariate(pre_processed, method = "pca", ellipse = FALSE)$scoresplot