## ---- message = FALSE, warning = FALSE, eval = FALSE--------------------------
#  if(!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("planet")

## ---- message = FALSE, warning = FALSE----------------------------------------
# cell deconvolution packages
library(minfi)
library(EpiDISH)

# data wrangling and plotting
library(dplyr)
library(ggplot2)
library(tidyr)
library(planet)

# load example data
data("plBetas")
data("plCellCpGsThird")
head(plCellCpGsThird)

## -----------------------------------------------------------------------------
houseman_estimates <- minfi:::projectCellType(
    plBetas[rownames(plCellCpGsThird), ],
    plCellCpGsThird,
    lessThanOne = FALSE
)

head(houseman_estimates)

## -----------------------------------------------------------------------------
# robust partial correlations
epidish_RPC <- epidish(
    beta.m = plBetas[rownames(plCellCpGsThird), ],
    ref.m = plCellCpGsThird,
    method = "RPC"
)

# CIBERSORT
epidish_CBS <- epidish(
    beta.m = plBetas[rownames(plCellCpGsThird), ],
    ref.m = plCellCpGsThird,
    method = "CBS"
)

# constrained projection (houseman 2012)
epidish_CP <- epidish(
    beta.m = plBetas[rownames(plCellCpGsThird), ],
    ref.m = plCellCpGsThird,
    method = "CP"
)

## ---- fig.width = 7, fig.height = 7-------------------------------------------
data("plColors")

# bind estimate data frames and reshape for plotting
bind_rows(
    houseman_estimates %>% as.data.frame() %>% mutate(algorithm = "CP (Houseman)"),
    epidish_RPC$estF %>% as.data.frame() %>% mutate(algorithm = "RPC"),
    epidish_CBS$estF %>% as.data.frame() %>% mutate(algorithm = "CBS"),
    epidish_CP$estF %>% as.data.frame() %>% mutate(algorithm = "CP (EpiDISH)")
) %>%
    mutate(sample = rep(rownames(houseman_estimates), 4)) %>%
    as_tibble() %>%
    pivot_longer(
        cols = -c(algorithm, sample),
        names_to = "component",
        values_to = "estimate"
    ) %>%

    # relevel for plot
    mutate(component = factor(component,
        levels = c(
            "nRBC", "Endothelial", "Hofbauer",
            "Stromal", "Trophoblasts",
            "Syncytiotrophoblast"
        )
    )) %>%

    # plot
    ggplot(aes(x = sample, y = estimate, fill = component)) +
    geom_bar(stat = "identity") +
    facet_wrap(~algorithm, ncol = 1) +
    scale_fill_manual(values = plColors) +
    scale_y_continuous(
        limits = c(-0.1, 1.1), breaks = c(0, 0.5, 1),
        labels = scales::percent
    ) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
    coord_cartesian(ylim = c(0, 1)) +
    labs(x = "", fill = "")

## -----------------------------------------------------------------------------
# load example data
data(plBetas)
data(plPhenoData)

dim(plBetas)
#> [1] 13918    24
head(plPhenoData)
#> # A tibble: 6 x 7
#>   sample_id  sex   disease   gestation_wk ga_RPC ga_CPC ga_RRPC
#>   <fct>      <chr> <chr>            <dbl>  <dbl>  <dbl>   <dbl>
#> 1 GSM1944936 Male  preeclam~           36   38.5   38.7    38.7
#> 2 GSM1944939 Male  preeclam~           32   33.1   34.2    32.6
#> 3 GSM1944942 Fema~ preeclam~           32   34.3   35.1    33.3
#> 4 GSM1944944 Male  preeclam~           35   35.5   36.7    35.5
#> 5 GSM1944946 Fema~ preeclam~           38   37.6   37.6    36.6
#> 6 GSM1944948 Fema~ preeclam~           36   36.8   38.4    36.7

## -----------------------------------------------------------------------------
plPhenoData <- plPhenoData %>%
    mutate(
        ga_RPC = predictAge(plBetas, type = "RPC"),
        ga_CPC = predictAge(plBetas, type = "CPC"),
        ga_RRPC = predictAge(plBetas, type = "RRPC")
    )

## ---- fig.width = 7, fig.height = 5-------------------------------------------
plPhenoData %>%
  
    # reshape, to plot
    pivot_longer(
        cols = contains("ga"),
        names_to = "clock_type",
        names_prefix = "ga_",
        values_to = "ga"
    ) %>%

    # plot code
    ggplot(aes(x = gestation_wk, y = ga, col = disease)) +
    geom_point() +
    geom_smooth(method = "lm", se = FALSE) +
    facet_wrap(~clock_type) +
    theme(legend.position = "top") +
    labs(x = "Reported GA (weeks)", y = "Inferred GA (weeks)", col = "")

## -----------------------------------------------------------------------------
data(ethnicityCpGs)
all(ethnicityCpGs %in% rownames(plBetas))

results <- predictEthnicity(plBetas)
results %>%
    tail(8)

## ---- fig.width = 7-----------------------------------------------------------
results %>%
    ggplot(aes(
        x = Prob_Caucasian, y = Prob_African,
        col = Predicted_ethnicity
    )) +
    geom_point(alpha = 0.7) +
    coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
    scale_x_continuous(labels = scales::percent) +
    scale_y_continuous(labels = scales::percent) +
    labs(x = "P(Caucasian)", y = "P(African)")

results %>%
    ggplot(aes(
        x = Prob_Caucasian, y = Prob_Asian,
        col = Predicted_ethnicity
    )) +
    geom_point(alpha = 0.7) +
    coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
    scale_x_continuous(labels = scales::percent) +
    scale_y_continuous(labels = scales::percent) +
    labs(x = "P(Caucasian)", y = "P(Asian)")

## -----------------------------------------------------------------------------
table(results$Predicted_ethnicity)

## -----------------------------------------------------------------------------
sessionInfo()