## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(dpi=40,fig.width=7)

## ----eval=FALSE---------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("bandle")

## ----ldpkg, message=FALSE-----------------------------------------------------
library("bandle")

## ----ldpkg2, message=FALSE----------------------------------------------------
library("pheatmap")
library("viridis")
library("dplyr")
library("ggplot2")
library("gridExtra")

## ----loadpkgdat, message=FALSE, warning=FALSE, fig.width=7, fig.height=6------
library("pRolocdata")
data("tan2009r1")

## Let's set the stock colours of the classes to plot to be transparent
setStockcol(NULL)
setStockcol(paste0(getStockcol(), "90")) 

## Plot the data using plot2D from pRoloc
plot2D(tan2009r1,
       main = "An example spatial proteomics datasets", 
       grid = FALSE)
addLegend(tan2009r1, where = "topleft", cex = 0.7, ncol = 2)

## ----simdata, fig.width=8, fig.height=10--------------------------------------
set.seed(1)
tansim <- sim_dynamic(object = tan2009r1,
                      numRep = 6L,
                      numDyn = 100L)

## ----first_rep----------------------------------------------------------------
# To access the first replicate
tansim$lopitrep[[1]]

## ----plotsims-----------------------------------------------------------------
plot_title <- c(paste0("Replicate ", seq(3), " condition", " A"), 
               paste0("Replicate ", seq(3), " condition", " B"))

par(mfrow = c(2, 3))
out <- lapply(seq(tansim$lopitrep), function(z) 
    plot2D(tansim$lopitrep[[z]], grid = FALSE, main = plot_title[z]))

## ----fitgps, fig.height=10, fig.width=8---------------------------------------
par(mfrow = c(4, 3))
gpParams <- lapply(tansim$lopitrep, function(x) 
  fitGPmaternPC(x, hyppar = matrix(c(10, 60, 250), nrow = 1)))

## ----plotgps, fig.height=10, fig.width=8--------------------------------------
par(mfrow = c(4, 3))
plotGPmatern(tansim$lopitrep[[1]], params = gpParams[[1]])

## ----sethyppar----------------------------------------------------------------
K <- length(getMarkerClasses(tansim$lopitrep[[1]], fcol = "markers"))
pc_prior <- matrix(NA, ncol = 3, K)
pc_prior[seq.int(1:K), ] <- matrix(rep(c(10, 60, 250),
                                       each = K), ncol = 3)

## ----runhyppar----------------------------------------------------------------
gpParams <- lapply(tansim$lopitrep,
                   function(x) fitGPmaternPC(x, hyppar = pc_prior))

## ----setweightprior-----------------------------------------------------------
set.seed(1)
dirPrior = diag(rep(1, K)) + matrix(0.001, nrow = K, ncol = K)
predDirPrior <- prior_pred_dir(object = tansim$lopitrep[[1]],
                               dirPrior = dirPrior,
                               q = 15)

## -----------------------------------------------------------------------------
predDirPrior$meannotAlloc

## -----------------------------------------------------------------------------
predDirPrior$tailnotAlloc

## -----------------------------------------------------------------------------
hist(predDirPrior$priornotAlloc, col = getStockcol()[1])

## ----runbandle, message=FALSE, warning=FALSE, error=FALSE, echo = TRUE, results = 'hide'----
## Split the datasets into two separate lists, one for control and one for treatment
control <- tansim$lopitrep[1:3] 
treatment <- tansim$lopitrep[4:6]

## Run bandle
bandleres <- bandle(objectCond1 = control,
                    objectCond2 = treatment,
                    numIter = 100,   # usually 10,000
                    burnin = 5L,    # usually 5,000
                    thin = 1L,      # usually 20
                    gpParams = gpParams,
                    pcPrior = pc_prior,
                    numChains = 3,  # usually >=4
                    dirPrior = dirPrior, 
                    seed = 1)       # set a random seed for reproducibility)

## -----------------------------------------------------------------------------
bandleres

## ----calcGelman---------------------------------------------------------------
calculateGelman(bandleres)

## ----plotoutliers, fig.width=8, fig.height=7----------------------------------
plotOutliers(bandleres)

## -----------------------------------------------------------------------------
bandleres_opt <- bandleres[-2]

## -----------------------------------------------------------------------------
bandleres_opt

## ----processbandle1-----------------------------------------------------------
summaries(bandleres_opt)

## ----processbandle2-----------------------------------------------------------
bandleres_opt <- bandleProcess(bandleres_opt)

## ----processbandle3, results = 'hide'-----------------------------------------
summaries(bandleres_opt)

## ----bandpred-----------------------------------------------------------------
xx <- bandlePredict(control, 
                    treatment, 
                    params = bandleres_opt, 
                    fcol = "markers")
res_control <- xx[[1]]
res_treatment <- xx[[2]]

## ----showlength---------------------------------------------------------------
length(res_control)
length(res_treatment)

## ----viewdata-----------------------------------------------------------------
fvarLabels(res_control[[1]])

## ----fdata, eval=FALSE--------------------------------------------------------
#  fData(res_control[[1]])$bandle.probability
#  fData(res_control[[1]])$bandle.allocation

## ----setthreshold-------------------------------------------------------------
res_control[[1]] <- getPredictions(res_control[[1]], 
                                   fcol = "bandle.allocation",                   
                                   scol = "bandle.probability",                   
                                   mcol = "markers",                   
                                   t = .99)

res_treatment[[1]] <- getPredictions(res_treatment[[1]], 
                                   fcol = "bandle.allocation",                   
                                   scol = "bandle.probability",                   
                                   mcol = "markers",                   
                                   t = .99)

## -----------------------------------------------------------------------------
## Remove the markers from the MSnSet
res_control_unknowns <- unknownMSnSet(res_control[[1]], fcol = "markers")
res_treated_unknowns <- unknownMSnSet(res_treatment[[1]], fcol = "markers")

## -----------------------------------------------------------------------------
getMarkers(res_control_unknowns, "bandle.allocation.pred")
getMarkers(res_treated_unknowns, "bandle.allocation.pred")

## -----------------------------------------------------------------------------
res1 <- fData(res_control_unknowns)$bandle.allocation.pred
res2 <- fData(res_treated_unknowns)$bandle.allocation.pred

res1_tbl <- table(res1)
res2_tbl <- table(res2)

## ----fig.height=4, fig.width=8------------------------------------------------
par(mfrow = c(1, 2))
barplot(res1_tbl, las = 2, main = "Predicted location: control",
        ylab = "Number of proteins")
barplot(res2_tbl, las = 2, main = "Predicted location: treatment",
        ylab = "Number of proteins")

## ----allocspost, fig.height=4, fig.width=8------------------------------------
pe1 <- fData(res_control_unknowns)$bandle.probability
pe2 <- fData(res_treated_unknowns)$bandle.probability

par(mfrow = c(1, 2))
boxplot(pe1 ~ res1, las = 2, main = "Posterior: control",
        ylab = "Probability")
boxplot(pe2 ~ res2, las = 2, main = "Posterior: treatment",
        ylab = "Probability")

## -----------------------------------------------------------------------------
res1_unlabelled <- unknownMSnSet(res_control_unknowns, 
                                 fcol = "bandle.allocation.pred")
res2_unlabelled <- unknownMSnSet(res_treated_unknowns, 
                                 fcol = "bandle.allocation.pred")

## -----------------------------------------------------------------------------
nrow(res1_unlabelled)
nrow(res2_unlabelled)

## -----------------------------------------------------------------------------
fn1 <- featureNames(res1_unlabelled)
fn2 <- featureNames(res2_unlabelled)

## ----fig.height=10, fig.width=10----------------------------------------------
g <- vector("list", 9)
for (i in 1:9) g[[i]] <- mcmc_plot_probs(bandleres_opt, fn1[i], cond = 1)
do.call(grid.arrange, g)

## ----fig.height=10, fig.width=10----------------------------------------------
g <- vector("list", 9)
for (i in 1:9) g[[i]] <- mcmc_plot_probs(bandleres_opt, fn1[i], cond = 2)
do.call(grid.arrange, g)

## -----------------------------------------------------------------------------
grid.arrange(
  mcmc_plot_probs(bandleres_opt, fname = "Q24253", cond = 1) +
  ggtitle("Distribution of Q24253 in control"),
  mcmc_plot_probs(bandleres_opt, fname = "Q24253", cond = 2) +
    ggtitle("Distribution of Q24253 in treated")
)

## -----------------------------------------------------------------------------
fData(res_control_unknowns)$bandle.joint["Q24253", ]

## ----heatmap_control----------------------------------------------------------
bjoint_control <- fData(res_control_unknowns)$bandle.joint
pheatmap(bjoint_control, cluster_cols = FALSE, color = viridis(n = 25), 
         show_rownames = FALSE, main = "Joint posteriors in the control")

## ----heatmap_treatment--------------------------------------------------------
bjoint_treated <- fData(res_treated_unknowns)$bandle.joint
pheatmap(bjoint_treated, cluster_cols = FALSE, color = viridis(n = 25),
         show_rownames = FALSE, main = "Joint posteriors in the treated")

## -----------------------------------------------------------------------------
dl <- diffLocalisationProb(bandleres_opt)
head(dl)

## ----numtransloc--------------------------------------------------------------
dl <- fData(res_control_unknowns)$bandle.differential.localisation
names(dl) <- featureNames(res_control_unknowns)
head(dl)

## ----extractdiffloc-----------------------------------------------------------
plot(dl[order(dl, decreasing = TRUE)],
     col = getStockcol()[3], pch = 19, ylab = "Probability",
     xlab = "Rank", main = "Differential localisation rank plot")

## -----------------------------------------------------------------------------
## Subset MSnSets for DL proteins > 0.99
ind <- which(dl > 0.99)
res_control_dl0.99 <- res_control_unknowns[ind, ]
res_treated_dl0.99 <- res_treated_unknowns[ind, ]

## Get DL results
dl0.99 <- fData(res_control_dl0.99)$bandle.differential.localisation
(names(dl0.99) <- featureNames(res_control_dl0.99))

## ----alluvial, warning=FALSE, message=FALSE, fig.height=6, fig.width=7--------
## Create an list of the two MSnSets
dl_msnsets <- list(res_control_dl0.99, res_treated_dl0.99)

## Set colours for organelles and unknown
mrkCl <- getMarkerClasses(res_control[[1]], fcol = "markers")
dl_cols <- c(getStockcol()[seq(mrkCl)], "grey")
names(dl_cols) <- c(mrkCl, "unknown")

## Now plot
plotTranslocations(dl_msnsets, 
                   fcol = "bandle.allocation.pred", 
                   col = dl_cols)

## ----chord, warning=FALSE, message=FALSE, fig.height=7, fig.width=7-----------
plotTranslocations(dl_msnsets, 
                   fcol = "bandle.allocation.pred", 
                   col = dl_cols,
                   type = "chord")

## -----------------------------------------------------------------------------
plotTable(dl_msnsets, fcol = "bandle.allocation.pred")

## ----diffloc_boot-------------------------------------------------------------
set.seed(1)
boot_t <- bootstrapdiffLocprob(params = bandleres, top = 100,
                               Bootsample = 5000, decreasing = TRUE)

boxplot(t(boot_t), col = getStockcol()[5],
        las = 2, ylab = "Probability", ylim = c(0, 1),
        main = "Differential localisation \nprobability plot (top 100 proteins)")

## ----diffloc_binom------------------------------------------------------------
bin_t <- binomialDiffLocProb(params = bandleres, top = 100,
                             nsample = 5000, decreasing = TRUE)

boxplot(t(bin_t), col = getStockcol()[5],
        las = 2, ylab = "Probability", ylim = c(0, 1),
        main = "Differential localisation \nprobability plot (top 100 proteins)")

## ----get_pe-------------------------------------------------------------------
# more robust estimate of probabilities
dprobs <- rowMeans(bin_t)

# compute cumulative error, there is not really a false discovery rate in
# Bayesian statistics but you can look at the cumulative error rate
ce <- cumsum(1  - dprobs)

# you could threshold on the interval and this will reduce the number of
# differential localisations
qt <- apply(bin_t, 1, function(x) quantile(x, .025))

## -----------------------------------------------------------------------------
EFDR(dl, threshold = 0.95)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()