## ----style, echo = FALSE, results = 'asis'---------------------------------
BiocStyle::markdown()

## ----global_options, include=FALSE----------------------------------------------------------------
knitr::opts_chunk$set(message=FALSE, error=FALSE, warning=FALSE, fig.width=8, 
fig.height=8)
options(width=100) 

## ----eval=FALSE-----------------------------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("transbioZI/BioMM")

## ----loadPkg, eval=TRUE, results="hide"-----------------------------------------------------------
library(BioMM)  
library(BiocParallel)  
library(ranger)
library(rms) 
library(glmnet) 
library(e1071) 
library(variancePartition)  

## ----studyData, eval=TRUE-------------------------------------------------------------------------
## Get DNA methylation data 
studyData <- readRDS(system.file("extdata", "/methylData.rds", 
                     package="BioMM"))
head(studyData[,1:5])
dim(studyData)

## ----annotationFile, eval=TRUE--------------------------------------------------------------------
## Load annotation data
featureAnno <- readRDS(system.file("extdata", "cpgAnno.rds", package="BioMM")) 
pathlistDB <- readRDS(system.file("extdata", "goDB.rds", package="BioMM")) 
head(featureAnno)
str(pathlistDB[1:3])

## ----chrlist, eval=TRUE---------------------------------------------------------------------------
## Map to chromosomes
chrlist <- omics2chrlist(data=studyData, probeAnno=featureAnno) 

## ----pathlist, eval=TRUE--------------------------------------------------------------------------
## Map to pathways (input 100 pathways only)
pathlistDBsub <- pathlistDB[1:100]
pathlist <- omics2pathlist(data=studyData, pathlistDBsub, featureAnno, 
                           restrictUp=100, restrictDown=20, minPathSize=10) 

## ----genelist, eval=FALSE-------------------------------------------------------------------------
#  ## Map to genes
#  studyDataSub <- studyData[,1:2000]
#  genelist <- omics2genelist(data=studyDataSub, featureAnno,
#                             restrictUp=200, restrictDown=2)

## ----BioMMrandForest, eval=TRUE-------------------------------------------------------------------
## Parameters
supervisedStage1=TRUE
classifier1=classifier2 <- "randForest"
predMode1=predMode2 <- "classification"
paramlist1=paramlist2 <- list(ntree=300, nthreads=10)   
param1 <- MulticoreParam(workers = 1)
param2 <- MulticoreParam(workers = 10)

studyDataSub <- studyData[,1:2000] ## less computation
result <- BioMM(trainData=studyDataSub, testData=NULL,
                stratify="chromosome", pathlistDB, featureAnno, 
                restrictUp=10, restrictDown=200, minPathSize=10, 
                supervisedStage1, typePCA="regular", 
                resample1="BS", resample2="CV", dataMode="allTrain", 
                repeatA1=50, repeatA2=1, repeatB1=20, repeatB2=1, 
                nfolds=10, FSmethod1=NULL, FSmethod2=NULL, 
                cutP1=0.1, cutP2=0.1, fdr1=NULL, fdr2=NULL, FScore=param1, 
                classifier1, classifier2, predMode1, predMode2, 
                paramlist1, paramlist2, innerCore=param2,  
                outFileA2=NULL, outFileB2=NULL)
print(result)

## ----BioMMpara, eval=TRUE-------------------------------------------------------------------------
## SVM 
supervisedStage1=TRUE
classifier1=classifier2 <- "SVM"
predMode1=predMode2 <- "classification"
paramlist1=paramlist2 <- list(tuneP=FALSE, kernel="radial", 
                              gamma=10^(-3:-1), cost=10^(-3:1))

## GLM with elastic-net
supervisedStage1=TRUE
classifier1=classifier2 <- "glmnet"
predMode1=predMode2 <- "classification" 
paramlist1=paramlist2 <- list(family="binomial", alpha=0.5, 
                              typeMeasure="mse", typePred="class")

## PCA + random forest
supervisedStage1=FALSE
classifier2 <- "randForest"
predMode2 <- "classification"
paramlist2 <- list(ntree=300, nthreads=10)  

## ----BioMMpathway, eval=TRUE----------------------------------------------------------------------
## Parameters
supervisedStage1=FALSE
classifier <- "randForest"
predMode <- "classification"
paramlist <- list(ntree=300, nthreads=10)   
param1 <- MulticoreParam(workers = 1)
param2 <- MulticoreParam(workers = 10)

result <- BioMM(trainData=studyData, testData=NULL,
                stratify="pathway", pathlistDBsub, featureAnno, 
                restrictUp=100, restrictDown=10, minPathSize=10, 
                supervisedStage1, typePCA="regular", 
                resample1="BS", resample2="CV", dataMode="allTrain", 
                repeatA1=40, repeatA2=1, repeatB1=40, repeatB2=1, 
                nfolds=10, FSmethod1=NULL, FSmethod2=NULL, 
                cutP1=0.1, cutP2=0.1, fdr1=NULL, fdr2=NULL, FScore=param1, 
                classifier1, classifier2, predMode1, predMode2, 
                paramlist1, paramlist2, innerCore=param2,  
                outFileA2=NULL, outFileB2=NULL)
print(result)

## ----stage2data, eval=TRUE------------------------------------------------------------------------
## Pathway level data or stage-2 data prepared by BioMMreconData()
stage2dataA <- readRDS(system.file("extdata", "/stage2dataA.rds", 
                       package="BioMM"))

head(stage2dataA[,1:5])
dim(stage2dataA)

## ----stage2dataAprepare, eval=FALSE---------------------------------------------------------------
#  #### Alternatively, 'stage2dataA' can be created by the following code:
#  ## Parameters
#  classifier <- "randForest"
#  predMode <- "probability"
#  paramlist <- list(ntree=300, nthreads=40)
#  param1 <- MulticoreParam(workers = 1)
#  param2 <- MulticoreParam(workers = 10)
#  set.seed(123)
#  ## This will take a bit longer to run
#  stage2dataA <- BioMMreconData(trainDataList=pathlist, testDataList=NULL,
#                              resample="BS", dataMode="allTrain",
#                              repeatA=25, repeatB=1, nfolds=10,
#                              FSmethod=NULL, cutP=0.1, fdr=NULL, FScore=param1,
#                              classifier, predMode, paramlist,
#                              innerCore=param2, outFileA=NULL, outFileB=NULL)
#  

## ----stage2dataViz, eval=TRUE---------------------------------------------------------------------
param <- MulticoreParam(workers = 1) 
plotVarExplained(data=stage2dataA, posF=TRUE, 
                 stratify="pathway", core=param, fileName=NULL)

## ----topFstage2data, eval=TRUE--------------------------------------------------------------------
param <- MulticoreParam(workers = 1) 
plotRankedFeature(data=stage2dataA, 
                  posF=TRUE, topF=10, 
                  blocklist=pathlist, 
                  stratify="pathway",
                  rankMetric="R2", 
                  colorMetric="size", 
                  core=param, fileName=NULL)

## ----sessioninfo, eval=TRUE-----------------------------------------------------------------------
sessionInfo()