### R code from vignette source 'ENmix.Rnw'

###################################################
### code chunk number 1: options
###################################################
options(width=70)


###################################################
### code chunk number 2: example0 (eval = FALSE)
###################################################
## rgSet <- readidat(datapath)
## beta=mpreprocess(rgSet)


###################################################
### code chunk number 3: example1 (eval = FALSE)
###################################################
## library(ENmix)
## #read in data
## require(minfiData)
## #read in IDAT files
## path <- file.path(find.package("minfiData"),"extdata")
## rgSet <- readidat(path = path,recursive = TRUE)
## #data pre-processing
## beta=mpreprocess(rgSet,nCores=6)
## #quality control, data pre-processing and imputation
## beta=mpreprocess(rgSet,nCores=6,qc=TRUE,fqcfilter=TRUE,
##      rmcr=TRUE,impute=TRUE)


###################################################
### code chunk number 4: example2 (eval = FALSE)
###################################################
## library(ENmix)
## #read in data
## path <- file.path(find.package("minfiData"),"extdata")
## rgSet <- readidat(path = path,recursive = TRUE)
## #QC info
## qc<-QCinfo(rgSet)
## #background correction and dye bias correction
## #if qc info object is provided, the low quality or outlier samples
## # and probes will be excluded before background correction
## mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", 
##                      QCinfo=qc, nCores=6)
## #between-array normalization
## mdat<-norm.quantile(mdat, method="quantile1")
## #probe-type bias adjustment
## beta<-rcp(mdat,qcscore=qc)
## #assign missing to low quality and outlier data points, remove samples
## # and probes with too many missing data, and do imputation
## beta <- qcfilter(beta,qcscore=qc,rmcr=TRUE,impute=TRUE)


###################################################
### code chunk number 5: example3 (eval = FALSE)
###################################################
## library(ENmix)
## #read in data
## path <- file.path(find.package("minfiData"),"extdata")
## rgSet <- readidat(path = path,recursive = TRUE)
## 
## #attach some phenotype info for later use
## sheet <- read.metharray.sheet(file.path(find.package("minfiData"),
##          "extdata"),pattern = "csv$")
## rownames(sheet)=paste(sheet$Slide,sheet$Array,sep="_")
## colData(rgSet)=as(sheet[colnames(rgSet),],"DataFrame")
## 
## #generate internal control plots to inspect quality of experiments
## plotCtrl(rgSet)
## 
## #generate quality control (QC) information and plots, 
## #identify outlier samples in data distribution
## #if set detPtype="negative", recommand to set depPthre to <= 10E-6
## #if set detPtype="oob", depPthre of 0.01 or 0.05 may be sufficient
## #see Heiss et al. PMID: 30678737 for details
## qc<-QCinfo(rgSet,detPtype="negative",detPthre=0.000001)
## 
## ###data preprocessing
## #background correction and dye bias correction
## mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC",
##                       QCinfo=qc, nCores=6)
## #between-array normalization
## mdat<-norm.quantile(mdat, method="quantile1")
## #attach phenotype data for later use
## colData(mdat)=as(sheet[colnames(mdat),],"DataFrame")
## #probe-type bias adjustment
## beta<-rcp(mdat,qcscore=qc)
## 
## #Search for multimodal CpGs, so called gap probes
## #beta matrix before qcfilter() step should be used for this purpose
## nmode<-nmode(beta, minN = 3, modedist=0.2, nCores = 6)
## 
## #filter out poor quality and outlier data points for each probe;
## #rows and columns with too many missing values can be removed
## #Imputation will be performed to fill missing data if specify.
## beta <- qcfilter(beta,qcscore=qc,rmcr=TRUE, rthre=0.05,
##                    cthre=0.05,impute=TRUE)
## 
## #If for epigenetic mutation analysis, use 
## #beta <- qcfilter(beta,qcscore=qc,rmoutlier=FALSE,rmcr=TRUE, rthre=0.05,
## #                   cthre=0.05,impute=FALSE)
## 
## ##Principal component regression analysis plot
## #phenotypes to be studied in the plot
## cov<-data.frame(group=colData(mdat)$Sample_Group,
##     slide=factor(colData(mdat)$Slide))
## #missing data is not allowed in the analysis!
## pcrplot(beta, cov, npc=6)
## 
## #Non-negative control surrogate variables, which can be used in
## # downstream association analysis to control for batch effects. 
## csva<-ctrlsva(rgSet)
## 
## #estimate cell type proportions
## #rgDataset or methDataSet can also be used for the estimation
## celltype=estimateCellProp(userdata=beta,
##          refdata="FlowSorted.Blood.450k",
##          nonnegative = TRUE,normalize=TRUE)
## 
## #predic sex based on rgDataSet or methDataset
## sex=predSex(rgSet)
## sex=predSex(mdat)
## 
## #Methylation age calculation
## mage=methyAge(beta)
## 
## #ICC analysis can be performed to study measurement reliability if
## # have duplicates, see manual
## #dupicc()
## 
## #After association analysis, the P values can be used for DMR analysis
## #simulate a small example file in BED format
## dat=simubed()
## #using ipdmr method
## ipdmr(data=dat,seed=0.1)
## #using comb-p method
## combp(data=dat,seed=0.1)


###################################################
### code chunk number 6: readdata (eval = FALSE)
###################################################
## library(ENmix)
## rgSet <- readidat(path = "directory path for idat files", 
##                  recursive = TRUE)
## 
## #Download manifestfile for HumanMethylation450 beadchip
## system("wget ftp://webdata2:webdata2@ussd-ftp.illumina.com/downloads/ProductFiles
## /HumanMethylation450/HumanMethylation450_15017482_v1-2.csv")
## mf="HumanMethylation450_15017482_v1-2.csv"
## rgSet <- readidat(path = "path to idat files",manifestfile=mf,recursive = TRUE)


###################################################
### code chunk number 7: readdata2 (eval = FALSE)
###################################################
## M<-matrix_for_methylated_intensity
## U<-matrix_for_unmethylated_intensity
## pheno<-as.data.frame(cbind(colnames(M), colnames(M)))
## names(pheno)<-c("Basename","filenames")
## rownames(pheno)<-pheno$Basename
## pheno<-AnnotatedDataFrame(data=pheno)
## anno<-c("IlluminaHumanMethylation450k", "ilmn12.hg19")
## names(anno)<-c("array", "annotation")
## mdat<-MethylSet(Meth = M, Unmeth = U, annotation=anno, 
## phenoData=pheno)


###################################################
### code chunk number 8: ctrlplot (eval = FALSE)
###################################################
## plotCtrl(rgSet)


###################################################
### code chunk number 9: ctrlplot2 (eval = FALSE)
###################################################
## 
## path <- file.path(find.package("minfiData"),"extdata")
## rgSet <- readidat(path = path,recursive = TRUE)
## sheet <- read.metharray.sheet(file.path(find.package("minfiData"),
##          "extdata"), pattern = "csv$") 
## rownames(sheet)=paste(sheet$Slide,sheet$Array,sep="_")
## colData(rgSet)=as(sheet[colnames(rgSet),],"DataFrame")
## #control plots
## IDorder=rownames(colData(rgSet))[order(colData(rgSet)$Slide,
##         colData(rgSet)$Array)]
## plotCtrl(rgSet,IDorder)


###################################################
### code chunk number 10: distplot (eval = FALSE)
###################################################
## 
## mraw <- getmeth(rgSet)
## #total intensity plot is userful for data quality inspection
## #and identification of outlier samples
## multifreqpoly(assays(mraw)$Meth+assays(mraw)$Unmeth,
##              xlab="Total intensity")
## #Compare frequency polygon plot and density plot
## beta<-getB(mraw)
## anno=rowData(mraw)
## beta1=beta[anno$Infinium_Design_Type=="I",]
## beta2=beta[anno$Infinium_Design_Type=="II",]
## library(geneplotter)
## jpeg("dist.jpg",height=900,width=600)
## par(mfrow=c(3,2))
## multidensity(beta,main="Multidensity")
## multifreqpoly(beta,main="Multifreqpoly",xlab="Beta value")
## multidensity(beta1,main="Multidensity: Infinium I")
## multifreqpoly(beta1,main="Multifreqpoly: Infinium I",
## xlab="Beta value")
## multidensity(beta2,main="Multidensity: Infinium II")
## multifreqpoly(beta2,main="Multifreqpoly: Infinium II",
## xlab="Beta value")
## dev.off()


###################################################
### code chunk number 11: filter (eval = FALSE)
###################################################
## qc<-QCinfo(rgSet)
## #exclude badsample and bad CpG before backgroud correction
## mdat<-preprocessENmix(rgSet, QCinfo=qc, nCores=6)


###################################################
### code chunk number 12: rmoutlier (eval = FALSE)
###################################################
## #filter out outliers only
## b1=qcfilter(beta)
## #filter out low quality and outlier values
## b2=qcfilter(beta,qcscore=qc)
## #filter out low quality and outlier values, remove rows 
## #and columns with too many missing values
## b3=qcfilter(beta,qcscore=qc,rmcr=TRUE)
## #filter out low quality and outlier values, remove rows and
## #columns with too many (rthre=0.05,cthre=0.05, 5% in default) missing values,
## # and then do imputation
## b3=qcfilter(beta,qcscore=qc,rmcr=TRUE,rthre=0.05,
##               cthre=0.05,impute=TRUE)


###################################################
### code chunk number 13: preprocessENmix (eval = FALSE)
###################################################
## qc=QCinfo(rgSet)
## mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", 
## QCinfo=qc, exCpG=NULL, nCores=6)


###################################################
### code chunk number 14: normalize.quantile.450k (eval = FALSE)
###################################################
## mdat<-norm.quantile(mdat, method="quantile1")


###################################################
### code chunk number 15: rcp (eval = FALSE)
###################################################
## beta<-rcp(mdat)


###################################################
### code chunk number 16: ctrlsva (eval = FALSE)
###################################################
## sva<-ctrlsva(rgSet)


###################################################
### code chunk number 17: pcrplot (eval = FALSE)
###################################################
## require(minfiData)
## mdat <- preprocessRaw(RGsetEx)
## beta=getBeta(mdat, "Illumina")
## group=pData(mdat)$Sample_Group
## slide=factor(pData(mdat)$Slide)
## cov=data.frame(group,slide)
## pcrplot(beta,cov,npc=6)


###################################################
### code chunk number 18: nmode (eval = FALSE)
###################################################
## nmode<- nmode(beta, minN = 3, modedist=0.2, nCores = 5)


###################################################
### code chunk number 19: celltype (eval = FALSE)
###################################################
## require(minfidata)
## path <- file.path(find.package("minfiData"),"extdata")
## #based on rgDataset
## rgSet <- readidat(path = path,recursive = TRUE)
##        celltype=estimateCellProp(userdata=rgSet,
##          refdata="FlowSorted.Blood.450k",
##          nonnegative = TRUE,normalize=TRUE)


###################################################
### code chunk number 20: mage (eval = FALSE)
###################################################
## require(minfidata)
## path <- file.path(find.package("minfiData"),"extdata")
## #based on rgDataset
## rgSet <- readidat(path = path,recursive = TRUE)
## meth=getmeth(rgSet)
## beta=getB(meth)
## mage=methyAge(beta)


###################################################
### code chunk number 21: dmr (eval = FALSE)
###################################################
## dat=simubed()
## names(dat)
## ipdmr(data=dat,seed=0.1)
## combp(data=dat,seed=0.1)


###################################################
### code chunk number 22: icc (eval = FALSE)
###################################################
## require(minfiData)
## path <- file.path(find.package("minfiData"),"extdata")
## rgSet <- readidat(path = path,recursive = TRUE)
## mdat=getmeth(rgSet)
## beta=getB(mdat,"Illumina")
## #assuming list in id1 are corresponding duplicates of id2
## dupidx=data.frame(id1=c("5723646052_R02C02","5723646052_R04C01",
##                        "5723646052_R05C02"),
##                   id2=c("5723646053_R04C02","5723646053_R05C02",
##                        "5723646053_R06C02"))
## iccresu<-dupicc(dat=beta,dupid=dupidx)


###################################################
### code chunk number 23: oxbs (eval = FALSE)
###################################################
## load(system.file("oxBS.MLE.RData",package="ENmix"))
## resu<-oxBS.MLE(beta.BS,beta.oxBS,N.BS,N.oxBS)
## dim(resu[["5mC"]])
## dim(resu[["5hmC"]])


###################################################
### code chunk number 24: ENmixAndminfi (eval = FALSE)
###################################################
## library(ENmix)
## #minfi functions to read in data
## sheet <- read.metharray.sheet(file.path(find.package("minfiData"),
##  "extdata"), pattern = "csv$")
## rgSet <- read.metharray.exp(targets = sheet, extended = TRUE)
## #ENmix function for control plot
## plotCtrl(rgSet)
## #minfi functions to extract methylation and annotation data
## mraw <- preprocessRaw(rgSet)
## beta<-getB(mraw, "Illumina")
## anno=getAnnotation(rgSet)
## #ENmix function for fast and accurate distribution plot
## multifreqpoly(beta,main="Data distribution")
## multifreqpoly(beta[anno$Type=="I",],main="Data distribution, type I")
## multifreqpoly(beta[anno$Type=="II",],main="Data distribution, type II")
## #ENmix background correction
## mset<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", nCores=6)
## #minfi functions for further preprocessing and analysis
## gmSet <- preprocessQuantile(mset)
## bumps <- bumphunter(gmSet, design = model.matrix(~ gmSet$status), B = 0,
## type = "Beta", cutoff = 0.25)


###################################################
### code chunk number 25: sessionInfo
###################################################
toLatex(sessionInfo())