### R code from vignette source 'ENmix.Rnw' ################################################### ### code chunk number 1: options ################################################### options(width=70) ################################################### ### code chunk number 2: example0 (eval = FALSE) ################################################### ## rgSet <- readidat(datapath) ## beta=mpreprocess(rgSet) ################################################### ### code chunk number 3: example1 (eval = FALSE) ################################################### ## library(ENmix) ## #read in data ## require(minfiData) ## #read in IDAT files ## path <- file.path(find.package("minfiData"),"extdata") ## rgSet <- readidat(path = path,recursive = TRUE) ## #data pre-processing ## beta=mpreprocess(rgSet,nCores=6) ## #quality control, data pre-processing and imputation ## beta=mpreprocess(rgSet,nCores=6,qc=TRUE,fqcfilter=TRUE, ## rmcr=TRUE,impute=TRUE) ################################################### ### code chunk number 4: example2 (eval = FALSE) ################################################### ## library(ENmix) ## #read in data ## path <- file.path(find.package("minfiData"),"extdata") ## rgSet <- readidat(path = path,recursive = TRUE) ## #QC info ## qc<-QCinfo(rgSet) ## #background correction and dye bias correction ## #if qc info object is provided, the low quality or outlier samples ## # and probes will be excluded before background correction ## mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", ## QCinfo=qc, nCores=6) ## #between-array normalization ## mdat<-norm.quantile(mdat, method="quantile1") ## #probe-type bias adjustment ## beta<-rcp(mdat,qcscore=qc) ## #assign missing to low quality and outlier data points, remove samples ## # and probes with too many missing data, and do imputation ## beta <- qcfilter(beta,qcscore=qc,rmcr=TRUE,impute=TRUE) ################################################### ### code chunk number 5: example3 (eval = FALSE) ################################################### ## library(ENmix) ## #read in data ## path <- file.path(find.package("minfiData"),"extdata") ## rgSet <- readidat(path = path,recursive = TRUE) ## ## #attach some phenotype info for later use ## sheet <- read.metharray.sheet(file.path(find.package("minfiData"), ## "extdata"),pattern = "csv$") ## rownames(sheet)=paste(sheet$Slide,sheet$Array,sep="_") ## colData(rgSet)=as(sheet[colnames(rgSet),],"DataFrame") ## ## #generate internal control plots to inspect quality of experiments ## plotCtrl(rgSet) ## ## #generate quality control (QC) information and plots, ## #identify outlier samples in data distribution ## #if set detPtype="negative", recommand to set depPthre to <= 10E-6 ## #if set detPtype="oob", depPthre of 0.01 or 0.05 may be sufficient ## #see Heiss et al. PMID: 30678737 for details ## qc<-QCinfo(rgSet,detPtype="negative",detPthre=0.000001) ## ## ###data preprocessing ## #background correction and dye bias correction ## mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", ## QCinfo=qc, nCores=6) ## #between-array normalization ## mdat<-norm.quantile(mdat, method="quantile1") ## #attach phenotype data for later use ## colData(mdat)=as(sheet[colnames(mdat),],"DataFrame") ## #probe-type bias adjustment ## beta<-rcp(mdat,qcscore=qc) ## ## #Search for multimodal CpGs, so called gap probes ## #beta matrix before qcfilter() step should be used for this purpose ## nmode<-nmode(beta, minN = 3, modedist=0.2, nCores = 6) ## ## #filter out poor quality and outlier data points for each probe; ## #rows and columns with too many missing values can be removed ## #Imputation will be performed to fill missing data if specify. ## beta <- qcfilter(beta,qcscore=qc,rmcr=TRUE, rthre=0.05, ## cthre=0.05,impute=TRUE) ## ## #If for epigenetic mutation analysis, use ## #beta <- qcfilter(beta,qcscore=qc,rmoutlier=FALSE,rmcr=TRUE, rthre=0.05, ## # cthre=0.05,impute=FALSE) ## ## ##Principal component regression analysis plot ## #phenotypes to be studied in the plot ## cov<-data.frame(group=colData(mdat)$Sample_Group, ## slide=factor(colData(mdat)$Slide)) ## #missing data is not allowed in the analysis! ## pcrplot(beta, cov, npc=6) ## ## #Non-negative control surrogate variables, which can be used in ## # downstream association analysis to control for batch effects. ## csva<-ctrlsva(rgSet) ## ## #estimate cell type proportions ## #rgDataset or methDataSet can also be used for the estimation ## celltype=estimateCellProp(userdata=beta, ## refdata="FlowSorted.Blood.450k", ## nonnegative = TRUE,normalize=TRUE) ## ## #predic sex based on rgDataSet or methDataset ## sex=predSex(rgSet) ## sex=predSex(mdat) ## ## #Methylation age calculation ## mage=methyAge(beta) ## ## #ICC analysis can be performed to study measurement reliability if ## # have duplicates, see manual ## #dupicc() ## ## #After association analysis, the P values can be used for DMR analysis ## #simulate a small example file in BED format ## dat=simubed() ## #using ipdmr method ## ipdmr(data=dat,seed=0.1) ## #using comb-p method ## combp(data=dat,seed=0.1) ################################################### ### code chunk number 6: readdata (eval = FALSE) ################################################### ## library(ENmix) ## rgSet <- readidat(path = "directory path for idat files", ## recursive = TRUE) ## ## #Download manifestfile for HumanMethylation450 beadchip ## system("wget ftp://webdata2:webdata2@ussd-ftp.illumina.com/downloads/ProductFiles ## /HumanMethylation450/HumanMethylation450_15017482_v1-2.csv") ## mf="HumanMethylation450_15017482_v1-2.csv" ## rgSet <- readidat(path = "path to idat files",manifestfile=mf,recursive = TRUE) ################################################### ### code chunk number 7: readdata2 (eval = FALSE) ################################################### ## M<-matrix_for_methylated_intensity ## U<-matrix_for_unmethylated_intensity ## pheno<-as.data.frame(cbind(colnames(M), colnames(M))) ## names(pheno)<-c("Basename","filenames") ## rownames(pheno)<-pheno$Basename ## pheno<-AnnotatedDataFrame(data=pheno) ## anno<-c("IlluminaHumanMethylation450k", "ilmn12.hg19") ## names(anno)<-c("array", "annotation") ## mdat<-MethylSet(Meth = M, Unmeth = U, annotation=anno, ## phenoData=pheno) ################################################### ### code chunk number 8: ctrlplot (eval = FALSE) ################################################### ## plotCtrl(rgSet) ################################################### ### code chunk number 9: ctrlplot2 (eval = FALSE) ################################################### ## ## path <- file.path(find.package("minfiData"),"extdata") ## rgSet <- readidat(path = path,recursive = TRUE) ## sheet <- read.metharray.sheet(file.path(find.package("minfiData"), ## "extdata"), pattern = "csv$") ## rownames(sheet)=paste(sheet$Slide,sheet$Array,sep="_") ## colData(rgSet)=as(sheet[colnames(rgSet),],"DataFrame") ## #control plots ## IDorder=rownames(colData(rgSet))[order(colData(rgSet)$Slide, ## colData(rgSet)$Array)] ## plotCtrl(rgSet,IDorder) ################################################### ### code chunk number 10: distplot (eval = FALSE) ################################################### ## ## mraw <- getmeth(rgSet) ## #total intensity plot is userful for data quality inspection ## #and identification of outlier samples ## multifreqpoly(assays(mraw)$Meth+assays(mraw)$Unmeth, ## xlab="Total intensity") ## #Compare frequency polygon plot and density plot ## beta<-getB(mraw) ## anno=rowData(mraw) ## beta1=beta[anno$Infinium_Design_Type=="I",] ## beta2=beta[anno$Infinium_Design_Type=="II",] ## library(geneplotter) ## jpeg("dist.jpg",height=900,width=600) ## par(mfrow=c(3,2)) ## multidensity(beta,main="Multidensity") ## multifreqpoly(beta,main="Multifreqpoly",xlab="Beta value") ## multidensity(beta1,main="Multidensity: Infinium I") ## multifreqpoly(beta1,main="Multifreqpoly: Infinium I", ## xlab="Beta value") ## multidensity(beta2,main="Multidensity: Infinium II") ## multifreqpoly(beta2,main="Multifreqpoly: Infinium II", ## xlab="Beta value") ## dev.off() ################################################### ### code chunk number 11: filter (eval = FALSE) ################################################### ## qc<-QCinfo(rgSet) ## #exclude badsample and bad CpG before backgroud correction ## mdat<-preprocessENmix(rgSet, QCinfo=qc, nCores=6) ################################################### ### code chunk number 12: rmoutlier (eval = FALSE) ################################################### ## #filter out outliers only ## b1=qcfilter(beta) ## #filter out low quality and outlier values ## b2=qcfilter(beta,qcscore=qc) ## #filter out low quality and outlier values, remove rows ## #and columns with too many missing values ## b3=qcfilter(beta,qcscore=qc,rmcr=TRUE) ## #filter out low quality and outlier values, remove rows and ## #columns with too many (rthre=0.05,cthre=0.05, 5% in default) missing values, ## # and then do imputation ## b3=qcfilter(beta,qcscore=qc,rmcr=TRUE,rthre=0.05, ## cthre=0.05,impute=TRUE) ################################################### ### code chunk number 13: preprocessENmix (eval = FALSE) ################################################### ## qc=QCinfo(rgSet) ## mdat<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", ## QCinfo=qc, exCpG=NULL, nCores=6) ################################################### ### code chunk number 14: normalize.quantile.450k (eval = FALSE) ################################################### ## mdat<-norm.quantile(mdat, method="quantile1") ################################################### ### code chunk number 15: rcp (eval = FALSE) ################################################### ## beta<-rcp(mdat) ################################################### ### code chunk number 16: ctrlsva (eval = FALSE) ################################################### ## sva<-ctrlsva(rgSet) ################################################### ### code chunk number 17: pcrplot (eval = FALSE) ################################################### ## require(minfiData) ## mdat <- preprocessRaw(RGsetEx) ## beta=getBeta(mdat, "Illumina") ## group=pData(mdat)$Sample_Group ## slide=factor(pData(mdat)$Slide) ## cov=data.frame(group,slide) ## pcrplot(beta,cov,npc=6) ################################################### ### code chunk number 18: nmode (eval = FALSE) ################################################### ## nmode<- nmode(beta, minN = 3, modedist=0.2, nCores = 5) ################################################### ### code chunk number 19: celltype (eval = FALSE) ################################################### ## require(minfidata) ## path <- file.path(find.package("minfiData"),"extdata") ## #based on rgDataset ## rgSet <- readidat(path = path,recursive = TRUE) ## celltype=estimateCellProp(userdata=rgSet, ## refdata="FlowSorted.Blood.450k", ## nonnegative = TRUE,normalize=TRUE) ################################################### ### code chunk number 20: mage (eval = FALSE) ################################################### ## require(minfidata) ## path <- file.path(find.package("minfiData"),"extdata") ## #based on rgDataset ## rgSet <- readidat(path = path,recursive = TRUE) ## meth=getmeth(rgSet) ## beta=getB(meth) ## mage=methyAge(beta) ################################################### ### code chunk number 21: dmr (eval = FALSE) ################################################### ## dat=simubed() ## names(dat) ## ipdmr(data=dat,seed=0.1) ## combp(data=dat,seed=0.1) ################################################### ### code chunk number 22: icc (eval = FALSE) ################################################### ## require(minfiData) ## path <- file.path(find.package("minfiData"),"extdata") ## rgSet <- readidat(path = path,recursive = TRUE) ## mdat=getmeth(rgSet) ## beta=getB(mdat,"Illumina") ## #assuming list in id1 are corresponding duplicates of id2 ## dupidx=data.frame(id1=c("5723646052_R02C02","5723646052_R04C01", ## "5723646052_R05C02"), ## id2=c("5723646053_R04C02","5723646053_R05C02", ## "5723646053_R06C02")) ## iccresu<-dupicc(dat=beta,dupid=dupidx) ################################################### ### code chunk number 23: oxbs (eval = FALSE) ################################################### ## load(system.file("oxBS.MLE.RData",package="ENmix")) ## resu<-oxBS.MLE(beta.BS,beta.oxBS,N.BS,N.oxBS) ## dim(resu[["5mC"]]) ## dim(resu[["5hmC"]]) ################################################### ### code chunk number 24: ENmixAndminfi (eval = FALSE) ################################################### ## library(ENmix) ## #minfi functions to read in data ## sheet <- read.metharray.sheet(file.path(find.package("minfiData"), ## "extdata"), pattern = "csv$") ## rgSet <- read.metharray.exp(targets = sheet, extended = TRUE) ## #ENmix function for control plot ## plotCtrl(rgSet) ## #minfi functions to extract methylation and annotation data ## mraw <- preprocessRaw(rgSet) ## beta<-getB(mraw, "Illumina") ## anno=getAnnotation(rgSet) ## #ENmix function for fast and accurate distribution plot ## multifreqpoly(beta,main="Data distribution") ## multifreqpoly(beta[anno$Type=="I",],main="Data distribution, type I") ## multifreqpoly(beta[anno$Type=="II",],main="Data distribution, type II") ## #ENmix background correction ## mset<-preprocessENmix(rgSet, bgParaEst="oob", dyeCorr="RELIC", nCores=6) ## #minfi functions for further preprocessing and analysis ## gmSet <- preprocessQuantile(mset) ## bumps <- bumphunter(gmSet, design = model.matrix(~ gmSet$status), B = 0, ## type = "Beta", cutoff = 0.25) ################################################### ### code chunk number 25: sessionInfo ################################################### toLatex(sessionInfo())