### R code from vignette source 'Clonality.Rnw'

###################################################
### code chunk number 1: Frequency estimation
###################################################
  library(Clonality)
data(lcis)
data(freqdata)

mut.matrix<-create.mutation.matrix(lcis )

freq<-get.mutation.frequencies(rownames(mut.matrix),"BRCA") 


###################################################
### code chunk number 2: link (eval = FALSE)
###################################################
## 
## load(url("https://github.com/IOstrovnaya/MutFreq/blob/master/freqdata.RData?raw=true"))


###################################################
### code chunk number 3: code (eval = FALSE)
###################################################
## 
## #  The file mc3.v0.2.8.PUBLIC.maf can be downloaded from https://gdc.cancer.gov/about-data/publications/mc3-2017
##  pancan<-read.table(  "mc3.v0.2.8.PUBLIC.maf",sep="\t",header=T,quote="") 
##  
## #  The file clinical.tsv.csv can be downloaded from https://portal.gdc.cancer.gov/projects - > check TCGA and clinical on the left panel -> 
## # click on 11,315 Cases across 33 Projects on the upper right corner -> click on Clinical->TSV button below
##  pancaninfo<-read.table("clinical.tsv",sep="\t",header=T)
##  
##  
##  pancaninfo$project_id<-substr(pancaninfo$project_id,6,11)
##  pancan$Tumor_Sample_Barcode<-substr(pancan$Tumor_Sample_Barcode,1,12)
##  pancan$mut<-paste(pancan$Chromosome,pancan$Start_Position,pancan$Reference_Allele,pancan$Tumor_Seq_Allele2)
##  pancan$type<-pancaninfo$project_id[match(pancan$Tumor_Sample_Barcode,pancaninfo$submitter_id)]
##  pancan<-pancan[!duplicated(paste(pancan$Tumor_Sample_Barcode,pancan$mut)),]
##  pancan<-pancan[!is.na(pancan$type),]
##  
##  pancan$mut<-as.factor(pancan$mut)
##  types<-unique(pancan$type)
##  ntypes<-NULL
## freqdata<-NULL
## for (i in types)
## {w<-pancan$type==i
## s<-length(unique(pancan$Tumor_Sample_Barcode[w]))
##  ntypes<-c(ntypes,s)
##  freqdata<-cbind(freqdata,table(pancan$mut[w]))
##  
##  }
##  
##  rownames(freqdata)<-names(table(pancan$mut[w]))
##  colnames(freqdata)<-types
##  freqdata<-rbind(ntypes,freqdata)
##  freqdata<-freqdata[,c(c("COAD","LUAD","BRCA"))]
##  freqdata<-freqdata[apply(freqdata,1,sum)>0,]


###################################################
### code chunk number 4: LR estimation
###################################################
  
  table(mut.matrix$TK53IDC2,mut.matrix$TK53LCIS2 )
freq[mut.matrix$TK53IDC2+mut.matrix$TK53LCIS2==2]


###################################################
### code chunk number 5: LR test2
###################################################
  set.seed(1) 
SNVtest(mut.matrix$TK53IDC2,mut.matrix$TK53LCIS2  ,freq)


###################################################
### code chunk number 6: Lung and colon frequencies
###################################################
#restricting set of loci to 10,000 so that the data are more similar to targeted sequencing
mut.list<-row.names(freqdata[freqdata[,"COAD"]>=1 | freqdata[,"LUAD"]>=1,][-1,])[1:10000] 

freqCOAD<-get.mutation.frequencies(mut.list,"COAD")/freqdata[1,"COAD"] 

#arbitrarily specified background mutation rate
freqCOAD[freqCOAD==min(freqCOAD)]<-1/(freqdata[1,"COAD"] +freqdata[1,"LUAD"]  ) 
freqLUAD<-get.mutation.frequencies(mut.list,"LUAD")/freqdata[1,"LUAD"]  

#arbitrarily specified background mutation rate
freqLUAD[freqLUAD==min(freqLUAD)]<-1/(freqdata[1,"COAD"] +freqdata[1,"LUAD"]  )  


###################################################
### code chunk number 7: LR test2
###################################################
  set.seed(1) 
x1<-as.numeric(runif(length(mut.list))<=freqCOAD)
x2<-as.numeric(runif(length(mut.list))<=freqLUAD)


###################################################
### code chunk number 8: LR test2
###################################################
SNVtest2(x1,x2  ,cbind(freqCOAD,freqLUAD))


###################################################
### code chunk number 9: REM estimation
###################################################
  data(lcis)
mut.matrix<-create.mutation.matrix(lcis ,rem=TRUE)
freq<-get.mutation.frequencies(rownames(mut.matrix),"BRCA") 
mod <- mutation.rem(cbind(freq, mut.matrix))


###################################################
### code chunk number 10: REM estimation2
###################################################
  print(mod)


###################################################
### code chunk number 11: REM proba
###################################################
  
  mod <- mutation.rem(cbind(freq,mut.matrix), proba=TRUE)


###################################################
### code chunk number 12: REM proba2
###################################################
  print(mod)


###################################################
### code chunk number 13: REM proba new case
###################################################
pi <- runif(30,0.001,0.13)
newpair <- cbind(pi,rbinom(30,1,1-pi*pi)+1)

new.likmat <- grid.lik(xigrid=c(0, seq(0.0005, 0.9995, by=0.001)), 
                       as.matrix(newpair[,c(-1)]), newpair[,1])

proba <- mutation.proba(c(mod$mu, mod$sigma, mod$pi), t(as.matrix(new.likmat)) )


###################################################
### code chunk number 14: REM proba new case2
###################################################
  print(proba)


###################################################
### code chunk number 15: read data
###################################################
 library(Clonality)
set.seed(100)
chrom<-paste("chr",rep(c(1:22),each=100),"p",sep="")
chrom[nchar(chrom)==5]<-paste("chr0",substr(chrom[nchar(chrom)==5] ,4,5),sep="")
maploc<- rep(c(1:100),22)
data<-NULL
for (pt in 1:9) 
{
tumor1<-tumor2<- NULL
mean1<- rnorm(22) 
mean2<- rnorm(22)
for (chr in 1:22)
{ 
  r<-runif(2) 
if (r[1]<=0.5) tumor1<-c(tumor1,rep(0,100))   
  else if   (r[1]>0.7)  tumor1<-c(tumor1,rep(mean1[chr],100))
  else  { i<-sort(sample(1:100,2))
        tumor1<-c(tumor1,mean1[chr]*c(rep(0,  i[1]),rep(1, i[2]-i[1]), rep(0,  100-i[2])))
        }
if (r[2]<=0.5) tumor2<-c(tumor2,rep(0,100))
  else if   (r[2]>0.7)  tumor2<-c(tumor2,rep(mean2[chr],100))
  else   {i<-sort(sample(1:100,2))
       tumor2<-c(tumor2,mean2[chr]*c(rep(0,  i[1]),rep(1, i[2]-i[1]), rep(0,  100-i[2])))
         }
}
data<-cbind(data,tumor1,tumor2)
}


tumor1<- NULL
mean1<- rnorm(22) 
for (chr in 1:22)
{ 
  r<-runif(1) 
if (r<=0.4) tumor1<-c(tumor1,rep(0,100))   
  else if   (r>0.6)  tumor1<-c(tumor1,rep(mean1[chr],100))
  else  { i<-sort(sample(1:100,2))
        tumor1<-c(tumor1,mean1[chr]*c(rep(0,  i[1]),rep(1, i[2]-i[1]), rep(0,  100-i[2])))
        }

}
data<-cbind(data,tumor1,tumor1)

data<-data+matrix(rnorm( 44000,mean=0,sd=0.4) ,nrow=2200,ncol=20)


samnms<-paste("pt",rep(1:10,each=2),rep(1:2,10),sep=".")


###################################################
### code chunk number 16: Clonality.Rnw:294-295
###################################################
dim(data)


###################################################
### code chunk number 17: CNA
###################################################

dataCNA<-CNA(data,chrom=chrom,maploc=maploc,sampleid=samnms)
as.matrix(dataCNA)[1:5,1:10]


###################################################
### code chunk number 18: averaging
###################################################
dataAve<- ave.adj.probes(dataCNA,2)


###################################################
### code chunk number 19: Clonality.Rnw:320-321
###################################################
ptlist<- paste("pt",rep(1:10,each=2),sep=".")


###################################################
### code chunk number 20: main clonality analysis
###################################################
results<-clonality.analysis(dataCNA, ptlist,  pfreq = NULL, refdata = NULL, nmad = 1,  reference = TRUE, allpairs = TRUE)


###################################################
### code chunk number 21: Clonality.Rnw:330-331
###################################################
results$LR


###################################################
### code chunk number 22: genomewidePlots
###################################################
genomewidePlots(results$OneStepSeg, results$ChromClass,ptlist , c("pt.10.1", "pt.10.2"),results$LR,  plot.as.in.analysis = TRUE) 


###################################################
### code chunk number 23: Clonality.Rnw:344-345
###################################################
chromosomePlots(results$OneStepSeg, ptlist,ptname="pt.10",nmad=1)


###################################################
### code chunk number 24: Clonality.Rnw:350-351
###################################################
histogramPlot(results$LR[,4], results$refLR[,4])


###################################################
### code chunk number 25: LOH data generation
###################################################
set.seed(25)
LOHtable<-cbind(1:20,matrix(sample(c(0,1,2),20*20,replace=TRUE),20))
LOHtable[,3]<-LOHtable[,2]
LOHtable[1,3]<-0


###################################################
### code chunk number 26: LOH data analysis
###################################################
LOHtable[,1:5]
LOHclonality(LOHtable,rep(1:10,each=2),pfreq=NULL,noloh=0,loh1=1,loh2=2)


###################################################
### code chunk number 27: sessionInfo
###################################################
sessionInfo()