## ----echo = FALSE-------------------------------------------------------------
library(knitr)

## ----eval=FALSE---------------------------------------------------------------
#  if(!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("saseR")

## ----eval=FALSE---------------------------------------------------------------
#  library(devtools)
#  devtools::install_github("statOmics/saseR")

## ----message=FALSE, warning=FALSE---------------------------------------------
library(saseR)
suppressWarnings(library(ASpli))
library(edgeR)
library(DESeq2)
library(limma)
library(GenomicFeatures)
library(dplyr)
library(pracma)
library(PRROC)
library(BiocParallel)
library(data.table)
library(igraph)

## -----------------------------------------------------------------------------
gtfFileName <- ASpli::aspliExampleGTF()
BAMFiles <- ASpli::aspliExampleBamList()

gtfFileName
head(BAMFiles)

## -----------------------------------------------------------------------------
targets <- data.frame( 
  row.names = paste0('Sample',c(1:12)),
  bam = BAMFiles,
  f1 = rep("A",12),
  stringsAsFactors = FALSE)

head(targets)

## -----------------------------------------------------------------------------
genomeTxDb <- txdbmaker::makeTxDbFromGFF(gtfFileName)
features <- binGenome(genomeTxDb, logTo = NULL)

## -----------------------------------------------------------------------------
ASpliSE <- BamtoAspliCounts(
                    features = features,
                    targets = targets,
                    minReadLength = 100,
                    libType = "SE",
                    BPPARAM = MulticoreParam(1L)
                    )

## -----------------------------------------------------------------------------
SEgenes <- convertASpli(ASpliSE, type = "gene")
SEbins <- convertASpli(ASpliSE, type = "bin")
SEjunctions <- convertASpli(ASpliSE, type = "junction")

## -----------------------------------------------------------------------------
metadata(SEgenes)$design <- ~1

## -----------------------------------------------------------------------------
filtergenes <- filterByExpr(SEgenes)
SEgenes <- SEgenes[filtergenes,]

## -----------------------------------------------------------------------------
SEgenes <- calculateOffsets(SEgenes, method = "TMM")

## -----------------------------------------------------------------------------
SEgenes <- saseRfindEncodingDim(SEgenes, method = "GD")

## -----------------------------------------------------------------------------
SEgenes <- saseRfit(SEgenes,
                    analysis = "AE",
                    padjust = "BH",
                    fit = "fast")

## -----------------------------------------------------------------------------
order <- apply(assays(SEgenes)$pValue, 2, order, decreasing = FALSE)

topPvalsGenes <- sapply(1:ncol(SEgenes), 
                   function(x) {assays(SEgenes)$pValue[order[1:5,x],x]})
rownames(topPvalsGenes) <- NULL

topGenes <- sapply(1:ncol(SEgenes), 
                   function(x) {rownames(SEgenes)[order[1:5,x]]})

significantgenes <- sapply(1:ncol(SEgenes), 
              function(x) {
                  rownames(SEgenes)[assays(SEgenes)$pValueAdjust[,x] < 0.05]})


head(topPvalsGenes)
head(topGenes)
head(significantgenes)

## -----------------------------------------------------------------------------
metadata(SEbins)$design <- ~1
metadata(SEjunctions)$design <- ~1


## -----------------------------------------------------------------------------
SEbins <- calculateOffsets(SEbins, 
                            method = "AS", 
                            aggregation = "locus")

SEjunctions <- calculateOffsets(SEjunctions, 
                            method = "AS", 
                            aggregation = "symbol",
                            mergeGeneASpli = TRUE)


## -----------------------------------------------------------------------------
filterbins <- filterByExpr(SEbins)
SEbins <- SEbins[filterbins,]

filterjunctions <- filterByExpr(SEjunctions)
SEjunctions <- SEjunctions[filterjunctions,]

## -----------------------------------------------------------------------------
SEbins <- saseRfindEncodingDim(SEbins, method = "GD")
SEjunctions <- saseRfindEncodingDim(SEjunctions, method = "GD")


## -----------------------------------------------------------------------------
SEbins <- saseRfit(SEbins,
                    analysis = "AS",
                    padjust = "BH",
                    fit = "fast")

SEjunctions <- saseRfit(SEjunctions,
                    analysis = "AS",
                    padjust = "BH",
                    fit = "fast")

## -----------------------------------------------------------------------------
order <- apply(assays(SEbins)$pValue, 2, order, decreasing = FALSE)

topPvalsBins <- sapply(1:ncol(SEbins), 
                   function(x) {assays(SEbins)$pValue[order[1:5,x],x]})

rownames(topPvalsBins) <- NULL

topBins <- sapply(1:ncol(SEbins), 
                   function(x) {rownames(SEbins)[order[1:5,x]]})

significantBins <- sapply(1:ncol(SEbins), 
                   function(x) {
                    rownames(SEbins)[assays(SEgenes)$pValueAdjust[,x] < 0.05]})

head(topPvalsBins)
head(topBins)
head(significantBins)

## -----------------------------------------------------------------------------
order <- apply(metadata(SEbins)$pValuesLocus, 2, order, decreasing = FALSE)

topPvalsBinsAggregated <- sapply(1:ncol(SEbins), 
                   function(x) {metadata(SEbins)$pValuesLocus[order[1:5,x],x]})
rownames(topPvalsBinsAggregated) <- NULL

topBinsAggregated <- sapply(1:ncol(SEbins), 
                   function(x) {
                     rownames(metadata(SEbins)$pValuesLocus)[order[1:5,x]]})

significantBinsAggregated <- sapply(1:ncol(SEbins), 
              function(x) {
                  rownames(SEbins)[metadata(SEgenes)$pValuesLocus[,x] < 0.05]})

head(topPvalsBinsAggregated)
head(topBinsAggregated)
head(significantBinsAggregated)

## ----message=FALSE, warning=FALSE---------------------------------------------
library(ASpli)
library(DESeq2)
library(DEXSeq)
library(edgeR)
library(dplyr)
library(GenomicAlignments)
library(GenomicFeatures)

## -----------------------------------------------------------------------------
gtfFileName <- aspliExampleGTF()
BAMFiles <- aspliExampleBamList()

## -----------------------------------------------------------------------------
genomeTxDb <- makeTxDbFromGFF(gtfFileName)
flattenedAnnotation = exonicParts(genomeTxDb, 
                                  linked.to.single.gene.only=TRUE )

## -----------------------------------------------------------------------------
se = summarizeOverlaps(
    flattenedAnnotation,
    BAMFiles, 
    singleEnd=TRUE, 
    ignore.strand=TRUE )

## -----------------------------------------------------------------------------
colData(se)$condition <- factor(c(rep(c("control", "case"), each = 6)))


## -----------------------------------------------------------------------------
counts <- assays(se)$counts
offsetsGene <- aggregate(counts,
                     by = list("gene" = rowData(se)$gene_id), FUN = sum) 
offsets <- offsetsGene[match(rowData(se)$gene_id, offsetsGene$gene), ] %>% 
                     mutate("gene" = NULL) %>% 
                     as.matrix() 

## -----------------------------------------------------------------------------
index <- offsets == 0
counts[index] <- 1
offsets[index] <- 1


## -----------------------------------------------------------------------------
dds <- DESeqDataSetFromMatrix(countData = counts,
                              colData = colData(se),
                              rowData = rowData(se),
                              design= ~ condition)
normalizationFactors(dds) <- offsets

dds <- DESeq2::estimateDispersionsGeneEst(dds)
dispersions(dds) <- mcols(dds)$dispGeneEst
dds <- DESeq2::nbinomWaldTest(dds)
results_dds <- DESeq2::results(dds)


## -----------------------------------------------------------------------------
DGE <- DGEList(counts = counts)
DGE$offset <- log(offsets)
design <- model.matrix(~condition, data = colData(se))

DGE <- edgeR::estimateDisp(DGE, design = design)
fitDGE <- edgeR::glmFit(DGE, design= design)
results_DGE <- edgeR::glmLRT(fitDGE, coef = 2)


## -----------------------------------------------------------------------------
sessionInfo()