## ----install-packages, eval=FALSE---------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE)) {
#      install.packages("BiocManager")
#  }
#  BiocManager::install(c("SummarizedExperiment", "ExperimentHub", "FieldEffectCrc"))

## ----load-hub-----------------------------------------------------------------
library(SummarizedExperiment)
library(ExperimentHub)

## ----find-data----------------------------------------------------------------
hub = ExperimentHub()
## simply list the resource names
ns <- listResources(hub, "FieldEffectCrc")
ns
## query the hub for full metadata
crcData <- query(hub, "FieldEffectCrc")
crcData
## extract metadata
df <- mcols(crcData)
df
## see where the cache is stored
hc <- hubCache(crcData)
hc

## ----explore-file-------------------------------------------------------------
md <- hub["EH3524"]
md

## ----download-data------------------------------------------------------------
## using resource ID
data1 <- hub[["EH3524"]]
data1
## using loadResources()
data2 <- loadResources(hub, "FieldEffectCrc", "cohort A")
data2

## ----phenotypes---------------------------------------------------------------
summary(colData(data1)$sampType)

## ----quick-start-1------------------------------------------------------------
se <- data1
counts1 <- assays(se)[["counts"]]

## ----quick-start-2------------------------------------------------------------
counts2 <- assay(se, 2)

## ----quick-start-3------------------------------------------------------------
all(counts1==counts2)

## ----make-txi-----------------------------------------------------------------
## manual construction
txi <- as.list(assays(se))
txi$countsFromAbundance <- "no"
## convenience function construction
txi <- make_txi(se)

## ----make-dds-----------------------------------------------------------------
if (!requireNamespace("DESeq2", quietly=TRUE)) {
    BiocManager::install("DESeq2")
}
library(DESeq2)
dds <- DESeqDataSetFromTximport(txi, colData(se), ~ sampType)

## ----typical-filter-----------------------------------------------------------
countLimit <- 10
sampleLimit <- (1/3) * ncol(dds)
keep <- rowSums( counts(dds) > countLimit ) >= sampleLimit
dds <- dds[keep, ]

## ----special-filter-----------------------------------------------------------
r <- sample(seq_len(nrow(dds)), 6000)
c <- sample(seq_len(ncol(dds)), 250)
dds <- dds[r, c]
r <- rowSums(counts(dds)) >= 10
dds <- dds[r, ]

## ----size-factors-------------------------------------------------------------
dds <- DESeq(dds)

## ----prep-input---------------------------------------------------------------
dat <- counts(dds, normalized=TRUE)  ## extract the normalized counts
mod <- model.matrix(~sampType, data=colData(dds))  ## set the full model
mod0 <- model.matrix(~1, data=colData(dds))  ## set the null model

## ----get-nsv------------------------------------------------------------------
if (!requireNamespace("sva", quietly=TRUE)) {
    BiocManager::install("sva")
}
library(sva)
nsv <- num.sv(dat, mod, method=c("be"), B=20, seed=1)  ## Buja Eyuboglu method

## ----get-svs------------------------------------------------------------------
svs <- svaseq(dat, mod, mod0, n.sv=nsv)

## ----add-svs------------------------------------------------------------------
for (i in seq_len(svs$n.sv)) {
    newvar <- paste0("sv", i)
    colData(dds)[ , newvar] <- svs$sv[, i]
}
nvidx <- (ncol(colData(dds)) - i + 1):ncol(colData(dds))
newvars <- colnames(colData(dds))[nvidx]
d <- formula(
    paste0("~", paste(paste(newvars, collapse="+"), "sampType", sep="+"))
)
design(dds) <- d

## ----test-de------------------------------------------------------------------
dds <- DESeq(dds)

## ----results------------------------------------------------------------------
cons <- list()
m <- combn(levels(colData(dds)$sampType), 2)
for (i in seq_len(ncol(m))) {
    cons[[i]] <- c("sampType", rev(m[, i]))
    names(cons) <- c(
        names(cons)[seq_len(length(cons) - 1)], paste(rev(m[, i]), collapse="v")
    )
}
res <- list()
for (i in seq_len(length(cons))) {
    res[[i]] <- results(dds, contrast=cons[[i]], alpha=0.05)  ## default alpha is 0.1
}
names(res) <- names(cons)

## ----display------------------------------------------------------------------
lapply(res, head)
lapply(res, summary)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()