## ----style, echo = FALSE, results = 'asis', warning=FALSE--------------------- suppressPackageStartupMessages(library(SSPA)) suppressPackageStartupMessages(library(lattice)) BiocStyle::markdown() ## ----simulated-data-generation------------------------------------------------ library(SSPA) library(genefilter) set.seed(12345) m <- 5000 J <- 10 pi0 <- 0.8 m0 <- as.integer(m * pi0) mu <- rbitri(m - m0, a = log2(1.2), b = log2(4), m = log2(2)) data <- simdat( mu, m = m, pi0 = pi0, J = J, noise = 0.01 ) statistics <- rowttests(data, factor(rep(c(0, 1), each = J)))$statistic ## ----simulated-data-plotting, fig.cap="Exploratory plots of the pilot-data: Upper left panel shows a histogram of the test statistics, upper right panel a histogram of the p-values. Lower left panel shows a qq-plot for the test statistics using the user-defined null distributions. Lower right panel shows the sorted p-values against their ranks."---- pdD <- pilotData( statistics = statistics, samplesize = sqrt(1 / (1 / J + 1 / J)), distribution = "norm" ) pdD plot(pdD) ## ----simulated-data-deconvolution, fig.cap="Estimated density of effect sizes: True density of effect sizes, the bitriangular distribution, and estimated density of effect sizes in blue."---- ssD <- sampleSize(pdD, control = list(from = -6, to = 6)) ssD plot( ssD, panel = function(x, y, ...) { panel.xyplot(x, y) panel.curve(dbitri(x), lwd = 2, lty = 2, n = 500) }, ylim = c(0, 0.6) ) ## ----simulated-data-power, fig.cap="Predicted power curve: For sample sizes from 10 to 20 the power is predicted based on the information obtained from the pilot-data."---- Jpred <- seq(10, 20, by = 2) N <- sqrt(Jpred / 2) pwrD <- predictpower(ssD, samplesizes = N, alpha = 0.05) matplot( Jpred, pwrD, type = "b", pch = 16, ylim = c(0, 1), ylab = "predicted power", xlab = "sample size (per group)" ) grid() ## ----simulated-data-effectsize, fig.cap="Conjugate gradient method: Estimated density of effect sizes using the conjugate gradient method."---- pdC <- pilotData( statistics = statistics, samplesize = sqrt(2 * J), distribution = "t", df = 2 * J - 2 ) ssC <- sampleSize(pdC, method = "congrad", control = list( from = -6, to = 6, resolution = 250 )) plot( ssC, panel = function(x, y, ...) { panel.xyplot(x, y) panel.curve(2 * dbitri(2 * x), lwd = 2, lty = 2, n = 500) }, xlim = c(-2, 2), ylim = c(0, 1.2) ) ## ----tikohonov, fig.cap="Tikohonov regularization: Estimated density of effect sizes using the Tikohonov regularization."---- ssT <- sampleSize( pdC, method = "tikhonov", control = list( resolution = 250, scale = "pdfstat", lambda = 10 ^ seq(-10, 10, length = 50), verbose = FALSE, modelselection = "lcurve" ) ) plot( ssT, panel = function(x, y, ...) { panel.xyplot(x, y, type = "b") panel.curve(2 * dbitri(2 * x), lwd = 2, lty = 2, n = 500) }, xlim = c(-2, 2), ylim = c(0, 1.2) ) ## ----nutrigenomics-effectsize, fig.cap="Nutrigenomic example: Estimated density of effect sizes for the five treatment exposure conditions."---- library(lattice) data(Nutrigenomics) str(Nutrigenomics) pd <- apply(Nutrigenomics, 2, function(x) pilotData( statistics = x[-1], samplesize = sqrt(x[1]), distribution = "norm" )) ss <- lapply(pd, sampleSize, control = list( pi0Method = "Storey", a = 0, resolution = 2 ^ 10, verbose = FALSE )) ##ss <- lapply(pd, sampleSize, ## method = "congrad", ## control=list(verbose=FALSE, resolution=2^10, from=-10, to=10)) compounds <- c("Wy14,643", "fenofibrate", "trilinolenin (C18:3)", "Wy14,643", "fenofibrate") exposure <- c("5 Days", "6 Hours") effectsize <- data.frame( exposure = factor(rep(rep(exposure, c( 2, 3 )), each = 1024)), compound = factor(rep(compounds, each = 1024)), lambda = as.vector(sapply(ss, function(x) x@lambda)), theta = as.vector(sapply(ss, function(x) x@theta)) ) print( xyplot( lambda ~ theta | exposure, group = compound, data = effectsize, type = c("g", "l"), layout = c(1, 2), lwd = 2, xlab = "effect size", ylab = "", auto.key = list( columns = 3, lines = TRUE, points = FALSE, cex = 0.7 ) ) ) ## ----nutrigenomics-power, fig.cap="Nutrigenomic example: Predicted power curves for the five treatment exposure conditions."---- samplesize <- seq(2, 8) averagepower <- data.frame( power = as.vector(sapply(ss, function(x) as.numeric( predictpower(x, samplesize = sqrt(samplesize)) ))), exposure = factor(rep(rep(exposure, c( 2, 3 )), each = length(samplesize))), compound = factor(rep(compounds, each = length(samplesize))), samplesize = rep(2 * samplesize, 5) ) print( xyplot( power ~ samplesize | exposure, group = compound, data = averagepower, type = c("g", "b"), layout = c(1, 2), lwd = 2, pch = 16, xlab = "sample size (per group)", ylab = "", auto.key = list( columns = 3, lines = TRUE, points = FALSE, cex = 0.7 ) ) ) ## ----obtain-teststatistics, eval=FALSE---------------------------------------- # ##files contains the full path and file names of each sample # targets <- data.frame(files = files, # group = rep(c("DCLK", "WT"), 4), # description = rep( # c( # "transgenic (Dclk1) mouse hippocampus", # "wild-type mouse hippocampus" # ), # 4 # )) # d <- readDGE(targets) ##reading the data # ##filter out low read counts # cpm.d <- cpm(d) # d <- d[rowSums(cpm.d > 1) >= 4,] # # design <- model.matrix( ~ group, data = d$samples) # ##estimate dispersion # disp <- estimateGLMCommonDisp(d, design) # disp <- estimateGLMTagwiseDisp(disp, design) # ##fit model # glmfit.hoen <- # glmFit(d, design, dispersion = disp$tagwise.dispersion) # ##perform likelihood ratio test # lrt.hoen <- glmLRT(glmfit.hoen) # ##extract results # tbl <- topTags(lrt.hoen, n = nrow(d))[[1]] # statistics <- tbl$LR ## ----deepsage-effectsize, fig.cap="Deep SAGE example: Estimated density of effect size and predicted power curve."---- library(lattice) data(deepSAGE) str(deepSAGE) pd <- pilotData( statistics = deepSAGE, samplesize = 8, distribution = "chisq", df = 1 ) ss <- sampleSize(pd, method = "congrad", control = list( trim = c(0, 0.98), symmetric = FALSE, from = 0, to = 10 )) pwr <- predictpower(ss, samplesize = c(8, 16, 32)) op <- par(mfcol = c(2, 1), mar = c(5, 4, 1, 1)) plot(ss@theta, ss@lambda, xlab = "effect size", ylab = "", type = "l") plot( c(8, 16, 32), pwr, xlab = "sample size", ylab = "power", type = "b", ylim = c(0, 1) ) grid(col = 1) par(op) ## ----session info, echo=FALSE------------------------------------------------- sessionInfo()