Vince Carey
June 14, 2017
Road map:
Mystery functions?
ts_zpm = function(d) sqrt(length(d))*mean(d)/sd(d)
contam1 = function(x, slip=5) {x[1] = x[1]+slip; x}
set.seed(12345)
X = rnorm(50)
ts_zpm(X)
## [1] 1.157889
ts_zpm(contam1(X))
## [1] 1.479476
ts_zpm(contam1(X,100))
## [1] 1.082075
set.seed(12345)
X = rnorm(50)
tst = t.test(X)
tst
##
## One Sample t-test
##
## data: X
## t = 1.1579, df = 49, p-value = 0.2525
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1320800 0.4912126
## sample estimates:
## mean of x
## 0.1795663
str(tst)
## List of 9
## $ statistic : Named num 1.16
## ..- attr(*, "names")= chr "t"
## $ parameter : Named num 49
## ..- attr(*, "names")= chr "df"
## $ p.value : num 0.253
## $ conf.int : atomic [1:2] -0.132 0.491
## ..- attr(*, "conf.level")= num 0.95
## $ estimate : Named num 0.18
## ..- attr(*, "names")= chr "mean of x"
## $ null.value : Named num 0
## ..- attr(*, "names")= chr "mean"
## $ alternative: chr "two.sided"
## $ method : chr "One Sample t-test"
## $ data.name : chr "X"
## - attr(*, "class")= chr "htest"
tst$statistic # from R
## t
## 1.157889
sqrt(50)*mean(X)/sd(X) # by hand
## [1] 1.157889
set.seed(12345)
simdist = replicate(10000, ts_zpm( rnorm(50) ))
head(simdist)
## [1] 1.15788926 1.92818028 -0.08173422 0.82143100 0.39880144 -1.21706025
hist(simdist, freq=FALSE)
lines(seq(-3,3,.01), dt(seq(-3,3,.01), 49))
simulated:
mean(simdist > 1.1579)
## [1] 0.123
theoretical (exact):
integrate(function(x) dt(x,49), 1.1579, Inf)$value
## [1] 0.1262589
contsim = replicate(10000,
ts_zpm( contam1( rnorm(50) ) ) )
critval_1sided = qt(.95, 49)
mean(simdist > critval_1sided) # uncontaminated
## [1] 0.0483
mean(contsim > critval_1sided) # contaminated
## [1] 0.0946
library(parody)
## Loading required package: tools
robust_t = function(x) {
outchk = calout.detect(x, method="GESD")
if (!is.na(outchk$ind[1])) x = x[-outchk$ind]
sqrt(length(x))*mean(x)/sd(x)
}
set.seed(12345)
contsim_r = replicate(10000,
robust_t( contam1( rnorm(50) ) ) )
mean(contsim_r > critval_1sided) # robust test on contaminated
## [1] 0.0531
power.t.test(n=50, type="one.sample",
alt="one.sided", delta=.4)
##
## One-sample t test power calculation
##
## n = 50
## delta = 0.4
## sd = 1
## sig.level = 0.05
## power = 0.8737242
## alternative = one.sided
# empirical
mean( replicate(10000,
ts_zpm(rnorm(50, .4))>qt(.95, 49)) )
## [1] 0.8764
mean( replicate(10000, ts_zpm(
contam1(rnorm(50, .4), slip=25))>qt(.95, 49)) )
## [1] 0.5834
Exercises:
We’ll use a series of outlier magnitudes (0:10) and summarize distributions of estimators
set.seed(12345)
mns <- sapply(0:10, function(o)
median(replicate(5000, mean(contam1(rnorm(50),o)))))
set.seed(12345)
meds <- sapply(0:10, function(o)
median(replicate(5000, median(contam1(rnorm(50),o)))))
plot(0:10, mns, xlab="outlier magnitude", ylab="median of stat",
pch=19)
points(0:10, meds, pch=19, col="blue")
legend(0, .1, pch=19, col=c("black", "blue"), legend=c("mean", "median"))
set.seed(12345)
sds <- sapply(0:10, function(o)
median(replicate(5000, sd(contam1(rnorm(50),o)))))
set.seed(12345)
mads <- sapply(0:10, function(o)
median(replicate(5000, mad(contam1(rnorm(50),o)))))
plot(0:10, sds, xlab="outlier magnitude", ylab="median of stat",
pch=19)
points(0:10, mads, pch=19, col="blue")
legend(0, 1.2, pch=19, col=c("black", "blue"), legend=c("SD", "MAD"))
contam1()
to help demonstrate the breakdown conceptqsignrank(.95, 50)
## [1] 808
set.seed(12345)
wilcox.test(rnorm(50, .4))$statistic
## V
## 977
Show that the (x,y) pairs have identical
- marginal means
- marginal SDs
- correlation coefficients
- linear regressions of y on x
Use MASS::rlm to get a model for y3, x3 that fits the majority of points exactly
suppressMessages({set.seed(12345)
library(DESeq2)
S1 = makeExampleDESeqDataSet(betaSD=.75)
D1 = DESeq(S1)
R1 = results(D1)})
summary(R1)
##
## out of 997 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 99, 9.9%
## LFC < 0 (down) : 95, 9.5%
## outliers [1] : 2, 0.2%
## low counts [2] : 211, 21%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results