## ----style, echo = FALSE, results = 'asis'---------------------------------
BiocStyle::markdown()

## ----setup, echo=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))
)

## ----lapply-args-----------------------------------------------------------
args(lapply)

## ----lapply-eg-------------------------------------------------------------
lst <- list(a=1:2, b=2:4)
lapply(lst, log)      # 'base' argument default; natural log
lapply(lst, log, 10)  # '10' is second argument to 'log()', i.e., log base 10

## --------------------------------------------------------------------------
args(mapply)

## ----mapply-eg-------------------------------------------------------------
mapply(seq, 1:3, 4:6, SIMPLIFY=FALSE) # seq(1, 4); seq(2, 5); seq(3, 6)

## ----apply-----------------------------------------------------------------
args(apply)

## ---- eval=FALSE-----------------------------------------------------------
#  if (test) {
#      ## code if TEST == TRUE
#  } else {
#      ## code if TEST == FALSE
#  }

## ----myfun-----------------------------------------------------------------
fun <- function(x) {
    length(unique(x))
}
## list of length 5, each containsing a sample (with replacement) of letters
lets <- replicate(5, sample(letters, 50, TRUE), simplify=FALSE)
sapply(lets, fun)

## --------------------------------------------------------------------------
x <- rnorm(1000)                     # atomic vectors
y <- x + rnorm(1000, sd=.5)
df <- data.frame(x=x, y=y)           # object of class 'data.frame'
plot(y ~ x, df)                      # generic plot, method plot.formula
fit <- lm(y ~x, df)                  # object of class 'lm'
methods(class=class(fit))            # introspection
anova(fit)
plot(y ~ x, df)                      # methods(plot); ?plot.formula
abline(fit, col="red", lwd=3, lty=2) # a function, not generic.method

## ----lapply-setup, echo=FALSE----------------------------------------------
fl <- "symgo.csv"

## ----lapply-user-setup, eval=FALSE-----------------------------------------
#  ## example data
#  fl <- file.choose()      ## symgo.csv

## ----lapply----------------------------------------------------------------
symgo <- read.csv(fl, row.names=1, stringsAsFactors=FALSE)
head(symgo)
dim(symgo)
length(unique(symgo$SYMBOL))
## split-sapply
go2sym <- split(symgo$SYMBOL, symgo$GO)
len1 <- sapply(go2sym, length)          # compare with lapply, vapply
## built-in functions for common actions
len2 <- lengths(go2sym)
identical(len1, len2)
## smarter built-in functions, e.g., omiting NAs
len3 <- aggregate(SYMBOL ~ GO, symgo, length)
head(len3)
## more fun with aggregate()
head(aggregate(GO ~ SYMBOL, symgo, length))
head(aggregate(SYMBOL ~ GO, symgo, c))
## your own function -- unique, lower-case identifiers
uidfun  <- function(x) {
    unique(tolower(x))
}
head(aggregate(SYMBOL ~ GO , symgo, uidfun))
## as an 'anonymous' function
head(aggregate(SYMBOL ~ GO, symgo, function(x) {
    unique(tolower(x))
}))

## ----echo=TRUE, eval=FALSE-------------------------------------------------
#  fname <- file.choose()   ## "ALLphenoData.tsv"
#  stopifnot(file.exists(fname))
#  pdata <- read.delim(fname)

## ----echo=FALSE------------------------------------------------------------
fname <- "ALLphenoData.tsv"
stopifnot(file.exists(fname))
pdata <- read.delim(fname)

## ----ALL-properties--------------------------------------------------------
class(pdata)
colnames(pdata)
dim(pdata)
head(pdata)
summary(pdata$sex)
summary(pdata$cyto.normal)

## ----ALL-subset------------------------------------------------------------
pdata[1:5, 3:4]
pdata[1:5, ]
head(pdata[, 3:5])
tail(pdata[, 3:5], 3)
head(pdata$age)
head(pdata$sex)
head(pdata[pdata$age > 21,])

## ----ALL-subset-NA---------------------------------------------------------
idx <- pdata$sex == "F" & pdata$age > 40
table(idx)
dim(pdata[idx,])

## ----ALL-BCR/ABL-subset----------------------------------------------------
bcrabl <- pdata[pdata$mol.biol %in% c("BCR/ABL", "NEG"),]

## ----ALL-BCR/ABL-drop-unused-----------------------------------------------
bcrabl$mol.biol <- factor(bcrabl$mol.biol)

## ----ALL-BT----------------------------------------------------------------
levels(bcrabl$BT)

## ----ALL-BT-recode---------------------------------------------------------
table(bcrabl$BT)
levels(bcrabl$BT) <- substring(levels(bcrabl$BT), 1, 1)
table(bcrabl$BT)

## ----ALL-BCR/ABL-BT--------------------------------------------------------
xtabs(~ BT + mol.biol, bcrabl)

## ----ALL-aggregate---------------------------------------------------------
aggregate(age ~ mol.biol + sex, bcrabl, mean)

## ----ALL-age---------------------------------------------------------------
t.test(age ~ mol.biol, bcrabl)
boxplot(age ~ mol.biol, bcrabl)

## ----echo=TRUE, eval=FALSE-------------------------------------------------
#  fname <- file.choose()   ## BRFSS-subset.csv
#  stopifnot(file.exists(fname))
#  brfss <- read.csv(fname)

## ----echo=FALSE------------------------------------------------------------
fname <- "BRFSS-subset.csv"
stopifnot(file.exists(fname))
brfss <- read.csv(fname)

## ----brfss-simple-plot-----------------------------------------------------
plot(sqrt(Weight) ~ Height, brfss, main="All Years, Both Sexes")

## ----brfss-subset----------------------------------------------------------
brfss2010 <- brfss[brfss$Year == "2010", ]

## ----brfss-pair-plot-------------------------------------------------------
opar <- par(mfcol=c(1, 2))
plot(sqrt(Weight) ~ Height, brfss2010[brfss2010$Sex == "Female", ],
     main="2010, Female")
plot(sqrt(Weight) ~ Height, brfss2010[brfss2010$Sex == "Male", ],
     main="2010, Male")
par(opar)                           # reset 'par' to original value

## ----ggplot2-brfss-simple-plot---------------------------------------------
library(ggplot2)

## 'quick' plot
qplot(Height, sqrt(Weight), data=brfss)

## specify the data set and 'aesthetics', then how to plot
ggplot(brfss, aes(x=Height, y=sqrt(Weight))) +
    geom_point()

## ----ggplot2-na-in-dataset-------------------------------------------------
sum(is.na(brfss$Height))
sum(is.na(brfss$Weight))
drop <- is.na(brfss$Height) | is.na(brfss$Weight)
sum(drop)

## ----ggplot2-remove-na-----------------------------------------------------
brfss <- brfss[!drop,]

## ----ggplot2-annotate------------------------------------------------------
qplot(Height, sqrt(Weight), data=brfss) +
    ylab("Square root of Weight") + 
        ggtitle("All Years, Both Sexes")

## ----ggplot2-color---------------------------------------------------------
ggplot(brfss, aes(x=Height, y=sqrt(Weight), color=Sex)) + 
    geom_point()

## ----ggplot2-shape---------------------------------------------------------
ggplot(brfss, aes(x=Height, y = sqrt(Weight), color=Sex, shape=Sex)) + 
    geom_point()

## ----ggplot2-shape-facet---------------------------------------------------
ggplot(brfss, aes(x=Height, y = sqrt(Weight), color=Sex)) + 
    geom_point() +
        facet_grid(Sex ~ .)

## ----ggplot2-subset-facet--------------------------------------------------
brfss2010 <- brfss[brfss$Year == "2010", ]
ggplot(brfss2010, aes(x=sqrt(Weight), fill=Sex)) +
    geom_density(alpha=.25)

## ----ggplot2-transparent---------------------------------------------------
sp <- ggplot(brfss, aes(x=Height, y=sqrt(Weight)))
sp + geom_point(alpha=.4)

## ----ggplot2-regression----------------------------------------------------
sp + geom_point() + stat_smooth(method=lm)

## ----ggplot2-regression-2, eval=FALSE--------------------------------------
#  sp + geom_point() + stat_smooth(method=lm + level=0.95)
#  sp + geom_point() + stat_smooth(method=lm, se=FALSE)

## ----ggplot2-regression-bygroup--------------------------------------------
sps <- ggplot(brfss, aes(x=Height, y=sqrt(Weight), colour=Sex)) +
    geom_point() +
        scale_colour_brewer(palette="Set1")
sps + geom_smooth(method="lm")

## --------------------------------------------------------------------------
sessionInfo()