## ----style, echo = FALSE, results = 'asis'---------------------------------
BiocStyle::markdown()

## ----setup, echo=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))
)

## ---- message = FALSE------------------------------------------------------
library(readr)
library(tibble)
library(dplyr)
library(ggplot2)

## ----echo=TRUE, eval=FALSE-------------------------------------------------
#  fname <- file.choose()   ## BRFSS-subset.csv
#  stopifnot(file.exists(fname))
#  brfss <- read_csv(fname)

## ----echo=FALSE------------------------------------------------------------
fname <- "BRFSS-subset.csv"
stopifnot(file.exists(fname))
brfss <- read_csv(fname)

## --------------------------------------------------------------------------
brfss

## --------------------------------------------------------------------------
brfss %>% mutate( Sex = factor(Sex), Year = factor(Year) )

## --------------------------------------------------------------------------
brfss <- brfss %>% mutate( Sex = factor(Sex), Year = factor(Year) )

## --------------------------------------------------------------------------
brfss %>% filter(Sex == "Female", Year == "1990") %>% 
    select(Age, Weight, Height)

## --------------------------------------------------------------------------
brfss %>%
    group_by(Sex, Year) %>% 
    summarize(
        AveAge = mean(Age, na.rm=TRUE),
        AveWeight = mean(Weight, na.rm=TRUE)
    )

## --------------------------------------------------------------------------
pdata <- read_tsv("ALLphenoData.tsv")
t.test(age ~ sex, pdata)

## ---- eval = FALSE---------------------------------------------------------
#  pdata %>% t.test(age ~ sex, data = .)

## --------------------------------------------------------------------------
t_test <- function(data, formula, ...) {
    t.test(formula, data, ...)
}

## --------------------------------------------------------------------------
pdata %>% t_test(age ~ sex)

## ---- eval = FALSE---------------------------------------------------------
#  pdata_file <- file.choose()    # airway-sample-sheet.csv
#  count_file <- file.choose()    # airway-read-counts.csv

## ---- echo = FALSE---------------------------------------------------------
pdata_file <- "airway-sample-sheet.csv"

## --------------------------------------------------------------------------
pdata <- read_csv(pdata_file)
pdata <- 
    pdata %>% 
    select(Run, cell, dex)
pdata

## --------------------------------------------------------------------------
count_file <- "airway-read-counts.csv"
counts <- read_csv(count_file)
eg <- counts[, 1:6]    # make it easy to work with
eg

## --------------------------------------------------------------------------
data <- left_join(pdata, eg)
data

## ---- message=FALSE--------------------------------------------------------
library(tidyr)

## --------------------------------------------------------------------------
tbl <- gather(data, "Gene", "Count", -(1:3))
tbl

## --------------------------------------------------------------------------
tbl %>%
    group_by(Run) %>%
    summarize(lib_size = sum(Count))
tbl %>%
    group_by(Gene) %>%
    summarize(
        ave_count = mean(Count),
        ave_log_count = mean(log(1 + Count))
    )

## --------------------------------------------------------------------------
counts_tbl <- gather(counts, "Gene", "Count", -Run)

## --------------------------------------------------------------------------
data_tbl <- left_join(pdata, counts_tbl)
data_tbl

## --------------------------------------------------------------------------
data_tbl %>%
    group_by(Run) %>%
    summarize(lib_size = sum(Count))

## --------------------------------------------------------------------------
gene_summaries <-
    data_tbl %>%
    group_by(Gene) %>%
    summarize(
        ave_count = mean(Count),
        ave_log_count = mean(log(1 + Count))
    )
gene_summaries

## ---- message=FALSE--------------------------------------------------------
library(ggplot2)

## --------------------------------------------------------------------------
ggplot(gene_summaries, aes(ave_log_count)) +
    geom_density()

## --------------------------------------------------------------------------
sessionInfo()