## ----style, echo = FALSE, results = 'asis'--------------------------------- BiocStyle::markdown() ## ----setup, echo=FALSE----------------------------------------------------- knitr::opts_chunk$set( eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")), cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")) ) ## ---- message = FALSE------------------------------------------------------ library(readr) library(tibble) library(dplyr) library(ggplot2) ## ----echo=TRUE, eval=FALSE------------------------------------------------- # fname <- file.choose() ## BRFSS-subset.csv # stopifnot(file.exists(fname)) # brfss <- read_csv(fname) ## ----echo=FALSE------------------------------------------------------------ fname <- "BRFSS-subset.csv" stopifnot(file.exists(fname)) brfss <- read_csv(fname) ## -------------------------------------------------------------------------- brfss ## -------------------------------------------------------------------------- brfss %>% mutate( Sex = factor(Sex), Year = factor(Year) ) ## -------------------------------------------------------------------------- brfss <- brfss %>% mutate( Sex = factor(Sex), Year = factor(Year) ) ## -------------------------------------------------------------------------- brfss %>% filter(Sex == "Female", Year == "1990") %>% select(Age, Weight, Height) ## -------------------------------------------------------------------------- brfss %>% group_by(Sex, Year) %>% summarize( AveAge = mean(Age, na.rm=TRUE), AveWeight = mean(Weight, na.rm=TRUE) ) ## -------------------------------------------------------------------------- pdata <- read_tsv("ALLphenoData.tsv") t.test(age ~ sex, pdata) ## ---- eval = FALSE--------------------------------------------------------- # pdata %>% t.test(age ~ sex, data = .) ## -------------------------------------------------------------------------- t_test <- function(data, formula, ...) { t.test(formula, data, ...) } ## -------------------------------------------------------------------------- pdata %>% t_test(age ~ sex) ## ---- eval = FALSE--------------------------------------------------------- # pdata_file <- file.choose() # airway-sample-sheet.csv # count_file <- file.choose() # airway-read-counts.csv ## ---- echo = FALSE--------------------------------------------------------- pdata_file <- "airway-sample-sheet.csv" ## -------------------------------------------------------------------------- pdata <- read_csv(pdata_file) pdata <- pdata %>% select(Run, cell, dex) pdata ## -------------------------------------------------------------------------- count_file <- "airway-read-counts.csv" counts <- read_csv(count_file) eg <- counts[, 1:6] # make it easy to work with eg ## -------------------------------------------------------------------------- data <- left_join(pdata, eg) data ## ---- message=FALSE-------------------------------------------------------- library(tidyr) ## -------------------------------------------------------------------------- tbl <- gather(data, "Gene", "Count", -(1:3)) tbl ## -------------------------------------------------------------------------- tbl %>% group_by(Run) %>% summarize(lib_size = sum(Count)) tbl %>% group_by(Gene) %>% summarize( ave_count = mean(Count), ave_log_count = mean(log(1 + Count)) ) ## -------------------------------------------------------------------------- counts_tbl <- gather(counts, "Gene", "Count", -Run) ## -------------------------------------------------------------------------- data_tbl <- left_join(pdata, counts_tbl) data_tbl ## -------------------------------------------------------------------------- data_tbl %>% group_by(Run) %>% summarize(lib_size = sum(Count)) ## -------------------------------------------------------------------------- gene_summaries <- data_tbl %>% group_by(Gene) %>% summarize( ave_count = mean(Count), ave_log_count = mean(log(1 + Count)) ) gene_summaries ## ---- message=FALSE-------------------------------------------------------- library(ggplot2) ## -------------------------------------------------------------------------- ggplot(gene_summaries, aes(ave_log_count)) + geom_density() ## -------------------------------------------------------------------------- sessionInfo()