% \VignetteEngine{knitr::knitr} % \VignetteIndexEntry{02. Working with R -- slides} \documentclass[xcolor=dvipsnames]{beamer} \usepackage{BioconductorSlides} \hypersetup{colorlinks,linkcolor=,urlcolor=Blue} \title{Introduction to \R{}} \author{Martin T.\ Morgan\footnote{\url{mtmorgan@fhcrc.org}}} \date{27-28 February 2014} \begin{document} \maketitle \section*{R} \begin{frame}[fragile]{\R} \begin{columns} \column{.45\textwidth} \begin{itemize} \item \url{http://r-project.org} \item Open-source, statistical programming language; widely used in academia, finance, pharma, \ldots \item Core language and base packages \item Interactive sessions, scripts \item $>5000$ contributed packages \end{itemize} \column{.55\textwidth} <>= ## Two 'vectors' x <- rnorm(1000) y <- x + rnorm(1000, sd=.5) ## Integrated container df <- data.frame(X=x, Y=y) ## Visualize plot(Y ~ X, df) ## Regression; 'object' fit <- lm(Y ~ X, df) ## Methods on the object abline(fit) # regression line anv <- anova(fit) # ANOVA table @ \end{columns} \end{frame} \begin{frame}{Programming \R} \begin{enumerate} \item Packages: loading, installing \item Help \item Scripts \& reproducible research \item Functions \item Debugging and measuring performance \end{enumerate} \end{frame} \begin{frame}[fragile]{1. Packages} Already installed packages <>= library(parallel) @ %% New packages from \emph{repositories} such as \href{http://cran.r-project.org}{CRAN} and \href{http://bioconductor.org/}{Bioconductor} \begin{itemize} \item \Rcode{biocLite()} to install, including \emph{dependencies} \item Occasional problems when a package depends on third-party software installation \end{itemize} <>= source("http://bioconductor.org/biocLite.R") biocLite("IRanges") library("IRanges") @ %% Other repositories: \texttt{R-forge}, \texttt{github}, \ldots \end{frame} \begin{frame}[fragile]{Packages (cont.)} What packages are loaded? <>= head(search(), 3) @ What functions are provided by a package? <>= help(package="IRanges") @ How does \R{} find symbols, e.g,. \Rcode{sin}? \begin{itemize} \item Look in \Rcode{.GlobalEnv}, then proceed down search path \item Specify package with \Rcode{base::sin} \end{itemize} \end{frame} \begin{frame}[fragile]{2. Help} \begin{knitrout} \definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor}\begin{kframe} \begin{alltt} \hlkwd{help.start}\hlstd{()} \hlkwd{? }\hlstd{data.frame} \hlkwd{? }\hlstd{anova} \hlkwd{? }\hlstd{anova.lm} \hlcom{# anova generic, method for class lm} \hlkwd{class ? }\hlstd{DNAStringSet} \hlkwd{method ? }\hlstr{"alphabetFrequency,DNAStringSet"} \hlkwd{vignette}\hlstd{(}\hlstr{"GenomicRangesIntroduction"}\hlstd{,} \hlstr{"GenomicRanges"}\hlstd{)} \hlkwd{help}\hlstd{(}\hlkwc{package} \hlstd{=} \hlstr{"Biostrings"}\hlstd{)} \hlkwd{RShowDoc}\hlstd{(}\hlstr{"R-intro"}\hlstd{)} \end{alltt} \end{kframe} \end{knitrout} \end{frame} \begin{frame}{3. Scripts, functions, and reproducible research} \begin{enumerate} \item Write simple scripts of \R{} code \texttt{my\_analysis.R} \item Implement common operations as functions. \item `Markdown' with \R{} code embedded in surrounding text \texttt{my\_analysis.Rmd} \item Packages! -- \Rcode{package.skeleton()} \item Version control! \end{enumerate} \end{frame} \begin{frame}{4. Favorite functions} \begin{columns}[t] \column{.5\textwidth} \begin{description}\itemsep0pt\parskip0pt \item[\Rfunction{dir}, \Rfunction{read.table}, \Rfunction{scan}] List files; input data. \item[\Rfunction{c}, \Rfunction{factor}, \Rfunction{data.frame}, \Rfunction{matrix}] Create vectors, etc. \item[\Rfunction{summary}, \Rfunction{table}, \Rfunction{xtabs}] Summarize or cross-tabulate data. \item[\Rfunction{t.test}, \Rfunction{lm}, \Rfunction{anova}] Compare two or several groups. \item[\Rfunction{dist}, \Rfunction{hclust}, \Rfunction{heatmap}] Cluster data. \item[\Rfunction{plot}] Plot data. \item[\Rfunction{ls}, \Rfunction{library}] List objects; attach packages. \end{description} \column{.5\textwidth} \begin{description} \itemsep0pt\parskip0pt \item[\Rfunction{lapply}, \Rfunction{sapply}, \Rfunction{mapply}] Apply function to elements of lists. \item[\Rfunction{match}, \Rfunction{\%in\%}] find elements of one vector in another. \item[\Rfunction{split}, \Rfunction{cut}] Split or cut vectors. \item[\Rfunction{strsplit}, \Rfunction{grep}, \Rfunction{sub}] Operate on character vectors. \item[\Rfunction{biocLite}] Install a package from an on-line repository. \end{description} \end{columns} \end{frame} \begin{frame}{5. Debugging and measuring performance} Debugging \begin{itemize} \item \Rfunction{traceback()}: what went wrong? \item \Rfunction{debug()}: step through a function. \item \Rfunction{browser()}: insert a break-point in your own function / script. Help debug errors. \end{itemize} \end{frame} \begin{frame}{Debugging and measuring performance (cont.)} Performance \begin{itemize} \item \Rfunction{all.equal()}, \Rfunction{identical()} to compare values. \item \Rfunction{system.time()} to measure how long evaluation takes. \item \Rpkg{microbenchmark} to compare times for different functions \item \Rfunction{Rprof()} to summarize time in each function call, \href{https://github.com/hadley/lineprof}{lineprof} to profile each line of code \end{itemize} \end{frame} \end{document}