% automatic manuscript creation for frma % -*- mode: noweb; noweb-default-code-mode: R-mode; -*- %\VignetteIndexEntry{ternarynet: A Computational Bayesian Approach to Ternary Network Estimation} %\VignetteDepends{ternarynet} %\VignettePackage{ternarynet} \documentclass[12pt]{article} \usepackage{hyperref} \usepackage[authoryear, round]{natbib} \textwidth=6.2in \textheight=8.5in \parskip=.3cm \oddsidemargin=.1in \evensidemargin=.1in \headheight=-.3in \newcommand\Rpackage[1]{{\textsf{#1}\index{#1 (package)}}} \newcommand\dataset[1]{{\textit{#1}\index{#1 (data set)}}} \newcommand\Rclass[1]{{\textit{#1}\index{#1 (class)}}} \newcommand\Rfunction[1]{{{\small\texttt{#1}}\index{#1 (function)}}} \newcommand\Rfunarg[1]{{\small\texttt{#1}}} \newcommand\Robject[1]{{\small\texttt{#1}}} \newcommand\ttt[1]{\texttt{#1}} \author{Matthew N. McCall and Anthony Almudevar and Harry A. Stern} \begin{document} \title{A Computational Bayesian Approach to Ternary Network Estimation (ternarynet)} \maketitle \tableofcontents \newpage \section{Introduction} This document describes \Rpackage{ternarynet}, which implements a computational Bayesian algorithm to estimate a ternary network from perturbation data. We strong recommend reading the paper, \emph{Fitting Boolean Networks from Steady State Perturbation Data} (Almudevar \emph{et. al} 2011) before proceeding with this vignette. \section{Getting Started} First begin by downloading and installing the ternarynet package. <>= library(ternarynet) @ \section*{\ttt{parallelFit} function} The \ttt{ternarynet} package contains a parallel implementation of the replica exchange algorithm for fitting ternary network models. The \ttt{parallelFit} function takes the following arguments: \begin{description} \item[\ttt{experiment\_set}]data frame containing five columns: \begin{description} \item[\ttt{i\_exp}] an experiment index: an integer from 0 to $N_{\textrm{exp}}-1$, where $N_{\textrm{exp}}$ is the number of experiments. \item[\ttt{i\_node}] a node index: an integer from 0 to $N_{\textrm{node}}-1$, where $N_{\textrm{node}}$ is the number of nodes. \item[\ttt{outcome}] a value of -1, 0, or +1, denoting a particular outcome for that node in that experiment \item[\ttt{value}] a cost for obtaining that outcome. For instance, if the cost function is the Hamming distance, and the observed outcome is +1, the cost would be would be +2, +1, or 0 for an outcome of -1, 0, or +1, respectively. \item[\ttt{is\_perturbation}] a Boolean value (or a value of 0/1) denoting whether this outcome is due to an applied perturbation or not. \end{description} \item[\ttt{max\_parents}]maximum number of parents allowed for each node \item[\ttt{n\_cycles}]maximum number of Monte Carlo cycles \item[\ttt{n\_write}]number of times to write output during the run \item[\ttt{T\_lo}]T for lowest-temperature replica \item[\ttt{T\_h}]T for highest-temperature replica \item[\ttt{target\_score}] run will terminate if this is reached \item[\ttt{n\_proc}]number of replicas \item[\ttt{logfile}]filename for log file \item[\ttt{n\_thread}]number of openMP threads to run per process; default=1 \item[\ttt{init\_parents}]initial parents; randomize if null \item[\ttt{init\_outcomes}]inital outcomes; set to '.' if null \item[\ttt{exchange\_interval}]steps between replica exchanges; default=1000 \item[\ttt{adjust\_move\_size\_interval}]steps between move size adjustments, default=7001 \item[\ttt{max\_states}]maximum number of states to propagate to find a repetition; default=10 \item[\ttt{callback}]callback function, should take one integer argument (the replica number), used to call set.seed with different seed for each replica \end{description} The return value is a list with an element for each replica. Each element is itself a list of the best unnormalized score, normalized score (unnormalized score divided by product of number of nodes and number of experiments), list of parents for each node, and array describing the transition rule, giving the outcome of a node for each possible configuration of parent nodes. \section*{Examples} The following shows a subset of the simple model regulatory network given in Example 1 of Reference 1 (nodes 1-4 only). There are four nodes and eight experiments (the first four rows of Table 4). The cost function for each possible outcome is the Hamming distance with the observed steady-state outcome, given a persistent perturbation. The output corresponds with the parents and transitions described on page 13 of Almudevar et al. (2011). <>= library(ternarynet) i_exp <- as.integer(c(0,0,0, 0,0,0, 0,0,0, 0,0,0, 1,1,1, 1,1,1, 1,1,1, 1,1,1, 2,2,2, 2,2,2, 2,2,2, 2,2,2, 3,3,3, 3,3,3, 3,3,3, 3,3,3, 4,4,4, 4,4,4, 4,4,4, 4,4,4, 5,5,5, 5,5,5, 5,5,5, 5,5,5, 6,6,6, 6,6,6, 6,6,6, 6,6,6, 7,7,7, 7,7,7, 7,7,7, 7,7,7)) i_node <- as.integer(c(0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3, 0,0,0, 1,1,1, 2,2,2, 3,3,3)) outcome <- as.integer(c(-1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1, -1,0,1)) value <- c(0,1,2, 0,1,2, 0,1,2, 0,1,2, 2,1,0, 0,1,2, 0,1,2, 0,1,2, 2,1,0, 2,1,0, 0,1,2, 0,1,2, 2,1,0, 2,1,0, 2,1,0, 0,1,2, 2,1,0, 2,1,0, 2,1,0, 2,1,0, 0,1,2, 2,1,0, 2,1,0, 2,1,0, 0,1,2, 0,1,2, 2,1,0, 2,1,0, 0,1,2, 0,1,2, 0,1,2, 2,1,0) is_perturbation <- c(TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE) indata <- data.frame(i_exp,i_node,outcome,value,is_perturbation) results <- parallelFit(indata, max_parents=1, n_cycles=100000, n_write=10, T_lo=0.001, T_hi=2.0, target_score=0, n_proc=1, logfile='try.log') lowest_temp_results <- results[[1]] print('Unnormalized score:') print(lowest_temp_results$unnormalized_score) print('Normalized score:') print(lowest_temp_results$normalized_score) print('Parents:') print(lowest_temp_results$parents) print('Outcomes:') print(lowest_temp_results$outcomes) @ Subsequent fits may be started using the network from a previous fit as the initial conditions, as in the following example (the initial network in the case already has a score of 0). <>= results <- parallelFit(indata, max_parents=1, n_cycles=10, n_write=10, T_lo=0.001, T_hi=2.0, target_score=0, n_proc=1, logfile='try.log', init_parents=lowest_temp_results$parents, init_outcomes=lowest_temp_results$outcomes) lowest_temp_results <- results[[1]] print('Unnormalized score:') print(lowest_temp_results$unnormalized_score) print('Normalized score:') print(lowest_temp_results$normalized_score) print('Parents:') print(lowest_temp_results$parents) print('Outcomes:') print(lowest_temp_results$outcomes) @ \section{Session Info} <>= sessionInfo() @ \end{document}