% automatic manuscript creation for frma
% -*- mode: noweb; noweb-default-code-mode: R-mode; -*-
%\VignetteIndexEntry{ternarynet: A Computational Bayesian Approach to Ternary Network Estimation}
%\VignetteDepends{ternarynet}
%\VignettePackage{ternarynet}
\documentclass[12pt]{article}
\usepackage{hyperref}
\usepackage[authoryear, round]{natbib}

\textwidth=6.2in
\textheight=8.5in
\parskip=.3cm
\oddsidemargin=.1in
\evensidemargin=.1in
\headheight=-.3in

\newcommand\Rpackage[1]{{\textsf{#1}\index{#1 (package)}}}
\newcommand\dataset[1]{{\textit{#1}\index{#1 (data set)}}}
\newcommand\Rclass[1]{{\textit{#1}\index{#1 (class)}}}
\newcommand\Rfunction[1]{{{\small\texttt{#1}}\index{#1 (function)}}}
\newcommand\Rfunarg[1]{{\small\texttt{#1}}}
\newcommand\Robject[1]{{\small\texttt{#1}}}
\newcommand\ttt[1]{\texttt{#1}}

\author{Matthew N. McCall and Anthony Almudevar and Harry A. Stern}

\begin{document}
\title{A Computational Bayesian Approach to Ternary Network Estimation (ternarynet)}
\maketitle
\tableofcontents

\newpage

\section{Introduction}

This document describes \Rpackage{ternarynet}, which implements a
computational Bayesian algorithm to estimate a ternary network from
perturbation data. We strong recommend reading the paper,
\emph{Fitting Boolean Networks from Steady State Perturbation Data}
(Almudevar \emph{et. al} 2011) before proceeding with this vignette.

\section{Getting Started}
First begin by downloading and installing the ternarynet package.
<<echo=T,results=hide>>=
library(ternarynet)
@ 

\section*{\ttt{parallelFit} function}

The \ttt{ternarynet} package contains a parallel 
implementation of the replica exchange algorithm 
for fitting ternary network models.  The 
\ttt{parallelFit} function
takes the following arguments:

\begin{description}
    \item[\ttt{experiment\_set}]data frame containing five columns: 
        \begin{description}
            \item[\ttt{i\_exp}] an experiment index:
                an integer from 0 to $N_{\textrm{exp}}-1$,
                where $N_{\textrm{exp}}$ is the number of experiments.
            \item[\ttt{i\_node}] a node index: an integer from
                0 to $N_{\textrm{node}}-1$,
                where $N_{\textrm{node}}$ is the number of nodes.
            \item[\ttt{outcome}] a value of -1, 0, or +1, denoting a
                particular outcome for
                that node in that experiment
            \item[\ttt{value}] a cost for obtaining that outcome.
                For instance, if the cost function
                is the Hamming distance, and the observed outcome is +1,
                the cost would be would be +2, +1, or 0 for an 
                outcome of -1, 0, or +1, respectively.
            \item[\ttt{is\_perturbation}] a Boolean value 
                (or a value of 0/1) denoting whether this 
                outcome is due to an applied perturbation or not.
        \end{description}
    \item[\ttt{max\_parents}]maximum number of parents 
                                allowed for each node
    \item[\ttt{n\_cycles}]maximum number of Monte Carlo cycles
    \item[\ttt{n\_write}]number of times to write output during the run
    \item[\ttt{T\_lo}]T for lowest-temperature replica
    \item[\ttt{T\_h}]T for highest-temperature replica
    \item[\ttt{target\_score}] run will terminate if this is reached
    \item[\ttt{n\_proc}]number of replicas
    \item[\ttt{logfile}]filename for log file
    \item[\ttt{n\_thread}]number of openMP threads to run per process; default=1
    \item[\ttt{init\_parents}]initial parents; randomize if null
    \item[\ttt{init\_outcomes}]inital outcomes; set to '.' if null
    \item[\ttt{exchange\_interval}]steps between replica exchanges; default=1000
    \item[\ttt{adjust\_move\_size\_interval}]steps between 
            move size adjustments, default=7001
    \item[\ttt{max\_states}]maximum number of states to propagate
            to find a repetition; default=10
    \item[\ttt{callback}]callback function, should take one integer argument
            (the replica number), used to call set.seed with different seed
            for each replica
\end{description}

The return value is a list with an element for each replica.  Each
element is itself a list of the best unnormalized score, normalized
score (unnormalized score divided by product of number of nodes and
number of experiments), list of parents for each node, and array
describing the transition rule, giving the outcome of a node for each
possible configuration of parent nodes.

\section*{Examples}

The following shows a subset of the simple model regulatory network
given in Example 1 of Reference 1 (nodes 1-4 only).  There are four
nodes and eight experiments (the first four rows of Table 4).  
The cost function for each possible outcome is the Hamming distance 
with the observed steady-state outcome, given a persistent perturbation.
The output corresponds with the parents and transitions described on page 13
of Almudevar et al. (2011).

<<echo=T>>=

library(ternarynet)

i_exp <- as.integer(c(0,0,0, 0,0,0, 0,0,0, 0,0,0,
                        1,1,1, 1,1,1, 1,1,1, 1,1,1,
                        2,2,2, 2,2,2, 2,2,2, 2,2,2,
                        3,3,3, 3,3,3, 3,3,3, 3,3,3,
                        4,4,4, 4,4,4, 4,4,4, 4,4,4,
                        5,5,5, 5,5,5, 5,5,5, 5,5,5,
                        6,6,6, 6,6,6, 6,6,6, 6,6,6,
                        7,7,7, 7,7,7, 7,7,7, 7,7,7))

i_node <- as.integer(c(0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3,
                        0,0,0, 1,1,1, 2,2,2, 3,3,3))

outcome <- as.integer(c(-1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1,
                        -1,0,1, -1,0,1, -1,0,1, -1,0,1))

value <- c(0,1,2, 0,1,2, 0,1,2, 0,1,2,
            2,1,0, 0,1,2, 0,1,2, 0,1,2,
            2,1,0, 2,1,0, 0,1,2, 0,1,2,
            2,1,0, 2,1,0, 2,1,0, 0,1,2,
            2,1,0, 2,1,0, 2,1,0, 2,1,0,
            0,1,2, 2,1,0, 2,1,0, 2,1,0,
            0,1,2, 0,1,2, 2,1,0, 2,1,0,
            0,1,2, 0,1,2, 0,1,2, 2,1,0)

is_perturbation <- 
c(TRUE,TRUE,TRUE,  FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE,
    FALSE,FALSE,FALSE,  TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE,
    FALSE,FALSE,FALSE,  FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE,
    FALSE,FALSE,FALSE,  FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE,
    TRUE,TRUE,TRUE,  FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE,
    FALSE,FALSE,FALSE,  TRUE,TRUE,TRUE, FALSE,FALSE,FALSE, FALSE,FALSE,FALSE,
    FALSE,FALSE,FALSE,  FALSE,FALSE,FALSE, TRUE,TRUE,TRUE, FALSE,FALSE,FALSE,
    FALSE,FALSE,FALSE,  FALSE,FALSE,FALSE, FALSE,FALSE,FALSE, TRUE,TRUE,TRUE)

indata <- data.frame(i_exp,i_node,outcome,value,is_perturbation)

results <- parallelFit(indata,
                        max_parents=1,
                        n_cycles=100000,
                        n_write=10,
                        T_lo=0.001,
                        T_hi=2.0,
                        target_score=0,
                        n_proc=1,
                        logfile='try.log')

lowest_temp_results <- results[[1]]

print('Unnormalized score:')
print(lowest_temp_results$unnormalized_score)

print('Normalized score:')
print(lowest_temp_results$normalized_score)

print('Parents:')
print(lowest_temp_results$parents)

print('Outcomes:')
print(lowest_temp_results$outcomes)

@

Subsequent fits may be started using the network from 
a previous fit as the initial conditions,
as in the following example (the initial network in the case 
already has a score of 0).

<<echo=T>>=
results <- parallelFit(indata,
                        max_parents=1,
                        n_cycles=10,
                        n_write=10,
                        T_lo=0.001,
                        T_hi=2.0,
                        target_score=0,
                        n_proc=1,
                        logfile='try.log',
                        init_parents=lowest_temp_results$parents,
                        init_outcomes=lowest_temp_results$outcomes)

lowest_temp_results <- results[[1]]

print('Unnormalized score:')
print(lowest_temp_results$unnormalized_score)

print('Normalized score:')
print(lowest_temp_results$normalized_score)

print('Parents:')
print(lowest_temp_results$parents)

print('Outcomes:')
print(lowest_temp_results$outcomes)

@

\section{Session Info}
<<echo=T>>=
sessionInfo()
@ 

\end{document}