%\VignetteIndexEntry{An R package for prediction of nucleosome positioning}
%\VignetteKeywords{Nucleosome}

\documentclass[a4paper]{article}

\newcommand{\Rpackage}[1]{{\textit{#1}}}
\newcommand{\Robject}[1]{{\texttt{#1}}}
\newcommand{\Rfunarg}[1]{{\textit{#1}}}
\newcommand{\Rfunction}[1]{{\texttt{#1}}}
\usepackage{ccaption}
\usepackage{natbib}

\setlength{\textwidth}{6.2in}
\setlength{\textheight}{8.5in}
\setlength{\parskip}{1.5ex plus0.5ex minus 0.5ex}
\setlength{\oddsidemargin}{0.1cm}
\setlength{\evensidemargin}{0.1cm}
\setlength{\headheight}{0.3cm}
\setlength{\arraycolsep}{0.1cm}

\renewcommand{\baselinestretch}{1}

\begin{document}
\title{An introduction to the nuCpos package}
\author{Hiroaki Kato\thanks{hkato@med.shimane-u.ac.jp}, \  Takeshi Urano}

\maketitle

\section{About nuCpos}

\Rpackage{nuCpos}, a derivative of \Rpackage{NuPoP}, 
is an R package for predicting \textbf{\textit{nuc}}leosome 
\textbf{\textit{pos}}itions. 
\Rpackage{nuCpos} calculates local and whole nucleosomal HBA 
scores for a given 147-bp sequence. 
This package was designed to demonstrate the use of 
chemical maps in prediction. 
As the parental package \Rpackage{NuPoP} now provides 
chemical-map-based prediction, the function for 
dHMM-based prediction was removed from this package. 
\Rpackage{nuCpos} continues to provide functiojns for HBA 
calculation. The models are based on chemical maps of 
nucleosomes from 
budding yeast \textit{Saccharomyces cerevisiae} 
(\cite{stat:Brogaard2012}), fission yeast 
\textit{Schizosaccharomyces pombe} (\cite{stat:Moyle-Heyrman2013}), 
or embryonic stem cells of house mouse \textit{Mus musculus} 
(\cite{stat:Voong2016}). 

The parental package \Rpackage{NuPoP}, licensed under GPL-2, 
was developed by Ji-Ping Wang and Liqun Xi. Please refer to 
\cite{stat:XiWang2010} and \cite{stat:WangWidom2008} for 
technical details of \Rpackage{NuPoP}. Their excellent codes 
were adapted in \Rpackage{nuCpos} to demonstrate the usefulness 
of chemical maps in prediction. 

Note that when 
\Rpackage{nuCpos} was released, \Rpackage{NuPoP} only used an 
MNase-seq-based map of budding yeast nucleosomes to train 
a duration hidden Markov model. However, as \Rpackage{NuPoP} now 
provides chemical map-based prediction, users are encouraged 
to use \Rpackage{NuPoP} functions to conduct dHMM-based prediction 
in their original way. 


\section{nuCpos functions}
\Rpackage{nuCpos} has two functions: 
\verb@HBA@, and \verb@localHBA@. 

The functions \verb@HBA@ and \verb@localHBA@ receive a sequence of 
147-bp DNA and calculate whole nucleosomal and local HBA scores. 
These functions invoke core Fortran codes for HBA calculation 
that were adapted from the excellent dHMM code of \Rpackage{NuPoP}.

\Rpackage{nuCpos} requires the \Rpackage{Biostrings} package, 
especially when DNA sequences are given as DNAString objects to 
the functions \verb@HBA@, and \verb@localHBA@. 
These functions can also receive DNA sequences as simple character 
string objects without loading the \Rpackage{Biostrings} package. 
Note: \Rpackage{nuCpos} requires the \Rpackage{NuPoP} package to perform 
some example runs.

Load the \Rpackage{nuCpos} package as follows:

<<>>=
library(nuCpos)
@


\section{Histone binding affinity score calculation with HBA}

HBA score can be calculated for a given 147-bp sequence with the 
\verb@HBA@ function. In the examples bellow, a character string 
object \Robject{inseq} and a DNAString object \Robject{INSEQ} with 
the same 147-bp DNA sequences are given to \verb@HBA@. Note: the 
\Rpackage{Biostrings} package is required for the latter case.

<<>>=
load(system.file("extdata", "inseq.RData", package = "nuCpos"))
HBA(inseq = inseq, species = "sc")
for(i in 1:3) cat(substr(inseq, start = (i-1)*60+1, 
    stop = (i-1)*60+60), "\n")
load(system.file("extdata", "INSEQ_DNAString.RData", 
    package = "nuCpos"))
INSEQ
HBA(inseq = INSEQ, species = "sc")
@

The argument $\Rfunarg{inseq}$ is the character string object to 
be given. Alternatively, a DNAString object can be used here. 
The length of DNA must be 147 bp. The argument $\Rfunarg{species}$ 
can be specified as follows: mm = \textit{M. musculus}; 
sc = \textit{S. cerevisiae}; sp = \textit{S. pombe}. 


\section{Local histone binding affinity score calculation 
    with localHBA}

Local HBA scores are defined as HBA scores for 13 overlapping 
subnucleosomal segments named A to M. They can be calculated for 
a given 147-bp sequence with the \verb@localHBA@ function. 
Like \verb@HBA@, this function can receive either a character 
string object or a DNAString object. The segment G corresponds to 
the central 21 bp region, in which the dyad axis passes through 
the 11th base position. This means that the local HBA score for 
the G segment implies the relationship between DNA and histone 
proteins at around superhelical locations -0.5 and +0.5. 
The neighboring F segment, which is 20 bp in length, 
is for SHLs -1.5 and -0.5. The result of example run shown below 
suggests that subsequence of \Robject{inseq} around SHL -3.5 and 
-2.5 is suitable for nucleosome formation. 


<<>>=
localHBA(inseq = inseq, species = "sc")
barplot(localHBA(inseq = inseq, species = "sc"), 
    names.arg = LETTERS[1:13], xlab = "Nucleosomal subsegments", 
    ylab = "local HBA", main = "Local HBA scores for inseq")
@


\section{Acknowledgements}
We would like to thank Drs. Shimizu, Fuse and Ichikawa for 
sharing DNA sequences and \textit{in vivo} data, 
and giving fruitful comments. 
We would like to thank Dr. Ji-Ping Wang and his colleagues
for distributing NuPoP under the GPL-2 license. 
In this package, their excellent code for dHMM-based prediction 
was adapted for chemical map-based prediction to demonstrate 
the usefulness of chemical maps in prediction. 
As we noticed that canceling of HBA smoothing helps predicting 
rotational settings, predNuCpos in the earlier version 
provided this option. 
However, for those who want to predict nucleosome occupancy in 
the original way with chemical maps, we encourage users to 
use NuPoP functions as it now provides 
chemical map-based predictions. 
In our functions HBA and localHBA, their excellent code was 
also adapted to calculate the scores of given 147-bp sequences 
independently of the genomic context. 
The function HBA now runs without invoking a fortran subroutine.


\bibliographystyle{apalike}
%\bibliography{nuCposBib.bib}

\begin{thebibliography}{}

\bibitem[Wang et~al., 2008]{stat:WangWidom2008}
    Wang JP, Fondufe-Mittendorf Y, Xi L, Tsai GF, Segal E and 
    Widom J (2008).
\newblock Preferentially quantized linker {DNA} lengths in 
    \textit{Saccharomyces cerevisiae}.
\newblock {\em PLoS Computational Biology}, 4(9):e1000175.

\bibitem[Xi et~al., 2010]{stat:XiWang2010}
    Xi L, Fondufe-Mittendorf Y, Xia L, Flatow J, Widom J and 
    Wang JP (2010).
    \newblock Predicting nucleosome positioning using a 
        duration hidden markov model.
    \newblock {\em BMC Bioinformatics}, 11:346.

\bibitem[Brogaard et~al., 2012]{stat:Brogaard2012}
    Brogaard K, Xi L, and Widom J (2012).
    \newblock A map of nucleosome positions in yeast at 
        base-pair resolution.
    \newblock {\em Nature}, 486(7404):496-501.

\bibitem[Moyle-Heyrman et~al., 2012]{stat:Moyle-Heyrman2013}
    Moyle-Heyrman G, Zaichuk T, Xi L, Zhang Q, Uhlenbeck OC, 
        Holmgren R, Widom J and Wang JP (2013).
    \newblock Chemical map of \textit{Schizosaccharomyces pombe} 
        reveals species-specific features in nucleosome positioning.
    \newblock {\em Proc. Natl. Acad. Sci. U. S. A.}, 
        110(50):20158-63.

\bibitem[Ichikawa et~al., 2014]{stat:Ichikawa2014}
    Ichikawa Y, Morohoshi K, Nishimura Y, Kurumizaka H and 
        Shimizu M (2014).
    \newblock Telomeric repeats act as nucleosome-disfavouring 
        sequences in vivo.
    \newblock {\em Nucleic Acids Res.}, 42(3):1541-1552.

\bibitem[Voong et~al., 2016]{stat:Voong2016}
    Voong LN, Xi L, Sebeson AC, Xiong B, Wang JP and Wang X (2016).
    \newblock Insights into Nucleosome Organization in 
        Mouse Embryonic Stem Cells through Chemical Mapping.
    \newblock {\em Cell}, 167(6):1555-1570.

\bibitem[Fuse et~al., 2017]{stat:Fuse2017}
    Fuse T, Katsumata K, Morohoshi K, Mukai Y, Ichikawa Y, 
        Kurumizaka H, Yanagida A, Urano T, Kato H, and Shimizu M (2017).
    \newblock Parallel mapping with site-directed hydroxyl radicals 
        and micrococcal nuclease reveals structural features of 
        positioned nucleosomes in vivo.
    \newblock {\em Plos One}, 12(10):e0186974.


\end{thebibliography}


\end{document}