\name{deds.stat}
\alias{deds.stat}

\title{Differentail Expression via Distance Summary of Multiple Statistics}
\description{
  \code{deds.stat} integrates different statistics of differential
  expression (DE) to rank and select a set of DE genes. 
}
\usage{
deds.stat(X, L, B = 1000, testfun = list(t = comp.t(L), fc = comp.FC(L),
sam = comp.SAM(L)), tail = c("abs", "lower", "higher"), distance =
c("weuclid", "euclid"), adj = c("fdr", "adjp"), nsig = nrow(X))
}

\arguments{
  \item{X}{A  matrix, with \eqn{m} rows corresponding to variables
    (hypotheses) and \eqn{n} columns corresponding to observations.
    In the case of gene expression data, rows correspond to genes and
    columns to mRNA samples. The data can be read using
    \code{\link{read.table}}.}
  \item{L}{A vector of integers corresponding to observation (column)
    class labels. For \eqn{k} classes, the labels must be integers
    between 0 and \eqn{k-1}.}
  \item{B}{The number of permutations. For a complete enumeration,
      \code{B} should be 0 (zero) or any number not less than the total
      number of permutations.}
  \item{testfun}{A list of functions specifying the statistics to be
    used to test the null hypothesis of no association between the
    variables and the class labels. The default uses t, fold change and
    SAM. The input can also be generated using the function
    \code{\link{deds.chooseTest}}.}
  \item{tail}{A character string specifying the type of rejection
    region.\cr
    If \code{side="abs"}, two-tailed tests, the null hypothesis is
    rejected for large absolute values of the test statistic.\cr
    If \code{side="higher"}, one-tailed tests, the null hypothesis
    is rejected for large values of the test statistic.\cr
    If \code{side="lower"}, one-tailed tests,  the null hypothesis is
    rejected for small values of the test statistic.
   }
  \item{distance}{A character string specifying the type of distance
    measure used for the calculation of the distance to the extreme
    point (E). \cr
    If \code{distance="weuclid"}, weighted euclidean distance, the
    weight for statistic \eqn{t} is \eqn{\frac{1}{MAD(t)}}{1/MAD(t)}; \cr
    If \code{distance="euclid"}, euclidean distance.\cr
   }
  \item{adj}{A character string specifying the type of multiple testing
    adjustment. \cr
    If \code{adj="fdr"}, False Discovery Rate is controled and \eqn{q} values
    are returned. \cr
    If \code{adj="adjp"}, ajusted \eqn{p} values that controls family wise
      type I error rate is returned.}
  \item{nsig}{If \code{adj = "fdr"}, \code{nsig} specifies the number of top
    differentially expressed genes whose \eqn{q} values will be calculated; we recommend 
    setting \code{nsig < m}, as the computation of $q$ values will be extensive. $q$ values
    for the rest of genes will be approximated to 1. If \code{adj = "adjp"}, the 
    calculation of the adjusted \eqn{p} values will be for the whole dataset.}
}
\details{
  \code{deds.stat} summarizes multiple statistical measures for the
  evidence of DE. The DEDS methodology treats each gene as
  a point corresponding to a gene's vector of DE measures. An "extreme
  origin" is defined as the maxima of all statistics and the
  distance from all points to the extreme is computed and ranking of
  a gene for DE is determined by the closeness of the gene to the
  extreme. To determine a cutoff for declaration of DE, null referent
  distributions are generated by permuting the data matrix.

  Statistical measures currently in the DEDS package include t statistics
  (\code{\link{comp.t}}), fold changes(\code{\link{comp.FC}}), F
  statistics (\code{\link{comp.F}}), SAM ((\code{\link{comp.SAM}}), moderated
  t (\code{\link{comp.modt}}), moderated F statistics
  (\code{\link{comp.modF}}), and B statistics (\code{\link{comp.B}}). The
  user can also supply their own function for a statistic other than the
  above, provided the function is written in a similar format as the
  above ones.

  The function \code{deds.stat} could be slow if the size of the data
  matrix and the number of permutations are big. We hence recommend the
  user to use \code{\link{deds.stat.linkC}} as the default
  function. \code{\link{deds.stat.linkC}} interfaces to a C function,
  which handles a 10,000 by 10 matrix and 1000 permutations in minutes.

  DEDS can also summarize \eqn{p} values from different statistical
  models, see \code{\link{deds.pval}}.
}
\value{
  An object of class \code{\link{DEDS}}. See \code{\link{DEDS-class}}.
}

\references{
  Yang, Y. H., Xiao, Y. and Segal MR: Selecting differentially expressed
  genes from microarray experiment by sets of
  statistics. \emph{Bioinformatics}, 2004, accepted.
  http://www.biostat.ucsf.edu/jean/Papers/DEDS.pdf.
}

\author{Yuanyuan Xiao, \email{yxiao@itsa.ucsf.edu}, \cr
    Jean Yee Hwa Yang, \email{jean@biostat.ucsf.edu}.}

\seealso{\code{\link{deds.pval}}, \code{\link{deds.stat.linkC}}}
\examples{
X <- matrix(rnorm(1000,0,0.5), nc=10)
L <- rep(0:1,c(5,5))

# genes 1-10 are differentially expressed
X[1:10,6:10]<-X[1:10,6:10]+1

# DEDS summarizing t, sam and fc
deds.X <- deds.stat(X, L, B=200)

# DEDS summarizing t, tmod and fc
\dontrun{deds.X <- deds.stat(X, L, testfun=list(t=comp.t(L),
tmod=comp.modt(L), sam=comp.SAM(L)))}

# one can also use:
\dontrun{deds.X <- deds.stat(X, L, testfun=deds.chooseTest(L,
tests=c("t","modt","fc")))
}

}
\keyword{htest}