\documentclass[11pt]{article} \usepackage{graphicx, verbatim} \usepackage{hyperref} \usepackage[utf8]{inputenc} \usepackage[numbers]{natbib} \setlength{\textwidth}{6.5in} \setlength{\textheight}{9in} \setlength{\oddsidemargin}{0in} \setlength{\evensidemargin}{0in} \setlength{\topmargin}{-1.5cm} %\VignetteIndexEntry{DrugVsDiseasedata} \begin{document} \begin{center} \Large Drug versus Disease data package \end{center} \SweaveOpts{keep.source=TRUE} \DefineVerbatimEnvironment{Sinput}{Verbatim}{xleftmargin=2em} \DefineVerbatimEnvironment{SOutput}{Verbatim}{xleftmargin=2em} \DefineVerbatimEnvironment{Scode}{Verbatim}{xleftmargin=2em} \fvset{listparameters={\setlength{\topsep}{0pt}}} \renewenvironment{Schunk}{\vspace{\topsep}}{\vspace{\topsep}} \section{Introduction} The Drug versus Disease data package provides reference data set for the DrugVsDisease package which is a pipeline for the comparison of drug and disease gene expression profiles where negatively correlated (enriched) profiles can be used to generate hypotheses of drug-repurposing and positively correlated (enriched) profiles may be used to infer side-effects of drugs. The reference data includes disease and drug profiles, where the disease profiles were manually curated from experiments available from the Gene Expression Omnibus (GEO) \subsection{Disease Signatures} Datasets included in the DrugVsDiseasedata reference set contained disease versus control samples that were derived from disease-relevant primary tissues. In total, 85 disease-associated microarray experiments (disease vs control) were acquired to represent and characterise 45 distinct diseases with (3766) individual microarrays. These were obtained from NCBI GEO microarray repository \href{http:// www.ncbi.nlm.nih.gov/geo/)}{http:// www.ncbi.nlm.nih.gov/geo/}. The raw CEL files were normalised using rma and probes mapped to genes using the average ranking method. The pairwise similarity score matrix of these profiles was generated using the top and bottom 100 genes and Affinity propagation used to derive a network of disease connections, the classifications of each disease profile is given in the diseaseClusters data file. The 88 profiles resulted in a network of 12 clusters, the ranked profiles used to generate these networks are stored in the diseaseRL data object. <>= #to load the disease ranked profiles data(diseaseRL,package="DrugVsDiseasedata") @ \subsection{Annotation} The DrugVsDisease data package automatically downloads and annotates Affymetrix probe sets to HUGO gene symbols using biomaRt. The annotationlist in DrugVsDiseasedata gives the Affymetrix platform annotation and associated database reference in BiomaRt to allow for automatic detection and calculation. The genes which are in the intersection of the three Affymetrix platforms supported for automatic annotation by DrugVsDisease are given in the genelist object. <<>>= data(annotationlist,package="DrugVsDiseasedata") #to get the HUGO genes which are included in the reference data data(genelist,package="DrugVsDiseasedata") @ \subsection{GEO data} The meta information is processed in DrugVsDisease through the GEOquery package. This provides the experimental design, for DrugVsDisease this information is used to identify explanatory factors which may be used in a regression model. The list of available factor values which are available on the GEO website \href{http://www.ncbi.nlm.nih.gov/geo/}{http://www.ncbi.nlm.nih.gov/geo/} are stored in the GEOfactorvalues object for use by DrugVsDisease. <<>>= data(GEOfactorvalues,package="DrugVsDiseasedata") @ \subsection{Cytoscape Information} An associated cytoscape plug-in is available for DrugVsDisease which also uses the DrugVsDiseasedata package. The DrugVsDiseasedata package contains the cytodisease data object which contains the edges in the network along with the distance and Running sum Peak Statistic (RPS). The latter two are used as edge attributes by Cytoscape. The Running sum Peak Statistic takes values 1 or -1 where 1 indicates a positive correlation and -1 a negative correlation. The distance measure gives the strength of this correlation. This data frame is used by the DrugVsDisease package to generate cytoscape sif and edge attribute files. For links out to the MeSH external web browsers, DrugVsDiseasedata also contains search compatible terms for all nodes in the reference data sets. <>= data(cytodisease,package="DrugVsDiseasedata") #to get the compound (node) names and corresponding search terms data(diseaselabels,package="DrugVsDiseasedata") @ \begin{thebibliography}{} \bibitem[Hu {\it et~al}., 2009]{Hu09} Hu G, Agarwal P (2009) Human Disease-Drug Network Based on Genomic Expression Profiles, {\it PLoS ONE}, {\bf 4(8)}: e6536. \bibitem[Shigemizu {\it et~al}., 2012]{Shi12} Shigemizu D, Hu Z, Hung J-H, Huang C-L, Wang Y, et al. (2012) Using Functional Signatures to Identify Repositioned Drugs for Breast, Myelogenous Leukemia and Prostate Cancer. {\it PLoS Comput Biol} {\bf 8(2)}: e1002347. \bibitem[Sirota \textit{et~al}., 2011]{Sirota11} Sirota M \textit{et~al}. (2011) Discovery and Preclinical Validation of Drug Indications Using Compendia of Public Gene Expression Data. \textit{Sci Trans Med}, \textbf{3:96ra77}. \bibitem[Subramanian \textit{et~al}., 2005]{Sub05} Subramanian A \textit{et~al}. (2005) Gene set enrichment analysis: A knowledge-based approach for interpreting genome-wide expression profiles. \textit{PNAS}, \textbf{102}(43), 15545-15550. \bibitem[Gentleman \textit{et~al}., 2004]{Gentleman04} Gentleman R \textit{et~al}. (2004) Bioconductor: open software development for computational biology and bioinformatics. \textit{Genome Biology}, \textbf{5}(10), R80. \bibitem[Parkinson \textit{et~al}., 2010]{Parkinson10} Parkinson et al. (2010) ArrayExpress update—an archive of microarray and high-throughput sequencing-based functional genomics experiments. \textit{Nucl. Acids Res.},doi: 10.1093/nar/gkq1040. \bibitem[Edgar \textit{et~al}., 2002]{Edgar02} Edgar R, Domrachev M, Lash AE. (2002) Gene Expression Omnibus: NCBI gene expression and hybridization array data repository. \textit{Nucl. Acids Res}, \textbf{30}(1):207-10 \bibitem[R 2008]{RDev08} R Development Core Team (2008). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. ISBN 3-900051-07-0 \bibitem[Cline \textit{et~al}., 2007]{Cline07} Cline \textit{et al}. (2007) Integration of biological networks and gene expression data using Cytoscape. \textit{Nature Protocols}, \textbf{2}, 2366-2382. \end{thebibliography} \end{document}