@ARTICLE{Tonner2017, title = "Detecting differential growth of microbial populations with Gaussian process regression", author = "Tonner, Peter D and Darnell, Cynthia L and Engelhardt, Barbara E and Schmid, Amy K", affiliation = "Program in Computational Biology and Bioinformatics, Duke University, Durham, North Carolina 27708, USA. Biology Department, Duke University, Durham, North Carolina 27708, USA. Biology Department, Duke University, Durham, North Carolina 27708, USA. Computer Science Department, Center for Statistics and Machine Learning, Princeton University, Princeton, New Jersey 08540, USA. Program in Computational Biology and Bioinformatics, Duke University, Durham, North Carolina 27708, USA. Biology Department, Duke University, Durham, North Carolina 27708, USA.", abstract = "Microbial growth curves are used to study differential effects of media, genetics, and stress on microbial population growth. Consequently, many modeling frameworks exist to capture microbial population growth measurements. However, current models are designed to quantify growth under conditions for which growth has a specific functional form. Extensions to these models are required to quantify the effects of perturbations, which often exhibit nonstandard growth curves. Rather than assume specific functional forms for experimental perturbations, we developed a general and robust model of microbial population growth curves using Gaussian process (GP) regression. GP regression modeling of high-resolution time-series growth data enables accurate quantification of population growth and allows explicit control of effects from other covariates such as genetic background. This framework substantially outperforms commonly used microbial population growth models, particularly when modeling growth data from environmentally stressed populations. We apply the GP growth model and develop statistical tests to quantify the differential effects of environmental perturbations on microbial growth across a large compendium of genotypes in archaea and yeast. This method accurately identifies known transcriptional regulators and implicates novel regulators of growth under standard and stress conditions in the model archaeal organism Halobacterium salinarum For yeast, our method correctly identifies known phenotypes for a diversity of genetic backgrounds under cyclohexamide stress and also detects previously unidentified oxidative stress sensitivity across a subset of strains. Together, these results demonstrate that the GP models are interpretable, recapitulating biological knowledge of growth response while providing new insights into the relevant parameters affecting microbial population growth.", journal = "Genome Res.", volume = 27, number = 2, pages = "320--333", month = feb, year = 2017, language = "" } % The entry below contains non-ASCII chars that could not be converted % to a LaTeX equivalent. @ARTICLE{Schurch2016How, title = "How many biological replicates are needed in an {RNA-seq} experiment and which differential expression tool should you use?", author = "Schurch, Nicholas J and Schofield, Piet{\'a} and Gierli{\'n}ski, Marek and Cole, Christian and Sherstnev, Alexander and Singh, Vijender and Wrobel, Nicola and Gharbi, Karim and Simpson, Gordon G and Owen-Hughes, Tom and Blaxter, Mark and Barton, Geoffrey J", affiliation = "Division of Computational Biology, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Division of Computational Biology, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom Division of Gene Regulation and Expression, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Division of Computational Biology, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom Division of Gene Regulation and Expression, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Division of Computational Biology, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Division of Computational Biology, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Division of Gene Regulation and Expression, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Edinburgh Genomics, University of Edinburgh, Edinburgh EH9 3JT, United Kingdom. Edinburgh Genomics, University of Edinburgh, Edinburgh EH9 3JT, United Kingdom. Division of Plant Sciences, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Division of Gene Regulation and Expression, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom. Edinburgh Genomics, University of Edinburgh, Edinburgh EH9 3JT, United Kingdom. Division of Computational Biology, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom Division of Gene Regulation and Expression, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom Division of Biological Chemistry and Drug Discovery, College of Life Sciences, University of Dundee, Dundee DD1 5EH, United Kingdom.", abstract = "RNA-seq is now the technology of choice for genome-wide differential gene expression experiments, but it is not clear how many biological replicates are needed to ensure valid biological interpretation of the results or which statistical tools are best for analyzing the data. An RNA-seq experiment with 48 biological replicates in each of two conditions was performed to answer these questions and provide guidelines for experimental design. With three biological replicates, nine of the 11 tools evaluated found only 20\%-40\% of the significantly differentially expressed (SDE) genes identified with the full set of 42 clean replicates. This rises to >85\% for the subset of SDE genes changing in expression by more than fourfold. To achieve >85\% for all SDE genes regardless of fold change requires more than 20 biological replicates. The same nine tools successfully control their false discovery rate at ≲5\% for all numbers of replicates, while the remaining two tools fail to control their FDR adequately, particularly for low numbers of replicates. For future RNA-seq experiments, these results suggest that at least six biological replicates should be used, rising to at least 12 when it is important to identify SDE genes for all fold changes. If fewer than 12 replicates are used, a superior combination of true positive and false positive performances makes edgeR and DESeq2 the leading tools. For higher replicate numbers, minimizing false positives is more important and DESeq marginally outperforms the other tools.", journal = "RNA", volume = 22, number = 6, pages = "839--851", month = jun, year = 2016, keywords = "RNA-seq; benchmarking; differential expression; experimental design; replication; statistical power; yeast", language = "" } @article{Benjamini1995Controlling, author = {Benjamini, Yoav and Hochberg, Yosef}, citeulike-article-id = {13706046}, citeulike-linkout-0 = {http://www.jstor.org/stable/2346101}, journal = {Journal of the Royal Statistical Society. Series B (Methodological)}, keywords = {deseq2, workflow}, number = {1}, pages = {289--300}, posted-at = {2015-08-19 14:51:55}, priority = {2}, title = {{Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing}}, url = {http://www.jstor.org/stable/2346101}, volume = {57}, year = {1995} } @article{Durinck2009Mapping, abstract = {{Genomic experiments produce multiple views of biological systems, among them are DNA sequence and copy number variation, and mRNA and protein abundance. Understanding these systems needs integrated bioinformatic analysis. Public databases such as Ensembl provide relationships and mappings between the relevant sets of probe and target molecules. However, the relationships can be biologically complex and the content of the databases is dynamic. We demonstrate how to use the computational environment R to integrate and jointly analyze experimental datasets, employing BioMart web services to provide the molecule mappings. We also discuss typical problems that are encountered in making gene-to-transcript-to-protein mappings. The approach provides a flexible, programmable and reproducible basis for state-of-the-art bioinformatic data integration.}}, author = {Durinck, Steffen and Spellman, Paul T. and Birney, Ewan and Huber, Wolfgang}, citeulike-article-id = {5219096}, citeulike-linkout-0 = {https://doi.org/10.1038/nprot.2009.97}, citeulike-linkout-1 = {https://doi.org/10.1038/nprot.2009.97}, citeulike-linkout-2 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3159387/}, citeulike-linkout-3 = {http://view.ncbi.nlm.nih.gov/pubmed/19617889}, citeulike-linkout-4 = {http://www.hubmed.org/display.cgi?uids=19617889}, day = {23}, doi = {10.1038/nprot.2009.97}, issn = {1750-2799}, journal = {Nature protocols}, keywords = {workflow}, month = jul, number = {8}, pages = {1184--1191}, pmcid = {PMC3159387}, pmid = {19617889}, posted-at = {2015-08-18 15:38:32}, priority = {2}, publisher = {Nature Publishing Group}, title = {{Mapping identifiers for the integration of genomic datasets with the R/Bioconductor package biomaRt.}}, url = {https://doi.org/10.1038/nprot.2009.97}, volume = {4}, year = {2009} } @article{Love2014Moderated, abstract = {{In comparative high-throughput sequencing assays, a fundamental task is the analysis of count data, such as read counts per gene in RNA-seq, for evidence of systematic changes across experimental conditions. Small replicate numbers, discreteness, large dynamic range and the presence of outliers require a suitable statistical approach. We present DESeq2, a method for differential analysis of count data, using shrinkage estimation for dispersions and fold changes to improve stability and interpretability of estimates. This enables a more quantitative analysis focused on the strength rather than the mere presence of differential expression. The DESeq2 package is available at http://www.bioconductor.org/packages/release/bioc/html/DESeq2.html.}}, author = {Love, Michael I. and Huber, Wolfgang and Anders, Simon}, citeulike-article-id = {13505832}, citeulike-linkout-0 = {https://doi.org/10.1186/s13059-014-0550-8}, citeulike-linkout-1 = {http://view.ncbi.nlm.nih.gov/pubmed/25516281}, citeulike-linkout-2 = {http://www.hubmed.org/display.cgi?uids=25516281}, day = {05}, doi = {10.1186/s13059-014-0550-8}, issn = {1465-6906}, journal = {Genome Biology}, keywords = {mine, workflow}, month = dec, number = {12}, pages = {550+}, pmid = {25516281}, posted-at = {2015-08-18 15:29:41}, priority = {2}, publisher = {BioMed Central Ltd}, title = {{Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}}, url = {https://doi.org/10.1186/s13059-014-0550-8}, volume = {15}, year = {2014} } @article{Leong2014Global, abstract = {{Non-coding RNAs (ncRNAs) are frequent and prevalent across the taxa. Although individual non-coding loci have been assigned a function, most are uncharacterized. Their global biological significance is unproven and remains controversial. Here we investigate the role played by ncRNAs in the stress response of Schizosaccharomyces pombe. We integrate global proteomics and RNA sequencing data to identify a systematic programme in which elevated antisense RNA arising both from ncRNAs and from 3'-overlapping convergent gene pairs is directly associated with substantial reductions in protein levels throughout the genome. We describe an extensive array of ncRNAs with trans associations that have the potential to influence multiple pathways. Deletion of one such locus reduces levels of atf1, a transcription factor downstream of the stress-activated mitogen-activated protein kinase (MAPK) pathway, and alters sensitivity to oxidative stress. These non-coding transcripts therefore regulate specific stress responses, adding unanticipated information-processing capacity to the MAPK signalling system.}}, author = {Leong, Hui S. and Dawson, Keren and Wirth, Chris and Li, Yaoyong and Connolly, Yvonne and Smith, Duncan L. and Wilkinson, Caroline R. and Miller, Crispin J.}, citeulike-article-id = {13705386}, citeulike-linkout-0 = {https://doi.org/10.1038/ncomms4947}, citeulike-linkout-1 = {http://view.ncbi.nlm.nih.gov/pubmed/24853205}, citeulike-linkout-2 = {http://www.hubmed.org/display.cgi?uids=24853205}, doi = {10.1038/ncomms4947}, issn = {2041-1723}, journal = {Nature communications}, keywords = {workflow}, pmid = {24853205}, posted-at = {2015-08-18 15:16:55}, priority = {2}, title = {{A global non-coding RNA system modulates fission yeast protein levels in response to stress.}}, url = {https://doi.org/10.1038/ncomms4947}, volume = {5}, year = {2014} } @article{Leek2014Svaseq, abstract = {{It is now known that unwanted noise and unmodeled artifacts such as batch effects can dramatically reduce the accuracy of statistical inference in genomic experiments. These sources of noise must be modeled and removed to accurately measure biological variability and to obtain correct statistical inference when performing high-throughput genomic analysis. We introduced surrogate variable analysis (sva) for estimating these artifacts by (i) identifying the part of the genomic data only affected by artifacts and (ii) estimating the artifacts with principal components or singular vectors of the subset of the data matrix. The resulting estimates of artifacts can be used in subsequent analyses as adjustment factors to correct analyses. Here I describe a version of the sva approach specifically created for count data or FPKMs from sequencing experiments based on appropriate data transformation. I also describe the addition of supervised sva (ssva) for using control probes to identify the part of the genomic data only affected by artifacts. I present a comparison between these versions of sva and other methods for batch effect estimation on simulated data, real count-based data and FPKM-based data. These updates are available through the sva Bioconductor package and I have made fully reproducible analysis using these methods available from: https://github.com/jtleek/svaseq. {\copyright} The Author(s) 2014. Published by Oxford University Press on behalf of Nucleic Acids Research.}}, author = {Leek, Jeffrey T.}, citeulike-article-id = {13385083}, citeulike-linkout-0 = {https://doi.org/10.1093/nar/gku864}, citeulike-linkout-1 = {http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.abstract}, citeulike-linkout-2 = {http://nar.oxfordjournals.org/content/early/2014/10/07/nar.gku864.full.pdf}, citeulike-linkout-3 = {http://view.ncbi.nlm.nih.gov/pubmed/25294822}, citeulike-linkout-4 = {http://www.hubmed.org/display.cgi?uids=25294822}, day = {1}, doi = {10.1093/nar/gku864}, issn = {1362-4962}, journal = {Nucleic acids research}, keywords = {workflow}, month = dec, number = {21}, pages = {000}, pmid = {25294822}, posted-at = {2015-08-18 15:16:02}, priority = {2}, publisher = {Oxford University Press}, title = {{svaseq: removing batch effects and other unwanted noise from sequencing data.}}, url = {https://doi.org/10.1093/nar/gku864}, volume = {42}, year = {2014} } @article{Huntley2013ReportingTools, abstract = {{Summary: It is common for computational analyses to generate large amounts of complex data that are difficult to process and share with collaborators. Standard methods are needed to transform such data into a more useful and intuitive format. We present ReportingTools, a Bioconductor package, that automatically recognizes and transforms the output of many common Bioconductor packages into rich, interactive, HTML-based reports. Reports are not generic, but have been individually designed to reflect content specific to the result type detected. Tabular output included in reports is sortable, filterable and searchable and contains context-relevant hyperlinks to external databases. Additionally, in-line graphics have been developed for specific analysis types and are embedded by default within table rows, providing a useful visual summary of underlying raw data. ReportingTools is highly flexible and reports can be easily customized for specific applications using the well-defined API.}}, author = {Huntley, Melanie A. and Larson, Jessica L. and Chaivorapol, Christina and Becker, Gabriel and Lawrence, Michael and Hackney, Jason A. and Kaminker, Joshua S.}, citeulike-article-id = {12728071}, citeulike-linkout-0 = {https://doi.org/10.1093/bioinformatics/btt551}, citeulike-linkout-1 = {http://bioinformatics.oxfordjournals.org/content/29/24/3220.abstract}, citeulike-linkout-2 = {http://bioinformatics.oxfordjournals.org/content/29/24/3220.full.pdf}, citeulike-linkout-3 = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/24/3220}, citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/24078713}, citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=24078713}, day = {15}, doi = {10.1093/bioinformatics/btt551}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {workflow}, month = dec, number = {24}, pages = {3220--3221}, pmid = {24078713}, posted-at = {2015-08-18 15:13:59}, priority = {2}, publisher = {Oxford University Press}, title = {{ReportingTools: an automated result processing and presentation toolkit for high-throughput genomic analyses}}, url = {https://doi.org/10.1093/bioinformatics/btt551}, volume = {29}, year = {2013} } @book{Wickham2009Ggplot2, address = {New York, NY}, author = {Wickham, Hadley}, booktitle = {ggplot2}, citeulike-article-id = {10715717}, citeulike-linkout-0 = {https://doi.org/10.1007/978-0-387-98141-3}, citeulike-linkout-1 = {http://www.springerlink.com/content/978-0-387-98140-6}, doi = {10.1007/978-0-387-98141-3}, isbn = {978-0-387-98140-6}, keywords = {workflow}, posted-at = {2015-08-18 15:12:19}, priority = {2}, publisher = {Springer New York}, title = {{ggplot2}}, url = {https://doi.org/10.1007/978-0-387-98141-3}, year = {2009} } @article{Li2009Sequence, abstract = {{The Sequence Alignment/Map (SAM) format is a generic alignment format for storing read alignments against reference sequences, supporting short and long reads (up to 128 Mbp) produced by different sequencing platforms. It is flexible in style, compact in size, efficient in random access and is the format in which alignments from the 1000 Genomes Project are released. SAMtools implements various utilities for post-processing alignments in the SAM format, such as indexing, variant caller and alignment viewer, and thus provides universal tools for processing read alignments. http://samtools.sourceforge.net.}}, author = {Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard and {1000 Genome Project Data Processing Subgroup}}, citeulike-article-id = {4778506}, citeulike-linkout-0 = {https://doi.org/10.1093/bioinformatics/btp352}, citeulike-linkout-1 = {http://bioinformatics.oxfordjournals.org/content/25/16/2078.abstract}, citeulike-linkout-2 = {http://bioinformatics.oxfordjournals.org/content/25/16/2078.full.pdf}, citeulike-linkout-3 = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/25/16/2078}, citeulike-linkout-4 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2723002/}, citeulike-linkout-5 = {http://view.ncbi.nlm.nih.gov/pubmed/19505943}, citeulike-linkout-6 = {http://www.hubmed.org/display.cgi?uids=19505943}, day = {15}, doi = {10.1093/bioinformatics/btp352}, issn = {1367-4811}, journal = {Bioinformatics (Oxford, England)}, keywords = {workflow}, month = aug, number = {16}, pages = {2078--2079}, pmcid = {PMC2723002}, pmid = {19505943}, posted-at = {2015-08-18 15:05:40}, priority = {2}, publisher = {Oxford University Press}, title = {{The Sequence Alignment/Map format and SAMtools.}}, url = {https://doi.org/10.1093/bioinformatics/btp352}, volume = {25}, year = {2009} } @article{Dobin2013STAR, abstract = {{Motivation: Accurate alignment of high-throughput RNA-seq data is a challenging and yet unsolved problem because of the non-contiguous transcript structure, relatively short read lengths and constantly increasing throughput of the sequencing technologies. Currently available RNA-seq aligners suffer from high mapping error rates, low mapping speed, read length limitation and mapping biases.}}, author = {Dobin, Alexander and Davis, Carrie A. and Schlesinger, Felix and Drenkow, Jorg and Zaleski, Chris and Jha, Sonali and Batut, Philippe and Chaisson, Mark and Gingeras, Thomas R.}, citeulike-article-id = {11550352}, citeulike-linkout-0 = {https://doi.org/10.1093/bioinformatics/bts635}, citeulike-linkout-1 = {http://bioinformatics.oxfordjournals.org/content/early/2012/10/25/bioinformatics.bts635.abstract}, citeulike-linkout-2 = {http://bioinformatics.oxfordjournals.org/content/early/2012/10/25/bioinformatics.bts635.full.pdf}, citeulike-linkout-3 = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/1/15}, citeulike-linkout-4 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3530905/}, citeulike-linkout-5 = {http://view.ncbi.nlm.nih.gov/pubmed/23104886}, citeulike-linkout-6 = {http://www.hubmed.org/display.cgi?uids=23104886}, day = {01}, doi = {10.1093/bioinformatics/bts635}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {rnaseq, workflow}, month = jan, number = {1}, pages = {15--21}, pmcid = {PMC3530905}, pmid = {23104886}, posted-at = {2015-08-18 15:05:35}, priority = {2}, publisher = {Oxford University Press}, title = {{STAR: ultrafast universal RNA-seq aligner}}, url = {https://doi.org/10.1093/bioinformatics/bts635}, volume = {29}, year = {2013} } @article{Flicek2014Ensembl, abstract = {{Ensembl (http://www.ensembl.org) creates tools and data resources to facilitate genomic analysis in chordate species with an emphasis on human, major vertebrate model organisms and farm animals. Over the past year we have increased the number of species that we support to 77 and expanded our genome browser with a new scrollable overview and improved variation and phenotype views. We also report updates to our core datasets and improvements to our gene homology relationships from the addition of new species. Our REST service has been extended with additional support for comparative genomics and ontology information. Finally, we provide updated information about our methods for data access and resources for user training.}}, author = {Flicek, Paul and Amode, M. Ridwan and Barrell, Daniel and Beal, Kathryn and Billis, Konstantinos and Brent, Simon and Carvalho-Silva, Denise and Clapham, Peter and Coates, Guy and Fitzgerald, Stephen and Gil, Laurent and Gir\'{o}n, Carlos G. and Gordon, Leo and Hourlier, Thibaut and Hunt, Sarah and Johnson, Nathan and Juettemann, Thomas and K\"{a}h\"{a}ri, Andreas K. and Keenan, Stephen and Kulesha, Eugene and Martin, Fergal J. and Maurel, Thomas and McLaren, William M. and Murphy, Daniel N. and Nag, Rishi and Overduin, Bert and Pignatelli, Miguel and Pritchard, Bethan and Pritchard, Emily and Riat, Harpreet S. and Ruffier, Magali and Sheppard, Daniel and Taylor, Kieron and Thormann, Anja and Trevanion, Stephen J. and Vullo, Alessandro and Wilder, Steven P. and Wilson, Mark and Zadissa, Amonida and Aken, Bronwen L. and Birney, Ewan and Cunningham, Fiona and Harrow, Jennifer and Herrero, Javier and Hubbard, Tim J. P. and Kinsella, Rhoda and Muffato, Matthieu and Parker, Anne and Spudich, Giulietta and Yates, Andy and Zerbino, Daniel R. and Searle, Stephen M. J.}, citeulike-article-id = {12827863}, citeulike-linkout-0 = {https://doi.org/10.1093/nar/gkt1196}, citeulike-linkout-1 = {http://nar.oxfordjournals.org/content/42/D1/D749.abstract}, citeulike-linkout-2 = {http://nar.oxfordjournals.org/content/42/D1/D749.full.pdf}, citeulike-linkout-3 = {http://nar.oxfordjournals.org/cgi/content/abstract/42/D1/D749}, citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/24316576}, citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=24316576}, day = {01}, doi = {10.1093/nar/gkt1196}, issn = {1362-4962}, journal = {Nucleic Acids Research}, keywords = {workflow}, month = jan, number = {D1}, pages = {D749--D755}, pmid = {24316576}, posted-at = {2015-08-18 15:04:12}, priority = {2}, publisher = {Oxford University Press}, title = {{Ensembl 2014}}, url = {https://doi.org/10.1093/nar/gkt1196}, volume = {42}, year = {2014} } @article{Himes2014RNASeq, abstract = {{Asthma is a chronic inflammatory respiratory disease that affects over 300 million people worldwide. Glucocorticoids are a mainstay therapy for asthma because they exert anti-inflammatory effects in multiple lung tissues, including the airway smooth muscle (ASM). However, the mechanism by which glucocorticoids suppress inflammation in ASM remains poorly understood. Using RNA-Seq, a high-throughput sequencing method, we characterized transcriptomic changes in four primary human ASM cell lines that were treated with dexamethasone--a potent synthetic glucocorticoid (1 µM for 18 hours). Based on a Benjamini-Hochberg corrected p-value <0.05, we identified 316 differentially expressed genes, including both well known (DUSP1, KLF15, PER1, TSC22D3) and less investigated (C7, CCDC69, CRISPLD2) glucocorticoid-responsive genes. CRISPLD2, which encodes a secreted protein previously implicated in lung development and endotoxin regulation, was found to have SNPs that were moderately associated with inhaled corticosteroid resistance and bronchodilator response among asthma patients in two previously conducted genome-wide association studies. Quantitative RT-PCR and Western blotting showed that dexamethasone treatment significantly increased CRISPLD2 mRNA and protein expression in ASM cells. CRISPLD2 expression was also induced by the inflammatory cytokine IL1β, and small interfering RNA-mediated knockdown of CRISPLD2 further increased IL1β-induced expression of IL6 and IL8. Our findings offer a comprehensive view of the effect of a glucocorticoid on the ASM transcriptome and identify CRISPLD2 as an asthma pharmacogenetics candidate gene that regulates anti-inflammatory effects of glucocorticoids in the ASM.}}, author = {Himes, Blanca E. and Jiang, Xiaofeng and Wagner, Peter and Hu, Ruoxi and Wang, Qiyu and Klanderman, Barbara and Whitaker, Reid M. and Duan, Qingling and Lasky-Su, Jessica and Nikolos, Christina and Jester, William and Johnson, Martin and Panettieri, Reynold A. and Tantisira, Kelan G. and Weiss, Scott T. and Lu, Quan}, citeulike-article-id = {13705379}, citeulike-linkout-0 = {https://doi.org/10.1371/journal.pone.0099625}, citeulike-linkout-1 = {http://view.ncbi.nlm.nih.gov/pubmed/24926665}, citeulike-linkout-2 = {http://www.hubmed.org/display.cgi?uids=24926665}, doi = {10.1371/journal.pone.0099625}, issn = {1932-6203}, journal = {PloS one}, keywords = {rnaseq, workflow}, number = {6}, pmid = {24926665}, posted-at = {2015-08-18 15:02:37}, priority = {2}, title = {{RNA-Seq transcriptome profiling identifies CRISPLD2 as a glucocorticoid responsive gene that modulates cytokine function in airway smooth muscle cells.}}, url = {https://doi.org/10.1371/journal.pone.0099625}, volume = {9}, year = {2014} } @inproceedings{Dudoit2002Statistical, abstract = {{DNA microarrays are a new and promising biotechnology whichallows the monitoring of expression levels in cells for thousands of genes simultaneously. The present paper describes statistical methods for the identi cation of di erentially expressed genes in replicated cDNA microarray experiments. Although it is not the main focus of the paper, new methods for the important pre-processing steps of image analysis and normalization are proposed. Given suitably normalized data, the biological question of di erential expression is restated as a problem in multiple hypothesis testing: the simultaneous test for each geneof the null hypothesis of no association between the expression levels and responses or covariates of interest. Di erentially expressed genes are identi ed based on adjusted p-values for a multiple testing procedure which strongly controls the family-wise Type I error rate and takes into account the dependence structure between the gene expression levels. No speci c parametric form is assumed for the distribution of the test statistics and a permutation procedure is used to estimate adjusted p-values. Several data displays are suggested for the visual identi cation of di erentially expressed genes and of important features of these genes. The above methods are applied to microarray data from a study of gene expression in the livers of mice with very low HDL cholesterol levels. The genes identi ed using data from multiple slides are compared to those identi ed by recently published single-slide methods. Key words and phrases: Adjusted p-value, di erential gene expression, DNA microarray, image analysis, multiple testing, normalization, permutation test. 1.}}, author = {Dudoit, Rine and Yang, Yee H. and Callow, Matthew J. and Speed, Terence P.}, booktitle = {Statistica Sinica}, citeulike-article-id = {10512110}, citeulike-linkout-0 = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.117.9702}, keywords = {workflow}, pages = {111--139}, posted-at = {2015-08-18 15:00:11}, priority = {2}, title = {{Statistical methods for identifying differentially expressed genes in replicated cDNA microarray experiments}}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.117.9702}, year = {2002} } @article{Ziemann2016Excel, title = "Gene name errors are widespread in the scientific literature", author = "Ziemann, Mark and Eren, Yotam and El-Osta, Assam", abstract = "The spreadsheet software Microsoft Excel, when used with default settings, is known to convert gene names to dates and floating-point numbers. A programmatic scan of leading genomics journals reveals that approximately one-fifth of papers with supplementary Excel gene lists contain erroneous gene name conversions.", journal = "Genome Biol.", volume = 17, number = 1, pages = "1--3", year = 2016 } @ARTICLE{Zhu2019-apeglm, title = "Heavy-tailed prior distributions for sequence count data: removing the noise and preserving large differences", author = "Zhu, Anqi and Ibrahim, Joseph G and Love, Michael I", journal = "Bioinformatics", year = 2019, volume = 35, issue = 12, pages = "2084--2092" } @article{Zeeberg2004Excel, title = "Mistaken identifiers: gene name errors can be introduced inadvertently when using Excel in bioinformatics", author = "Zeeberg, Barry R and Riss, Joseph and Kane, David W and Bussey, Kimberly J and Uchio, Edward and Linehan, W Marston and Barrett, J Carl and Weinstein, John N", abstract = "BACKGROUND: When processing microarray data sets, we recently noticed that some gene names were being changed inadvertently to non-gene names. RESULTS: A little detective work traced the problem to default date format conversions and floating-point format conversions in the very useful Excel program package. The date conversions affect at least 30 gene names; the floating-point conversions affect at least 2,000 if Riken identifiers are included. These conversions are irreversible; the original gene names cannot be recovered. CONCLUSIONS: Users of Excel for analyses involving gene names should be aware of this problem, which can cause genes, including medically important ones, to be lost from view and which has contaminated even carefully curated public databases. We provide work-arounds and scripts for circumventing the problem.", journal = "BMC Bioinformatics", volume = 5, pages = "80", month = "23~" # jun, year = 2004, keywords = "Animals; Computational Biology; Computational Biology: classification; Computational Biology: standards; Genes; Humans; Mice; Oligonucleotide Array Sequence Analysis; Oligonucleotide Array Sequence Analysis: classific; Research Design; Software; Software: classification; Software: standards" } @article{Huber2015Orchestrating, abstract = {{Bioconductor is an open-source, open-development software project for the analysis and comprehension of high-throughput data in genomics and molecular biology. The project aims to enable interdisciplinary research, collaboration and rapid development of scientific software. Based on the statistical programming language R, Bioconductor comprises 934 interoperable packages contributed by a large, diverse community of scientists. Packages cover a range of bioinformatic and statistical applications. They undergo formal initial review and continuous automated testing. We present an overview for prospective users and contributors.}}, author = {Huber, Wolfgang and Carey, Vincent J. and Gentleman, Robert and Anders, Simon and Carlson, Marc and Carvalho, Benilton S. and Bravo, Hector Corrada C. and Davis, Sean and Gatto, Laurent and Girke, Thomas and Gottardo, Raphael and Hahne, Florian and Hansen, Kasper D. and Irizarry, Rafael A. and Lawrence, Michael and Love, Michael I. and MacDonald, James and Obenchain, Valerie and Ole\'{s}, Andrzej K. and Pag\`{e}s, Herv\'{e} and Reyes, Alejandro and Shannon, Paul and Smyth, Gordon K. and Tenenbaum, Dan and Waldron, Levi and Morgan, Martin}, citeulike-article-id = {13504287}, citeulike-linkout-0 = {https://doi.org/10.1038/nmeth.3252}, citeulike-linkout-1 = {https://doi.org/10.1038/nmeth.3252}, citeulike-linkout-2 = {http://view.ncbi.nlm.nih.gov/pubmed/25633503}, citeulike-linkout-3 = {http://www.hubmed.org/display.cgi?uids=25633503}, day = {29}, doi = {10.1038/nmeth.3252}, issn = {1548-7105}, journal = {Nature methods}, keywords = {mine, workflow}, month = feb, number = {2}, pages = {115--121}, pmid = {25633503}, posted-at = {2015-05-29 16:53:20}, priority = {2}, publisher = {Nature Publishing Group}, title = {{Orchestrating high-throughput genomic analysis with Bioconductor.}}, url = {https://doi.org/10.1038/nmeth.3252}, volume = {12}, year = {2015} } @article{Risso2014Normalization, author = {Risso, Davide and Ngai, John and Speed, Terence P. and Dudoit, Sandrine}, citeulike-article-id = {13336814}, citeulike-linkout-0 = {https://doi.org/10.1038/nbt.2931}, citeulike-linkout-1 = {https://doi.org/10.1038/nbt.2931}, day = {24}, doi = {10.1038/nbt.2931}, issn = {1087-0156}, journal = {Nature Biotechnology}, keywords = {rnaguide, workflow}, month = aug, number = {9}, pages = {896--902}, posted-at = {2014-09-11 20:51:49}, priority = {2}, publisher = {Nature Publishing Group}, title = {{Normalization of RNA-seq data using factor analysis of control genes or samples}}, url = {https://doi.org/10.1038/nbt.2931}, volume = {32}, year = {2014} } @article{Witten2011Classification, abstract = {{In recent years, advances in high throughput sequencing technology have led to a need for specialized methods for the analysis of digital gene expression data. While gene expression data measured on a microarray take on continuous values and can be modeled using the normal distribution, RNA sequencing data involve nonnegative counts and are more appropriately modeled using a discrete count distribution, such as the Poisson or the negative binomial. Consequently, analytic tools that assume a Gaussian distribution (such as classification methods based on linear discriminant analysis and clustering methods that use Euclidean distance) may not perform as well for sequencing data as methods that are based upon a more appropriate distribution. Here, we propose new approaches for performing classification and clustering of observations on the basis of sequencing data. Using a Poisson log linear model, we develop an analog of diagonal linear discriminant analysis that is appropriate for sequencing data. We also propose an approach for clustering sequencing data using a new dissimilarity measure that is based upon the Poisson model. We demonstrate the performances of these approaches in a simulation study, on three publicly available RNA sequencing data sets, and on a publicly available chromatin immunoprecipitation sequencing data set.}}, author = {Witten, Daniela M.}, citeulike-article-id = {13172798}, citeulike-linkout-0 = {https://doi.org/10.1214/11-AOAS493}, day = {28}, doi = {10.1214/11-AOAS493}, issn = {1932-6157}, journal = {The Annals of Applied Statistics}, keywords = {chipseq, ctsca, deseq2, rnaseq, workflow}, month = dec, number = {4}, pages = {2493--2518}, posted-at = {2014-05-16 17:18:08}, priority = {2}, title = {{Classification and clustering of sequencing data using a Poisson model}}, url = {https://doi.org/10.1214/11-AOAS493}, volume = {5}, year = {2011} } @article{Leng2013EBSeq, abstract = {{Motivation: Messenger RNA expression is important in normal development and differentiation, as well as in manifestation of disease. RNA-seq experiments allow for the identification of differentially expressed (DE) genes and their corresponding isoforms on a genome-wide scale. However, statistical methods are required to ensure that accurate identifications are made. A number of methods exist for identifying DE genes, but far fewer are available for identifying DE isoforms. When isoform DE is of interest, investigators often apply gene-level (count-based) methods directly to estimates of isoform counts. Doing so is not recommended. In short, estimating isoform expression is relatively straightforward for some groups of isoforms, but more challenging for others. This results in estimation uncertainty that varies across isoform groups. Count-based methods were not designed to accommodate this varying uncertainty, and consequently, application of them for isoform inference results in reduced power for some classes of isoforms and increased false discoveries for others.}}, author = {Leng, N. and Dawson, J. A. and Thomson, J. A. and Ruotti, V. and Rissman, A. I. and Smits, B. M. G. and Haag, J. D. and Gould, M. N. and Stewart, R. M. and Kendziorski, C.}, citeulike-article-id = {12074857}, citeulike-linkout-0 = {https://doi.org/10.1093/bioinformatics/btt087}, citeulike-linkout-1 = {http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.abstract}, citeulike-linkout-2 = {http://bioinformatics.oxfordjournals.org/content/early/2013/02/21/bioinformatics.btt087.full.pdf}, citeulike-linkout-3 = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/29/8/1035}, citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/23428641}, citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=23428641}, day = {15}, doi = {10.1093/bioinformatics/btt087}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {deseq2, workflow}, month = feb, number = {8}, pages = {1035--1043}, pmid = {23428641}, posted-at = {2014-05-13 22:33:51}, priority = {2}, publisher = {Oxford University Press}, title = {{EBSeq: an empirical Bayes hierarchical model for inference in RNA-seq experiments}}, url = {https://doi.org/10.1093/bioinformatics/btt087}, volume = {29}, year = {2013} } @article{Anders2015HTSeqa, abstract = {{Motivation: A large choice of tools exists for many standard tasks in the analysis of high-throughput sequencing (HTS) data. However, once a project deviates from standard workflows, custom scripts are needed.}}, author = {Anders, Simon and Pyl, Paul T. and Huber, Wolfgang}, doi = {10.1093/bioinformatics/btu638}, issn = {1460-2059}, journal = {Bioinformatics}, month = jan, number = {2}, pages = {166--169}, pmid = {25260700}, publisher = {Oxford University Press}, title = {{HTSeq -- a Python framework to work with high-throughput sequencing data}}, url = {https://doi.org/10.1093/bioinformatics/btu638}, volume = {31}, year = {2015} } @article{Liao2014FeatureCounts, abstract = {{ Next-generation sequencing technologies generate millions of short sequence reads, which are usually aligned to a reference genome. In many applications, the key information required for downstream analysis is the number of reads mapping to each genomic feature, for example to each exon or each gene. The process of counting reads is called read summarization. Read summarization is required for a great variety of genomic analyses but has so far received relatively little attention in the literature.  We present featureCounts, a read summarization program suitable for counting reads generated from either RNA or genomic DNA sequencing experiments. featureCounts implements highly efficient chromosome hashing and feature blocking techniques. It is considerably faster than existing methods (by an order of magnitude for gene-level summarization) and requires far less computer memory. It works with either single or paired-end reads and provides a wide range of options appropriate for different sequencing applications.Availability and implementation: featureCounts is available under GNU General Public License as part of the Subread (http://subread.sourceforge.net) or Rsubread (http://www.bioconductor.org) software packages.  shi@wehi.edu.au.}}, author = {Liao, Y. and Smyth, G. K. and Shi, W.}, citeulike-article-id = {12796380}, citeulike-linkout-0 = {https://doi.org/10.1093/bioinformatics/btt656}, citeulike-linkout-1 = {http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.abstract}, citeulike-linkout-2 = {http://bioinformatics.oxfordjournals.org/content/early/2013/11/13/bioinformatics.btt656.full.pdf}, citeulike-linkout-3 = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/30/7/923}, citeulike-linkout-4 = {http://view.ncbi.nlm.nih.gov/pubmed/24227677}, citeulike-linkout-5 = {http://www.hubmed.org/display.cgi?uids=24227677}, day = {13}, doi = {10.1093/bioinformatics/btt656}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {deseq2, workflow}, month = apr, number = {7}, pages = {923--930}, pmid = {24227677}, posted-at = {2014-02-18 20:28:26}, priority = {2}, publisher = {Oxford University Press}, title = {{featureCounts: an efficient general purpose program for assigning sequence reads to genomic features}}, url = {https://doi.org/10.1093/bioinformatics/btt656}, volume = {30}, year = {2014} } @article{Lawrence2013Software, abstract = {{We describe Bioconductor infrastructure for representing and computing on annotated genomic ranges and integrating genomic data with the statistical computing features of R and its extensions. At the core of the infrastructure are three packages: IRanges, GenomicRanges, and GenomicFeatures. These packages provide scalable data structures for representing annotated ranges on the genome, with special support for transcript structures, read alignments and coverage vectors. Computational facilities include efficient algorithms for overlap and nearest neighbor detection, coverage calculation and other range operations. This infrastructure directly supports more than 80 other Bioconductor packages, including those for sequence analysis, differential expression analysis and visualization.}}, author = {Lawrence, Michael and Huber, Wolfgang and Pag\`{e}s, Herv\'{e} and Aboyoun, Patrick and Carlson, Marc and Gentleman, Robert and Morgan, Martin T. and Carey, Vincent J.}, citeulike-article-id = {12548311}, citeulike-linkout-0 = {https://doi.org/10.1371/journal.pcbi.1003118}, citeulike-linkout-1 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3738458/}, citeulike-linkout-2 = {http://view.ncbi.nlm.nih.gov/pubmed/23950696}, citeulike-linkout-3 = {http://www.hubmed.org/display.cgi?uids=23950696}, day = {8}, doi = {10.1371/journal.pcbi.1003118}, editor = {Prlic, Andreas}, issn = {1553-7358}, journal = {PLoS Computational Biology}, keywords = {deseq2, workflow}, month = aug, number = {8}, pages = {e1003118+}, pmcid = {PMC3738458}, pmid = {23950696}, posted-at = {2014-02-14 00:17:30}, priority = {2}, publisher = {Public Library of Science}, title = {{Software for Computing and Annotating Genomic Ranges}}, url = {https://doi.org/10.1371/journal.pcbi.1003118}, volume = {9}, year = {2013} } @article{Law2014Voom, abstract = {{Normal linear modeling methods are developed for analyzing read counts from RNA-seq experiments. The voom method estimates the mean-variance relationship of the log-counts, generates a precision weight for each observation, and then enters these into a limma empirical Bayes analysis pipeline. This opens access for RNA-seq analysts to a large body of methodology developed for microarrays. Simulation studies show that voom performs as well or better than count-based RNA-seq methods even when the data are generated according to the assumptions of the earlier methods. Two case studies illustrate the use of linear modeling and gene set testing methods.}}, author = {Law, Charity W. and Chen, Yunshun and Shi, Wei and Smyth, Gordon K.}, citeulike-article-id = {12965503}, citeulike-linkout-0 = {https://doi.org/10.1186/gb-2014-15-2-r29}, citeulike-linkout-1 = {http://view.ncbi.nlm.nih.gov/pubmed/24485249}, citeulike-linkout-2 = {http://www.hubmed.org/display.cgi?uids=24485249}, day = {03}, doi = {10.1186/gb-2014-15-2-r29}, issn = {1465-6906}, journal = {Genome Biology}, keywords = {deseq2, rnaguide, workflow}, month = feb, number = {2}, pages = {R29+}, pmid = {24485249}, posted-at = {2014-02-13 20:56:00}, priority = {2}, publisher = {BioMed Central Ltd}, title = {{Voom: precision weights unlock linear model analysis tools for RNA-seq read counts}}, url = {https://doi.org/10.1186/gb-2014-15-2-r29}, volume = {15}, year = {2014} } @article{Bourgon2010Independent, abstract = {{With high-dimensional data, variable-by-variable statistical testing is often used to select variables whose behavior differs across conditions. Such an approach requires adjustment for multiple testing, which can result in low statistical power. A two-stage approach that first filters variables by a criterion independent of the test statistic, and then only tests variables which pass the filter, can provide higher power. We show that use of some filter/test statistics pairs presented in the literature may, however, lead to loss of type I error control. We describe other pairs which avoid this problem. In an application to microarray data, we found that gene-by-gene filtering by overall variance followed by a t-test increased the number of discoveries by 50\%. We also show that this particular statistic pair induces a lower bound on fold-change among the set of discoveries. Independent filtering—using filter/test pairs that are independent under the null hypothesis but correlated under the alternative—is a general approach that can substantially increase the efficiency of experiments.}}, author = {Bourgon, R. and Gentleman, R. and Huber, W.}, citeulike-article-id = {7203690}, citeulike-linkout-0 = {https://doi.org/10.1073/pnas.0914005107}, citeulike-linkout-1 = {http://www.pnas.org/content/early/2010/05/10/0914005107.abstract}, citeulike-linkout-2 = {http://www.pnas.org/content/early/2010/05/10/0914005107.full.pdf}, citeulike-linkout-3 = {http://www.pnas.org/cgi/content/abstract/107/21/9546}, citeulike-linkout-4 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2906865/}, citeulike-linkout-5 = {http://view.ncbi.nlm.nih.gov/pubmed/20460310}, citeulike-linkout-6 = {http://www.hubmed.org/display.cgi?uids=20460310}, day = {11}, doi = {10.1073/pnas.0914005107}, issn = {1091-6490}, journal = {Proceedings of the National Academy of Sciences}, keywords = {deseq2, rnaguide, workflow}, month = may, number = {21}, pages = {9546--9551}, pmcid = {PMC2906865}, pmid = {20460310}, posted-at = {2013-10-29 20:09:30}, priority = {2}, publisher = {National Academy of Sciences}, title = {{Independent filtering increases detection power for high-throughput experiments}}, url = {https://doi.org/10.1073/pnas.0914005107}, volume = {107}, year = {2010} } @article{Wu2013New, abstract = {{Recent developments in RNA-sequencing (RNA-seq) technology have led to a rapid increase in gene expression data in the form of counts. RNA-seq can be used for a variety of applications, however, identifying differential expression (DE) remains a key task in functional genomics. There have been a number of statistical methods for DE detection for RNA-seq data. One common feature of several leading methods is the use of the negative binomial (Gamma–Poisson mixture) model. That is, the unobserved gene expression is modeled by a gamma random variable and, given the expression, the sequencing read counts are modeled as Poisson. The distinct feature in various methods is how the variance, or dispersion, in the Gamma distribution is modeled and estimated. We evaluate several large public RNA-seq datasets and find that the estimated dispersion in existing methods does not adequately capture the heterogeneity of biological variance among samples. We present a new empirical Bayes shrinkage estimate of the dispersion parameters and demonstrate improved DE detection.}}, author = {Wu, Hao and Wang, Chi and Wu, Zhijin}, citeulike-article-id = {11345725}, citeulike-linkout-0 = {https://doi.org/10.1093/biostatistics/kxs033}, citeulike-linkout-1 = {http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.abstract}, citeulike-linkout-2 = {http://biostatistics.oxfordjournals.org/content/early/2012/09/22/biostatistics.kxs033.full.pdf}, citeulike-linkout-3 = {http://view.ncbi.nlm.nih.gov/pubmed/23001152}, citeulike-linkout-4 = {http://www.hubmed.org/display.cgi?uids=23001152}, day = {01}, doi = {10.1093/biostatistics/kxs033}, issn = {1468-4357}, journal = {Biostatistics}, keywords = {deseq2, rnaseq, workflow}, month = apr, number = {2}, pages = {232--243}, pmid = {23001152}, posted-at = {2013-02-26 17:09:19}, priority = {2}, publisher = {Oxford University Press}, title = {{A new shrinkage estimator for dispersion improves differential expression detection in RNA-seq data}}, url = {https://doi.org/10.1093/biostatistics/kxs033}, volume = {14}, year = {2013} } @article{Kent2002Human, abstract = {{As vertebrate genome sequences near completion and research refocuses to their analysis, the issue of effective genome annotation display becomes critical. A mature web tool for rapid and reliable display of any requested portion of the genome at any scale, together with several dozen aligned annotation tracks, is provided at http://genome.ucsc.edu. This browser displays assembly contigs and gaps, mRNA and expressed sequence tag alignments, multiple gene predictions, cross-species homologies, single nucleotide polymorphisms, sequence-tagged sites, radiation hybrid data, transposon repeats, and more as a stack of coregistered tracks. Text and sequence-based searches provide quick and precise access to any region of specific interest. Secondary links from individual features lead to sequence details and supplementary off-site databases. One-half of the annotation tracks are computed at the University of California, Santa Cruz from publicly available sequence data; collaborators worldwide provide the rest. Users can stably add their own custom tracks to the browser for educational or research purposes. The conceptual and technical framework of the browser, its underlying MYSQL database, and overall use are described. The web site currently serves over 50,000 pages per day to over 3000 different users.}}, author = {Kent, W. James and Sugnet, Charles W. and Furey, Terrence S. and Roskin, Krishna M. and Pringle, Tom H. and Zahler, Alan M. and Haussler, David}, citeulike-article-id = {2009259}, citeulike-linkout-0 = {https://doi.org/10.1101/gr.229102}, citeulike-linkout-1 = {https://doi.org/10.1101/gr.229102.\%20article\%20published\%20online\%20before\%20print\%20in\%20may\%202002}, citeulike-linkout-2 = {http://genome.cshlp.org/content/12/6/996.full.abstract}, citeulike-linkout-3 = {http://genome.cshlp.org/content/12/6/996.full.full.pdf}, citeulike-linkout-4 = {http://www.genome.org/cgi/content/abstract/12/6/996}, citeulike-linkout-5 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC186604/}, citeulike-linkout-6 = {http://view.ncbi.nlm.nih.gov/pubmed/12045153}, citeulike-linkout-7 = {http://www.hubmed.org/display.cgi?uids=12045153}, day = {1}, doi = {10.1101/gr.229102}, issn = {1088-9051}, journal = {Genome research}, keywords = {ctsca, workflow}, month = jun, number = {6}, pages = {996--1006}, pmcid = {PMC186604}, pmid = {12045153}, posted-at = {2012-07-26 16:04:05}, priority = {2}, publisher = {Cold Spring Harbor Laboratory Press}, title = {{The human genome browser at UCSC.}}, url = {https://doi.org/10.1101/gr.229102}, volume = {12}, year = {2002} } @article{Robinson2009EdgeR, abstract = {{It is expected that emerging digital gene expression (DGE) technologies will overtake microarray technologies in the near future for many functional genomics applications. One of the fundamental data analysis tasks, especially for gene expression studies, involves determining whether there is evidence that counts for a transcript or exon are significantly different across experimental conditions. edgeR is a Bioconductor software package for examining differential expression of replicated count data. An overdispersed Poisson model is used to account for both biological and technical variability. Empirical Bayes methods are used to moderate the degree of overdispersion across transcripts, improving the reliability of inference. The methodology can be used even with the most minimal levels of replication, provided at least one phenotype or experimental condition is replicated. The software may have other applications beyond sequencing data, such as proteome peptide count data. The package is freely available under the LGPL licence from the Bioconductor web site (http://bioconductor.org).}}, author = {Robinson, M. D. and McCarthy, D. J. and Smyth, G. K.}, citeulike-article-id = {6109634}, citeulike-linkout-0 = {https://doi.org/10.1093/bioinformatics/btp616}, citeulike-linkout-1 = {http://bioinformatics.oxfordjournals.org/content/btp616v1/.abstract}, citeulike-linkout-2 = {http://bioinformatics.oxfordjournals.org/content/btp616v1/.full.pdf}, citeulike-linkout-3 = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/1/139}, citeulike-linkout-4 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2796818/}, citeulike-linkout-5 = {http://view.ncbi.nlm.nih.gov/pubmed/19910308}, citeulike-linkout-6 = {http://www.hubmed.org/display.cgi?uids=19910308}, day = {11}, doi = {10.1093/bioinformatics/btp616}, issn = {1460-2059}, journal = {Bioinformatics}, keywords = {cnv, deseq2, overdispersion, rnaseq, workflow}, month = nov, number = {1}, pages = {139--140}, pmcid = {PMC2796818}, pmid = {19910308}, posted-at = {2011-06-25 18:43:51}, priority = {2}, publisher = {Oxford University Press}, title = {{edgeR: a Bioconductor package for differential expression analysis of digital gene expression data}}, url = {https://doi.org/10.1093/bioinformatics/btp616}, volume = {26}, year = {2009} } @article{Hardcastle2010BaySeq, abstract = {{BACKGROUND:High throughput sequencing has become an important technology for studying expression levels in many types of genomic, and particularly transcriptomic, data. One key way of analysing such data is to look for elements of the data which display particular patterns of differential expression in order to take these forward for further analysis and validation.RESULTS:We propose a framework for defining patterns of differential expression and develop a novel algorithm, baySeq, which uses an empirical Bayes approach to detect these patterns of differential expression within a set of sequencing samples. The method assumes a negative binomial distribution for the data and derives an empirically determined prior distribution from the entire dataset. We examine the performance of the method on real and simulated data.CONCLUSIONS:Our method performs at least as well, and often better, than existing methods for analyses of pairwise differential expression in both real and simulated data. When we compare methods for the analysis of data from experimental designs involving multiple sample groups, our method again shows substantial gains in performance. We believe that this approach thus represents an important step forward for the analysis of count data from sequencing experiments.}}, author = {Hardcastle, Thomas and Kelly, Krystyna}, citeulike-article-id = {7610091}, citeulike-linkout-0 = {https://doi.org/10.1186/1471-2105-11-422}, citeulike-linkout-1 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2928208/}, citeulike-linkout-2 = {http://view.ncbi.nlm.nih.gov/pubmed/20698981}, citeulike-linkout-3 = {http://www.hubmed.org/display.cgi?uids=20698981}, doi = {10.1186/1471-2105-11-422}, issn = {1471-2105}, journal = {BMC Bioinformatics}, keywords = {bayes, deseq2, rnaseq, workflow}, number = {1}, pages = {422+}, pmcid = {PMC2928208}, pmid = {20698981}, posted-at = {2011-04-05 09:08:06}, priority = {2}, title = {{baySeq: Empirical Bayesian methods for identifying differential expression in sequence count data}}, url = {https://doi.org/10.1186/1471-2105-11-422}, volume = {11}, year = {2010} } @article{Anders2010Differential, abstract = {{High-throughput sequencing assays such as RNA-Seq, ChIP-Seq or barcode counting provide quantitative readouts in the form of count data. To infer differential signal in such data correctly and with good statistical power, estimation of data variability throughout the dynamic range and a suitable error model are required. We propose a method based on the negative binomial distribution, with variance and mean linked by local regression and present an implementation, DESeq, as an R/Bioconductor package.}}, author = {Anders, Simon and Huber, Wolfgang}, citeulike-article-id = {8132834}, citeulike-linkout-0 = {https://doi.org/10.1186/gb-2010-11-10-r106}, citeulike-linkout-1 = {http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3218662/}, citeulike-linkout-2 = {http://view.ncbi.nlm.nih.gov/pubmed/20979621}, citeulike-linkout-3 = {http://www.hubmed.org/display.cgi?uids=20979621}, day = {27}, doi = {10.1186/gb-2010-11-10-r106}, issn = {1465-6906}, journal = {Genome Biology}, keywords = {bioconductor, cnv, ctsca, deseq2, overdispersion, rnaguide, rnaseq, variance, workflow}, month = oct, number = {10}, pages = {R106+}, pmcid = {PMC3218662}, pmid = {20979621}, posted-at = {2011-01-20 22:51:25}, priority = {2}, publisher = {BioMed Central Ltd}, title = {{Differential expression analysis for sequence count data}}, url = {https://doi.org/10.1186/gb-2010-11-10-r106}, volume = {11}, year = {2010} } @article{Soneson2015Differential, url = {https://doi.org/10.12688/f1000research.7563.1}, doi = {10.12688/f1000research.7563.1}, author = {Soneson, Charlotte and Love, Michael I. and Robinson, Mark}, title = {{Differential analyses for RNA-seq: transcript-level estimates improve gene-level inferences}}, journal = {F1000Research}, year = 2015, Volume = 4, Issue = 1521 } @article{Li2011RSEM, author = {Li, Bo and Dewey, Colin N.}, doi = {10.1186/1471-2105-12-3231}, journal = {BMC Bioinformatics}, pages = {323+}, title = {{RSEM: accurate transcript quantification from RNA-Seq data with or without a reference genome.}}, url = {https://doi.org/10.1186/1471-2105-12-323}, volume = {12}, year = {2011} } @article{Patro2014Sailfish, author = {Patro, Rob and Mount, Stephen M. and Kingsford, Carl}, journal = {Nature Biotechnology}, pages = {462--464}, title = {{Sailfish enables alignment-free isoform quantification from RNA-seq reads using lightweight algorithms}}, doi = {10.1038/nbt.2862}, url = {https://doi.org/10.1038/nbt.2862}, volume = {32}, year = {2014} } @ARTICLE{Patro2017Salmon, title = "Salmon provides fast and bias-aware quantification of transcript expression", author = "Patro, Rob and Duggal, Geet and Love, Michael I and Irizarry, Rafael A and Kingsford, Carl", affiliation = "Department of Computer Science, Stony Brook University, Stony Brook, New York, USA. DNAnexus, Mountain View, California, USA. Department of Biostatistics and Computational Biology, Dana-Farber Cancer Institute, Cambridge, Massachusetts, USA. Department of Biostatistics, Harvard T.H. Chan School of Public Health, Cambridge, Massachusetts, USA. Department of Biostatistics and Computational Biology, Dana-Farber Cancer Institute, Cambridge, Massachusetts, USA. Department of Biostatistics, Harvard T.H. Chan School of Public Health, Cambridge, Massachusetts, USA. Computational Biology Department, Carnegie Mellon University, Pittsburgh, Pennsylvania, USA.", abstract = "We introduce Salmon, a lightweight method for quantifying transcript abundance from RNA-seq reads. Salmon combines a new dual-phase parallel inference algorithm and feature-rich bias models with an ultra-fast read mapping procedure. It is the first transcriptome-wide quantifier to correct for fragment GC-content bias, which, as we demonstrate here, substantially improves the accuracy of abundance estimates and the sensitivity of subsequent differential expression analysis.", journal = "Nat. Methods", volume = 14, pages = "417--419", month = "6~" # mar, year = 2017, language = "" } @article{Bray2016Near, title = "Near-optimal {RNA-Seq} quantification", author = "Bray, Nicolas L and Pimentel, Harold and Melsted, P\'{a}ll and Pachter, Lior", abstract = "We present kallisto, an RNA-seq quantification program that is two orders of magnitude faster than previous approaches and achieves similar accuracy. Kallisto pseudoaligns reads to a reference, producing a list of transcripts that are compatible with each read while avoiding alignment of individual bases. We use kallisto to analyze 30 million unaligned paired-end RNA-seq reads in <10 min on a standard laptop computer. This removes a major computational bottleneck in RNA-seq analysis.", journal = "Nat. Biotechnol.", year = 2016 } @article{Robert2015Errors, author = {Robert, Christelle and Watson, Mick}, doi = {10.1186/s13059-015-0734-x}, journal = {Genome Biology}, title = {{Errors in RNA-Seq quantification affect genes of relevance to human disease}}, url = {https://doi.org/10.1186/s13059-015-0734-x}, year = {2015} } @article{Trapnell2013Differential, author = {Trapnell, Cole and Hendrickson, David G and Sauvageau, Martin and Goff, Loyal and Rinn, John L and Pachter, Lior}, doi = {10.1038/nbt.2450}, journal = {Nature Biotechnology}, title = {{Differential analysis of gene regulation at transcript resolution with RNA-seq}}, url = {https://doi.org/10.1038/nbt.2450}, year = {2013} } @article{Love2015RNASeq, title = {{RNA}-{Seq} workflow: gene-level exploratory analysis and differential expression}, issn = {2046-1402}, shorttitle = {{RNA}-{Seq} workflow}, url = {http://f1000research.com/articles/4-1070/v1}, doi = {10.12688/f1000research.7035.1}, language = {en}, urldate = {2016-06-30}, journal = {F1000Research}, author = {Love, Michael I. and Anders, Simon and Kim, Vladislav and Huber, Wolfgang}, month = oct, year = {2015} } @article{Alexa2006Improved, title = {Improved scoring of functional groups from gene expression data by decorrelating {GO} graph structure}, volume = {22}, issn = {1367-4803, 1460-2059}, url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btl140}, doi = {10.1093/bioinformatics/btl140}, language = {en}, number = {13}, urldate = {2016-06-30}, journal = {Bioinformatics}, author = {Alexa, A. and Rahnenfuhrer, J. and Lengauer, T.}, month = jul, year = {2006}, pages = {1600--1607} } @article{Di2017Nextflow, title={Nextflow enables reproducible computational workflows}, author={Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric}, journal={Nature Biotechnology}, volume={35}, number={4}, pages={316--319}, year={2017}, publisher={Nature Research} } @article{Koster2012Snakemake, title={Snakemake—a scalable bioinformatics workflow engine}, author={K{\"o}ster, Johannes and Rahmann, Sven}, journal={Bioinformatics}, volume={28}, number={19}, pages={2520--2522}, year={2012}, publisher={Oxford University Press} } @article{Ignatiadis2016, author = {Ignatiadis, Nikolaos and Klaus, Bernd and Zaugg, Judith and Huber, Wolfgang}, journal = {Nature Methods}, title = {Data-driven hypothesis weighting increases detection power in genome-scale multiple testing}, url = {https://doi.org/10.1038/nmeth.3885}, year = 2016 } @article{Rainer:2019jd, author = {Rainer, Johannes and Gatto, Laurent and Weichenberger, Christian X}, title = {{ensembldb: an R package to create and use Ensembl-based annotation resources.}}, journal = {Bioinformatics}, year = {2019}, volume = {14}, pages = {925}, month = jan, affiliation = {Institute for Biomedicine, Eurac Research, Affiliated Institute of the University of L{\"u}beck, Bolzano, Italy.}, doi = {10.1093/bioinformatics/btz031}, pmid = {30689724}, abstract = {Summary:Bioinformatics research frequently involves handling gene-centric data such as exons, transcripts, proteins, and their positions relative to a reference coordinate system. The ensembldb Bioconductor package retrieves and stores Ensembl-based genetic annotations and positional information, and furthermore offers identifier conversion and coordinate mappings for gene-associated data. In support of reproducible research, data are tied to Ensembl releases and are kept separately from the software. Premade data packages are available for a variety of genomes and Ensembl releases. Three examples demonstrate typical use cases of this software. Availability:ensembldb is part of Bioconductor (https://bioconductor.org/packages/ensembldb). Supplementary Information:Supplementary Data are available at Bioinformatics Online.}, url = {https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/btz031/5301311}, }