2014

  • [DOI] M. H. Schulz, D. Weese, M. Holtgrewe, V. Dimitrova, S. Niu, K. Reinert, and H. Richard, “Fiona: a parallel and automatic strategy for read error correction.,” Bioinformatics (oxford, england), vol. 30, iss. 17, p. i356–i363, 2014.
    [Bibtex]
    @article{Schulz:2014dm,
    author = {Schulz, Marcel H and Weese, David and Holtgrewe, Manuel and Dimitrova, V and Niu, Sijia and Reinert, Knut and Richard, Hugues},
    title = {{Fiona: a parallel and automatic strategy for read error correction.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    volume = {30},
    number = {17},
    pages = {i356--i363},
    month = sep,
    publisher = {Oxford University Press},
    affiliation = {'Multimodal Computing and Interaction', Saarland University {&} Department for Computational Biology and Applied Computing, Max Planck Institute for Informatics, Saarbr{"u}cken, 66123 Saarland, Germany, Ray and Stephanie Lane Center for Computational Biology, Carnegie Mellon University, Pittsburgh, 15206 PA, USA, Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Universit{'e} Pierre et Marie Curie, UMR7238, CNRS-UPMC, Paris, France and CNRS, UMR7238, Laboratory of Computational and Quantitative Biology, Paris, France 'Multimodal Computing and Interaction', Saarland University {&} Department for Computational Biology and Applied Computing, Max Planck Institute for Informatics, Saarbr{"u}cken, 66123 Saarland, Germany, Ray and Stephanie Lane Center for Computational Biology, Carnegie Mellon University, Pittsburgh, 15206 PA, USA, Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Universit{'e} Pierre et Marie Curie, UMR7238, CNRS-UPMC, Paris, France and CNRS, UMR7238, Laboratory of Computational and Quantitative Biology, Paris, France.},
    doi = {10.1093/bioinformatics/btu440},
    pmid = {25161220},
    pmcid = {PMC4147893},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-09-08T12:32:23GMT},
    date-modified = {2016-01-14T20:09:03GMT},
    abstract = {MOTIVATION:Automatic error correction of high-throughput sequencing data can have a dramatic impact on the amount of usable base pairs and their quality. It has been shown that the performance of tasks such as de novo genome assembly and SNP calling can be dramatically improved after read error correction. While a large number of methods specialized for correcting substitution errors as found in Illumina data exist, few methods for the correction of indel errors, common to technologies like 454 or Ion Torrent, have been proposed.
    RESULTS:We present Fiona, a new stand-alone read error-correction method. Fiona provides a new statistical approach for sequencing error detection and optimal error correction and estimates its parameters automatically. Fiona is able to correct substitution, insertion and deletion errors and can be applied to any sequencing technology. It uses an efficient implementation of the partial suffix array to detect read overlaps with different seed lengths in parallel. We tested Fiona on several real datasets from a variety of organisms with different read lengths and compared its performance with state-of-the-art methods. Fiona shows a constantly higher correction accuracy over a broad range of datasets from 454 and Ion Torrent sequencers, without compromise in speed.
    CONCLUSION:Fiona is an accurate parameter-free read error-correction method that can be run on inexpensive hardware and can make use of multicore parallelization whenever available. Fiona was implemented using the SeqAn library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/fiona.
    CONTACT:mschulz@mmci.uni-saarland.de or hugues.richard@upmc.fr
    SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.},
    url = {http://bioinformatics.oxfordjournals.org/content/30/17/i356.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/75/75795684-ABC8-488D-BB7B-330F2F28B93C},
    file = {{75795684-ABC8-488D-BB7B-330F2F28B93C:/Users/reinert/Dropbox/Library.papers3/Files/75/75795684-ABC8-488D-BB7B-330F2F28B93C:application/pdf;75795684-ABC8-488D-BB7B-330F2F28B93C:/Users/reinert/Dropbox/Library.papers3/Files/75/75795684-ABC8-488D-BB7B-330F2F28B93C:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu440}}
    }
  • [DOI] H. Hauswedell, J. Singer, and K. Reinert, “Lambda: the local aligner for massive biological data.,” Bioinformatics (oxford, england), vol. 30, iss. 17, p. i349–i355, 2014.
    [Bibtex]
    @article{Hauswedell:2014bt,
    author = {Hauswedell, Hannes and Singer, Jochen and Reinert, Knut},
    title = {{Lambda: the local aligner for massive biological data.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    volume = {30},
    number = {17},
    pages = {i349--i355},
    month = sep,
    publisher = {Oxford University Press},
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, Takustr. 9, 14195 Berlin, Germany.},
    doi = {10.1093/bioinformatics/btu439},
    pmid = {25161219},
    pmcid = {PMC4147892},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-09-08T12:33:47GMT},
    date-modified = {2016-01-14T20:09:11GMT},
    abstract = {MOTIVATION:Next-generation sequencing technologies produce unprecedented amounts of data, leading to completely new research fields. One of these is metagenomics, the study of large-size DNA samples containing a multitude of diverse organisms. A key problem in metagenomics is to functionally and taxonomically classify the sequenced DNA, to which end the well-known BLAST program is usually used. But BLAST has dramatic resource requirements at metagenomic scales of data, imposing a high financial or technical burden on the researcher. Multiple attempts have been made to overcome these limitations and present a viable alternative to BLAST.
    RESULTS:In this work we present Lambda, our own alternative for BLAST in the context of sequence classification. In our tests, Lambda often outperforms the best tools at reproducing BLAST's results and is the fastest compared with the current state of the art at comparable levels of sensitivity.
    AVAILABILITY AND IMPLEMENTATION:Lambda was implemented in the SeqAn open-source C++ library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/lambda.
    CONTACT:hannes.hauswedell@fu-berlin.de
    SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.},
    url = {http://bioinformatics.oxfordjournals.org/content/30/17/i349.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/94/9488230A-A39B-4CC4-9E72-D226E28C7C90},
    file = {{9488230A-A39B-4CC4-9E72-D226E28C7C90:/Users/reinert/Dropbox/Library.papers3/Files/94/9488230A-A39B-4CC4-9E72-D226E28C7C90:application/pdf;9488230A-A39B-4CC4-9E72-D226E28C7C90:/Users/reinert/Dropbox/Library.papers3/Files/94/9488230A-A39B-4CC4-9E72-D226E28C7C90:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu439}}
    }
  • [DOI] R. Rahn, D. Weese, and K. Reinert, “Journaled string tree-a scalable data structure for analyzing thousands of similar genomes on your laptop.,” Bioinformatics (oxford, england), p. btu438, 2014.
    [Bibtex]
    @article{Rahn:2014bb,
    author = {Rahn, R and Weese, David and Reinert, Knut},
    title = {{Journaled string tree-a scalable data structure for analyzing thousands of similar genomes on your laptop.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    pages = {btu438},
    month = jul,
    publisher = {Oxford University Press},
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, Takustr. 9, 14195 Berlin, Germany.},
    doi = {10.1093/bioinformatics/btu438},
    pmid = {25028723},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-08-14T06:36:08GMT},
    date-modified = {2016-01-14T20:09:14GMT},
    abstract = {MOTIVATION::Next-generation sequencing (NGS) has revolutionized biomedical research in the past decade and led to a continuous stream of developments in bioinformatics, addressing the need for fast and space-efficient solutions for analyzing NGS data. Often researchers need to analyze a set of genomic sequences that stem from closely related species or are indeed individuals of the same species. Hence, the analyzed sequences are similar. For analyses where local changes in the examined sequence induce only local changes in the results, it is obviously desirable to examine identical or similar regions not repeatedly.
    RESULTS::In this work, we provide a datatype that exploits data parallelism inherent in a set of similar sequences by analyzing shared regions only once. In real-world experiments, we show that algorithms that otherwise would scan each reference sequentially can be speeded up by a factor of 115. Availability: The data structure and associated tools are publicly available at http://www.seqan.de/projects/jst and are part of SeqAn, the C++ template library for sequence analysis.
    CONTACT::rene.rahn@fu-berlin.de.},
    url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btu438},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2014/Rahn/Bioinformatics%202014%20Rahn.pdf},
    file = {{Bioinformatics 2014 Rahn.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Rahn/Bioinformatics 2014 Rahn.pdf:application/pdf;Bioinformatics 2014 Rahn.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Rahn/Bioinformatics 2014 Rahn.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu438}}
    }
  • [DOI] K. Trappe, A. Emde, H. C. Ehrlich, and K. Reinert, “Gustaf: Detecting and correctly classifying SVs in the NGS twilight zone.,” Bioinformatics (oxford, england), p. btu431, 2014.
    [Bibtex]
    @article{Trappe:2014bf,
    author = {Trappe, Kathrin and Emde, A and Ehrlich, H C and Reinert, Knut},
    title = {{Gustaf: Detecting and correctly classifying SVs in the NGS twilight zone.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    pages = {btu431},
    month = jul,
    publisher = {Oxford University Press},
    affiliation = {Department of Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Research Group Bioinformatics (NG4), Robert Koch Institute, 13353 Berlin, Germany and New York Genome Center, New York, NY 10013, USADepartment of Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Research Group Bioinformatics (NG4), Robert Koch Institute, 13353 Berlin, Germany and New York Genome Center, New York, NY 10013, USA.},
    doi = {10.1093/bioinformatics/btu431},
    pmid = {25028727},
    language = {English},
    read = {Yes},
    rating = {4},
    date-added = {2014-08-14T06:40:59GMT},
    date-modified = {2016-01-14T20:09:17GMT},
    abstract = {MOTIVATION:The landscape of structural variation (SV) including complex duplication and translocation patterns is far from resolved. SV detection tools usually exhibit low agreement, are often geared toward certain types or size ranges of variation and struggle to correctly classify the type and exact size of SVs.
    RESULTS:We present Gustaf (Generic mUlti-SpliT Alignment Finder), a sound generic multi-split SV detection tool that detects and classifies deletions, inversions, dispersed duplications and translocations of $ge$30 bp. Our approach is based on a generic multi-split alignment strategy that can identify SV breakpoints with base pair resolution. We show that Gustaf correctly identifies SVs, especially in the range from 30 to 100 bp, which we call the next-generation sequencing (NGS) twilight zone of SVs, as well as larger SVs >500 bp. Gustaf performs better than similar tools in our benchmark and is furthermore able to correctly identify size and location of dispersed duplications and translocations, which otherwise might be wrongly classified, for example, as large deletions. Availability and implementation: Project information, paper benchmark and source code are available via http://www.seqan.de/projects/gustaf/.
    CONTACT:kathrin.trappe@fu-berlin.de.},
    url = {http://bioinformatics.oxfordjournals.org/content/early/2014/07/29/bioinformatics.btu431.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2014/Trappe/Bioinformatics%202014%20Trappe.pdf},
    file = {{Bioinformatics 2014 Trappe.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Trappe/Bioinformatics 2014 Trappe.pdf:application/pdf;Bioinformatics 2014 Trappe.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Trappe/Bioinformatics 2014 Trappe.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu431}}
    }
2016-02-16T11:02:40+00:00