Publications

­
  • E. Audain, J. Uszkoreit, T. Sachsenberg, J. Pfeuffer, X. Liang, Henning Hermjakob, A. Sanchez, M. Eisenacher, K. Reinert, D. L. Tabb, O. Kohlbacher, and Yasset Perez-Riverol, “In-depth analysis of protein inference algorithms using multiple search engines and well-defined metrics,” Journal of proteomics, vol. 150, pp. 170-182, 2017.
    [Bibtex]
    @article{fu_mi_publications1939,
    volume = {150},
    pages = {170--182},
    title = {In-depth analysis of protein inference algorithms using multiple search engines and well-defined metrics},
    year = {2017},
    author = {Enrique Audain and Julian Uszkoreit and Timo Sachsenberg and Julianus Pfeuffer and Xiao Liang and Henning
    Hermjakob and Aniel Sanchez and Martin Eisenacher and Knut Reinert and David L. Tabb and Oliver Kohlbacher and Yasset
    Perez-Riverol},
    journal = {Journal of Proteomics},
    month = {January},
    publisher = {Elsevier},
    abstract = {In mass spectrometry-based shotgun proteomics, protein identifications are usually the desired result.
    However, most of the analytical methods are based on the identification of reliable peptides and not the direct
    identification of intact proteins. Thus, assembling peptides identified from tandem mass spectra into a list of
    proteins, referred to as protein inference, is a critical step in proteomics research. Currently, different protein
    inference algorithms and tools are available for the proteomics community. Here, we evaluated five software tools for
    protein inference (PIA, ProteinProphet, Fido, ProteinLP, MSBayesPro) using three popular database search engines:
    Mascot, X!Tandem, and MS-GF +. All the algorithms were evaluated using a highly customizable KNIME workflow using four
    different public datasets with varying complexities (different sample preparation, species and analytical instruments).
    We defined a set of quality control metrics to evaluate the performance of each combination of search engines, protein
    inference algorithm, and parameters on each dataset. We show that the results for complex samples vary not only
    regarding the actual numbers of reported protein groups but also concerning the actual composition of groups.
    Furthermore, the robustness of reported proteins when using databases of differing complexities is strongly dependant on
    the applied inference algorithm. Finally, merging the identifications of multiple search engines does not necessarily
    increase the number of reported proteins, but does increase the number of peptides per protein and thus can generally be
    recommended.},
    url = {http://publications.imp.fu-berlin.de/1939/}
    }
  • S. Canzar, S. Andreotti, D. Weese, K. Reinert, and G. W. Klau, “Cidane: comprehensive isoform discovery and abundance estimation,” Genome biology, vol. 17, iss. 1, 2016.
    [Bibtex]
    @article{fu_mi_publications1830,
    publisher = {BioMed Central, Springer Science+Business Media},
    month = {January},
    number = {1},
    journal = {Genome Biology},
    author = {S. Canzar and S. Andreotti and D. Weese and K. Reinert and G. W. Klau},
    year = {2016},
    title = {CIDANE: comprehensive isoform discovery and abundance estimation},
    volume = {17},
    abstract = {We present CIDANE, a novel framework for genome-based transcript reconstruction and quantification from
    RNA-seq reads. CIDANE assembles transcripts efficiently with significantly higher sensitivity and precision than
    existing tools. Its algorithmic core not only reconstructs transcripts ab initio, but also allows the use of the growing
    annotation of known splice sites, transcription start and end sites, or full-length transcripts, which are available for
    most model organisms. CIDANE supports the integrated analysis of RNA-seq and additional gene-boundary data and recovers
    splice junctions that are invisible to other methods. CIDANE is available at
    http://?ccb.?jhu.?edu/?software/?cidane/?.},
    url = {http://publications.imp.fu-berlin.de/1830/}
    }
  • M. Jäger, M. Schubach, T. Zemojtel, K. Reinert, D. M. Church, and P. N. Robinson, “Alternate-locus aware variant calling in whole genome sequencing,” Genome medicine, vol. 8, iss. 1, 2016.
    [Bibtex]
    @article{fu_mi_publications2004,
    volume = {8},
    title = {Alternate-locus aware variant calling in whole genome sequencing},
    year = {2016},
    author = {Marten J{"a}ger and Max Schubach and Tomasz Zemojtel and Knut Reinert and Deanna M. Church and Peter N.
    Robinson},
    journal = {Genome Medicine},
    number = {1},
    month = {December},
    publisher = {BioMed Central (Springer Nature)},
    abstract = {
    Background
    The last two human genome assemblies have extended the previous linear golden-path paradigm of the human genome to a
    graph-like model to better represent regions with a high degree of structural variability. The new model offers
    opportunities to improve the technical validity of variant calling in whole-genome sequencing (WGS).
    Methods
    We developed an algorithm that analyzes the patterns of variant calls in the 178 structurally variable regions of
    the GRCh38 genome assembly, and infers whether a given sample is most likely to contain sequences from the primary
    assembly, an alternate locus, or their heterozygous combination at each of these 178 regions. We investigate 121
    in-house WGS datasets that have been aligned to the GRCh37 and GRCh38 assemblies.
    Results
    We show that stretches of sequences that are largely but not entirely identical between the primary assembly and an
    alternate locus can result in multiple variant calls against regions of the primary assembly. In WGS analysis, this
    results in characteristic and recognizable patterns of variant calls at positions that we term alignable
    scaffold-discrepant positions (ASDPs). In 121 in-house genomes, on average 51.8{$pm$}3.8 of the 178 regions were
    found to correspond best to an alternate locus rather than the primary assembly sequence, and filtering these
    genomes with our algorithm led to the identification of 7863 variant calls per genome that colocalized with ASDPs.
    Additionally, we found that 437 of 791 genome-wide association study hits located within one of the regions
    corresponded to ASDPs.
    Conclusions
    Our algorithm uses the information contained in the 178 structurally variable regions of the GRCh38 genome assembly
    to avoid spurious variant calls in cases where samples contain an alternate locus rather than the corresponding
    segment of the primary assembly. These results suggest the great potential of fully incorporating the resources of
    graph-like genome assemblies into variant calling, but also underscore the importance of developing computational
    resources that will allow a full reconstruction of the genotype in personal genomes. Our algorithm is freely
    available at https://github.com/charite/asdpex.},
    url = {http://publications.imp.fu-berlin.de/2004/}
    }
  • T. Marschall, K. Reinert, and (59. authors in total) others, “Computational pan-genomics: status, promises and challenges,” Briefings in bioinformatics, 2016.
    [Bibtex]
    @article{fu_mi_publications1981,
    month = {October},
    publisher = {Oxford Journals},
    year = {2016},
    title = {Computational pan-genomics: status, promises and challenges},
    journal = {Briefings in Bioinformatics},
    author = {T. Marschall and K. Reinert and (59 authors in total) others},
    url = {http://publications.imp.fu-berlin.de/1981/},
    abstract = {Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly
    face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced
    genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics
    pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel,
    qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of
    computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing
    definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a
    reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of
    future technologies and methodologies and review open challenges from the vantage point of the above-mentioned
    biological disciplines. As a prominent example for a computational paradigm shift, we particularly highlight the
    transition from the representation of reference genomes as strings to representations as graphs. We outline how this and
    other challenges from different application domains translate into common computational problems, point out relevant
    bioinformatics techniques and identify open problems in computer science. With this review, we aim to increase awareness
    that a joint approach to computational pan-genomics can help address many of the problems currently faced in various
    domains. }
    }
  • B. Vatansever, A. Muñoz, C. L. Klein, and K. Reinert, “Development and optimisation of a generic micro lc-esi-ms method for the qualitative and quantitative determination of 30-mer toxic gliadin peptides in wheat flour for food analysis,” Analytical and bioanalytical chemistry, pp. 1-9, 2016.
    [Bibtex]
    @article{fu_mi_publications1976,
    publisher = {Springer Berlin Heidelberg},
    month = {October},
    year = {2016},
    title = {Development and optimisation of a generic micro LC-ESI-MS method for the qualitative and quantitative
    determination of 30-mer toxic gliadin peptides in wheat flour for food analysis},
    journal = {Analytical and Bioanalytical Chemistry},
    pages = {1--9},
    author = {B. Vatansever and A. Mu{~n}oz and C. L. Klein and K. Reinert},
    url = {http://publications.imp.fu-berlin.de/1976/},
    abstract = {We sometimes see manufactured bakery products on the market which are labelled as being gluten free. Why is
    the content of such gluten proteins of importance for the fabrication of bakery industry and for the products? The
    gluten proteins represent up to 80 % of wheat proteins, and they are conventionally subdivided into gliadins and
    glutenins. Gliadins belong to the proline and glutamine-rich prolamin family. Its role in human gluten intolerance, as a
    consequence of its harmful effects, is well documented in the scientific literature. The only known therapy so far is a
    gluten-free diet, and hence, it is important to develop robust and reliable analytical methods to quantitatively assess
    the presence of the identified peptides causing the so-called coeliac disease. This work describes the development of a
    new, fast and robust micro ion pair-LC-MS analytical method for the qualitative and quantitative determination of 30-mer
    toxic gliadin peptides in wheat flour. The use of RapiGest? SF as a denaturation reagent prior to the enzymatic
    digestion showed to shorten the measuring time. During the optimisation of the enzymatic digestion step, the best 30-mer
    toxic peptide was identified from the maximum recovery after 3 h of digestion time. The lower limit of quantification
    was determined to be 0.25 ng/{ensuremath{mu}}L. The method has shown to be linear for the selected concentration range
    of 0.25?3.0 ng/{ensuremath{mu}}L. The uncertainty related to reproducibility of measurement procedure, excluding the
    extraction step, has shown to be 5.0 % (N = 12). Finally, this method was successfully applied to the quantification of
    30-mer toxic peptides from commercial wheat flour with an overall uncertainty under reproducibility conditions of 6.4 %
    including the extraction of the gliadin fraction. The results were always expressed as the average of the values from
    all standard concentrations. Subsequently, the final concentration of the 30-mer toxic peptide in the flour was
    calculated and expressed in milligrams per gram unit. The determined, calculated concentration of the 30-mer toxic
    peptide in the flour was found to be 1.29 {$pm$} 0.37 {ensuremath{mu}}g/g in flour (N = 25, sy = 545,075, f = 25 ? 2
    (t = 2.069), P = 95 %, two-sided).}
    }
  • [DOI] K. Reinert, B. Langmead, D. Weese, and D. J. Evers, “Alignment of Next-Generation Sequencing Reads.,” Annual review of genomics and human genetics, vol. 16, pp. 133-151, 2015.
    [Bibtex]
    @article{Reinert:2015ds,
    author = {Reinert, Knut and Langmead, Ben and Weese, David and Evers, Dirk J},
    title = {{Alignment of Next-Generation Sequencing Reads.}},
    journal = {Annual review of genomics and human genetics},
    year = {2015},
    volume = {16},
    pages = {133--151},
    month = aug,
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany; email: knut.reinert@fu-berlin.de , david.weese@fu-berlin.de.},
    doi = {10.1146/annurev-genom-090413-025358},
    pmid = {25939052},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2015-09-08T22:46:25GMT},
    date-modified = {2015-11-26T15:38:48GMT},
    abstract = {High-throughput DNA sequencing has considerably changed the possibilities for conducting biomedical research by measuring billions of short DNA or RNA fragments. A central computational problem, and for many applications a first step, consists of determining where the fragments came from in the original genome. In this article, we review the main techniques for generating the fragments, the main applications, and the main algorithmic ideas for computing a solution to the read alignment problem. In addition, we describe pitfalls and difficulties connected to determining the correct positions of reads.},
    url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=25939052&retmode=ref&cmd=prlinks},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/75/7559E08C-FEF5-4C6C-8B06-256C0CF662A7},
    file = {{7559E08C-FEF5-4C6C-8B06-256C0CF662A7:/Users/reinert/Dropbox/Library.papers3/Files/75/7559E08C-FEF5-4C6C-8B06-256C0CF662A7:application/pdf;7559E08C-FEF5-4C6C-8B06-256C0CF662A7:/Users/reinert/Dropbox/Library.papers3/Files/75/7559E08C-FEF5-4C6C-8B06-256C0CF662A7:application/pdf}},
    uri = {url{papers3://publication/doi/10.1146/annurev-genom-090413-025358}}
    }
  • [DOI] M. Holtgrewe, L. Kuchenbecker, and K. Reinert, “Methods for the detection and assembly of novel sequence in high-throughput sequencing data.,” Bioinformatics (oxford, england), vol. 31, iss. 12, pp. 1904-1912, 2015.
    [Bibtex]
    @article{Holtgrewe:2015bna,
    author = {Holtgrewe, Manuel and Kuchenbecker, Leon and Reinert, Knut},
    title = {{Methods for the detection and assembly of novel sequence in high-throughput sequencing data.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2015},
    volume = {31},
    number = {12},
    pages = {1904--1912},
    month = jun,
    publisher = {Oxford University Press},
    affiliation = {Department of Computer Science, Freie Universit{"a}t Berlin and Max Planck Institute for Molecular Genetics, Berlin, Germany.},
    doi = {10.1093/bioinformatics/btv051},
    pmid = {25649620},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2015-11-11T13:51:39GMT},
    date-modified = {2016-02-10T10:55:48GMT},
    abstract = {MOTIVATION:Large insertions of novel sequence are an important type of structural variants. Previous studies used traditional de novo assemblers for assembling non-mapping high-throughput sequencing (HTS) or capillary reads and then tried to anchor them in the reference using paired read information.
    RESULTS:We present approaches for detecting insertion breakpoints and targeted assembly of large insertions from HTS paired data: BASIL and ANISE. On near identity repeats that are hard for assemblers, ANISE employs a repeat resolution step. This results in far better reconstructions than obtained by the compared methods. On simulated data, we found our insert assembler to be competitive with the de novo assemblers ABYSS and SGA while yielding already anchored inserted sequence as opposed to unanchored contigs as from ABYSS/SGA. On real-world data, we detected novel sequence in a human individual and thoroughly validated the assembled sequence. ANISE was found to be superior to the competing tool MindTheGap on both simulated and real-world data.
    AVAILABILITY AND IMPLEMENTATION:ANISE and BASIL are available for download at http://www.seqan.de/projects/herbarium under a permissive open source license.},
    url = {http://bioinformatics.oxfordjournals.org/content/31/12/1904.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/28/28042B27-15F4-4BEC-A9DB-B1D0A22E6000},
    file = {{28042B27-15F4-4BEC-A9DB-B1D0A22E6000:/Users/reinert/Dropbox/Library.papers3/Files/28/28042B27-15F4-4BEC-A9DB-B1D0A22E6000:application/pdf;28042B27-15F4-4BEC-A9DB-B1D0A22E6000:/Users/reinert/Dropbox/Library.papers3/Files/28/28042B27-15F4-4BEC-A9DB-B1D0A22E6000:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btv051}}
    }
  • [DOI] L. Kuchenbecker, M. Nienen, J. Hecht, A. U. Neumann, N. Babel, K. Reinert, and P. N. Robinson, “IMSEQ – a fast and error aware approach to immunogenetic sequence analysis,” Bioinformatics (oxford, england), vol. 31, iss. 18, p. btv309–2971, 2015.
    [Bibtex]
    @article{Kuchenbecker:2015kz,
    author = {Kuchenbecker, Leon and Nienen, Mikalai and Hecht, Jochen and Neumann, Avidan U and Babel, Nina and Reinert, Knut and Robinson, Peter N},
    title = {{IMSEQ - a fast and error aware approach to immunogenetic sequence analysis}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2015},
    volume = {31},
    number = {18},
    pages = {btv309--2971},
    month = may,
    publisher = {Oxford University Press},
    affiliation = {Berlin-Brandenburg Center for Regenerative Therapies, Charit{'e} Universit{"a}tsmedizin, Berlin, Department of Computer Science, Freie Universit{"a}t, Berlin, Max Planck Institute for Molecular Genetics, Ihnestrasse 63-73, 14195 Berlin, Germany, Goodman Faculty of Life Sciences, Bar-Ilan University, Ramat Gan, Israel, Marien Hospital Herne, Ruhr University Bochum, Bochum and Institute of Medical Genetics and Human Genetics, Charit{'e} Universit{"a}tsmedizin Berlin, Berlin, Germany Berlin-Brandenburg Center for Regenerative Therapies, Charit{'e} Universit{"a}tsmedizin, Berlin, Department of Computer Science, Freie Universit{"a}t, Berlin, Max Planck Institute for Molecular Genetics, Ihnestrasse 63-73, 14195 Berlin, Germany, Goodman Faculty of Life Sciences, Bar-Ilan University, Ramat Gan, Israel, Marien Hospital Herne, Ruhr University Bochum, Bochum and Institute of Medical Genetics and Human Genetics, Charit{'e} Universit{"a}tsmedizin Berlin, Berlin, Germany Berlin-Brandenburg Center for Regenerative Therapies, Charit{'e} Universit{"a}tsmedizin, Berlin, Department of Computer Science, Freie Universit{"a}t, Berlin, Max Planck Institute for Molecular Genetics, Ihnestrasse 63-73, 14195 Berlin, Germany, Goodman Faculty of Life Sciences, Bar-Ilan University, Ramat Gan, Israel, Marien Hospital Herne, Ruhr University Bochum, Bochum and Institute of Medical Genetics and Human Genetics, Charit{'e} Universit{"a}tsmedizin Berlin, Berlin, Germany.},
    doi = {10.1093/bioinformatics/btv309},
    pmid = {25987567},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2015-09-10T13:08:52GMT},
    date-modified = {2016-01-05T21:09:17GMT},
    abstract = {Abstract Motivation: Recombined T and B cell receptor repertoires are increasingly being studied using next generation sequencing (NGS) in order to interrogate the repertoire composition as well as changes in the distribution of receptor clones under different ...
    },
    url = {http://bioinformatics.oxfordjournals.org/lookup/doi/10.1093/bioinformatics/btv309},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/6A/6A37250E-2353-4A74-B15E-9B2E1287153F},
    file = {{6A37250E-2353-4A74-B15E-9B2E1287153F:/Users/reinert/Dropbox/Library.papers3/Files/6A/6A37250E-2353-4A74-B15E-9B2E1287153F:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btv309}}
    }
  • [DOI] M. H. Schulz, D. Weese, M. Holtgrewe, V. Dimitrova, S. Niu, K. Reinert, and H. Richard, “Fiona: a parallel and automatic strategy for read error correction.,” Bioinformatics (oxford, england), vol. 30, iss. 17, p. i356–i363, 2014.
    [Bibtex]
    @article{Schulz:2014dm,
    author = {Schulz, Marcel H and Weese, David and Holtgrewe, Manuel and Dimitrova, V and Niu, Sijia and Reinert, Knut and Richard, Hugues},
    title = {{Fiona: a parallel and automatic strategy for read error correction.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    volume = {30},
    number = {17},
    pages = {i356--i363},
    month = sep,
    publisher = {Oxford University Press},
    affiliation = {'Multimodal Computing and Interaction', Saarland University {&} Department for Computational Biology and Applied Computing, Max Planck Institute for Informatics, Saarbr{"u}cken, 66123 Saarland, Germany, Ray and Stephanie Lane Center for Computational Biology, Carnegie Mellon University, Pittsburgh, 15206 PA, USA, Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Universit{'e} Pierre et Marie Curie, UMR7238, CNRS-UPMC, Paris, France and CNRS, UMR7238, Laboratory of Computational and Quantitative Biology, Paris, France 'Multimodal Computing and Interaction', Saarland University {&} Department for Computational Biology and Applied Computing, Max Planck Institute for Informatics, Saarbr{"u}cken, 66123 Saarland, Germany, Ray and Stephanie Lane Center for Computational Biology, Carnegie Mellon University, Pittsburgh, 15206 PA, USA, Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Universit{'e} Pierre et Marie Curie, UMR7238, CNRS-UPMC, Paris, France and CNRS, UMR7238, Laboratory of Computational and Quantitative Biology, Paris, France.},
    doi = {10.1093/bioinformatics/btu440},
    pmid = {25161220},
    pmcid = {PMC4147893},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-09-08T12:32:23GMT},
    date-modified = {2016-01-14T20:09:03GMT},
    abstract = {MOTIVATION:Automatic error correction of high-throughput sequencing data can have a dramatic impact on the amount of usable base pairs and their quality. It has been shown that the performance of tasks such as de novo genome assembly and SNP calling can be dramatically improved after read error correction. While a large number of methods specialized for correcting substitution errors as found in Illumina data exist, few methods for the correction of indel errors, common to technologies like 454 or Ion Torrent, have been proposed.
    RESULTS:We present Fiona, a new stand-alone read error-correction method. Fiona provides a new statistical approach for sequencing error detection and optimal error correction and estimates its parameters automatically. Fiona is able to correct substitution, insertion and deletion errors and can be applied to any sequencing technology. It uses an efficient implementation of the partial suffix array to detect read overlaps with different seed lengths in parallel. We tested Fiona on several real datasets from a variety of organisms with different read lengths and compared its performance with state-of-the-art methods. Fiona shows a constantly higher correction accuracy over a broad range of datasets from 454 and Ion Torrent sequencers, without compromise in speed.
    CONCLUSION:Fiona is an accurate parameter-free read error-correction method that can be run on inexpensive hardware and can make use of multicore parallelization whenever available. Fiona was implemented using the SeqAn library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/fiona.
    CONTACT:mschulz@mmci.uni-saarland.de or hugues.richard@upmc.fr
    SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.},
    url = {http://bioinformatics.oxfordjournals.org/content/30/17/i356.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/75/75795684-ABC8-488D-BB7B-330F2F28B93C},
    file = {{75795684-ABC8-488D-BB7B-330F2F28B93C:/Users/reinert/Dropbox/Library.papers3/Files/75/75795684-ABC8-488D-BB7B-330F2F28B93C:application/pdf;75795684-ABC8-488D-BB7B-330F2F28B93C:/Users/reinert/Dropbox/Library.papers3/Files/75/75795684-ABC8-488D-BB7B-330F2F28B93C:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu440}}
    }
  • [DOI] H. Hauswedell, J. Singer, and K. Reinert, “Lambda: the local aligner for massive biological data.,” Bioinformatics (oxford, england), vol. 30, iss. 17, p. i349–i355, 2014.
    [Bibtex]
    @article{Hauswedell:2014bt,
    author = {Hauswedell, Hannes and Singer, Jochen and Reinert, Knut},
    title = {{Lambda: the local aligner for massive biological data.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    volume = {30},
    number = {17},
    pages = {i349--i355},
    month = sep,
    publisher = {Oxford University Press},
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, Takustr. 9, 14195 Berlin, Germany.},
    doi = {10.1093/bioinformatics/btu439},
    pmid = {25161219},
    pmcid = {PMC4147892},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-09-08T12:33:47GMT},
    date-modified = {2016-01-14T20:09:11GMT},
    abstract = {MOTIVATION:Next-generation sequencing technologies produce unprecedented amounts of data, leading to completely new research fields. One of these is metagenomics, the study of large-size DNA samples containing a multitude of diverse organisms. A key problem in metagenomics is to functionally and taxonomically classify the sequenced DNA, to which end the well-known BLAST program is usually used. But BLAST has dramatic resource requirements at metagenomic scales of data, imposing a high financial or technical burden on the researcher. Multiple attempts have been made to overcome these limitations and present a viable alternative to BLAST.
    RESULTS:In this work we present Lambda, our own alternative for BLAST in the context of sequence classification. In our tests, Lambda often outperforms the best tools at reproducing BLAST's results and is the fastest compared with the current state of the art at comparable levels of sensitivity.
    AVAILABILITY AND IMPLEMENTATION:Lambda was implemented in the SeqAn open-source C++ library for sequence analysis and is publicly available for download at http://www.seqan.de/projects/lambda.
    CONTACT:hannes.hauswedell@fu-berlin.de
    SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.},
    url = {http://bioinformatics.oxfordjournals.org/content/30/17/i349.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/94/9488230A-A39B-4CC4-9E72-D226E28C7C90},
    file = {{9488230A-A39B-4CC4-9E72-D226E28C7C90:/Users/reinert/Dropbox/Library.papers3/Files/94/9488230A-A39B-4CC4-9E72-D226E28C7C90:application/pdf;9488230A-A39B-4CC4-9E72-D226E28C7C90:/Users/reinert/Dropbox/Library.papers3/Files/94/9488230A-A39B-4CC4-9E72-D226E28C7C90:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu439}}
    }
  • [DOI] R. Rahn, D. Weese, and K. Reinert, “Journaled string tree-a scalable data structure for analyzing thousands of similar genomes on your laptop.,” Bioinformatics (oxford, england), p. btu438, 2014.
    [Bibtex]
    @article{Rahn:2014bb,
    author = {Rahn, R and Weese, David and Reinert, Knut},
    title = {{Journaled string tree-a scalable data structure for analyzing thousands of similar genomes on your laptop.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    pages = {btu438},
    month = jul,
    publisher = {Oxford University Press},
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, Takustr. 9, 14195 Berlin, Germany.},
    doi = {10.1093/bioinformatics/btu438},
    pmid = {25028723},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-08-14T06:36:08GMT},
    date-modified = {2016-01-14T20:09:14GMT},
    abstract = {MOTIVATION::Next-generation sequencing (NGS) has revolutionized biomedical research in the past decade and led to a continuous stream of developments in bioinformatics, addressing the need for fast and space-efficient solutions for analyzing NGS data. Often researchers need to analyze a set of genomic sequences that stem from closely related species or are indeed individuals of the same species. Hence, the analyzed sequences are similar. For analyses where local changes in the examined sequence induce only local changes in the results, it is obviously desirable to examine identical or similar regions not repeatedly.
    RESULTS::In this work, we provide a datatype that exploits data parallelism inherent in a set of similar sequences by analyzing shared regions only once. In real-world experiments, we show that algorithms that otherwise would scan each reference sequentially can be speeded up by a factor of 115. Availability: The data structure and associated tools are publicly available at http://www.seqan.de/projects/jst and are part of SeqAn, the C++ template library for sequence analysis.
    CONTACT::rene.rahn@fu-berlin.de.},
    url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/btu438},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2014/Rahn/Bioinformatics%202014%20Rahn.pdf},
    file = {{Bioinformatics 2014 Rahn.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Rahn/Bioinformatics 2014 Rahn.pdf:application/pdf;Bioinformatics 2014 Rahn.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Rahn/Bioinformatics 2014 Rahn.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu438}}
    }
  • [DOI] K. Trappe, A. Emde, H. C. Ehrlich, and K. Reinert, “Gustaf: Detecting and correctly classifying SVs in the NGS twilight zone.,” Bioinformatics (oxford, england), p. btu431, 2014.
    [Bibtex]
    @article{Trappe:2014bf,
    author = {Trappe, Kathrin and Emde, A and Ehrlich, H C and Reinert, Knut},
    title = {{Gustaf: Detecting and correctly classifying SVs in the NGS twilight zone.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2014},
    pages = {btu431},
    month = jul,
    publisher = {Oxford University Press},
    affiliation = {Department of Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Research Group Bioinformatics (NG4), Robert Koch Institute, 13353 Berlin, Germany and New York Genome Center, New York, NY 10013, USADepartment of Computer Science, Freie Universit{"a}t Berlin, 14195 Berlin, Germany, Research Group Bioinformatics (NG4), Robert Koch Institute, 13353 Berlin, Germany and New York Genome Center, New York, NY 10013, USA.},
    doi = {10.1093/bioinformatics/btu431},
    pmid = {25028727},
    language = {English},
    read = {Yes},
    rating = {4},
    date-added = {2014-08-14T06:40:59GMT},
    date-modified = {2016-01-14T20:09:17GMT},
    abstract = {MOTIVATION:The landscape of structural variation (SV) including complex duplication and translocation patterns is far from resolved. SV detection tools usually exhibit low agreement, are often geared toward certain types or size ranges of variation and struggle to correctly classify the type and exact size of SVs.
    RESULTS:We present Gustaf (Generic mUlti-SpliT Alignment Finder), a sound generic multi-split SV detection tool that detects and classifies deletions, inversions, dispersed duplications and translocations of $ge$30 bp. Our approach is based on a generic multi-split alignment strategy that can identify SV breakpoints with base pair resolution. We show that Gustaf correctly identifies SVs, especially in the range from 30 to 100 bp, which we call the next-generation sequencing (NGS) twilight zone of SVs, as well as larger SVs >500 bp. Gustaf performs better than similar tools in our benchmark and is furthermore able to correctly identify size and location of dispersed duplications and translocations, which otherwise might be wrongly classified, for example, as large deletions. Availability and implementation: Project information, paper benchmark and source code are available via http://www.seqan.de/projects/gustaf/.
    CONTACT:kathrin.trappe@fu-berlin.de.},
    url = {http://bioinformatics.oxfordjournals.org/content/early/2014/07/29/bioinformatics.btu431.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2014/Trappe/Bioinformatics%202014%20Trappe.pdf},
    file = {{Bioinformatics 2014 Trappe.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Trappe/Bioinformatics 2014 Trappe.pdf:application/pdf;Bioinformatics 2014 Trappe.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2014/Trappe/Bioinformatics 2014 Trappe.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btu431}}
    }
  • [DOI] E. Siragusa, D. Weese, and K. Reinert, “Scalable string similarity search/join with approximate seeds and multiple backtracking,” in Edbt ’13: proceedings of the joint edbt/icdt 2013 workshops, New York, New York, USA, 2013, pp. 370-374.
    [Bibtex]
    @inproceedings{Siragusa:2013dx,
    author = {Siragusa, Enrico and Weese, David and Reinert, Knut},
    title = {{Scalable string similarity search/join with approximate seeds and multiple backtracking}},
    booktitle = {EDBT '13: Proceedings of the Joint EDBT/ICDT 2013 Workshops},
    year = {2013},
    pages = {370--374},
    publisher = {~ACM Request Permissions},
    address = {New York, New York, USA},
    month = mar,
    doi = {10.1145/2457317.2457386},
    isbn = {9781450315999},
    read = {Yes},
    rating = {0},
    date-added = {2014-04-05T07:22:02GMT},
    date-modified = {2016-01-21T08:51:13GMT},
    abstract = {We present in this paper scalable algorithms for optimal string similarity search and join. Our methods are variations of those applied in Masai [15], our recently published tool for mapping high-throughput DNA sequencing data with unpreceded},
    url = {http://dl.acm.org/citation.cfm?doid=2457317.2457386},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Files/EC/EC14F5DB-5321-44F8-BD50-ACC9917030B7.pdf},
    file = {{EC14F5DB-5321-44F8-BD50-ACC9917030B7.pdf:/Users/reinert/Dropbox/Library.papers3/Files/EC/EC14F5DB-5321-44F8-BD50-ACC9917030B7.pdf:application/pdf;EC14F5DB-5321-44F8-BD50-ACC9917030B7.pdf:/Users/reinert/Dropbox/Library.papers3/Files/EC/EC14F5DB-5321-44F8-BD50-ACC9917030B7.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1145/2457317.2457386}}
    }
  • [DOI] E. Siragusa, D. Weese, and K. Reinert, “Fast and accurate read mapping with approximate seeds and multiple backtracking.,” Nucleic acids research, vol. 41, iss. 7, p. e78–e78, 2013.
    [Bibtex]
    @article{Siragusa:2013ir,
    author = {Siragusa, Enrico and Weese, David and Reinert, Knut},
    title = {{Fast and accurate read mapping with approximate seeds and multiple backtracking.}},
    journal = {Nucleic Acids Research},
    year = {2013},
    volume = {41},
    number = {7},
    pages = {e78--e78},
    month = jan,
    publisher = {Oxford University Press},
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, Takustr. 9, 14195 Berlin, Germany and Max Planck Institute for Molecular Genetics, Ihnestr. 63-73, 14195 Berlin, Germany.},
    keywords = {read mapping},
    doi = {10.1093/nar/gkt005},
    pmid = {23358824},
    pmcid = {PMC3627565},
    language = {English},
    read = {Yes},
    rating = {5},
    date-added = {2013-02-01T08:24:19GMT},
    date-modified = {2016-01-14T20:09:32GMT},
    abstract = {We present Masai, a read mapper representing the state-of-the-art in terms of speed and accuracy. Our tool is an order of magnitude faster than RazerS 3 and mrFAST, 2-4 times faster and more accurate than Bowtie 2 and BWA. The novelties of our read mapper are filtration with approximate seeds and a method for multiple backtracking. Approximate seeds, compared with exact seeds, increase filtration specificity while preserving sensitivity. Multiple backtracking amortizes the cost of searching a large set of seeds by taking advantage of the repetitiveness of next-generation sequencing data. Combined together, these two methods significantly speed up approximate search on genomic data sets. Masai is implemented in C++ using the SeqAn library. The source code is distributed under the BSD license and binaries for Linux, Mac OS X and Windows can be freely downloaded from http://www.seqan.de/projects/masai.},
    url = {http://nar.oxfordjournals.org/content/41/7/e78.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2013/Siragusa/Nucleic%20Acids%20Res%202013%20Siragusa.pdf},
    file = {{Nucleic Acids Res 2013 Siragusa.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2013/Siragusa/Nucleic Acids Res 2013 Siragusa.pdf:application/pdf;Nucleic Acids Res 2013 Siragusa.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2013/Siragusa/Nucleic Acids Res 2013 Siragusa.pdf:application/pdf}},
    uri = {url{papers3://publication/livfe/id/115946}}
    }
  • [DOI] D. Weese, M. Holtgrewe, and K. Reinert, “RazerS 3: faster, fully sensitive read mapping.,” Bioinformatics (oxford, england), vol. 28, iss. 20, pp. 2592-2599, 2012.
    [Bibtex]
    @article{Weese:2012byb,
    author = {Weese, David and Holtgrewe, Manuel and Reinert, Knut},
    title = {{RazerS 3: faster, fully sensitive read mapping.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2012},
    volume = {28},
    number = {20},
    pages = {2592--2599},
    month = oct,
    publisher = {Oxford University Press},
    affiliation = {Department of Mathematics and Computer Science, Freie Universit{"a}t Berlin, Berlin, Germany. david.weese@fu-berlin.de},
    doi = {10.1093/bioinformatics/bts505},
    pmid = {22923295},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2014-06-18T11:28:14GMT},
    date-modified = {2016-01-14T20:10:02GMT},
    abstract = {MOTIVATION:During the past years, next-generation sequencing has become a key technology for many applications in the biomedical sciences. Throughput continues to increase and new protocols provide longer reads than currently available. In almost all applications, read mapping is a first step. Hence, it is crucial to have algorithms and implementations that perform fast, with high sensitivity, and are able to deal with long reads and a large absolute number of insertions and deletions.
    RESULTS:RazerS is a read mapping program with adjustable sensitivity based on counting q-grams. In this work, we propose the successor RazerS 3, which now supports shared-memory parallelism, an additional seed-based filter with adjustable sensitivity, a much faster, banded version of the Myers' bit-vector algorithm for verification, memory-saving measures and support for the SAM output format. This leads to a much improved performance for mapping reads, in particular, long reads with many errors. We extensively compare RazerS 3 with other popular read mappers and show that its results are often superior to them in terms of sensitivity while exhibiting practical and often competitive run times. In addition, RazerS 3 works without a pre-computed index.
    AVAILABILITY AND IMPLEMENTATION:Source code and binaries are freely available for download at http://www.seqan.de/projects/razers. RazerS 3 is implemented in C++ and OpenMP under a GPL license using the SeqAn library and supports Linux, Mac OS X and Windows.},
    url = {http://bioinformatics.oxfordjournals.org/cgi/doi/10.1093/bioinformatics/bts505},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2012/Weese/Bioinformatics%202012%20Weese-1.pdf},
    file = {{Bioinformatics 2012 Weese-1.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2012/Weese/Bioinformatics 2012 Weese-1.pdf:application/pdf;Bioinformatics 2012 Weese-1.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2012/Weese/Bioinformatics 2012 Weese-1.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/bts505}}
    }
  • [DOI] A. Emde, M. H. Schulz, D. Weese, R. Sun, M. Vingron, V. M. Kalscheuer, S. A. Haas, and K. Reinert, “Detecting genomic indel variants with exact breakpoints in single- and paired-end sequencing data using SplazerS,” Bioinformatics (oxford, england), vol. 28, iss. 5, pp. 619-627, 2012.
    [Bibtex]
    @article{Emde:2012ui,
    author = {Emde, A and Schulz, Marcel H and Weese, David and Sun, Ruping and Vingron, Martin and Kalscheuer, Vera M and Haas, Stefan A and Reinert, Knut},
    title = {{Detecting genomic indel variants with exact breakpoints in single- and paired-end sequencing data using SplazerS}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2012},
    volume = {28},
    number = {5},
    pages = {619--627},
    month = mar,
    publisher = {Oxford University Press},
    affiliation = {Department of Computer Science, Freie Universit{"a}t Berlin, Takustrasse 9, Max-Planck-Institute for Molecular Genetics, Berlin, Germany. emde@inf.fu-berlin.de},
    doi = {10.1093/bioinformatics/bts019},
    pmid = {22238266},
    language = {English},
    read = {Yes},
    rating = {5},
    date-added = {2012-01-24T21:49:19GMT},
    date-modified = {2016-01-14T20:10:20GMT},
    abstract = {MOTIVATION:The reliable detection of genomic variation in resequencing data is still a major challenge, especially for variants larger than a few base pairs. Sequencing reads crossing boundaries of structural variation carry the potential for their identification, but are difficult to map.
    RESULTS:Here we present a method for 'split' read mapping, where prefix and suffix match of a read may be interrupted by a longer gap in the read-to-reference alignment. We use this method to accurately detect medium-sized insertions and long deletions with precise breakpoints in genomic resequencing data. Compared with alternative split mapping methods, SplazerS significantly improves sensitivity for detecting large indel events, especially in variant-rich regions. Our method is robust in the presence of sequencing errors as well as alignment errors due to genomic mutations/divergence, and can be used on reads of variable lengths. Our analysis shows that SplazerS is a versatile tool applicable to unanchored or single-end as well as anchored paired-end reads. In addition, application of SplazerS to targeted resequencing data led to the interesting discovery of a complete, possibly functional gene retrocopy variant.
    AVAILABILITY:SplazerS is available from http://www.seqan.de/projects/ splazers.
    SUPPLEMENTARY INFORMATION:Supplementary data are available at Bioinformatics online.},
    url = {http://bioinformatics.oxfordjournals.org/content/28/5/619.full},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2012/Emde/Bioinformatics%202012%20Emde.pdf},
    file = {{Bioinformatics 2012 Emde.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2012/Emde/Bioinformatics 2012 Emde.pdf:application/pdf;Bioinformatics 2012 Emde.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2012/Emde/Bioinformatics 2012 Emde.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/bts019}}
    }
  • B. Kehr, D. Weese, and K. Reinert, “STELLAR: fast and exact local alignments,” Bmc bioinformatics, vol. 12, iss. Suppl 9, p. S15, 2011.
    [Bibtex]
    @article{Kehr:2011vo,
    author = {Kehr, B. and Weese, David and Reinert, Knut},
    title = {{STELLAR: fast and exact local alignments}},
    journal = {BMC Bioinformatics},
    year = {2011},
    volume = {12},
    number = {Suppl 9},
    pages = {S15},
    publisher = {BioMed Central Ltd},
    pmid = {22151882},
    pmcid = {PMC3283304},
    read = {Yes},
    rating = {0},
    date-added = {2011-12-01T08:31:26GMT},
    date-modified = {2016-01-14T20:14:14GMT},
    url = {http://www.biomedcentral.com/qc/1471-2105/12/S9/S15/},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2011/Kehr/BMC%20Bioinformatics%202011%20Kehr.pdf},
    file = {{BMC Bioinformatics 2011 Kehr.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2011/Kehr/BMC Bioinformatics 2011 Kehr.pdf:application/pdf}},
    uri = {url{papers3://publication/uuid/630CE1B9-6448-4291-920F-CE4C53F31751}}
    }
  • [DOI] M. Holtgrewe, A. Emde, D. Weese, and K. Reinert, “A Novel And Well-Defined Benchmarking Method For Second Generation Read Mapping,” Bmc bioinformatics, vol. 12, iss. 1, p. 210, 2011.
    [Bibtex]
    @article{Holtgrewe:2011fj,
    author = {Holtgrewe, Manuel and Emde, A and Weese, David and Reinert, Knut},
    title = {{A Novel And Well-Defined Benchmarking Method For Second Generation Read Mapping}},
    journal = {BMC Bioinformatics},
    year = {2011},
    volume = {12},
    number = {1},
    pages = {210},
    publisher = {BioMed Central Ltd},
    affiliation = {Department of Computer Science, Free University of Berlin, Takustr, Germany. holtgrewe@inf.fu-berlin.de},
    doi = {10.1186/1471-2105-12-210},
    pmid = {21615913},
    pmcid = {PMC3128034},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2011-06-08T20:42:08GMT},
    date-modified = {2016-01-14T20:14:11GMT},
    abstract = {Second generation sequencing technologies yield DNA sequence data at ultra high-throughput. Common to most biological applications is a mapping of the reads to an almost identical or highly similar reference genome. The assessment of the quality of read mapping results is not straightforward and has not been formalized so far. Hence, it has not been easy to compare different read mapping approaches in a unified way and to determine which program is the best for what task.We present a new benchmark method, called Rabema (Read Alignment BEnchMArk), for read mappers. It consists of a strict definition of the read mapping problem and of tools to evaluate the result of arbitrary read mappers supporting the SAM output format.We show the usefulness of the benchmark program by performing a comparison of popular read mappers. The tools supporting the benchmark are licensed under the GPL and available from http://www.seqan.de/projects/rabema.html.},
    url = {http://www.biomedcentral.com/1471-2105/12/210/abstract},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2011/Holtgrewe/BMC%20Bioinformatics%202011%20Holtgrewe.pdf},
    file = {{BMC Bioinformatics 2011 Holtgrewe.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2011/Holtgrewe/BMC Bioinformatics 2011 Holtgrewe.pdf:application/pdf;BMC Bioinformatics 2011 Holtgrewe.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2011/Holtgrewe/BMC Bioinformatics 2011 Holtgrewe.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1186/1471-2105-12-210}}
    }
  • [DOI] A. Emde, M. Grunert, D. Weese, K. Reinert, and S. R. Sperling, “MicroRazerS: rapid alignment of small RNA reads.,” Bioinformatics (oxford, england), vol. 26, iss. 1, pp. 123-124, 2010.
    [Bibtex]
    @article{Emde:2010fw,
    author = {Emde, A and Grunert, Marcel and Weese, David and Reinert, Knut and Sperling, Silke R},
    title = {{MicroRazerS: rapid alignment of small RNA reads.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2010},
    volume = {26},
    number = {1},
    pages = {123--124},
    month = jan,
    affiliation = {Department of Computer Science, Free University of Berlin, Takustr. 9, Berlin, Germany. emde@inf.fu-berlin.de},
    doi = {10.1093/bioinformatics/btp601},
    pmid = {19880369},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2010-03-24T08:35:42GMT},
    date-modified = {2016-01-14T20:14:26GMT},
    abstract = {MOTIVATION:Deep sequencing has become the method of choice for determining the small RNA content of a cell. Mapping the sequenced reads onto their reference genome serves as the basis for all further analyses, namely for identification and quantification. A method frequently used is Mega BLAST followed by several filtering steps, even though it is slow and inefficient for this task. Also, none of the currently available short read aligners has established itself for the particular task of small RNA mapping.
    RESULTS:We present MicroRazerS, a tool optimized for mapping small RNAs onto a reference genome. It is an order of magnitude faster than Mega BLAST and comparable in speed with other short read mapping tools. In addition, it is more sensitive and easy to handle and adjust.
    AVAILABILITY:MicroRazerS is part of the SeqAn C++ library and can be downloaded from http://www.seqan.de/projects/MicroRazerS.html.},
    url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=19880369&retmode=ref&cmd=prlinks},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2010/Emde/Bioinformatics%202010%20Emde.pdf},
    file = {{Bioinformatics 2010 Emde.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2010/Emde/Bioinformatics 2010 Emde.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btp601}}
    }
  • [DOI] D. Hüser, A. Gogol-Döring, T. Lutter, S. Weger, K. Winter, E. Hammer, T. Cathomen, K. Reinert, and R. Heilbronn, “Integration preferences of wildtype AAV-2 for consensus rep-binding sites at numerous loci in the human genome.,” Plos pathogens, vol. 6, iss. 7, p. e1000985, 2010.
    [Bibtex]
    @article{Huser:2010id,
    author = {H{"u}ser, Daniela and Gogol-D{"o}ring, Andreas and Lutter, Timo and Weger, Stefan and Winter, Kerstin and Hammer, Eva-Maria and Cathomen, Toni and Reinert, Knut and Heilbronn, Regine},
    title = {{Integration preferences of wildtype AAV-2 for consensus rep-binding sites at numerous loci in the human genome.}},
    journal = {PLoS Pathogens},
    year = {2010},
    volume = {6},
    number = {7},
    pages = {e1000985},
    affiliation = {Institute of Virology, Campus Benjamin Franklin, Charit{'e}-Universit{"a}tsmedizin Berlin, Berlin, Germany.},
    doi = {10.1371/journal.ppat.1000985},
    pmid = {20628575},
    pmcid = {PMC2900306},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2010-11-22T20:41:42GMT},
    date-modified = {2016-01-14T20:14:31GMT},
    abstract = {Adeno-associated virus type 2 (AAV) is known to establish latency by preferential integration in human chromosome 19q13.42. The AAV non-structural protein Rep appears to target a site called AAVS1 by simultaneously binding to Rep-binding sites (RBS) present on the AAV genome and within AAVS1. In the absence of Rep, as is the case with AAV vectors, chromosomal integration is rare and random. For a genome-wide survey of wildtype AAV integration a linker-selection-mediated (LSM)-PCR strategy was designed to retrieve AAV-chromosomal junctions. DNA sequence determination revealed wildtype AAV integration sites scattered over the entire human genome. The bioinformatic analysis of these integration sites compared to those of rep-deficient AAV vectors revealed a highly significant overrepresentation of integration events near to consensus RBS. Integration hotspots included AAVS1 with 10% of total events. Novel hotspots near consensus RBS were identified on chromosome 5p13.3 denoted AAVS2 and on chromsome 3p24.3 denoted AAVS3. AAVS2 displayed seven independent junctions clustered within only 14 bp of a consensus RBS which proved to bind Rep in vitro similar to the RBS in AAVS3. Expression of Rep in the presence of rep-deficient AAV vectors shifted targeting preferences from random integration back to the neighbourhood of consensus RBS at hotspots and numerous additional sites in the human genome. In summary, targeted AAV integration is not as specific for AAVS1 as previously assumed. Rather, Rep targets AAV to integrate into open chromatin regions in the reach of various, consensus RBS homologues in the human genome.},
    url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=20628575&retmode=ref&cmd=prlinks},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2010/H%C3%BCser/PLoS%20Pathog%202010%20H%C3%BCser.pdf},
    file = {{PLoS Pathog 2010 Hüser.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2010/Hüser/PLoS Pathog 2010 Hüser.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1371/journal.ppat.1000985}}
    }
  • A. Gogol-Döring and K. Reinert, Biological sequence analysis using the SeqAn C++ library, CRC, 2009.
    [Bibtex]
    @book{GogolDoring:2009wx,
    author = {Gogol-D{"o}ring, Andreas and Reinert, Knut},
    title = {{Biological sequence analysis using the SeqAn C++ library}},
    publisher = {CRC},
    year = {2009},
    month = oct,
    isbn = {9781420076233},
    language = {English},
    rating = {0},
    date-added = {2011-06-08T20:46:19GMT},
    date-modified = {2016-01-14T20:14:34GMT},
    abstract = {A key to that invaluable resource, this book provides a highly accessible way for the rapid prototyping of algorithms in the field.},
    url = {http://books.google.com/books?hl=en&lr=&id=Qf98t1LOiBYC&oi=fnd&pg=PP1&dq=seqan+reinert&ots=4y9iyK_zEv&sig=P6EZHMUrqI1Czy6VqcwgIr4qPBg},
    uri = {url{papers3://publication/uuid/E63A4075-298E-4848-908E-DD78EAB143E5}}
    }
  • [DOI] D. Weese, A. Emde, T. Rausch, A. Döring, and K. Reinert, “RazerS–fast read mapping with sensitivity control.,” Genome research, vol. 19, iss. 9, pp. 1646-1654, 2009.
    [Bibtex]
    @article{Weese:2009iw,
    author = {Weese, David and Emde, A and Rausch, T and D{"o}ring, Andreas and Reinert, Knut},
    title = {{RazerS--fast read mapping with sensitivity control.}},
    journal = {Genome research},
    year = {2009},
    volume = {19},
    number = {9},
    pages = {1646--1654},
    month = sep,
    affiliation = {Department of Computer Science, Free University of Berlin, 14195 Berlin, Germany. weese@inf.fu-berlin.de},
    doi = {10.1101/gr.088823.108},
    pmid = {19592482},
    pmcid = {PMC2752123},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2009-08-14T13:54:02GMT},
    date-modified = {2016-01-14T20:14:40GMT},
    abstract = {Second-generation sequencing technologies deliver DNA sequence data at unprecedented high throughput. Common to most biological applications is a mapping of the reads to an almost identical or highly similar reference genome. Due to the large amounts of data, efficient algorithms and implementations are crucial for this task. We present an efficient read mapping tool called RazerS. It allows the user to align sequencing reads of arbitrary length using either the Hamming distance or the edit distance. Our tool can work either lossless or with a user-defined loss rate at higher speeds. Given the loss rate, we present an approach that guarantees not to lose more reads than specified. This enables the user to adapt to the problem at hand and provides a seamless tradeoff between sensitivity and running time.},
    url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=19592482&retmode=ref&cmd=prlinks},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2009/Weese/Genome%20Res.%202009%20Weese.pdf},
    file = {{Genome Res. 2009 Weese.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2009/Weese/Genome Res. 2009 Weese.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1101/gr.088823.108}}
    }
  • [DOI] T. Rausch, S. Koren, G. Denisov, D. Weese, A. Emde, A. Döring, and K. Reinert, “A consistency-based consensus algorithm for de novo and reference-guided sequence assembly of short reads.,” Bioinformatics (oxford, england), vol. 25, iss. 9, pp. 1118-1124, 2009.
    [Bibtex]
    @article{Rausch:2009hq,
    author = {Rausch, T and Koren, Sergey and Denisov, G and Weese, David and Emde, A and D{"o}ring, Andreas and Reinert, Knut},
    title = {{A consistency-based consensus algorithm for de novo and reference-guided sequence assembly of short reads.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2009},
    volume = {25},
    number = {9},
    pages = {1118--1124},
    month = may,
    affiliation = {International Max Planck Research School for Computational Biology and Scientific Computing, Ihnestr. 63-73, Algorithmische Bioinformatik, Institut f{"u}r Informatik, Takustr. 9, 14195 Berlin, Germany. rausch@inf.fu-berlin.de},
    doi = {10.1093/bioinformatics/btp131},
    pmid = {19269990},
    pmcid = {PMC2732307},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2009-03-11T08:51:34GMT},
    date-modified = {2016-01-14T20:14:42GMT},
    abstract = {MOTIVATION:Novel high-throughput sequencing technologies pose new algorithmic challenges in handling massive amounts of short-read, high-coverage data. A robust and versatile consensus tool is of particular interest for such data since a sound multi-read alignment is a prerequisite for variation analyses, accurate genome assemblies and insert sequencing.
    RESULTS:A multi-read alignment algorithm for de novo or reference-guided genome assembly is presented. The program identifies segments shared by multiple reads and then aligns these segments using a consistency-enhanced alignment graph. On real de novo sequencing data obtained from the newly established NCBI Short Read Archive, the program performs similarly in quality to other comparable programs. On more challenging simulated datasets for insert sequencing and variation analyses, our program outperforms the other tools.
    AVAILABILITY:The consensus program can be downloaded from http://www.seqan.de/projects/consensus.html. It can be used stand-alone or in conjunction with the Celera Assembler. Both application scenarios as well as the usage of the tool are described in the documentation.},
    url = {http://bioinformatics.oxfordjournals.org/cgi/content/short/25/9/1118},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2009/Rausch/Bioinformatics%202009%20Rausch.pdf},
    file = {{Bioinformatics 2009 Rausch.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2009/Rausch/Bioinformatics 2009 Rausch.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btp131}}
    }
  • T. Rausch and K. Reinert, “The problem solving handbook for computational biology and bioinformatics,” , L. S. Heath and N. Ramakrishnan, Eds., Springer, 2009.
    [Bibtex]
    @incollection{Rausch2009,
    author = {Rausch, T and Reinert, Knut},
    title = {{The problem solving handbook for computational biology and bioinformatics}},
    year = {2009},
    editor = {Heath, L S and Ramakrishnan, N},
    publisher = {Springer},
    rating = {0},
    date-added = {2013-09-04T14:04:09GMT},
    date-modified = {2015-07-12T09:51:09GMT},
    uri = {url{papers3://publication/uuid/A9C736AD-428B-4BD0-9E47-9FEBC190E3A9}}
    }
  • A. Emde, T. Rausch, A. Döring, and K. Reinert, “RazerStextemdashfast read mapping with sensitivity control,” Genome ldots, 2009.
    [Bibtex]
    @article{Emde:2009wq,
    author = {Emde, A and Rausch, T and D{"o}ring, Andreas and Reinert, Knut},
    title = {{RazerS{textemdash}fast read mapping with sensitivity control}},
    journal = {Genome {ldots}},
    year = {2009},
    rating = {0},
    date-added = {2015-09-08T22:50:00GMT},
    date-modified = {2015-11-26T15:38:49GMT},
    abstract = {Abstract Second-generation sequencing technologies deliver DNA sequence data at unprecedented high throughput. Common to most biological applications is a mapping of the reads to an almost identical or highly similar reference genome. Due to the large ...
    },
    url = {http://genome.cshlp.org/content/19/9/1646.short},
    uri = {url{papers3://publication/uuid/FF1EEB49-88E0-4C15-B291-F9A5F896E20C}}
    }
  • [DOI] T. Rausch, A. Emde, D. Weese, A. Döring, C. Notredame, and K. Reinert, “Segment-based multiple sequence alignment.,” Bioinformatics (oxford, england), vol. 24, iss. 16, p. i187–192, 2008.
    [Bibtex]
    @article{Rausch:2008bk,
    author = {Rausch, T and Emde, A and Weese, David and D{"o}ring, Andreas and Notredame, C and Reinert, Knut},
    title = {{Segment-based multiple sequence alignment.}},
    journal = {Bioinformatics (Oxford, England)},
    year = {2008},
    volume = {24},
    number = {16},
    pages = {i187--192},
    month = aug,
    affiliation = {International Max Planck Research School for Computational Biology and Scientific Computing, Ihnestr 63-73, 14195 Berlin, Germany. rausch@inf.fu-berlin.de},
    doi = {10.1093/bioinformatics/btn281},
    pmid = {18689823},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2008-10-31T11:26:05GMT},
    date-modified = {2016-01-14T20:15:17GMT},
    abstract = {MOTIVATION:Many multiple sequence alignment tools have been developed in the past, progressing either in speed or alignment accuracy. Given the importance and wide-spread use of alignment tools, progress in both categories is a contribution to the community and has driven research in the field so far.
    RESULTS:We introduce a graph-based extension to the consistency-based, progressive alignment strategy. We apply the consistency notion to segments instead of single characters. The main problem we solve in this context is to define segments of the sequences in such a way that a graph-based alignment is possible. We implemented the algorithm using the SeqAn library and report results on amino acid and DNA sequences. The benefit of our approach is threefold: (1) sequences with conserved blocks can be rapidly aligned, (2) the implementation is conceptually easy, generic and fast and (3) the consistency idea can be extended to align multiple genomic sequences.
    AVAILABILITY:The segment-based multiple sequence alignment tool can be downloaded from http://www.seqan.de/projects/msa.html. A novel version of T-Coffee interfaced with the tool is available from http://www.tcoffee.org. The usage of the tool is described in both documentations.},
    url = {http://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?dbfrom=pubmed&id=18689823&retmode=ref&cmd=prlinks},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2008/Rausch/Bioinformatics%202008%20Rausch.pdf},
    file = {{Bioinformatics 2008 Rausch.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2008/Rausch/Bioinformatics 2008 Rausch.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1093/bioinformatics/btn281}}
    }
  • [DOI] A. Döring, D. Weese, T. Rausch, and K. Reinert, “SeqAn an efficient, generic C++ library for sequence analysis.,” Bmc bioinformatics, vol. 9, iss. 1, p. 11, 2008.
    [Bibtex]
    @article{doring_08_seqan,
    author = {D{"o}ring, Andreas and Weese, David and Rausch, T and Reinert, Knut},
    title = {{SeqAn an efficient, generic C++ library for sequence analysis.}},
    journal = {BMC Bioinformatics},
    year = {2008},
    volume = {9},
    number = {1},
    pages = {11},
    publisher = {BioMed Central Ltd},
    affiliation = {Algorithmische Bioinformatik, Institut f{"u}r Informatik, Takustr, 9, 14195 Berlin, Germany. doering@inf.fu-berlin.de},
    doi = {10.1186/1471-2105-9-11},
    pmid = {18184432},
    pmcid = {PMC2246154},
    language = {English},
    read = {Yes},
    rating = {0},
    date-added = {2009-11-24T15:28:08GMT},
    date-modified = {2016-01-14T20:15:25GMT},
    abstract = {BACKGROUND:The use of novel algorithmic techniques is pivotal to many important problems in life science. For example the sequencing of the human genome 1 would not have been possible without advanced assembly algorithms. However, owing to the high speed of technological progress and the urgent need for bioinformatics tools, there is a widening gap between state-of-the-art algorithmic techniques and the actual algorithmic components of tools that are in widespread use.
    RESULTS:To remedy this trend we propose the use of SeqAn, a library of efficient data types and algorithms for sequence analysis in computational biology. SeqAn comprises implementations of existing, practical state-of-the-art algorithmic components to provide a sound basis for algorithm testing and development. In this paper we describe the design and content of SeqAn and demonstrate its use by giving two examples. In the first example we show an application of SeqAn as an experimental platform by comparing different exact string matching algorithms. The second example is a simple version of the well-known MUMmer tool rewritten in SeqAn. Results indicate that our implementation is very efficient and versatile to use.
    CONCLUSION:We anticipate that SeqAn greatly simplifies the rapid development of new bioinformatics tools by providing a collection of readily usable, well-designed algorithmic components which are fundamental for the field of sequence analysis. This leverages not only the implementation of new algorithms, but also enables a sound analysis and comparison of existing algorithms.},
    url = {http://www.biomedcentral.com/1471-2105/9/11},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2008/oring/BMC%20Bioinformatics%202008%20oring.pdf},
    file = {{BMC Bioinformatics 2008 oring.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2008/oring/BMC Bioinformatics 2008 oring.pdf:application/pdf;BMC Bioinformatics 2008 oring.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2008/oring/BMC Bioinformatics 2008 oring.pdf:application/pdf}},
    uri = {url{papers3://publication/doi/10.1186/1471-2105-9-11}}
    }
  • T. Rausch, A. Emde, and K. Reinert, “Robust consensus computation,” Bmc bioinformatics, vol. 9, iss. Suppl 10, p. P4, 2008.
    [Bibtex]
    @article{Rausch:2008ws,
    author = {Rausch, T and Emde, A and Reinert, Knut},
    title = {{Robust consensus computation}},
    journal = {BMC Bioinformatics},
    year = {2008},
    volume = {9},
    number = {Suppl 10},
    pages = {P4},
    rating = {0},
    date-added = {2013-09-04T14:04:14GMT},
    date-modified = {2016-01-14T20:15:30GMT},
    abstract = { BMC Bioinformatics Open Access Poster presentation computation Tobias * 1,2 , Anne-Katrin Emde 1,2 and Knut 2 },
    url = {http://www.biomedcentral.com/content/pdf/1471-2105-9-s10-p4.pdf},
    local-url = {file://localhost/Users/reinert/Dropbox/Library.papers3/Articles/2008/Rausch/BMC%20Bioinformatics%202008%20Rausch.pdf},
    file = {{BMC Bioinformatics 2008 Rausch.pdf:/Users/reinert/Dropbox/Library.papers3/Articles/2008/Rausch/BMC Bioinformatics 2008 Rausch.pdf:application/pdf}},
    uri = {url{papers3://publication/uuid/BE347016-88AA-4642-81DC-E1EE604F9E2E}}
    }