• S. Canzar, S. Andreotti, D. Weese, K. Reinert, and G. W. Klau, “Cidane: comprehensive isoform discovery and abundance estimation,” Genome biology, vol. 17, iss. 1, 2016.
[Bibtex]
@article{fu_mi_publications1830,
publisher = {BioMed Central, Springer Science+Business Media},
month = {January},
number = {1},
journal = {Genome Biology},
author = {S. Canzar and S. Andreotti and D. Weese and K. Reinert and G. W. Klau},
year = {2016},
title = {CIDANE: comprehensive isoform discovery and abundance estimation},
volume = {17},
abstract = {We present CIDANE, a novel framework for genome-based transcript reconstruction and quantification from
RNA-seq reads. CIDANE assembles transcripts efficiently with significantly higher sensitivity and precision than
existing tools. Its algorithmic core not only reconstructs transcripts ab initio, but also allows the use of the growing
annotation of known splice sites, transcription start and end sites, or full-length transcripts, which are available for
most model organisms. CIDANE supports the integrated analysis of RNA-seq and additional gene-boundary data and recovers
splice junctions that are invisible to other methods. CIDANE is available at
http://?ccb.?jhu.?edu/?software/?cidane/?.},
url = {http://publications.imp.fu-berlin.de/1830/}
}
• M. Jäger, M. Schubach, T. Zemojtel, K. Reinert, D. M. Church, and P. N. Robinson, “Alternate-locus aware variant calling in whole genome sequencing,” Genome medicine, vol. 8, iss. 1, 2016.
[Bibtex]
@article{fu_mi_publications2004,
volume = {8},
title = {Alternate-locus aware variant calling in whole genome sequencing},
year = {2016},
author = {Marten J{\"a}ger and Max Schubach and Tomasz Zemojtel and Knut Reinert and Deanna M. Church and Peter N.
Robinson},
journal = {Genome Medicine},
number = {1},
month = {December},
publisher = {BioMed Central (Springer Nature)},
abstract = {
Background
The last two human genome assemblies have extended the previous linear golden-path paradigm of the human genome to a
graph-like model to better represent regions with a high degree of structural variability. The new model offers
opportunities to improve the technical validity of variant calling in whole-genome sequencing (WGS).
Methods
We developed an algorithm that analyzes the patterns of variant calls in the 178 structurally variable regions of
the GRCh38 genome assembly, and infers whether a given sample is most likely to contain sequences from the primary
assembly, an alternate locus, or their heterozygous combination at each of these 178 regions. We investigate 121
in-house WGS datasets that have been aligned to the GRCh37 and GRCh38 assemblies.
Results
We show that stretches of sequences that are largely but not entirely identical between the primary assembly and an
alternate locus can result in multiple variant calls against regions of the primary assembly. In WGS analysis, this
results in characteristic and recognizable patterns of variant calls at positions that we term alignable
scaffold-discrepant positions (ASDPs). In 121 in-house genomes, on average 51.8{$\pm$}3.8 of the 178 regions were
found to correspond best to an alternate locus rather than the primary assembly sequence, and filtering these
genomes with our algorithm led to the identification of 7863 variant calls per genome that colocalized with ASDPs.
Additionally, we found that 437 of 791 genome-wide association study hits located within one of the regions
corresponded to ASDPs.
Conclusions
Our algorithm uses the information contained in the 178 structurally variable regions of the GRCh38 genome assembly
to avoid spurious variant calls in cases where samples contain an alternate locus rather than the corresponding
segment of the primary assembly. These results suggest the great potential of fully incorporating the resources of
graph-like genome assemblies into variant calling, but also underscore the importance of developing computational
resources that will allow a full reconstruction of the genotype in personal genomes. Our algorithm is freely
available at https://github.com/charite/asdpex.},
url = {http://publications.imp.fu-berlin.de/2004/}
}
• T. Marschall, K. Reinert, and (59. authors in total) others, “Computational pan-genomics: status, promises and challenges,” Briefings in bioinformatics, 2016.
[Bibtex]
@article{fu_mi_publications1981,
month = {October},
publisher = {Oxford Journals},
year = {2016},
title = {Computational pan-genomics: status, promises and challenges},
journal = {Briefings in Bioinformatics},
author = {T. Marschall and K. Reinert and (59 authors in total) others},
url = {http://publications.imp.fu-berlin.de/1981/},
abstract = {Many disciplines, from human genetics and oncology to plant breeding, microbiology and virology, commonly
face the challenge of analyzing rapidly increasing numbers of genomes. In case of Homo sapiens, the number of sequenced
genomes will approach hundreds of thousands in the next few years. Simply scaling up established bioinformatics
pipelines will not be sufficient for leveraging the full potential of such rich genomic data sets. Instead, novel,
qualitatively different computational methods and paradigms are needed. We will witness the rapid extension of
computational pan-genomics, a new sub-area of research in computational biology. In this article, we generalize existing
definitions and understand a pan-genome as any collection of genomic sequences to be analyzed jointly or to be used as a
reference. We examine already available approaches to construct and use pan-genomes, discuss the potential benefits of
future technologies and methodologies and review open challenges from the vantage point of the above-mentioned
biological disciplines. As a prominent example for a computational paradigm shift, we particularly highlight the
transition from the representation of reference genomes as strings to representations as graphs. We outline how this and
other challenges from different application domains translate into common computational problems, point out relevant
bioinformatics techniques and identify open problems in computer science. With this review, we aim to increase awareness
that a joint approach to computational pan-genomics can help address many of the problems currently faced in various
domains. }
}
• B. Vatansever, A. Muñoz, C. L. Klein, and K. Reinert, “Development and optimisation of a generic micro lc-esi-ms method for the qualitative and quantitative determination of 30-mer toxic gliadin peptides in wheat flour for food analysis,” Analytical and bioanalytical chemistry, p. 1–9, 2016.
[Bibtex]
@article{fu_mi_publications1976,
publisher = {Springer Berlin Heidelberg},
month = {October},
year = {2016},
title = {Development and optimisation of a generic micro LC-ESI-MS method for the qualitative and quantitative
determination of 30-mer toxic gliadin peptides in wheat flour for food analysis},
journal = {Analytical and Bioanalytical Chemistry},
pages = {1--9},
author = {B. Vatansever and A. Mu{\~n}oz and C. L. Klein and K. Reinert},
url = {http://publications.imp.fu-berlin.de/1976/},
abstract = {We sometimes see manufactured bakery products on the market which are labelled as being gluten free. Why is
the content of such gluten proteins of importance for the fabrication of bakery industry and for the products? The
gluten proteins represent up to 80 \% of wheat proteins, and they are conventionally subdivided into gliadins and
glutenins. Gliadins belong to the proline and glutamine-rich prolamin family. Its role in human gluten intolerance, as a
consequence of its harmful effects, is well documented in the scientific literature. The only known therapy so far is a
gluten-free diet, and hence, it is important to develop robust and reliable analytical methods to quantitatively assess
the presence of the identified peptides causing the so-called coeliac disease. This work describes the development of a
new, fast and robust micro ion pair-LC-MS analytical method for the qualitative and quantitative determination of 30-mer
toxic gliadin peptides in wheat flour. The use of RapiGest? SF as a denaturation reagent prior to the enzymatic
digestion showed to shorten the measuring time. During the optimisation of the enzymatic digestion step, the best 30-mer
toxic peptide was identified from the maximum recovery after 3 h of digestion time. The lower limit of quantification
was determined to be 0.25 ng/{\ensuremath{\mu}}L. The method has shown to be linear for the selected concentration range
of 0.25?3.0 ng/{\ensuremath{\mu}}L. The uncertainty related to reproducibility of measurement procedure, excluding the
extraction step, has shown to be 5.0 \% (N = 12). Finally, this method was successfully applied to the quantification of
30-mer toxic peptides from commercial wheat flour with an overall uncertainty under reproducibility conditions of 6.4 \%
including the extraction of the gliadin fraction. The results were always expressed as the average of the values from
all standard concentrations. Subsequently, the final concentration of the 30-mer toxic peptide in the flour was
calculated and expressed in milligrams per gram unit. The determined, calculated concentration of the 30-mer toxic
peptide in the flour was found to be 1.29 {$\pm$} 0.37 {\ensuremath{\mu}}g/g in flour (N = 25, sy = 545,075, f = 25 ? 2
(t = 2.069), P = 95 \%, two-sided).}
}