@article{3151b0394bd04556af94831d1af44963,
title = "Generating lineage-resolved, complete metagenome-assembled genomes from complex microbial communities",
abstract = "Microbial communities might include distinct lineages of closely related organisms that complicate metagenomic assembly and prevent the generation of complete metagenome-assembled genomes (MAGs). Here we show that deep sequencing using long (HiFi) reads combined with Hi-C binning can address this challenge even for complex microbial communities. Using existing methods, we sequenced the sheep fecal metagenome and identified 428 MAGs with more than 90% completeness, including 44 MAGs in single circular contigs. To resolve closely related strains (lineages), we developed MAGPhase, which separates lineages of related organisms by discriminating variant haplotypes across hundreds of kilobases of genomic sequence. MAGPhase identified 220 lineage-resolved MAGs in our dataset. The ability to resolve closely related microbes in complex microbial communities improves the identification of biosynthetic gene clusters and the precision of assigning mobile genetic elements to host genomes. We identified 1,400 complete and 350 partial biosynthetic gene clusters, most of which are novel, as well as 424 (298) potential host–viral (host–plasmid) associations using Hi-C data.",
author = "Bickhart, {Derek M.} and Mikhail Kolmogorov and Elizabeth Tseng and Portik, {Daniel M.} and Anton Korobeynikov and Ivan Tolstoganov and Gherman Uritskiy and Ivan Liachko and Sullivan, {Shawn T.} and Shin, {Sung Bong} and Alvah Zorea and Andreu, {Vict{\`o}ria Pascal} and Kevin Panke-Buisse and Medema, {Marnix H.} and Itzhak Mizrahi and Pevzner, {Pavel A.} and Smith, {Timothy P.L.}",
note = "Funding Information: We thank K. McClure, K. Kuhn, B. Lee, J. Carnahan and W. Thompson for technical support. D.M.B. was supported by appropriated USDA CRIS Project 5090-31000-026-00-D. T.P.L.S. and S.B.S. were supported by appropriated USDA CRIS Project 3040-31000-100-00D. I.L., S.T.S. and G.U. were supported, in part, by NIH grants R44AI150008 and R44AI162570 to Phase Genomics. I.M. was supported by grants from the European Research Council (no. 640384) and from the Israel Science Foundation (no. 1947/19). M.K. and P.A.P. were supported by NSF/MCB-BSF grant 1715911. V.P.A. was supported by the US Defense Advanced Research Projects Agency{\textquoteright}s Living Foundries program award HR0011-15-C-0084. A.K. and I.T. were supported by St. Petersburg State University (grant ID PURE 73023672). K.P. was supported by appropriated USDA CRIS Project 5090-21000-071-000-D. We thank P. J. Weimer for helpful comments and suggestions on the manuscript. The USDA does not endorse any products or services. Mentioning of trade names is for information purposes only. The USDA is an equal opportunity employer. Publisher Copyright: {\textcopyright} 2022, This is a U.S. government work and not under copyright protection in the U.S.; foreign copyright protection may apply.",
year = "2022",
month = may,
day = "1",
doi = "10.1038/s41587-021-01130-z",
language = "English",
volume = "40",
pages = "711--719",
journal = "Nature Biotechnology",
issn = "1087-0156",
publisher = "Nature Publishing Group",
number = "5",
}