@article{c2bc7f116f4d4ff1aeaa94c5ecb06dc0,
title = "Novel metrics to measure coverage in whole exome sequencing datasets reveal local and global non-uniformity",
abstract = "Whole Exome Sequencing (WES) is a powerful clinical diagnostic tool for discovering the genetic basis of many diseases. A major shortcoming of WES is uneven coverage of sequence reads over the exome targets contributing to many low coverage regions, which hinders accurate variant calling. In this study, we devised two novel metrics, Cohort Coverage Sparseness (CCS) and Unevenness (UE) Scores for a detailed assessment of the distribution of coverage of sequence reads. Employing these metrics we revealed non-uniformity of coverage and low coverage regions in the WES data generated by three different platforms. This non-uniformity of coverage is both local (coverage of a given exon across different platforms) and global (coverage of all exons across the genome in the given platform). The low coverage regions encompassing functionally important genes were often associated with high GC content, repeat elements and segmental duplications. While a majority of the problems associated with WES are due to the limitations of the capture methods, further refinements in WES technologies have the potential to enhance its clinical applications.",
author = "Qingyu Wang and Shashikant, {Cooduvalli S.} and Matthew Jensen and Altman, {Naomi S.} and Santhosh Girirajan",
note = "Funding Information: We thank Debmalya Nandy and Emily Huber for critical reading of the manuscript. This work was supported by a Basil O'Connor Award from the March of Dimes Foundation (#5-FY14-66), R01-MH107431, a NARSAD Young Investigator Grant from the Brain and Behavior Research Foundation, and resources from the Huck Institutes of the Life Sciences (SG), T32-GM102057 and The Pennsylvania State University Experiment Station grant AES 4586 (CCS). We acknowledge the NIH data repository, the contributing investigators, and the associated primary funding organizations for providing access to the datasets used in this study. We thank NIH Genomic Study Datasets, Christopher A. Walsh, MD, PhD. Children's Hospital Boston, Boston, MA, USA (PI of the dataset), and their funding resource: U54 HG003067. National Human Genome Research Institute, National Institutes of Health, Bethesda, MD, USA; the Department of Health Sciences, University of Milano-Bicocca, Monza, Italy (for SRP028277); Liver Cancer Institute, Zhongshan Hospital, Fudan University, Grant: ID 81272725, {"}Identification of driving mutations and singling pathway alterations in intrahepatic cholangiocarcinoma{"}, National Natural Science Foundation of China (for SRP025150); McGill University, Cancer Research Society, Hungarian Scientific Research Fund (OTKA) contract T04639, Canadian Institutes of Health Research grant 102684, and National Research and Development Fund (NKFP) (for SRP032767). Funding support for the dataset in {"}Sporadic autism exomes reveal a highly interconnected protein network of de novo mutations{"} (phs000482.v1.p1) was provided by the Simons Foundation Autism Research Initiative (SFARI 137578 & 191889), NIH HD065285 and the Howard Hughes Medical Institute. Data was originally reported by O'Roak et al. 2012, PMCID: PMC3350576. We are grateful to all of the families at the participating SFARI Simplex Collection (SSC) sites, as well as the principal investigators (A. Beaudet, R. Bernier, J. Constantino, E. Cook, E. Fombonne, D. Geschwind, E. Hanson, D. Grice, A. Klin, R. Kochel, D. Ledbetter, C. Lord, C. Martin, D. Martin, R. Maxim, J. Miles, O. Ousley, K. Pelphrey, B. Peterson, J. Piggot, C. Saulnier, M. State, W. Stone, J. Sutcliffe, C. Walsh, Z. Warren, E. Wijsman). Publisher Copyright: {\textcopyright} The Author(s) 2017.",
year = "2017",
month = dec,
day = "1",
doi = "10.1038/s41598-017-01005-x",
language = "English (US)",
volume = "7",
journal = "Scientific Reports",
issn = "2045-2322",
publisher = "Nature Publishing Group",
number = "1",
}