@inproceedings{a94074ebc6ba4119ac367cff2c1972cd,
title = "Solving the “who's mark Johnson” puzzle: Information extraction based cross document coreference",
abstract = "Cross Document Coreference (CDC) is the problem of resolving the underlying identity of entities across multiple documents and is a major step for document understanding. We develop a framework to efficiently determine the identity of a person based on extracted information, which includes unary properties such as gender and title, as well as binary relationships with other named entities such as co-occurrence and geo-locations. At the heart of our approach is a suite of similarity functions (specialists) for matching relationships and a relational density-based clustering algorithm that delineates name clusters based on pairwise similarity. We demonstrate the effectiveness of our methods on the WePS benchmark datasets and point out future research directions.",
author = "Jian Huang and Taylor, {Sarah M.} and Smith, {Jonathan L.} and Fotiadis, {Konstantinos A.} and Giles, {C. Lee}",
note = "Publisher Copyright: {\textcopyright} 2009 Association for Computational Linguistics; 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2009 ; Conference date: 01-06-2009",
year = "2009",
language = "English (US)",
series = "NAACL-HLT 2009 - Human Language Technologies: 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics, Proceedings of the Student Research Workshop and Doctoral Consortium",
publisher = "Association for Computational Linguistics (ACL)",
pages = "7--12",
editor = "Ulrich Germann and Chirag Shah and Svetlana Stoyanchev and Rose, {Carolyn Penstein} and Anoop Sarkar",
booktitle = "NAACL-HLT 2009 - Human Language Technologies",
}