@inproceedings{13925c7feb34444b8d3045e5eb5a7f5e,
title = "Choosing the RightWords: Characterizing and Reducing Error of theWord Count Approach",
abstract = "Social scientists are increasingly using the vast amount of text available on social media to measure variation in happiness and other psychological states. Such studies count words deemed to be indicators of happiness and track how the word frequencies change across locations or time. This word count approach is simple and scalable, yet often picks up false signals, as words can appear in different contexts and take on different meanings. We characterize the types of errors that occur using the word count approach, and find lexical ambiguity to be the most prevalent. We then show that one can reduce error with a simple refinement to such lexica by automatically eliminating highly ambiguous words. The resulting refined lexica improve precision as measured by human judgments of word occurrences in Facebook posts.",
author = "Schwartz, \{H. Andrew\} and Johannes Eichstaedt and Lukasz Dziurzynski and Eduardo Blanco and Kern, \{Margaret L.\} and Stephanie Ramones and Martin Seligman and Lyle Ungar",
note = "Publisher Copyright: {\textcopyright}2013 Association for Computational Linguistics.; 2nd Joint Conference on Lexical and Computational Semantics, *SEM 2013 ; Conference date: 13-06-2013 Through 14-06-2013",
year = "2013",
language = "English",
series = "SEM 2013 - 2nd Joint Conference on Lexical and Computational Semantics, Proceedings of the Main Conference and the Shared Task: Semantic Textual SimilaritySEM 2013 - 2nd Joint Conference on Lexical and Computational Semantics, Proceedings of the Main Conference and the Shared Task: Semantic Textual Similarity",
publisher = "Association for Computational Linguistics (ACL)",
pages = "296--305",
editor = "Mona Diab and Tim Baldwin and Marco Baroni",
booktitle = "SEM 2013 - 2nd Joint Conference on Lexical and Computational Semantics, Proceedings of the Main Conference and the Shared Task",
}