@inproceedings{61f06e7911bf4eb5ab0353fee5f08378,
title = "UL2C: Mapping User Locations to Countries on Arabic Twitter",
abstract = "Mapping user locations to countries can be useful for many applications such as dialect identification, author profiling, recommendation systems, etc. Twitter allows users to declare their locations as free text, and these userdeclared locations are often noisy and hard to decipher automatically. In this paper, we present the largest manually labeled dataset for mapping user locations on Arabic Twitter to their corresponding countries. We build effective machine learning models that can automate this mapping with significantly better efficiency compared to libraries such as geopy. We also show that our dataset is more effective than data extracted from GeoNames geographical database in this task as the latter covers only locations written in formal ways.",
author = "Hamdy Mubarak and Sabit Hassan",
note = "Publisher Copyright: {\textcopyright} WANLP 2021 - 6th Arabic Natural Language Processing Workshop; 6th Arabic Natural Language Processing Workshop, WANLP 2021 ; Conference date: 19-04-2021",
year = "2021",
language = "English",
series = "WANLP 2021 - 6th Arabic Natural Language Processing Workshop, Proceedings of the Workshop",
publisher = "Association for Computational Linguistics (ACL)",
pages = "145--153",
editor = "Nizar Habash and Houda Bouamor and Hazem Hajj and Walid Magdy and Wajdi Zaghouani and Fethi Bougares and Nadi Tomeh and Farha, {Ibrahim Abu} and Samia Touileb",
booktitle = "WANLP 2021 - 6th Arabic Natural Language Processing Workshop, Proceedings of the Workshop",
address = "United States",
}