@inproceedings{59e4b5d8264745fe9c7694a23aa1ec0c,
title = "Guidelines and framework for a large scale Arabic diacritized corpus",
abstract = "This paper presents the annotation guidelines developed as part of an effort to create a large scale manually diacritized corpus for various Arabic text genres. The target size of the annotated corpus is 2 million words. We summarize the guidelines and describe issues encountered during the training of the annotators. We also discuss the challenges posed by the complexity of the Arabic language and how they are addressed. Finally, we present the diacritization annotation procedure and detail the quality of the resulting annotations.",
keywords = "Annotation, Arabic diacritization, Guidelines",
author = "Wajdi Zaghouani and Houda Bouamor and Abdelati Hawwari and Mona Diab and Ossama Obeid and Mahmoud Ghoneim and Sawsan Alqahtani and Kemal Oflazer",
year = "2016",
language = "English",
series = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
publisher = "European Language Resources Association (ELRA)",
pages = "3637--3643",
editor = "Nicoletta Calzolari and Khalid Choukri and Helene Mazo and Asuncion Moreno and Thierry Declerck and Sara Goggi and Marko Grobelnik and Jan Odijk and Stelios Piperidis and Bente Maegaard and Joseph Mariani",
booktitle = "Proceedings of the 10th International Conference on Language Resources and Evaluation, LREC 2016",
note = "10th International Conference on Language Resources and Evaluation, LREC 2016 ; Conference date: 23-05-2016 Through 28-05-2016",
}