@inproceedings{e38574062d424cf09b2787a65e572f3b,
title = "A single-model approach for Arabic segmentation, POS tagging, and named entity recognition",
abstract = "This paper presents an entirely new, one-million-word annotated corpus for a comprehensive, machine-learning-based preprocessing of text in Modern Standard Arabic. Contrary to the conventional pipeline architecture, we solve the NLP tasks of word segmentation, POS tagging and named entity recognition as a single sequence labeling task. This single-component configuration results in a faster operation and is able to provide state-of-the-art precision and recall according to our evaluations. The fine-grained output tag set output by our annotator greatly simplifies downstream tasks such as lemmatization. Provided as a trained OpenNLP component, the annotator is free for research purposes.",
keywords = "Lemmatization, Machine learning, NLP, Named entity recognition, POS tagging, Segmentation",
author = "Freihat, {Abed Alhakim} and Gabor Bella and Hamdy Mubarak and Fausto Giunchiglia",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2nd International Conference on Natural Language and Speech Processing, ICNLSP 2018 ; Conference date: 25-04-2018 Through 26-04-2018",
year = "2018",
month = jun,
day = "6",
doi = "10.1109/ICNLSP.2018.8374393",
language = "English",
series = "2nd International Conference on Natural Language and Speech Processing, ICNLSP 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1--8",
booktitle = "2nd International Conference on Natural Language and Speech Processing, ICNLSP 2018",
address = "United States",
}