@inproceedings{b7698ab27e724671a29085a30e32a3f2,
title = "Best practices for crowdsourcing dialectal arabic speech transcription",
abstract = "In this paper, we investigate different approaches in crowdsourcing transcriptions of Dialectal Arabic speech with automatic quality control to ensure good transcription at the source. Since Dialectal Arabic has no standard orthographic representation, it is very challenging to perform quality control. We propose a complete recipe for speech transcription quality control that includes using output of an Automatic Speech Recognition system. We evaluated the quality of the transcribed speech and through this recipe, we achieved a reduction in transcription error of 1.0% compared with 13.2% baseline with no quality control for Egyptian data, and down to 4% compared with 7.8% for the North African dialect.",
author = "Samantha Wray and Hamdy Mubarak and Ahmed Ali",
note = "Publisher Copyright: {\textcopyright} ACL 2015. All rights reserved.; 2nd Workshop on Arabic Natural Language Processing, ANLP 2015 ; Conference date: 30-07-2015",
year = "2015",
doi = "10.18653/v1/w15-3211",
language = "English",
series = "2nd Workshop on Arabic Natural Language Processing, ANLP 2015 - held at 53rd Annual Meeting of the Association for Computational Linguistics, ACL 2015 - Proceedings",
publisher = "Association for Computational Linguistics (ACL)",
pages = "99--107",
editor = "Nizar Habash and Stephan Vogel and Kareem Darwish",
booktitle = "2nd Workshop on Arabic Natural Language Processing, ANLP 2015 - held at 53rd Annual Meeting of the Association for Computational Linguistics, ACL 2015 - Proceedings",
address = "United States",
}