@inproceedings{8fa947dc80124436b7a5a4305764e6d0,
title = "MIT-QCRI Arabic dialect identification system for the 2017 multi-genre broadcast challenge",
abstract = "In order to successfully annotate the Arabic speech content found in open-domain media broadcasts, it is essential to be able to process a diverse set of Arabic dialects. For the 2017 Multi-Genre Broadcast challenge (MGB-3) there were two possible tasks: Arabic speech recognition, and Arabic Dialect Identification (ADI). In this paper, we describe our efforts to create an ADI system for the MGB-3 challenge, with the goal of distinguishing amongst four major Arabic dialects, as well as Modern Standard Arabic. Our research focused on dialect variability and domain mismatches between the training and test domain. In order to achieve a robust ADI system, we explored both Siamese neural network models to learn similarity and dissimilarities among Arabic dialects, as well as i-vector post-processing to adapt domain mismatches. Both Acoustic and linguistic features were used for the final MGB-3 submissions, with the best primary system achieving 75% accuracy on the official 10hr test set.",
keywords = "Arabic, Dialect Recognition, Domain Adaptation, MGB challenge, Siamese Network",
author = "Suwon Shon and Ahmed Ali and James Glass",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017 ; Conference date: 16-12-2017 Through 20-12-2017",
year = "2017",
month = jul,
day = "2",
doi = "10.1109/ASRU.2017.8268960",
language = "English",
series = "2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "374--380",
booktitle = "2017 IEEE Automatic Speech Recognition and Understanding Workshop, ASRU 2017 - Proceedings",
address = "United States",
}