@inproceedings{6cad75a939a646a7bea2cc1065dd2f64,
title = "QCRI advanced transcription system (QATS) for the Arabic Multi-Dialect Broadcast media recognition: MGB-2 challenge",
abstract = "In this paper, we describe Qatar Computing Research Institute's (QCRI) speech transcription system for the 2016 Dialectal Arabic Multi-Genre Broadcast (MGB-2) challenge. MGB-2 is a controlled evaluation using 1,200 hours audio with lightly supervised transcription Our system which was a combination of three purely sequence trained recognition systems, achieved the lowest WER of 14.2% among the nine participating teams. Key features of our transcription system are: purely sequence trained acoustic models using the recently introduced Lattice free Maximum Mutual Information (LF-MMI) modeling framework; Language model rescoring using a four-gram and Recurrent Neural Network with Max-Ent connections (RNNME) language models; and system combination using Minimum Bayes Risk (MBR) decoding criterion. The whole system is built using kaldi speech recognition toolkit.",
keywords = "Arabic Speech Recognition, Bi-directional LSTM, Kaldi, Purely sequence trained acoustic models, QATS, RNN LM",
author = "Sameer Khurana and Ahmed Ali",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 2016 IEEE Workshop on Spoken Language Technology, SLT 2016 ; Conference date: 13-12-2016 Through 16-12-2016",
year = "2017",
month = feb,
day = "7",
doi = "10.1109/SLT.2016.7846279",
language = "English",
series = "2016 IEEE Workshop on Spoken Language Technology, SLT 2016 - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "292--298",
booktitle = "2016 IEEE Workshop on Spoken Language Technology, SLT 2016 - Proceedings",
address = "United States",
}