@inproceedings{m-abdullah-etal-2025-saarland,
title = {Saarland-Groningen at NADI 2025 Shared Task: Effective Dialectal Arabic Speech Processing under Data Constraints},
author = {Badr M. Abdullah and Yusser Al Ghussin and Zena Al Khalili and {\"O}mer Tarik {\"O}zyilmaz and Matias Valdenegro-Toro and Simon Ostermann and Dietrich Klakow},
editor = {Kareem Darwish and Ahmed Ali and Ibrahim Abu Farha and Samia Touileb and Imed Zitouni and Ahmed Abdelali and Sharefah Al-Ghamdi and Sakhar Alkhereyf and Wajdi Zaghouani and Salam Khalifa and Badr AlKhamissi and Rawan Almatham and Injy Hamed and Zaid Alyafeai and Areeb Alowisheq and Go Inoue and Khalil Mrini and Waad Alshammari},
url = {https://aclanthology.org/2025.arabicnlp-sharedtasks.102/},
doi = {https://doi.org/10.18653/v1/2025.arabicnlp-sharedtasks.102},
year = {2025},
date = {2025},
booktitle = {Proceedings of The Third Arabic Natural Language Processing Conference: Shared Tasks},
isbn = {979-8-89176-356-2},
pages = {745-751},
publisher = {Association for Computational Linguistics},
address = {Suzhou, China},
abstract = {We present our systems for the NADI 2025 shared task on multidialectal Arabic speech processing, participating in both spoken dialect identification (ADI) and automatic speech recognition (ASR) subtasks. Working under data constraints by using only the provided shared task resources for dialect adaptation, we explore effective model adaptation strategies for dialectal Arabic speech. For ADI, we fine-tune w2v-BERT 2.0 and employ voice conversion as data augmentation, improving accuracy from 68.71% to 76.40% on a blind crossdomain test set. For ASR, we develop two complementary approaches: (1) a CTC-based model pre-trained on public Arabic speech data, and (2) Whisper-based models using twostage fine-tuning. Our experiments show that while dialect-centric CTC models exhibit better zero-shot dialectal performance (58.89 vs 93.90 WER), Whisper achieves better performance after dialect-specific adaptation, which reduces WER from 93.89 to 39.78 WER. We also demonstrate that using character error rate (CER) as a validation criterion provides practical benefits with minimal performance tradeoffs. Despite using no external resources for dialect adaptation beyond the shared task data, our systems ranked second in ADI and third in ASR, demonstrating that careful adaptation strategies can overcome data constraints in dialectal speech processing.},
pubstate = {published},
type = {inproceedings}
}