@inproceedings{suresh-etal-2026-modeling,
title = {Modeling Turn-Taking with Semantically Informed Gestures},
author = {Varsha Suresh and Muhammad Hamza Mughal and Christian Theobalt and Vera Demberg},
editor = {Vera Demberg and Kentaro Inui and Llu{\'i}s Marquez},
url = {https://aclanthology.org/2026.findings-eacl.106/},
doi = {https://doi.org/10.18653/v1/2026.findings-eacl.106},
year = {2026},
date = {2026},
booktitle = {Findings of the Association for Computational Linguistics: EACL 2026},
isbn = {979-8-89176-386-9},
pages = {2034-2041},
publisher = {Association for Computational Linguistics},
address = {Rabat, Morocco},
abstract = {In conversation, humans use multimodal cues, such as speech, gestures, and gaze, to manage turn-taking. While linguistic and acoustic features are informative, gestures provide complementary cues for modeling these transitions. To study this, we introduce DnD Gesture++, an extension of the multi-party DnD Gesture corpus enriched with 2,663 semantic gesture annotations spanning iconic, metaphoric, deictic, and discourse types. Using this dataset, we model turn-taking prediction through a Mixture-of-Experts framework integrating text, audio, and gestures. Experiments show that incorporating semantically guided gestures yields consistent performance gains over baselines, demonstrating their complementary role in multimodal turn-taking.},
pubstate = {published},
type = {inproceedings}
}