The effects of lexical frequency on anticipatory voice assimilation in Bulgarian obstruents Inproceedings
Grawunder, Sven (Ed.): Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025, TUDpress, pp. 163-169, Dresden, 2025, ISBN 978-3-95908-803-9, ISSN 0940-6832.This study investigates the relation between the surprisal (or unpredictability) of linguistic items and anticipatory voicing assimilation in Bulgarian obstruents. Using a corpus of speech read by 140 Bulgarian speakers and wordlevel language models, we calculated unigram surprisal for word forms ending in obstruents followed by a word-initial obstruent of the opposite underlying [±voice] specification. Percentage of voicing was computed for 9,712 word-final obstruents. Linear mixed models were used to determine the effect of surprisal on the percentage of voicing in assimilating obstruents. The results confirm that Bulgarian obstruents do indeed in general assimilate to the voicing of a following obstruent: voiceless obstruents become voiced before voiced ones, while voiced obstruents are devoiced before voiceless ones. Crucially, however, surprisal had a significant effect on the percentage of voicing found in assimilating obstruents: in words with higher surprisal values, we found significantly lower degrees of voicing in voiceless obstruents before voiced ones, as well as significantly less devoicing of voiced obstruents before voiceless ones. This shows that assimilation is stronger in lowsurprisal words, while in high-surprisal words speakers attempt to maintain the underlying [±voice] specification of an obstruent to a higher degree. Our findings add to a growing body of research that demonstrates that processes once thought of as entirely categorical in fact exhibit gradient variation in fine phonetic detail, which is attributable to speakers’ awareness of statistical patterns in language use and their response to the predictability of linguistic items in maintaining a balance between phonetic encoding and information density.
@inproceedings{sabev_etal_essv2025,
title = {The effects of lexical frequency on anticipatory voice assimilation in Bulgarian obstruents},
author = {Mitko Sabev and Bistra Andreeva and Bernd M{\"o}bius and Ivan Yuen and Omnia Ibrahim},
editor = {Sven Grawunder},
url = {https://www.essv.de/paper.php?id=1249},
year = {2025},
date = {2025},
booktitle = {Studientexte zur Sprachkommunikation: Elektronische Sprachsignalverarbeitung 2025},
isbn = {978-3-95908-803-9},
issn = {0940-6832},
pages = {163-169},
publisher = {TUDpress},
address = {Dresden},
abstract = {This study investigates the relation between the surprisal (or unpredictability) of linguistic items and anticipatory voicing assimilation in Bulgarian obstruents. Using a corpus of speech read by 140 Bulgarian speakers and wordlevel language models, we calculated unigram surprisal for word forms ending in obstruents followed by a word-initial obstruent of the opposite underlying [±voice] specification. Percentage of voicing was computed for 9,712 word-final obstruents. Linear mixed models were used to determine the effect of surprisal on the percentage of voicing in assimilating obstruents. The results confirm that Bulgarian obstruents do indeed in general assimilate to the voicing of a following obstruent: voiceless obstruents become voiced before voiced ones, while voiced obstruents are devoiced before voiceless ones. Crucially, however, surprisal had a significant effect on the percentage of voicing found in assimilating obstruents: in words with higher surprisal values, we found significantly lower degrees of voicing in voiceless obstruents before voiced ones, as well as significantly less devoicing of voiced obstruents before voiceless ones. This shows that assimilation is stronger in lowsurprisal words, while in high-surprisal words speakers attempt to maintain the underlying [±voice] specification of an obstruent to a higher degree. Our findings add to a growing body of research that demonstrates that processes once thought of as entirely categorical in fact exhibit gradient variation in fine phonetic detail, which is attributable to speakers’ awareness of statistical patterns in language use and their response to the predictability of linguistic items in maintaining a balance between phonetic encoding and information density.},
pubstate = {published},
type = {inproceedings}
}
Project: C1