@inproceedings{alves-etal-2025-surprisal, title = {Surprisal Dynamics for the Detection of Multi-Word Expressions in English}, author = {Diego Alves and Sergei Bagdasarov and Elke Teich}, editor = {Kentaro Inui and Sakriani Sakt and Haofen Wang and Derek F. Wong and Pushpak Bhattacharyya and Biplab Banerjee and Asif Ekbal and Tanmoy Chakraborty and Dhirendra Pratap Singh}, url = {https://aclanthology.org/2025.findings-ijcnlp.72/}, doi = {https://doi.org/10.18653/v1/2025.findings-ijcnlp.72}, year = {2025}, date = {2025}, booktitle = {Proceedings of the 14th International Joint Conference on Natural Language Processing and the 4th Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics}, isbn = {979-8-89176-303-6}, pages = {1185-1194}, publisher = {The Asian Federation of Natural Language Processing and The Association for Computational Linguistics}, address = {Mumbai, India}, abstract = {This work examines the potential of surprisal slope as a feature for identifying multi-word expressions (MWEs) in English, leveraging token-level surprisal estimates from the GPT-2 language model. Evaluations on the DiMSUM and SemEval-2022 datasets reveal that surprisal slope provides moderate yet meaningful discriminative power with a trade-off between specificity and coverage: while high recall indicates that surprisal slope captures many true MWEs, the slightly lower precision reflects false positives, particularly for non-MWEs that follow formulaic patterns (e.g., adjective-noun or verb-pronoun structures). The method performs particularly well for conventionalized expressions, such as idiomatic bigrams in the SemEval-2022 corpus. Both idiomatic and literal usages of these bigrams exhibit negative slopes, with idiomatic instances generally showing a more pronounced decrease.Overall, surprisal slope offers a cognitively motivated and interpretable signal that complements existing MWE identification methods, particularly for conventionalized expressions.}, pubstate = {published}, type = {inproceedings} }