TenseLoC: Tense Localization and Control in a Multilingual LLM Inproceedings
Ifeoluwa Adelani, David; Arnett, Catherine; Ataman, Duygu; A. Chang, Tyler; Gonen, Hila; Raja, Rahul; Schmidt, Fabian; Stap, David; Wang, Jiayi (Ed.): Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025), Association for Computational Linguistics, pp. 243-264, Suzhuo, China, 2025, ISBN 979-8-89176-345-6.Multilingual language models excel across languages, yet how they internally encode grammatical tense remains largely unclear. We investigate how decoder-only transformers represent, transfer, and control tense across eight typologically diverse languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai. We construct a synthetic tense-annotated dataset and combine probing, causal analysis, feature disentanglement, and model steering to LLaMA-3.1 8B. We show that tense emerges as a distinct signal from early layers and transfers most strongly within the same language family. Causal tracing reveals that attention outputs around layer 16 consistently carry cross-lingually transferable tense information. Leveraging sparse autoencoders in this subspace, we isolate and steer English tense-related features, improving target-tense prediction accuracy by up to 11%% in a downstream cloze task.
@inproceedings{tumurchuluun-etal-2025-tenseloc,
title = {TenseLoC: Tense Localization and Control in a Multilingual LLM},
author = {Ariun-Erdene Tumurchuluun and Yusser Al Ghussin and David Mare{\v{c}ek and Josef van Genabith and Koel Dutta Chowdhury},
editor = {David Ifeoluwa Adelani and Catherine Arnett and Duygu Ataman and Tyler A. Chang and Hila Gonen and Rahul Raja and Fabian Schmidt and David Stap and Jiayi Wang},
url = {https://aclanthology.org/2025.mrl-main.17/},
doi = {https://doi.org/10.18653/v1/2025.mrl-main.17},
year = {2025},
date = {2025},
booktitle = {Proceedings of the 5th Workshop on Multilingual Representation Learning (MRL 2025)},
isbn = {979-8-89176-345-6},
pages = {243-264},
publisher = {Association for Computational Linguistics},
address = {Suzhuo, China},
abstract = {Multilingual language models excel across languages, yet how they internally encode grammatical tense remains largely unclear. We investigate how decoder-only transformers represent, transfer, and control tense across eight typologically diverse languages: English, German, French, Italian, Portuguese, Hindi, Spanish, and Thai. We construct a synthetic tense-annotated dataset and combine probing, causal analysis, feature disentanglement, and model steering to LLaMA-3.1 8B. We show that tense emerges as a distinct signal from early layers and transfers most strongly within the same language family. Causal tracing reveals that attention outputs around layer 16 consistently carry cross-lingually transferable tense information. Leveraging sparse autoencoders in this subspace, we isolate and steer English tense-related features, improving target-tense prediction accuracy by up to 11%% in a downstream cloze task.},
pubstate = {published},
type = {inproceedings}
}
Project: B6