Publications

Aurnhammer, Christoph; Delogu, Francesca; Brouwer, Harm; Crocker, Matthew W.

The P600 as a Continuous Index of Integration Effort Journal Article Forthcoming

Psychophysiology, 2023.

@article{Aurnhammer.etal23,
title = {The P600 as a Continuous Index of Integration Effort},
author = {Christoph Aurnhammer and Francesca Delogu and Harm Brouwer and Matthew W. Crocker},
year = {2023},
date = {2023},
journal = {Psychophysiology},
pubstate = {forthcoming},
type = {article}
}

Copy BibTeX to Clipboard

Project:   A1

Demberg, Vera; Kravtchenko, Ekaterina; Loy, Jia

A systematic evaluation of factors affecting referring expression choice in passage completion tasks Journal Article

Journal of Memory and Language, 130, 104413, 2023.

Abstract:

There is a long-standing controversy around the question of whether referent predictability affects pronominalization: while there are good theoretical reasons for this prediction (e.g., Arnold, 2008), the experimental evidence has been rather mixed. We here report on three highly powered studies that manipulate a range of factors that have differed between previous studies, in order to determine more exactly under which conditions a predictability effect on pronominalization can be found. We use a constrained as well as a free reference task, and manipulate verb type, antecedent ambiguity, length of NP and whether the stimuli are presented within a story context or not. Our results find the story context to be the single important factor that allows to elicit an effect of predictability on pronoun choice, in line with (Rosa and Arnold, 2017; Weatherford and Arnold, 2021). We also propose a parametrization for a rational speech act model, that reconciles the findings between many of the experiments in the literature.

@article{Demberg.etal23,
title = {A systematic evaluation of factors affecting referring expression choice in passage completion tasks},
author = {Vera Demberg and Ekaterina Kravtchenko and Jia Loy},
url = {https://www.sciencedirect.com/science/article/pii/S0749596X23000116},
year = {2023},
date = {2023},
journal = {Journal of Memory and Language, 130, 104413},
abstract = {Abstract:

There is a long-standing controversy around the question of whether referent predictability affects pronominalization: while there are good theoretical reasons for this prediction (e.g., Arnold, 2008), the experimental evidence has been rather mixed. We here report on three highly powered studies that manipulate a range of factors that have differed between previous studies, in order to determine more exactly under which conditions a predictability effect on pronominalization can be found. We use a constrained as well as a free reference task, and manipulate verb type, antecedent ambiguity, length of NP and whether the stimuli are presented within a story context or not. Our results find the story context to be the single important factor that allows to elicit an effect of predictability on pronoun choice, in line with (Rosa and Arnold, 2017; Weatherford and Arnold, 2021). We also propose a parametrization for a rational speech act model, that reconciles the findings between many of the experiments in the literature.

},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   A3

Ortmann, Katrin

Computational Methods for Investigating Syntactic Change: Automatic Identification of Extraposition in Modern and Historical German PhD Thesis

Bochumer Linguistische Arbeitsberichte (BLA) 25, 2023.

Abstract
The linguistic analysis of historical German and diachronic syntactic change is traditionally based on small, manually annotated data sets. As a consequence, such studies lack the generalizability and statistical significance that quantitative approaches can offer. In this thesis, computational methods for the automatic syntactic analysis of modern and historical German are developed, which help to overcome the natural limits of manual annotation and enable the creation of large annotated data sets. The main goal of the thesis is to identify extraposition in modern and historical German, with extraposition being defined as the movement of constituents from their base position to the post-field of the sentence (Höhle 2019; Wöllstein 2018). For the automatic recognition of extraposition, two annotation steps are combined: (i) a topological field analysis for the identification of post-fields and (ii) a constituency analysis to recognize candidates for extraposition. The thesis describes experiments on topological field parsing (Ortmann 2020), chunking (Ortmann 2021a), and constituency parsing (Ortmann 2021b). The best results are achieved with statistical models trained on Part-of-Speech tags as input. Contrary to previous studies, all annotation steps are thoroughly evaluated with the newly developed FairEval method for the fine-grained error analysis and fair evaluation of labeled spans (Ortmann 2022). In an example analysis, the created methods are applied to large collections of modern and historical text to explore different factors for the extraposition of relative clauses, demonstrating the practical value of computational approaches for linguistic studies. The developed methods are released as the CLASSIG pipeline (Computational Linguistic Analysis of Syntactic Structures In German) at https://github.com/rubcompling/classig- pipeline. Data sets, models, and evaluation results are provided for download at https://github.com/rubcompling/classig-data and https://doi.org/10.5281/zenodo.7180973.

@phdthesis{ortmann23,
title = {Computational Methods for Investigating Syntactic Change: Automatic Identification of Extraposition in Modern and Historical German},
author = {Katrin Ortmann},
url = {https://www.linguistics.rub.de/forschung/arbeitsberichte/25.pdf},
year = {2023},
date = {2023},
publisher = {Bochumer Linguistische Arbeitsberichte (BLA) 25},
abstract = {Abstract The linguistic analysis of historical German and diachronic syntactic change is traditionally based on small, manually annotated data sets. As a consequence, such studies lack the generalizability and statistical significance that quantitative approaches can offer. In this thesis, computational methods for the automatic syntactic analysis of modern and historical German are developed, which help to overcome the natural limits of manual annotation and enable the creation of large annotated data sets. The main goal of the thesis is to identify extraposition in modern and historical German, with extraposition being defined as the movement of constituents from their base position to the post-field of the sentence (H{\"o}hle 2019; W{\"o}llstein 2018). For the automatic recognition of extraposition, two annotation steps are combined: (i) a topological field analysis for the identification of post-fields and (ii) a constituency analysis to recognize candidates for extraposition. The thesis describes experiments on topological field parsing (Ortmann 2020), chunking (Ortmann 2021a), and constituency parsing (Ortmann 2021b). The best results are achieved with statistical models trained on Part-of-Speech tags as input. Contrary to previous studies, all annotation steps are thoroughly evaluated with the newly developed FairEval method for the fine-grained error analysis and fair evaluation of labeled spans (Ortmann 2022). In an example analysis, the created methods are applied to large collections of modern and historical text to explore different factors for the extraposition of relative clauses, demonstrating the practical value of computational approaches for linguistic studies. The developed methods are released as the CLASSIG pipeline (Computational Linguistic Analysis of Syntactic Structures In German) at https://github.com/rubcompling/classig- pipeline. Data sets, models, and evaluation results are provided for download at https://github.com/rubcompling/classig-data and https://doi.org/10.5281/zenodo.7180973.},
pubstate = {published},
type = {phdthesis}
}

Copy BibTeX to Clipboard

Project:   C6

Chingacham, Anupama; Demberg, Vera; Klakow, Dietrich

A Data-Driven Investigation of Noise-Adaptive Utterance Generation with Linguistic Modification Inproceedings

2022 IEEE Spoken Language Technology Workshop (SLT 2022, 9th - 12th January 2023, Doha, Qatar), 2023.

In noisy environments, speech can be hard to understand for humans. Spoken dialog systems can help to enhance the intelligibility of their output, either by modifying the speech synthesis (e.g., imitate Lombard speech) or by optimizing the language generation. We here focus on the second type of approach, by which an intended message is realized with words that are more intelligible in a specific noisy environment. By conducting a speech perception experiment, we created a dataset of 900 paraphrases in babble noise, perceived by native English speakers with normal hearing. We find that careful selection of paraphrases can improve intelligibility by 33% at SNR -5 dB. Our analysis of the data shows that the intelligibility differences between paraphrases are mainly driven by noise-robust acoustic cues. Furthermore, we propose an intelligibility-aware paraphrase ranking model, which outperforms baseline models with a relative improvement of 31.37% at SNR -5 dB.

@inproceedings{Chingachametal23,
title = {A Data-Driven Investigation of Noise-Adaptive Utterance Generation with Linguistic Modification},
author = {Anupama Chingacham and Vera Demberg and Dietrich Klakow},
url = {https://arxiv.org/abs/2210.10252},
doi = {https://doi.org/10.48550/arXiv.2210.10252},
year = {2023},
date = {2023},
booktitle = {2022 IEEE Spoken Language Technology Workshop (SLT 2022, 9th - 12th January 2023, Doha, Qatar)},
abstract = {In noisy environments, speech can be hard to understand for humans. Spoken dialog systems can help to enhance the intelligibility of their output, either by modifying the speech synthesis (e.g., imitate Lombard speech) or by optimizing the language generation. We here focus on the second type of approach, by which an intended message is realized with words that are more intelligible in a specific noisy environment. By conducting a speech perception experiment, we created a dataset of 900 paraphrases in babble noise, perceived by native English speakers with normal hearing. We find that careful selection of paraphrases can improve intelligibility by 33% at SNR -5 dB. Our analysis of the data shows that the intelligibility differences between paraphrases are mainly driven by noise-robust acoustic cues. Furthermore, we propose an intelligibility-aware paraphrase ranking model, which outperforms baseline models with a relative improvement of 31.37% at SNR -5 dB.},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   A4

Przybyl, Heike; Karakanta, Alina; Menzel, Katrin; Teich, Elke

Exploring linguistic variation in mediated discourse: translation vs. interpreting Book Chapter

Kajzer-Wietrzny, Marta; Bernardini, Silvia; Ferraresi, Adriano; Ivaska, Ilmari;  (Ed.): Mediated discourse at the European Parliament: Empirical investigations, Language Science Press, pp. 191–218, Berlin, 2022.

This paper focuses on the distinctive features of translated and interpreted texts in specific language combinations as forms of mediated discourse at the European Parliament. We aim to contribute to the long line of research on the specific properties of translation/interpreting. Specifically, we are interested in mediation effects (translation vs. interpreting) vs. effects of discourse mode (written vs. spoken). We propose a data-driven, exploratory approach to detecting and evaluating linguistic features as typical of translation/interpreting. Our approach utilizes simple wordbased 𝑛-gram language models combined with the information-theoretic measure of relative entropy, a standard measure of similarity/difference between probability distributions, applied here as a method of corpus comparison. Comparing translation
and interpreting (including the relation to their originals), we confirm the previously observed overall trend of written vs. spoken mode being strongly reflected in the translation and interpreting output. In addition, we detect some new features, such as a tendency towards more general lexemes in the verbal domain in interpreting or features of nominal style in translation.

@inbook{Przybyl2021exploring,
title = {Exploring linguistic variation in mediated discourse: translation vs. interpreting},
author = {Heike Przybyl and Alina Karakanta and Katrin Menzel and Elke Teich},
editor = {Marta Kajzer-Wietrzny and Silvia Bernardini and Adriano Ferraresi and Ilmari Ivaska},
url = {https://langsci-press.org/catalog/book/343},
doi = {https://doi.org/10.5281/zenodo.6977050},
year = {2022},
date = {2022},
booktitle = {Mediated discourse at the European Parliament: Empirical investigations},
pages = {191–218},
publisher = {Language Science Press},
address = {Berlin},
abstract = {This paper focuses on the distinctive features of translated and interpreted texts in specific language combinations as forms of mediated discourse at the European Parliament. We aim to contribute to the long line of research on the specific properties of translation/interpreting. Specifically, we are interested in mediation effects (translation vs. interpreting) vs. effects of discourse mode (written vs. spoken). We propose a data-driven, exploratory approach to detecting and evaluating linguistic features as typical of translation/interpreting. Our approach utilizes simple wordbased 𝑛-gram language models combined with the information-theoretic measure of relative entropy, a standard measure of similarity/difference between probability distributions, applied here as a method of corpus comparison. Comparing translation and interpreting (including the relation to their originals), we confirm the previously observed overall trend of written vs. spoken mode being strongly reflected in the translation and interpreting output. In addition, we detect some new features, such as a tendency towards more general lexemes in the verbal domain in interpreting or features of nominal style in translation.},
pubstate = {published},
type = {inbook}
}

Copy BibTeX to Clipboard

Project:   B7

Lapshinova-Koltunski, Ekaterina; Pollkläsener, Christina; Przybyl, Heike

Exploring Explicitation and Implicitation in Parallel Interpreting and Translation Corpora Journal Article

The Prague Bulletin of Mathematical Linguistics, 119, pp. 5-22, 2022, ISSN 0032-6585.

We present a study of discourse connectives in English-German and German-English translation and interpreting where we focus on the phenomena of explicitation and implicitation.
Apart from distributional analysis of translation patterns in parallel data, we also look into surprisal, i.e. an information-theoretic measure of cognitive effort, which helps us to interpret the observed tendencies.

@article{lapshinova-koltunski-pollklaesener-przybyl:2022,
title = {Exploring Explicitation and Implicitation in Parallel Interpreting and Translation Corpora},
author = {Ekaterina Lapshinova-Koltunski and Christina Pollkl{\"a}sener and Heike Przybyl},
url = {https://ufal.mff.cuni.cz/pbml/119/art-lapshinova-koltunski-pollklaesener-przybyl.pdf},
doi = {https://doi.org/10.14712/00326585.020},
year = {2022},
date = {2022},
journal = {The Prague Bulletin of Mathematical Linguistics},
pages = {5-22},
volume = {119},
abstract = {We present a study of discourse connectives in English-German and German-English translation and interpreting where we focus on the phenomena of explicitation and implicitation. Apart from distributional analysis of translation patterns in parallel data, we also look into surprisal, i.e. an information-theoretic measure of cognitive effort, which helps us to interpret the observed tendencies.},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   B7

Kudera, Jacek; Stenger, Irina; Möbius, Bernd; Avgustinova, Tania; Klakow, Dietrich

Phonetic cues in auditory identification of Bulgarian, Czech, Polish, and Russian language of origin Journal Article

Language and Speech, 2022.

This work presents the results of an auditory language of origin identification experiment. Disyllabic and trisyllabic logatomes were recorded by speakers of Bulgarian, Czech, Polish, and Russian, and presented to L1 speakers of the abovementioned Slavic languages. The goals of the test were to verify the ability of lay listeners to recognize the linguistic origin of speakers, based on spoken samples with limited segmental and suprasegmental information, and to correlate the signal features with the subjects’ performance. It was found that position of word stress is not an important predictor in language recognition. However, inherent vowel characteristics such as duration and vowel space computed by the means of Pillai scores correlate with subjects’ performance. Both the linguistic profile and the familiarity with closely related languages also appear to be relevant predictors of listeners’ performance. Finally, the information-theoretic notion of surprisal applied on regular cross-linguistic sound correspondences was correlated with recognition scores; though, the correlations did not reach the threshold of statistical significance. We conclude that auditory identification of linguistic origin by lay persons, native speakers of closely related languages, is possible even when exposed to limited segmental information, which can serve as a cue in the identification of linguistic origin.

@article{kudera_etal2022_cues,
title = {Phonetic cues in auditory identification of Bulgarian, Czech, Polish, and Russian language of origin},
author = {Jacek Kudera and Irina Stenger and Bernd M{\"o}bius and Tania Avgustinova and Dietrich Klakow},
url = {https://journals.sagepub.com/eprint/JJIKHP9RPEYZM2EQKFWZ/full},
doi = {https://doi.org/10.1177/00238309221119098},
year = {2022},
date = {2022-09-01},
journal = {Language and Speech},
abstract = {This work presents the results of an auditory language of origin identification experiment. Disyllabic and trisyllabic logatomes were recorded by speakers of Bulgarian, Czech, Polish, and Russian, and presented to L1 speakers of the abovementioned Slavic languages. The goals of the test were to verify the ability of lay listeners to recognize the linguistic origin of speakers, based on spoken samples with limited segmental and suprasegmental information, and to correlate the signal features with the subjects’ performance. It was found that position of word stress is not an important predictor in language recognition. However, inherent vowel characteristics such as duration and vowel space computed by the means of Pillai scores correlate with subjects’ performance. Both the linguistic profile and the familiarity with closely related languages also appear to be relevant predictors of listeners’ performance. Finally, the information-theoretic notion of surprisal applied on regular cross-linguistic sound correspondences was correlated with recognition scores; though, the correlations did not reach the threshold of statistical significance. We conclude that auditory identification of linguistic origin by lay persons, native speakers of closely related languages, is possible even when exposed to limited segmental information, which can serve as a cue in the identification of linguistic origin.},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   C4

Ibrahim, Omnia; Yuen, Ivan; van Os, Marjolein; Andreeva, Bistra; Möbius, Bernd

The combined effects of contextual predictability and noise on the acoustic realisation of German syllables Journal Article

The Journal of the Acoustical Society of America, 152, 2022.

Speakers tend to speak clearly in noisy environments, while they tend to reserve effort by shortening word duration in predictable contexts. It is unclear how these two communicative demands are met. The current study investigates the acoustic realizations of syllables in predictable vs unpredictable contexts across different background noise levels. Thirty-eight German native speakers produced 60 CV syllables in two predictability contexts in three noise conditions (reference = quiet, 0 dB and −10 dB signal-to-noise ratio). Duration, intensity (average and range), F0 (median), and vowel formants of the target syllables were analysed. The presence of noise yielded significantly longer duration, higher average intensity, larger intensity range, and higher F0. Noise levels affected intensity (average and range) and F0. Low predictability syllables exhibited longer duration and larger intensity range. However, no interaction was found between noise and predictability. This suggests that noise-related modifications might be independent of predictability-related changes, with implications for including channel-based and message-based formulations in speech production.

@article{ibrahim_etal_jasa2022,
title = {The combined effects of contextual predictability and noise on the acoustic realisation of German syllables},
author = {Omnia Ibrahim and Ivan Yuen and Marjolein van Os and Bistra Andreeva and Bernd M{\"o}bius},
url = {https://asa.scitation.org/doi/10.1121/10.0013413},
doi = {https://doi.org/10.1121/10.0013413},
year = {2022},
date = {2022-08-10},
journal = {The Journal of the Acoustical Society of America},
volume = {152},
number = {2},
abstract = {Speakers tend to speak clearly in noisy environments, while they tend to reserve effort by shortening word duration in predictable contexts. It is unclear how these two communicative demands are met. The current study investigates the acoustic realizations of syllables in predictable vs unpredictable contexts across different background noise levels. Thirty-eight German native speakers produced 60 CV syllables in two predictability contexts in three noise conditions (reference = quiet, 0 dB and −10 dB signal-to-noise ratio). Duration, intensity (average and range), F0 (median), and vowel formants of the target syllables were analysed. The presence of noise yielded significantly longer duration, higher average intensity, larger intensity range, and higher F0. Noise levels affected intensity (average and range) and F0. Low predictability syllables exhibited longer duration and larger intensity range. However, no interaction was found between noise and predictability. This suggests that noise-related modifications might be independent of predictability-related changes, with implications for including channel-based and message-based formulations in speech production.},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Projects:   C1 A4

Bhandari, Pratik; Demberg, Vera; Kray, Jutta

Predictability effects in degraded speech comprehension are reduced as a function of attention Journal Article

Language and Cognition, Cambridge University Press, pp. 1-18, 2022.

The aim of this study was to examine the role of attention in understanding linguistic information even in a noisy environment. To assess the role of attention, we varied task instructions in two experiments in which participants were instructed to listen to short sentences and thereafter to type in the last word they heard or to type in the whole sentence. We were interested in how these task instructions influence the interplay between top-down prediction and bottom-up perceptual processes during language comprehension. Therefore, we created sentences that varied in the degree of predictability (low, medium, and high) as well as in the degree of speech degradation (four, six, and eight noise-vocoding channels). Results indicated better word recognition for highly predictable sentences for moderate, though not for high, levels of speech degradation, but only when attention was directed to the whole sentence. This underlines the important role of attention in language comprehension.

@article{bhandari_demberg_kray_2022,
title = {Predictability effects in degraded speech comprehension are reduced as a function of attention},
author = {Pratik Bhandari and Vera Demberg and Jutta Kray},
url = {https://www.cambridge.org/core/journals/language-and-cognition/article/abs/predictability-effects-in-degraded-speech-comprehension-are-reduced-as-a-function-of-attention/98F4E3A4A3FC0B7E00C8E1536D986853},
doi = {https://doi.org/10.1017/langcog.2022.16},
year = {2022},
date = {2022-07-22},
journal = {Language and Cognition},
pages = {1-18},
publisher = {Cambridge University Press},
abstract = {The aim of this study was to examine the role of attention in understanding linguistic information even in a noisy environment. To assess the role of attention, we varied task instructions in two experiments in which participants were instructed to listen to short sentences and thereafter to type in the last word they heard or to type in the whole sentence. We were interested in how these task instructions influence the interplay between top-down prediction and bottom-up perceptual processes during language comprehension. Therefore, we created sentences that varied in the degree of predictability (low, medium, and high) as well as in the degree of speech degradation (four, six, and eight noise-vocoding channels). Results indicated better word recognition for highly predictable sentences for moderate, though not for high, levels of speech degradation, but only when attention was directed to the whole sentence. This underlines the important role of attention in language comprehension.},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   A4

Przybyl, Heike; Lapshinova-Koltunski, Ekaterina; Menzel, Katrin; Fischer, Stefan; Teich, Elke

EPIC UdS - Creation and applications of a simultaneous interpreting corpus Inproceedings

Proceedings of the  13th Conference on Language Resources and Evaluation (LREC 2022), pp. 1193–1200, Marseille, France, 20-25 June 2022, 2022.

In this paper, we describe the creation and annotation of EPIC UdS, a multilingual corpus of simultaneous interpreting for English, German and Spanish. We give an overview of the comparable and parallel, aligned corpus variants and explore various applications of the corpus. What makes EPIC UdS relevant is that it is one of the rare interpreting corpora that includes transcripts suitable for research on more than one language pair and on interpreting with regard to German. It not only contains transcribed speeches, but also rich metadata and fine-grained linguistic annotations tailored for diverse applications across a broad range of linguistic subfields

@inproceedings{Przybyl_interpreting_2022,
title = {EPIC UdS - Creation and applications of a simultaneous interpreting corpus},
author = {Heike Przybyl and Ekaterina Lapshinova-Koltunski and Katrin Menzel and Stefan Fischer and Elke Teich},
year = {2022},
date = {2022},
booktitle = {Proceedings of the  13th Conference on Language Resources and Evaluation (LREC 2022)},
pages = {1193–1200},
address = {Marseille, France, 20-25 June 2022},
abstract = {In this paper, we describe the creation and annotation of EPIC UdS, a multilingual corpus of simultaneous interpreting for English, German and Spanish. We give an overview of the comparable and parallel, aligned corpus variants and explore various applications of the corpus. What makes EPIC UdS relevant is that it is one of the rare interpreting corpora that includes transcripts suitable for research on more than one language pair and on interpreting with regard to German. It not only contains transcribed speeches, but also rich metadata and fine-grained linguistic annotations tailored for diverse applications across a broad range of linguistic subfields},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B7

Stenger, Irina; Georgis, Philip; Avgustinova, Tania; Möbius, Bernd; Klakow, Dietrich

Modeling the Impact of Syntactic Distance and Surprisal on Cross-Slavic Text Comprehension Inproceedings

Proceedings of the Language Resources and Evaluation Conference, European Language Resources Association, pp. 7368-7376, Marseille, France, 2022.

We focus on the syntactic variation and measure syntactic distances between nine Slavic languages (Belarusian, Bulgarian, Croatian, Czech, Polish, Slovak, Slovene, Russian, and Ukrainian) using symmetric measures of insertion, deletion and movement of syntactic units in the parallel sentences of the fable „The North Wind and the Sun“. Additionally, we investigate phonetic and orthographic asymmetries between selected languages by means of the information theoretical notion of surprisal. Syntactic distance and surprisal are, thus, considered as potential predictors of mutual intelligibility between related languages. In spoken and written cloze test experiments for Slavic native speakers, the presented predictors will be validated as to whether variations in syntax lead to a slower or impeded intercomprehension of Slavic texts.

@inproceedings{stenger-EtAl:2022:LREC,
title = {Modeling the Impact of Syntactic Distance and Surprisal on Cross-Slavic Text Comprehension},
author = {Irina Stenger and Philip Georgis and Tania Avgustinova and Bernd M{\"o}bius and Dietrich Klakow},
url = {https://aclanthology.org/2022.lrec-1.802},
year = {2022},
date = {2022-06-21},
booktitle = {Proceedings of the Language Resources and Evaluation Conference},
pages = {7368-7376},
publisher = {European Language Resources Association},
address = {Marseille, France},
abstract = {We focus on the syntactic variation and measure syntactic distances between nine Slavic languages (Belarusian, Bulgarian, Croatian, Czech, Polish, Slovak, Slovene, Russian, and Ukrainian) using symmetric measures of insertion, deletion and movement of syntactic units in the parallel sentences of the fable "The North Wind and the Sun". Additionally, we investigate phonetic and orthographic asymmetries between selected languages by means of the information theoretical notion of surprisal. Syntactic distance and surprisal are, thus, considered as potential predictors of mutual intelligibility between related languages. In spoken and written cloze test experiments for Slavic native speakers, the presented predictors will be validated as to whether variations in syntax lead to a slower or impeded intercomprehension of Slavic texts.},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   C4

Ortmann, Katrin

Fine-Grained Error Analysis and Fair Evaluation of Labeled Spans Inproceedings

Proceedings of the Language Resources and Evaluation Conference (LREC), European Language Resources Association, pp. 1400-1407, Marseille, France, 2022.

@inproceedings{ortmann2022,
title = {Fine-Grained Error Analysis and Fair Evaluation of Labeled Spans},
author = {Katrin Ortmann},
url = {https://aclanthology.org/2022.lrec-1.150},
year = {2022},
date = {2022-06-21},
booktitle = {Proceedings of the Language Resources and Evaluation Conference (LREC)},
pages = {1400-1407},
publisher = {European Language Resources Association},
address = {Marseille, France},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   C6

Menzel, Katrin; Krielke, Marie-Pauline; Degaetano-Ortlieb, Stefania

Synthetic and analytic adjective negation in English scientific journal articles: A diachronic perspective Journal Article

In Lege artis. Language yesterday, today, tomorrow. The journal of University of SS Cyril and Methodius in Trnava, 2022, VII(1), Trnava: University of SS Cyril and Methodius in Trnava, pp. 157-213, 2022, ISSN 2453-8035 .

@article{menzel_2022_diachronicperspective,
title = {Synthetic and analytic adjective negation in English scientific journal articles: A diachronic perspective},
author = {Katrin Menzel and Marie-Pauline Krielke and Stefania Degaetano-Ortlieb},
year = {2022},
date = {2022},
pages = {157-213},
publisher = {Trnava: University of SS Cyril and Methodius in Trnava},
volume = {2022, VII(1)},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   B1

Scholman, Merel; Blything, Liam; Cain, Kate; Evers-Vermeul, Jacqueline

Discourse Rules:The Effects of Clause Order Principles on the Reading Process Journal Article

Language, Cognition and Neuroscience, 37(10), pp. 1277-1291, 2022, ISSN 2327-3798 .

In an eye-tracking-while-reading study, we investigated adult monolinguals’ (N=80) processing of two-clause sentences embedded in short narratives. Three principles theorized to guide comprehension of complex sentences were contrasted: one operating at the clause level, namely clause structure (main clause – subordinate clause or vice versa), and two operating at the discourse-level, namely givenness (given-new vs. new-given) and event order (chronological vs. reverse order). The results indicate that clause structure mainly affects early stages of processing, whereas the two principles operating at the discourse level are more important during later stages and for reading times of the entire sentence. Event order was found to operate relatively independently of the other principles. Givenness was found to overrule clause structure, a phenomenon that can be related to the grounding function of preposed subordinate clauses. We propose a new principle to reflect this interaction effect: the grounding principle.

@article{Merel_Rules_2022,
title = {Discourse Rules:The Effects of Clause Order Principles on the Reading Process},
author = {Merel Scholman and Liam Blything and Kate Cain and Jacqueline Evers-Vermeul},
url = {https://www.tandfonline.com/doi/full/10.1080/23273798.2022.2077971},
doi = {https://doi.org/10.1080/23273798.2022.2077971},
year = {2022},
date = {2022},
journal = {Language, Cognition and Neuroscience},
pages = {1277-1291},
volume = {37(10)},
abstract = {In an eye-tracking-while-reading study, we investigated adult monolinguals’ (N=80) processing of two-clause sentences embedded in short narratives. Three principles theorized to guide comprehension of complex sentences were contrasted: one operating at the clause level, namely clause structure (main clause - subordinate clause or vice versa), and two operating at the discourse-level, namely givenness (given-new vs. new-given) and event order (chronological vs. reverse order). The results indicate that clause structure mainly affects early stages of processing, whereas the two principles operating at the discourse level are more important during later stages and for reading times of the entire sentence. Event order was found to operate relatively independently of the other principles. Givenness was found to overrule clause structure, a phenomenon that can be related to the grounding function of preposed subordinate clauses. We propose a new principle to reflect this interaction effect: the grounding principle.},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   B2

Kravtchenko, Ekaterina; Demberg, Vera

Informationally redundant utterances elicit pragmatic inferences Inproceedings

Cognition. 2022 May 14, 2022.

Most theories of pragmatics and language processing predict that speakers avoid excessive informational redundancy. Informationally redundant utterances are, however, quite common in natural dialogue. From a comprehension standpoint, it remains unclear how comprehenders interpret these utterances, and whether they make attempts to reconcile the ‚dips‘ in informational utility with expectations of ‚appropriate‘ or ‚rational‘ speaker informativity. We show that informationally redundant (overinformative) utterances can trigger pragmatic inferences that increase utterance utility in line with comprehender expectations. In a series of three studies, we look at utterances which refer to stereotyped event sequences describing common activities (scripts). When comprehenders encounter utterances describing events that can be easily inferred from prior context, they interpret them as signifying that the event conveys new, unstated information (i.e. an event otherwise assumed to be habitual, such as paying the cashier when shopping, is reinterpreted as non-habitual). We call these inferences atypicality inferences. Further, we show that the degree to which these atypicality inferences are triggered depends on the framing of the utterance. In the absence of an exclamation mark or a discourse marker indicating the speaker’s specific intent to communicate the given information, such inferences are far less likely to arise. Overall, the results demonstrate that excessive conceptual redundancy leads to comprehenders revising the conversational common ground, in an effort to accommodate unexpected dips in informational utility.

@inproceedings{Kravtchenko_redundant_2022,
title = {Informationally redundant utterances elicit pragmatic inferences},
author = {Ekaterina Kravtchenko and Vera Demberg},
url = {https://pubmed.ncbi.nlm.nih.gov/35580451/},
doi = {https://doi.org/ 10.1016/j.cognition.2022.105159},
year = {2022},
date = {2022},
booktitle = {Cognition. 2022 May 14},
abstract = {Most theories of pragmatics and language processing predict that speakers avoid excessive informational redundancy. Informationally redundant utterances are, however, quite common in natural dialogue. From a comprehension standpoint, it remains unclear how comprehenders interpret these utterances, and whether they make attempts to reconcile the 'dips' in informational utility with expectations of 'appropriate' or 'rational' speaker informativity. We show that informationally redundant (overinformative) utterances can trigger pragmatic inferences that increase utterance utility in line with comprehender expectations. In a series of three studies, we look at utterances which refer to stereotyped event sequences describing common activities (scripts). When comprehenders encounter utterances describing events that can be easily inferred from prior context, they interpret them as signifying that the event conveys new, unstated information (i.e. an event otherwise assumed to be habitual, such as paying the cashier when shopping, is reinterpreted as non-habitual). We call these inferences atypicality inferences. Further, we show that the degree to which these atypicality inferences are triggered depends on the framing of the utterance. In the absence of an exclamation mark or a discourse marker indicating the speaker's specific intent to communicate the given information, such inferences are far less likely to arise. Overall, the results demonstrate that excessive conceptual redundancy leads to comprehenders revising the conversational common ground, in an effort to accommodate unexpected dips in informational utility.},
keywords = {Accommodation; Context-dependent implicatures; Experimental pragmatics; Psycholinguistics; Redundancy},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   A3

Sommerfeld, Linda; Staudte, Maria; Kray, Jutta

Ratings of name agreement and semantic categorization of 247 colored clipart pictures by young German children Journal Article Forthcoming

Acta Psychologica, 226, pp. 103558, 2022, ISSN 0001-6918.

Developmental and longitudinal studies with children increasingly use pictorial stimuli in cognitive, psychologic, and psycholinguistic research. To enhance validity and comparability within and across those studies, the use of normed pictures is recommended. Besides, creating picture sets and evaluating them in rating studies is very time consuming, in particular regarding samples of young children in which testing time is rather limited. As there is an increasing number of studies that investigate young German children’s semantic language processing with colored clipart stimuli, this work provides a first set of 247 colored cliparts with ratings of German native speaking children aged 4 to 6 years. We assessed two central rating aspects of pictures: Name agreement (Do pictures elicit the intended name of an object?) and semantic categorization (Are objects classified as members of the intended semantic category?). Our ratings indicate that children are proficient in naming and even better in semantic categorization of objects, whereas both seems to improve with increasing age of young childhood. Finally, this paper discusses some features of pictorial objects that might be important for children’s name agreement and semantic categorization and could be considered in future picture rating studies.

 

@article{Sommerfeld_of_2022,
title = {Ratings of name agreement and semantic categorization of 247 colored clipart pictures by young German children},
author = {Linda Sommerfeld and Maria Staudte and Jutta Kray},
url = {https://www.sciencedirect.com/science/article/pii/S0001691822000737},
doi = {https://doi.org/https://doi.org/10.1016/j.actpsy.2022.103558},
year = {2022},
date = {2022-05-29},
journal = {Acta Psychologica},
pages = {103558},
volume = {226},
abstract = {Developmental and longitudinal studies with children increasingly use pictorial stimuli in cognitive, psychologic, and psycholinguistic research. To enhance validity and comparability within and across those studies, the use of normed pictures is recommended. Besides, creating picture sets and evaluating them in rating studies is very time consuming, in particular regarding samples of young children in which testing time is rather limited. As there is an increasing number of studies that investigate young German children's semantic language processing with colored clipart stimuli, this work provides a first set of 247 colored cliparts with ratings of German native speaking children aged 4 to 6 years. We assessed two central rating aspects of pictures: Name agreement (Do pictures elicit the intended name of an object?) and semantic categorization (Are objects classified as members of the intended semantic category?). Our ratings indicate that children are proficient in naming and even better in semantic categorization of objects, whereas both seems to improve with increasing age of young childhood. Finally, this paper discusses some features of pictorial objects that might be important for children's name agreement and semantic categorization and could be considered in future picture rating studies.},
keywords = {Name agreement, Semantic categorization, Picture naming, Picture ratings, Children, Age differences},
pubstate = {forthcoming},
type = {article}
}

Copy BibTeX to Clipboard

Project:   A5

Höltje, Gerrit; Mecklinger, Axel

Benefits and costs of predictive processing: How sentential constraint and word expectedness affect memory formation Journal Article Forthcoming

Science Direct, Experimental Neuropsychology Unit, Department of Psychology, Saarland University, Saarbrücken, Germany, 2022.

@article{Höltje_and_2022,
title = {Benefits and costs of predictive processing: How sentential constraint and word expectedness affect memory formation},
author = {Gerrit H{\"o}ltje and Axel Mecklinger},
url = {https://www.sciencedirect.com/journal/brain-research},
doi = {https://doi.org/10.1016},
year = {2022},
date = {2022},
journal = {Science Direct},
publisher = {Experimental Neuropsychology Unit, Department of Psychology},
address = {Saarland University, Saarbr{\"u}cken, Germany},
pubstate = {forthcoming},
type = {article}
}

Copy BibTeX to Clipboard

Project:   A6

Zouhar, Vilém; Mosbach, Marius; Zhang, Miaoran; Klakow, Dietrich

Knowledge Base Index Compression via Dimensionality and Precision Reduction Inproceedings Forthcoming

Spa-NLP workshop at ACL 2022, 22nd-27th May 2022 Dublin, Ireland, 2022.

Recently neural network based approaches to knowledge-intensive NLP tasks, such as question answering, started to rely heavily on the combination of neural retrievers and readers. Retrieval is typically performed over a large textual knowledge base (KB) which requires significant memory and compute resources, especially when scaled up. On HotpotQA we systematically investigate reducing the size of the KB index by means of dimensionality (sparse random projections, PCA, autoencoders) and numerical precision reduction.
Our results show that PCA is an easy solution that requires very little data and is only slightly worse than autoencoders, which are less stable. All methods are sensitive to pre- and post-processing and data should always be centered and normalized both before and after dimension reduction. Finally, we show that it is possible to combine PCA with using 1bit per dimension. Overall we achieve (1) 100× compression with 75%, and (2) 24× compression with 92% original retrieval performance.

@inproceedings{Zouhar_2022_Base,
title = {Knowledge Base Index Compression via Dimensionality and Precision Reduction},
author = {Vil{\'e}m Zouhar and Marius Mosbach and Miaoran Zhang and Dietrich Klakow},
url = {https://arxiv.org/abs/2204.02906},
year = {2022},
date = {2022},
publisher = {Spa-NLP workshop at ACL 2022},
address = {22nd-27th May 2022 Dublin, Ireland},
abstract = {Recently neural network based approaches to knowledge-intensive NLP tasks, such as question answering, started to rely heavily on the combination of neural retrievers and readers. Retrieval is typically performed over a large textual knowledge base (KB) which requires significant memory and compute resources, especially when scaled up. On HotpotQA we systematically investigate reducing the size of the KB index by means of dimensionality (sparse random projections, PCA, autoencoders) and numerical precision reduction. Our results show that PCA is an easy solution that requires very little data and is only slightly worse than autoencoders, which are less stable. All methods are sensitive to pre- and post-processing and data should always be centered and normalized both before and after dimension reduction. Finally, we show that it is possible to combine PCA with using 1bit per dimension. Overall we achieve (1) 100× compression with 75%, and (2) 24× compression with 92% original retrieval performance.},
pubstate = {forthcoming},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Dutta Chowdhury, Koel; Jalota, Rricha; van Genabith, Josef; España-Bonet, Cristina

Towards Debiasing Translation Artifacts Inproceedings

Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Association for Computational Linguistics, pp. 3983-3991, Seattle, United States, July 2022, 2022.

Cross-lingual natural language processing relies on translation, either by humans or machines, at different levels, from translating training data to translating test sets. However, compared to original texts in the same language, translations possess distinct qualities referred to as translationese. Previous research has shown that these translation artifacts influence the performance of a variety of cross-lingual tasks. In this work, we propose a novel approach to reducing translationese by extending an established bias-removal technique. We use the Iterative Null-space Projection (INLP) algorithm, and show by measuring classification accuracy before and after debiasing, that translationese is reduced at both sentence and word level. We evaluate the utility of debiasing translationese on a natural language inference (NLI) task, and show that by reducing this bias, NLI accuracy improves. To the best of our knowledge, this is the first study to debias translationese as represented in latent embedding space.

@inproceedings{Chowdhury_2022_Debiasing,
title = {Towards Debiasing Translation Artifacts},
author = {Koel Dutta Chowdhury and Rricha Jalota and Josef van Genabith and Cristina Espa{\~n}a-Bonet},
url = {https://aclanthology.org/2022.naacl-main.292/},
doi = {https://doi.org/10.18653/v1/2022.naacl-main.292},
year = {2022},
date = {2022},
booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages = {3983-3991},
publisher = {Association for Computational Linguistics},
address = {Seattle, United States, July 2022},
abstract = {Cross-lingual natural language processing relies on translation, either by humans or machines, at different levels, from translating training data to translating test sets. However, compared to original texts in the same language, translations possess distinct qualities referred to as translationese. Previous research has shown that these translation artifacts influence the performance of a variety of cross-lingual tasks. In this work, we propose a novel approach to reducing translationese by extending an established bias-removal technique. We use the Iterative Null-space Projection (INLP) algorithm, and show by measuring classification accuracy before and after debiasing, that translationese is reduced at both sentence and word level. We evaluate the utility of debiasing translationese on a natural language inference (NLI) task, and show that by reducing this bias, NLI accuracy improves. To the best of our knowledge, this is the first study to debias translationese as represented in latent embedding space.},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B6

Mayn, Alexandra; Demberg, Vera

Pragmatics of Metaphor Revisited: Modeling the Role of Degree and Salience in Metaphor Understanding Inproceedings

Proceedings of the Annual Meeting of the Cognitive Science Society, 43(43), pp. 3178ff., 2022.

Abstract: Experimental pragmatics tells us that a metaphor conveys salient features of a vehicle and that highly typical featurestend to be salient. But can highly atypical features also be salient? When asking if John is loyal and hearing “John is afox”, will the hearer conclude that John is disloyal because loyalty is saliently atypical for a fox? This prediction followsfrom our RSA-based model of metaphor understanding which relies on gradient salience. Our behavioral experimentscorroborate the model’s predictions, providing evidence that high and low typicality are salient and result in high in-terpretation confidence and agreement, while average typicality is not salient and makes a metaphor confusing. Ourmodel implements the idea that other features of a vehicle, along with possible alternative vehicles, influence metaphorinterpretation. It produces a significantly better fit compared to an existing RSA model of metaphor understanding,supporting our predictions about the factors at play.

@inproceedings{Mayn_2022_of,
title = {Pragmatics of Metaphor Revisited: Modeling the Role of Degree and Salience in Metaphor Understanding},
author = {Alexandra Mayn and Vera Demberg},
url = {https://escholarship.org/uc/item/7kq207zs},
year = {2022},
date = {2022},
booktitle = {Proceedings of the Annual Meeting of the Cognitive Science Society, 43(43)},
pages = {3178ff.},
abstract = {Abstract: Experimental pragmatics tells us that a metaphor conveys salient features of a vehicle and that highly typical featurestend to be salient. But can highly atypical features also be salient? When asking if John is loyal and hearing “John is afox”, will the hearer conclude that John is disloyal because loyalty is saliently atypical for a fox? This prediction followsfrom our RSA-based model of metaphor understanding which relies on gradient salience. Our behavioral experimentscorroborate the model’s predictions, providing evidence that high and low typicality are salient and result in high in-terpretation confidence and agreement, while average typicality is not salient and makes a metaphor confusing. Ourmodel implements the idea that other features of a vehicle, along with possible alternative vehicles, influence metaphorinterpretation. It produces a significantly better fit compared to an existing RSA model of metaphor understanding,supporting our predictions about the factors at play.},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B2

Successfully