EuroParl-UdS: Preserving and Extending Metadata in Parliamentary Debates Inproceedings
ParlaCLARIN workshop, 11th Language Resources and Evaluation Conference (LREC2018), Miyazaki, Japan, 2018.Multilingual parliaments have been a useful source for monolingual and multilingual corpus collection. However, extra-textual information about speakers is often absent, and as a result, these resources cannot be fully used in translation studies.
In this paper we present a method for processing and building a parallel corpus consisting of parliamentary debates of the European Parliament for English into German and English into Spanish, where original language and native speaker information is available as metadata. The paperdocumentsallnecessary(pre-andpost-)processingstepsforcreatingsuchavaluableresource. Inadditiontotheparallelcorpora, we collect monolingual comparable corpora for English, German and Spanish using the same method.
@inproceedings{Karakanta2018b,
title = {EuroParl-UdS: Preserving and Extending Metadata in Parliamentary Debates},
author = {Alina Karakanta and Mihaela Vela and Elke Teich},
url = {http://lrec-conf.org/workshops/lrec2018/W2/pdf/10_W2.pdf},
year = {2018},
date = {2018},
booktitle = {ParlaCLARIN workshop, 11th Language Resources and Evaluation Conference (LREC2018)},
address = {Miyazaki, Japan},
abstract = {Multilingual parliaments have been a useful source for monolingual and multilingual corpus collection. However, extra-textual information about speakers is often absent, and as a result, these resources cannot be fully used in translation studies.
In this paper we present a method for processing and building a parallel corpus consisting of parliamentary debates of the European Parliament for English into German and English into Spanish, where original language and native speaker information is available as metadata. The paperdocumentsallnecessary(pre-andpost-)processingstepsforcreatingsuchavaluableresource. Inadditiontotheparallelcorpora, we collect monolingual comparable corpora for English, German and Spanish using the same method.},
pubstate = {published},
type = {inproceedings}
}
Project: B7