@article{Menzel2021, title = {Generating linguistically relevant metadata for the Royal Society Corpus}, author = {Katrin Menzel and J{\"o}rg Knappen and Elke Teich}, editor = {Tanja S{\"a}ily and Jukka Tyrkk{\"o}}, url = {https://ricl.aelinco.es/index.php/ricl/article/view/158}, doi = {https://doi.org/10.32714/ricl.09.01.02}, year = {2021}, date = {2021}, journal = {Research in Corpus Linguistics, Challenges in combining structured and unstructured data in corpus development (special issue)}, pages = {1-18}, volume = {9}, number = {1}, abstract = {This paper provides an overview of metadata generation and management for the Royal Society Corpus (RSC), aiming to encourage discussion about the specific challenges in building substantial diachronic corpora intended to be used for linguistic and humanistic analysis. We discuss the motivations and goals of building the corpus, describe its composition and present the types of metadata it contains. Specifically, we tackle two challenges: first, integration of original metadata from the data providers (JSTOR and the Royal Society); second, derivation of additional linguistically relevant metadata regarding text structure and situational context (register).}, pubstate = {published}, type = {article} }