@phdthesis{Mosbach-2024-Thesis, title = {Analyzing pre-trained and fine-tuned language models}, author = {Marius Mosbach}, url = {https://publikationen.sulb.uni-saarland.de/handle/20.500.11880/37254}, doi = {https://doi.org/10.22028/D291-41531}, year = {2024}, date = {2024-02-19}, school = {Saarland University}, publisher = {Saarl{\"a}ndische Universit{\"a}ts- und Landesbibliothek}, address = {Saarbruecken, Germany}, abstract = {The field of natural language processing (NLP) has recently undergone a paradigm shift. Since the introduction of transformer-based language models in 2018, the current generation of natural language processing models continues to demonstrate impressive capabilities on a variety of academic benchmarks and real-world applications. This paradigm shift is based on a simple but general pipeline which consists of pre-training neural language models on large quantities of text, followed by an adaptation step that fine-tunes the pre-trained model to perform a specific NLP task of interest. Despite the impressive progress on academic benchmarks and the widespread deployment of pre-trained and fine-tuned language models in industry, these models do not come without shortcomings which often have immediate consequences for the robustness and generalization of fine-tuned language models. Moreover, these shortcomings demonstrate that we still lack a fundamental understanding of how and why pre-trained and fine-tuned language models work as well as the individual steps of the pipeline that produce them. This thesis makes several contributions towards improving our understanding of pre-trained and fine-tuned language models by carrying out a detailed analysis of various parts of the modern NLP pipeline. Our contributions range from analyzing the linguistic knowledge of pre-trained language models and how it is affected by fine-tuning, to a rigorous analysis of the fine-tuning process itself and how the choice of adaptation technique affects the generalization of models. Overall, we provide new insights about previously unexplained phenomena and the capabilities of pre-trained and fine-tuned language models.