@article{edderich2021anea, title = {ANEA: distant supervision for low-resource named entity recognition}, author = {Michael Hedderich and Lukas Lange and Dietrich Klakow}, url = {https://arxiv.org/abs/2102.13129}, doi = {https://doi.org/10.48550/arXiv.2102.13129}, year = {2021}, date = {2021}, journal = {arXiv}, abstract = {Distant supervision allows obtaining labeled training corpora for low-resource settings where only limited hand-annotated data exists. However, to be used effectively, the distant supervision must be easy to gather. In this work, we present ANEA, a tool to automatically annotate named entities in texts based on entity lists. It spans the whole pipeline from obtaining the lists to analyzing the errors of the distant supervision. A tuning step allows the user to improve the automatic annotation with their linguistic insights without labelling or checking all tokens manually. In six low-resource scenarios, we show that the F1-score can be increased by on average 18 points through distantly supervised data obtained by ANEA.}, pubstate = {published}, type = {article} }