Publications

Klakow, Dietrich; Trost, Thomas

Parameter Free Hierarchical Graph-Based Clustering for Analyzing Continuous Word Embeddings Inproceedings

In Workshop Proceedings of TextGraphs-11: Graph-based Methods for Natural Language Processing (Workshop at ACL 2017), 2017.

@inproceedings{TroKla2017,
title = {Parameter Free Hierarchical Graph-Based Clustering for Analyzing Continuous Word Embeddings},
author = {Dietrich Klakow andThomas Trost},
year = {2017},
date = {2017},
booktitle = {In Workshop Proceedings of TextGraphs-11: Graph-based Methods for Natural Language Processing (Workshop at ACL 2017)},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Oualil, Youssef; Klakow, Dietrich

A batch noise contrastive estimation approach for training large vocabulary language models Inproceedings

18th Annual Conference of the International Speech Communication Association (INTERSPEECH), 2017.

@inproceedings{Oualil2017,
title = {A batch noise contrastive estimation approach for training large vocabulary language models},
author = {Youssef Oualil and Dietrich Klakow},
year = {2017},
date = {2017},
publisher = {18th Annual Conference of the International Speech Communication Association (INTERSPEECH)},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Singh, Mittul; Greenberg, Clayton; Oualil, Youssef; Klakow, Dietrich

Sub-Word Similarity based Search for Embeddings: Inducing Rare-Word Embeddings for Word Similarity Tasks and Language Modelling Inproceedings

Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers, The COLING 2016 Organizing Committee, Osaka, Japan, 2016.

Training good word embeddings requires large amounts of data. Out-of-vocabulary words will still be encountered at test-time, leaving these words without embeddings. To overcome this lack of embeddings for rare words, existing methods leverage morphological features to generate embeddings. While the existing methods use computationally-intensive rule-based (Soricut and Och, 2015) or tool-based (Botha and Blunsom, 2014) morphological analysis to generate embeddings, our system applies a computationally-simpler sub-word search on words that have existing embeddings.

Embeddings of the sub-word search results are then combined using string similarity functions to generate rare word embeddings. We augmented pre-trained word embeddings with these novel embeddings and evaluated on a rare word similarity task, obtaining up to 3 times improvement in correlation over the original set of embeddings. Applying our technique to embeddings trained on larger datasets led to on-par performance with the existing state-of-the-art for this task. Additionally, while analysing augmented embeddings in a log-bilinear language model, we observed up to 50% reduction in rare word perplexity in comparison to other more complex language models.

@inproceedings{singh-EtAl:2016:COLING1,
title = {Sub-Word Similarity based Search for Embeddings: Inducing Rare-Word Embeddings for Word Similarity Tasks and Language Modelling},
author = {Mittul Singh and Clayton Greenberg and Youssef Oualil and Dietrich Klakow},
url = {http://aclweb.org/anthology/C16-1194},
year = {2016},
date = {2016-12-01},
booktitle = {Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
publisher = {The COLING 2016 Organizing Committee},
address = {Osaka, Japan},
abstract = {Training good word embeddings requires large amounts of data. Out-of-vocabulary words will still be encountered at test-time, leaving these words without embeddings. To overcome this lack of embeddings for rare words, existing methods leverage morphological features to generate embeddings. While the existing methods use computationally-intensive rule-based (Soricut and Och, 2015) or tool-based (Botha and Blunsom, 2014) morphological analysis to generate embeddings, our system applies a computationally-simpler sub-word search on words that have existing embeddings. Embeddings of the sub-word search results are then combined using string similarity functions to generate rare word embeddings. We augmented pre-trained word embeddings with these novel embeddings and evaluated on a rare word similarity task, obtaining up to 3 times improvement in correlation over the original set of embeddings. Applying our technique to embeddings trained on larger datasets led to on-par performance with the existing state-of-the-art for this task. Additionally, while analysing augmented embeddings in a log-bilinear language model, we observed up to 50% reduction in rare word perplexity in comparison to other more complex language models.},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Singh, Mittul; Greenberg, Clayton; Klakow, Dietrich

The Custom Decay Language Model for Long Range Dependencies Book Chapter

Text, Speech, and Dialogue: 19th International Conference, TSD 2016, Brno , Czech Republic, September 12-16, 2016, Proceedings, Springer International Publishing, pp. 343-351, Cham, 2016, ISBN 978-3-319-45510-5.

@inbook{Singh2016,
title = {The Custom Decay Language Model for Long Range Dependencies},
author = {Mittul Singh and Clayton Greenberg and Dietrich Klakow},
url = {http://dx.doi.org/10.1007/978-3-319-45510-5_39},
doi = {https://doi.org/10.1007/978-3-319-45510-5_39},
year = {2016},
date = {2016},
booktitle = {Text, Speech, and Dialogue: 19th International Conference, TSD 2016, Brno , Czech Republic, September 12-16, 2016, Proceedings},
isbn = {978-3-319-45510-5},
pages = {343-351},
publisher = {Springer International Publishing},
address = {Cham},
pubstate = {published},
type = {inbook}
}

Copy BibTeX to Clipboard

Project:   B4

Oualil, Youssef; Greenberg, Clayton; Singh, Mittul; Klakow, Dietrich; Oualil, Youssef; Mittul, Singh

Sequential recurrent neural networks for language modeling Journal Article

Interspeech 2016, pp. 3509-3513, 2016.

@article{oualil2016sequential,
title = {Sequential recurrent neural networks for language modeling},
author = {Youssef Oualil and Clayton Greenberg and Mittul Singh and Dietrich Klakow andYoussef Oualil and Singh Mittul},
year = {2016},
date = {2016},
journal = {Interspeech 2016},
pages = {3509-3513},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Project:   B4

Sayeed, Asad; Greenberg, Clayton; Demberg, Vera

Thematic fit evaluation: an aspect of selectional preferences Journal Article

Proceedings of the 1st Workshop on Evaluating Vector Space Representations for NLP, pp. 99-105, 2016, ISBN 9781945626142.

@article{Sayeed2016,
title = {Thematic fit evaluation: an aspect of selectional preferences},
author = {Asad Sayeed and Clayton Greenberg and Vera Demberg},
year = {2016},
date = {2016},
journal = {Proceedings of the 1st Workshop on Evaluating Vector Space Representations for NLP},
pages = {99-105},
pubstate = {published},
type = {article}
}

Copy BibTeX to Clipboard

Projects:   B2 B4

Oualil, Youssef; Singh, Mittul; Greenberg, Clayton; Klakow, Dietrich

Long-short range context neural networks for language models Inproceedings

EMLP 2016, 2016.

@inproceedings{Oualil2016,
title = {Long-short range context neural networks for language models},
author = {Youssef Oualil and Mittul Singh and Clayton Greenberg and Dietrich Klakow},
year = {2016},
date = {2016},
publisher = {EMLP 2016},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Schneegass, Stefan; Oualil, Youssef; Bulling, Andreas

SkullConduct: Biometric User Identification on Eyewear Computers Using Bone Conduction Through the Skull Inproceedings

Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems, CHI '16, ACM, pp. 1379-1384, New York, NY, USA, 2016, ISBN 978-1-4503-3362-7.

@inproceedings{Schneegass:2016:SBU:2858036.2858152,
title = {SkullConduct: Biometric User Identification on Eyewear Computers Using Bone Conduction Through the Skull},
author = {Stefan Schneegass and Youssef Oualil and Andreas Bulling},
url = {http://doi.acm.org/10.1145/2858036.2858152},
doi = {https://doi.org/10.1145/2858036.2858152},
year = {2016},
date = {2016},
booktitle = {Proceedings of the 2016 CHI Conference on Human Factors in Computing Systems},
isbn = {978-1-4503-3362-7},
pages = {1379-1384},
publisher = {ACM},
address = {New York, NY, USA},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Varjokallio, Matti; Klakow, Dietrich

Unsupervised morph segmentation and statistical language models for vocabulary expansion Inproceedings

Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), Association for Computational Linguistics, pp. 175-180, Berlin, Germany, 2016.

@inproceedings{varjokallio-klakow:2016:P16-2,
title = {Unsupervised morph segmentation and statistical language models for vocabulary expansion},
author = {Matti Varjokallio and Dietrich Klakow},
url = {http://anthology.aclweb.org/P16-2029},
year = {2016},
date = {2016-08-01},
booktitle = {Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
pages = {175-180},
publisher = {Association for Computational Linguistics},
address = {Berlin, Germany},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Oualil, Youssef; Schulder, Marc; Helmke, Hartmut; Schmidt, Anna; Klakow, Dietrich

Real-Time Integration of Dynamic Context Information for Improving Automatic Speech Recognition Inproceedings

INTERSPEECH 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany, 2015.

@inproceedings{youalil_interspeech_2015,
title = {Real-Time Integration of Dynamic Context Information for Improving Automatic Speech Recognition},
author = {Youssef Oualil and Marc Schulder and Hartmut Helmke and Anna Schmidt and Dietrich Klakow},
url = {https://core.ac.uk/display/31018097},
year = {2015},
date = {2015},
booktitle = {INTERSPEECH 2015, 16th Annual Conference of the International Speech Communication Association, Dresden, Germany},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Project:   B4

Greenberg, Clayton; Sayeed, Asad; Demberg, Vera

Improving Unsupervised Vector-Space Thematic Fit Evaluation via Role-Filler Prototype Clustering Inproceedings

Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Association for Computational Linguistics, pp. 21-31, Denver, Colorado, 2015.

@inproceedings{greenberg-sayeed-demberg:2015:NAACL-HLT,
title = {Improving Unsupervised Vector-Space Thematic Fit Evaluation via Role-Filler Prototype Clustering},
author = {Clayton Greenberg and Asad Sayeed and Vera Demberg},
url = {http://www.aclweb.org/anthology/N15-1003},
year = {2015},
date = {2015},
booktitle = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
pages = {21-31},
publisher = {Association for Computational Linguistics},
address = {Denver, Colorado},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Projects:   B2 B4

Greenberg, Clayton; Demberg, Vera; Sayeed, Asad

Verb Polysemy and Frequency Effects in Thematic Fit Modeling Inproceedings

Proceedings of the 6th Workshop on Cognitive Modeling and Computational Linguistics, Association for Computational Linguistics, pp. 48-57, Denver, Colorado, 2015.

@inproceedings{greenberg-demberg-sayeed:2015:CMCL,
title = {Verb Polysemy and Frequency Effects in Thematic Fit Modeling},
author = {Clayton Greenberg and Vera Demberg and Asad Sayeed},
url = {http://www.aclweb.org/anthology/W15-1106},
year = {2015},
date = {2015-06-01},
booktitle = {Proceedings of the 6th Workshop on Cognitive Modeling and Computational Linguistics},
pages = {48-57},
publisher = {Association for Computational Linguistics},
address = {Denver, Colorado},
pubstate = {published},
type = {inproceedings}
}

Copy BibTeX to Clipboard

Projects:   B2 B4

Successfully