{ "algorithm": { "command": null, "id": 4, "name": "Gensim Continuous Bag-of-Words", "tool": "Gensim", "url": "https://github.com/RaRe-Technologies/gensim", "version": "3.8" }, "contents": [ { "filename": "model.txt", "format": "text" }, { "filename": "model.bin", "format": "data" }, { "filename": "meta.json", "format": "json" } ], "corpus": [ { "NER": true, "case preserved": false, "description": "Russian National Corpus", "id": 88, "language": "rus", "lemmatized": true, "public": false, "stop words removal": null, "tagger": "UDPipe 1.2", "tagset": "UPoS", "tokens": 270000000, "url": "http://ruscorpora.ru/" }, { "NER": true, "case preserved": false, "description": "Russian Wikipedia dump of December 2018", "id": 91, "language": "rus", "lemmatized": true, "public": true, "stop words removal": null, "tagger": "UDPipe 1.2", "tagset": "UPoS", "tokens": 518531000, "tool": "https://github.com/RaRe-Technologies/gensim/blob/master/gensim/scripts/segment_wiki.py", "url": "https://dumps.wikimedia.org/" }, { "NER": true, "case preserved": false, "description": "Russian News from Dialogue Evaluation 2020", "id": 114, "language": "rus", "lemmatized": true, "public": true, "stop words removal": null, "tagger": "UDPipe 1.2", "tagset": "UPoS", "tokens": 1321489104, "url": "https://competitions.codalab.org/competitions/22168" }, { "NER": true, "case preserved": false, "description": "Araneum Russicum Maximum", "id": 115, "language": "rus", "lemmatized": true, "public": true, "stop words removal": "functional PoS", "tagger": "MyStem", "tagset": "UPoS", "tokens": 10000000000, "url": "https://rusvectores.org/en/models/" } ], "creators": [ { "email": "andreku@ifi.uio.no", "name": "Andrey Kutuzov" }, { "email": "maria.kunilovskaya@wlv.ac.uk", "name": "Maria Kunilovskaya" } ], "dimensions": 300, "documentation": [ "https://github.com/kunilovskaya/hypohyper/" ], "external_id": "ruscorporawikiaraneumnews_mwe_upos_cbow_300_2_2020", "handle": "http://vectors.nlpl.eu/repository/20/204.zip", "id": 204, "iterations": 3, "vocabulary size": 998459, "window": 2 }