{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 44, | |
"content": "<unk>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "Lowercase" | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "[^\u0447 \u0434\u044c\u044f\u0439\u0441\u0443\u0430\u043e20\u044a\u044b\u043d\u043b\u0436\u0445\u0448\u0437c_\u043fm\u044e\u044d\u04404\u0449o\u2013\u0444q\u0438\u0446\u043a1\u0435\u0431\\-\u0442\u0432\u0433\u043c]" | |
}, | |
"content": "" | |
}, | |
{ | |
"type": "Strip", | |
"strip_left": true, | |
"strip_right": true | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "(?=.)|(?<!^)$" | |
}, | |
"content": "\u0447" | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "Split", | |
"pattern": { | |
"Regex": "" | |
}, | |
"behavior": "Isolated", | |
"invert": false | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"vocab": { | |
"\u0447": 0, | |
" ": 1, | |
"\u0434": 2, | |
"\u044c": 3, | |
"\u044f": 4, | |
"\u0439": 5, | |
"\u0441": 6, | |
"\u0443": 7, | |
"\u0430": 8, | |
"\u043e": 9, | |
"2": 10, | |
"0": 11, | |
"\u044a": 12, | |
"\u044b": 13, | |
"\u043d": 14, | |
"\u043b": 15, | |
"\u0436": 16, | |
"\u0445": 17, | |
"\u0448": 18, | |
"\u0437": 19, | |
"c": 20, | |
"_": 21, | |
"\u043f": 22, | |
"m": 23, | |
"\u044e": 24, | |
"\u044d": 25, | |
"\u0440": 26, | |
"4": 27, | |
"\u0449": 28, | |
"o": 29, | |
"\u2013": 30, | |
"\u0444": 31, | |
"q": 32, | |
"\u0438": 33, | |
"\u0446": 34, | |
"\u043a": 35, | |
"1": 36, | |
"\u0435": 37, | |
"\u0431": 38, | |
"-": 39, | |
"\u0442": 40, | |
"\u0432": 41, | |
"\u0433": 42, | |
"\u043c": 43, | |
"<unk>": 44 | |
} | |
} | |
} |