{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 95, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Lowercase" }, { "type": "Replace", "pattern": { "Regex": "[^\u1ee5xs\u00e8\u00ec\u1eb7\u1ee9\u01a1u\u01b0\u1eb1\u1ef3\u00e2cv\u1ec7\u1ed3_\u1ef5\u1ed9\u1ebbg\u1ea1\u0129\u1ea3\u00f5\u1ee1\u1eb3\u00f4aio\u1eeb\u1ef9\u1ed5\u00e0\u1edd\u00fd\u00f9\u2013\u1ec1\u1ec5\u1eb5\u1ecf'\u00f2\u1ed1q\u1ecd\u1ea9\u1ef1\u00e32\u1ecbe\u0111\u00f3\u1ea7\u00e9\u1edb\u1ebf\u1ee7\u1eedd\u1eef\u1eaf\u1ebd\u1eabm\u1ec3\u0169\u1ec9\u1eb9\u1ed7\u00edy\u00fa\u00e1pkt\u1ea5l\u1edf h\u1ef7\u1eadb\u0103n\u00ear\u1ee3\\-]" }, "content": "" }, { "type": "Strip", "strip_left": true, "strip_right": true }, { "type": "Replace", "pattern": { "Regex": "(?=.)|(?": 95 } } }