{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 60, | |
"content": "<unk>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "Lowercase" | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "[^|\u0abe\u0ac7\u0aa4\u0aa8\u0ab0\u0acd\u0aae\u0ac0\u0a95\u0a82\u0acb\u0aaa\u0ab5\u0ac1\u0ab8\u0aaf\u0ab9\u0a9c\u0aa5\u0abf\u0ab6\u0aa3\u0a9b\u0ab2\u0aa6\u0a86\u0a93\u0a85\u0a8f\u0a88\u0aac\u0a97\u0a96\u0aa7\u0aad\u0a9f\u0ab3\u0ac2\u0a9a\u0aa1\u0ab7\u0a89\u0aab\u0a98\u0aa0\u0ac3'\u0a8a\u0a87\u0a9e\u0a9d\u0aa2\u0ac8\u0a83\u0acc\\-\u0a8b\u0a90 ]" | |
}, | |
"content": "" | |
}, | |
{ | |
"type": "Strip", | |
"strip_left": true, | |
"strip_right": true | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "(?=.)|(?<!^)$" | |
}, | |
"content": "|" | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "Split", | |
"pattern": { | |
"Regex": "" | |
}, | |
"behavior": "Isolated", | |
"invert": false | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"vocab": { | |
"|": 0, | |
"\u0abe": 1, | |
"\u0ac7": 2, | |
"\u0aa4": 3, | |
"\u0aa8": 4, | |
"\u0ab0": 5, | |
"\u0acd": 6, | |
"\u0aae": 7, | |
"\u0ac0": 8, | |
"\u0a95": 9, | |
"\u0a82": 10, | |
"\u0acb": 11, | |
"\u0aaa": 12, | |
"\u0ab5": 13, | |
"\u0ac1": 14, | |
"\u0ab8": 15, | |
"\u0aaf": 16, | |
"\u0ab9": 17, | |
"\u0a9c": 18, | |
"\u0aa5": 19, | |
"\u0abf": 20, | |
"\u0ab6": 21, | |
"\u0aa3": 22, | |
"\u0a9b": 23, | |
"\u0ab2": 24, | |
"\u0aa6": 25, | |
"\u0a86": 26, | |
"\u0a93": 27, | |
"\u0a85": 28, | |
"\u0a8f": 29, | |
"\u0a88": 30, | |
"\u0aac": 31, | |
"\u0a97": 32, | |
"\u0a96": 33, | |
"\u0aa7": 34, | |
"\u0aad": 35, | |
"\u0a9f": 36, | |
"\u0ab3": 37, | |
"\u0ac2": 38, | |
"\u0a9a": 39, | |
"\u0aa1": 40, | |
"\u0ab7": 41, | |
"\u0a89": 42, | |
"\u0aab": 43, | |
"\u0a98": 44, | |
"\u0aa0": 45, | |
"\u0ac3": 46, | |
"'": 47, | |
"\u0a8a": 48, | |
"\u0a87": 49, | |
"\u0a9e": 50, | |
"\u0a9d": 51, | |
"\u0aa2": 52, | |
"\u0ac8": 53, | |
"\u0a83": 54, | |
"\u0acc": 55, | |
"-": 56, | |
"\u0a8b": 57, | |
"\u0a90": 58, | |
" ": 59, | |
"<unk>": 60 | |
} | |
} | |
} |