{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 76, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Lowercase" }, { "type": "Replace", "pattern": { "Regex": "[^_ !\"'(),\\-.:;?[\\]\u2019\u201c\u201dABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00e0\u00e2\u00e8\u00e9\u00ea\u00fc]" }, "content": "" }, { "type": "Strip", "strip_left": true, "strip_right": true }, { "type": "Replace", "pattern": { "Regex": "(?=.)|(?": 76 } } }