{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true } ], "normalizer": { "type": "Replace", "pattern": { "String": " " }, "content": "|" }, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": { "type": "CTC", "pad_token": "", "word_delimiter_token": "|", "cleanup": true }, "model": { "vocab": { "": 0, "": 1, "": 2, "": 3, "|": 4, "'": 5, "-": 6, "a": 7, "b": 8, "c": 9, "d": 10, "e": 11, "f": 12, "g": 13, "h": 14, "i": 15, "j": 16, "k": 17, "l": 18, "m": 19, "n": 20, "o": 21, "p": 22, "q": 23, "r": 24, "s": 25, "t": 26, "u": 27, "v": 28, "w": 29, "x": 30, "y": 31, "z": 32 } } }