{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": true, "rstrip": true, "normalized": false, "special": true } ], "normalizer": { "type": "Replace", "pattern": { "String": " " }, "content": "|" }, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "" }, "behavior": "Isolated", "invert": false }, "post_processor": null, "decoder": { "type": "CTC", "pad_token": "", "word_delimiter_token": "|", "cleanup": true }, "model": { "vocab": { "": 0, "": 1, "": 2, "": 3, "|": 4, "E": 5, "T": 6, "A": 7, "O": 8, "N": 9, "I": 10, "H": 11, "S": 12, "R": 13, "D": 14, "L": 15, "U": 16, "M": 17, "W": 18, "C": 19, "F": 20, "G": 21, "Y": 22, "P": 23, "B": 24, "V": 25, "K": 26, "'": 27, "X": 28, "J": 29, "Q": 30, "Z": 31 } } }