{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Strip", "strip_left": true, "strip_right": true }, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "[UNK]": 0, "[PAD]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "6": 5, "9": 6, "2": 7, "5": 8, "1": 9, "7": 10, "4": 11, "8": 12, "0": 13, "3": 14 }, "unk_token": "[UNK]" } }