{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "([bos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "([eos])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "([unk])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "([pad])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "([mask])", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "([eos])", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "([eos])", "type_id": 1 } } ], "special_tokens": { "([bos])": { "id": "([bos])", "ids": [ 0 ], "tokens": [ "([bos])" ] }, "([eos])": { "id": "([eos])", "ids": [ 1 ], "tokens": [ "([eos])" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "([bos])": 0, "([eos])": 1, "([unk])": 2, "([pad])": 3, "([mask])": 4, "'": 5, "a": 6, "b": 7, "c": 8, "d": 9, "e": 10, "f": 11, "g": 12, "h": 13, "i": 14, "j": 15, "k": 16, "l": 17, "m": 18, "n": 19, "o": 20, "p": 21, "q": 22, "r": 23, "s": 24, "t": 25, "u": 26, "v": 27, "w": 28, "x": 29, "y": 30, "z": 31, "Ġ": 32, "Ġt": 33, "he": 34, "Ġa": 35, "Ġthe": 36, "in": 37, "Ġs": 38, "Ġw": 39, "Ġo": 40, "re": 41, "nd": 42, "Ġb": 43, "Ġh": 44, "er": 45, "Ġm": 46, "Ġi": 47, "ou": 48, "Ġc": 49, "Ġf": 50, "at": 51, "ed": 52, "Ġand": 53, "en": 54, "Ġto": 55, "Ġof": 56, "on": 57, "is": 58, "Ġd": 59, "ing": 60, "Ġth": 61, "Ġp": 62, "Ġhe": 63, "or": 64, "Ġl": 65, "es": 66, "Ġin": 67, "ll": 68, "it": 69, "ar": 70, "as": 71, "an": 72, "Ġn": 73, "Ġg": 74, "om": 75, "Ġbe": 76, "Ġha": 77, "Ġe": 78, "le": 79, "ot": 80, "Ġy": 81, "ut": 82, "ow": 83, "ic": 84, "Ġwh": 85, "Ġit": 86, "ld": 87, "ve": 88, "Ġthat": 89, "ly": 90, "Ġwas": 91, "id": 92, "se": 93, "st": 94, "Ġon": 95, "gh": 96, "ent": 97, "Ġre": 98, "Ġyou": 99 }, "merges": [ "Ġ t", "h e", "Ġ a", "Ġt he", "i n", "Ġ s", "Ġ w", "Ġ o", "r e", "n d", "Ġ b", "Ġ h", "e r", "Ġ m", "Ġ i", "o u", "Ġ c", "Ġ f", "a t", "e d", "Ġa nd", "e n", "Ġt o", "Ġo f", "o n", "i s", "Ġ d", "in g", "Ġt h", "Ġ p", "Ġ he", "o r", "Ġ l", "e s", "Ġ in", "l l", "i t", "a r", "a s", "a n", "Ġ n", "Ġ g", "o m", "Ġb e", "Ġh a", "Ġ e", "l e", "o t", "Ġ y", "u t", "o w", "i c", "Ġw h", "Ġi t", "l d", "v e", "Ġth at", "l y", "Ġw as", "i d", "s e", "s t", "Ġo n", "g h", "en t", "Ġ re", "Ġy ou" ] } }