{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 1 ], "tokens": [ "" ] } } }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "": 1, "!": 2, "\"": 3, "#": 4, "$": 5, "%": 6, "&": 7, "'": 8, "(": 9, ")": 10, "*": 11, "+": 12, ",": 13, "-": 14, ".": 15, "/": 16, "0": 17, "1": 18, "2": 19, "3": 20, "4": 21, "5": 22, "6": 23, "7": 24, "8": 25, "9": 26, ":": 27, ";": 28, "<": 29, "=": 30, ">": 31, "?": 32, "@": 33, "A": 34, "B": 35, "C": 36, "D": 37, "E": 38, "F": 39, "G": 40, "H": 41, "I": 42, "J": 43, "K": 44, "L": 45, "M": 46, "N": 47, "O": 48, "P": 49, "Q": 50, "R": 51, "S": 52, "T": 53, "U": 54, "V": 55, "W": 56, "X": 57, "Y": 58, "Z": 59, "[": 60, "\\": 61, "]": 62, "^": 63, "_": 64, "`": 65, "a": 66, "b": 67, "c": 68, "d": 69, "e": 70, "f": 71, "g": 72, "h": 73, "i": 74, "j": 75, "k": 76, "l": 77, "m": 78, "n": 79, "o": 80, "p": 81, "q": 82, "r": 83, "s": 84, "t": 85, "u": 86, "v": 87, "w": 88, "x": 89, "y": 90, "z": 91, "{": 92, "|": 93, "}": 94, "~": 95, "¡": 96, "¢": 97, "£": 98, "¤": 99, "¥": 100, "¦": 101, "§": 102, "¨": 103, "©": 104, "ª": 105, "«": 106, "¬": 107, "®": 108, "¯": 109, "°": 110, "±": 111, "²": 112, "³": 113, "´": 114, "µ": 115, "¶": 116, "·": 117, "¸": 118, "¹": 119, "º": 120, "»": 121, "¼": 122, "½": 123, "¾": 124, "¿": 125, "À": 126, "Á": 127, "Â": 128, "Ã": 129, "Ä": 130, "Å": 131, "Æ": 132, "Ç": 133, "È": 134, "É": 135, "Ê": 136, "Ë": 137, "Ì": 138, "Í": 139, "Î": 140, "Ï": 141, "Ð": 142, "Ñ": 143, "Ò": 144, "Ó": 145, "Ô": 146, "Õ": 147, "Ö": 148, "×": 149, "Ø": 150, "Ù": 151, "Ú": 152, "Û": 153, "Ü": 154, "Ý": 155, "Þ": 156, "ß": 157, "à": 158, "á": 159, "â": 160, "ã": 161, "ä": 162, "å": 163, "æ": 164, "ç": 165, "è": 166, "é": 167, "ê": 168, "ë": 169, "ì": 170, "í": 171, "î": 172, "ï": 173, "ð": 174, "ñ": 175, "ò": 176, "ó": 177, "ô": 178, "õ": 179, "ö": 180, "÷": 181, "ø": 182, "ù": 183, "ú": 184, "û": 185, "ü": 186, "ý": 187, "þ": 188, "ÿ": 189, "Ā": 190, "ā": 191, "Ă": 192, "ă": 193, "Ą": 194, "ą": 195, "Ć": 196, "ć": 197, "Ĉ": 198, "ĉ": 199, "Ċ": 200, "ċ": 201, "Č": 202, "č": 203, "Ď": 204, "ď": 205, "Đ": 206, "đ": 207, "Ē": 208, "ē": 209, "Ĕ": 210, "ĕ": 211, "Ė": 212, "ė": 213, "Ę": 214, "ę": 215, "Ě": 216, "ě": 217, "Ĝ": 218, "ĝ": 219, "Ğ": 220, "ğ": 221, "Ġ": 222, "ġ": 223, "Ģ": 224, "ģ": 225, "Ĥ": 226, "ĥ": 227, "Ħ": 228, "ħ": 229, "Ĩ": 230, "ĩ": 231, "Ī": 232, "ī": 233, "Ĭ": 234, "ĭ": 235, "Į": 236, "į": 237, "İ": 238, "ı": 239, "IJ": 240, "ij": 241, "Ĵ": 242, "ĵ": 243, "Ķ": 244, "ķ": 245, "ĸ": 246, "Ĺ": 247, "ĺ": 248, "Ļ": 249, "ļ": 250, "Ľ": 251, "ľ": 252, "Ŀ": 253, "ŀ": 254, "Ł": 255, "ł": 256, "Ń": 257, "ab": 258, "el": 259, "in": 260, "lab": 261, "pu": 262, "inpu": 263, "label": 264, "input": 265 }, "merges": [ "a b", "e l", "i n", "l ab", "p u", "in pu", "lab el", "inpu t" ] } }