{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<|begin_of_sequence|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<|end_of_sequence|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "<|im_start|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "<|im_sep|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "<|im_end|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "<|semantic|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "<|pad|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": false, "use_regex": true }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "<|begin_of_sequence|>": 0, "<|end_of_sequence|>": 1, "<|im_start|>": 2, "<|im_sep|>": 3, "<|im_end|>": 4, "<|semantic|>": 5, "<|pad|>": 6, "!": 7, "\"": 8, "#": 9, "$": 10, "%": 11, "&": 12, "'": 13, "(": 14, ")": 15, "*": 16, "+": 17, ",": 18, "-": 19, ".": 20, "/": 21, "0": 22, "1": 23, "2": 24, "3": 25, "4": 26, "5": 27, "6": 28, "7": 29, "8": 30, "9": 31, ":": 32, ";": 33, "<": 34, "=": 35, ">": 36, "?": 37, "@": 38, "A": 39, "B": 40, "C": 41, "D": 42, "E": 43, "F": 44, "G": 45, "H": 46, "I": 47, "J": 48, "K": 49, "L": 50, "M": 51, "N": 52, "O": 53, "P": 54, "Q": 55, "R": 56, "S": 57, "T": 58, "U": 59, "V": 60, "W": 61, "X": 62, "Y": 63, "Z": 64, "[": 65, "\\": 66, "]": 67, "^": 68, "_": 69, "`": 70, "a": 71, "b": 72, "c": 73, "d": 74, "e": 75, "f": 76, "g": 77, "h": 78, "i": 79, "j": 80, "k": 81, "l": 82, "m": 83, "n": 84, "o": 85, "p": 86, "q": 87, "r": 88, "s": 89, "t": 90, "u": 91, "v": 92, "w": 93, "x": 94, "y": 95, "z": 96, "{": 97, "|": 98, "}": 99, "~": 100, "¡": 101, "¢": 102, "£": 103, "¤": 104, "¥": 105, "¦": 106, "§": 107, "¨": 108, "©": 109, "ª": 110, "«": 111, "¬": 112, "®": 113, "¯": 114, "°": 115, "±": 116, "²": 117, "³": 118, "´": 119, "µ": 120, "¶": 121, "·": 122, "¸": 123, "¹": 124, "º": 125, "»": 126, "¼": 127, "½": 128, "¾": 129, "¿": 130, "À": 131, "Á": 132, "Â": 133, "Ã": 134, "Ä": 135, "Å": 136, "Æ": 137, "Ç": 138, "È": 139, "É": 140, "Ê": 141, "Ë": 142, "Ì": 143, "Í": 144, "Î": 145, "Ï": 146, "Ð": 147, "Ñ": 148, "Ò": 149, "Ó": 150, "Ô": 151, "Õ": 152, "Ö": 153, "×": 154, "Ø": 155, "Ù": 156, "Ú": 157, "Û": 158, "Ü": 159, "Ý": 160, "Þ": 161, "ß": 162, "à": 163, "á": 164, "â": 165, "ã": 166, "ä": 167, "å": 168, "æ": 169, "ç": 170, "è": 171, "é": 172, "ê": 173, "ë": 174, "ì": 175, "í": 176, "î": 177, "ï": 178, "ð": 179, "ñ": 180, "ò": 181, "ó": 182, "ô": 183, "õ": 184, "ö": 185, "÷": 186, "ø": 187, "ù": 188, "ú": 189, "û": 190, "ü": 191, "ý": 192, "þ": 193, "ÿ": 194, "Ā": 195, "ā": 196, "Ă": 197, "ă": 198, "Ą": 199, "ą": 200, "Ć": 201, "ć": 202, "Ĉ": 203, "ĉ": 204, "Ċ": 205, "ċ": 206, "Č": 207, "č": 208, "Ď": 209, "ď": 210, "Đ": 211, "đ": 212, "Ē": 213, "ē": 214, "Ĕ": 215, "ĕ": 216, "Ė": 217, "ė": 218, "Ę": 219, "ę": 220, "Ě": 221, "ě": 222, "Ĝ": 223, "ĝ": 224, "Ğ": 225, "ğ": 226, "Ġ": 227, "ġ": 228, "Ģ": 229, "ģ": 230, "Ĥ": 231, "ĥ": 232, "Ħ": 233, "ħ": 234, "Ĩ": 235, "ĩ": 236, "Ī": 237, "ī": 238, "Ĭ": 239, "ĭ": 240, "Į": 241, "į": 242, "İ": 243, "ı": 244, "IJ": 245, "ij": 246, "Ĵ": 247, "ĵ": 248, "Ķ": 249, "ķ": 250, "ĸ": 251, "Ĺ": 252, "ĺ": 253, "Ļ": 254, "ļ": 255, "Ľ": 256, "ľ": 257, "Ŀ": 258, "ŀ": 259, "Ł": 260, "ł": 261, "Ń": 262 }, "merges": [] } }