{ | |
"version": "1.0", | |
"truncation": { | |
"direction": "Right", | |
"max_length": 512, | |
"strategy": "LongestFirst", | |
"stride": 0 | |
}, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "<s>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "<pad>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "</s>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
}, | |
{ | |
"id": 3, | |
"content": "<unk>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
}, | |
{ | |
"id": 4, | |
"content": "<mask>", | |
"single_word": false, | |
"lstrip": true, | |
"rstrip": false, | |
"normalized": true, | |
"special": true | |
} | |
], | |
"normalizer": null, | |
"pre_tokenizer": { | |
"type": "ByteLevel", | |
"add_prefix_space": false, | |
"trim_offsets": true, | |
"use_regex": true | |
}, | |
"post_processor": { | |
"type": "RobertaProcessing", | |
"sep": [ | |
"</s>", | |
2 | |
], | |
"cls": [ | |
"<s>", | |
0 | |
], | |
"trim_offsets": true, | |
"add_prefix_space": false | |
}, | |
"decoder": { | |
"type": "ByteLevel", | |
"add_prefix_space": true, | |
"trim_offsets": true, | |
"use_regex": true | |
}, | |
"model": { | |
"type": "BPE", | |
"dropout": null, | |
"unk_token": null, | |
"continuing_subword_prefix": "", | |
"end_of_word_suffix": "", | |
"fuse_unk": false, | |
"byte_fallback": false, | |
"vocab": { | |
"<s>": 0, | |
"<pad>": 1, | |
"</s>": 2, | |
"<unk>": 3, | |
"<mask>": 4, | |
"!": 5, | |
"\"": 6, | |
"#": 7, | |
"$": 8, | |
"%": 9, | |
"&": 10, | |
"'": 11, | |
"(": 12, | |
")": 13, | |
"*": 14, | |
"+": 15, | |
",": 16, | |
"-": 17, | |
".": 18, | |
"/": 19, | |
"0": 20, | |
"1": 21, | |
"2": 22, | |
"3": 23, | |
"4": 24, | |
"5": 25, | |
"6": 26, | |
"7": 27, | |
"8": 28, | |
"9": 29, | |
":": 30, | |
";": 31, | |
"<": 32, | |
"=": 33, | |
">": 34, | |
"?": 35, | |
"@": 36, | |
"A": 37, | |
"B": 38, | |
"C": 39, | |
"D": 40, | |
"E": 41, | |
"F": 42, | |
"G": 43, | |
"H": 44, | |
"I": 45, | |
"J": 46, | |
"K": 47, | |
"L": 48, | |
"M": 49, | |
"N": 50, | |
"O": 51, | |
"P": 52, | |
"Q": 53, | |
"R": 54, | |
"S": 55, | |
"T": 56, | |
"U": 57, | |
"V": 58, | |
"W": 59, | |
"X": 60, | |
"Y": 61, | |
"Z": 62, | |
"[": 63, | |
"\\": 64, | |
"]": 65, | |
"^": 66, | |
"_": 67, | |
"`": 68, | |
"a": 69, | |
"b": 70, | |
"c": 71, | |
"d": 72, | |
"e": 73, | |
"f": 74, | |
"g": 75, | |
"h": 76, | |
"i": 77, | |
"j": 78, | |
"k": 79, | |
"l": 80, | |
"m": 81, | |
"n": 82, | |
"o": 83, | |
"p": 84, | |
"q": 85, | |
"r": 86, | |
"s": 87, | |
"t": 88, | |
"u": 89, | |
"v": 90, | |
"w": 91, | |
"x": 92, | |
"y": 93, | |
"z": 94, | |
"{": 95, | |
"|": 96, | |
"}": 97, | |
"~": 98, | |
"¡": 99, | |
"¢": 100, | |
"£": 101, | |
"¤": 102, | |
"¥": 103, | |
"¦": 104, | |
"§": 105, | |
"¨": 106, | |
"©": 107, | |
"ª": 108, | |
"«": 109, | |
"¬": 110, | |
"®": 111, | |
"¯": 112, | |
"°": 113, | |
"±": 114, | |
"²": 115, | |
"³": 116, | |
"´": 117, | |
"µ": 118, | |
"¶": 119, | |
"·": 120, | |
"¸": 121, | |
"¹": 122, | |
"º": 123, | |
"»": 124, | |
"¼": 125, | |
"½": 126, | |
"¾": 127, | |
"¿": 128, | |
"À": 129, | |
"Á": 130, | |
"Â": 131, | |
"Ã": 132, | |
"Ä": 133, | |
"Å": 134, | |
"Æ": 135, | |
"Ç": 136, | |
"È": 137, | |
"É": 138, | |
"Ê": 139, | |
"Ë": 140, | |
"Ì": 141, | |
"Í": 142, | |
"Î": 143, | |
"Ï": 144, | |
"Ð": 145, | |
"Ñ": 146, | |
"Ò": 147, | |
"Ó": 148, | |
"Ô": 149, | |
"Õ": 150, | |
"Ö": 151, | |
"×": 152, | |
"Ø": 153, | |
"Ù": 154, | |
"Ú": 155, | |
"Û": 156, | |
"Ü": 157, | |
"Ý": 158, | |
"Þ": 159, | |
"ß": 160, | |
"à": 161, | |
"á": 162, | |
"â": 163, | |
"ã": 164, | |
"ä": 165, | |
"å": 166, | |
"æ": 167, | |
"ç": 168, | |
"è": 169, | |
"é": 170, | |
"ê": 171, | |
"ë": 172, | |
"ì": 173, | |
"í": 174, | |
"î": 175, | |
"ï": 176, | |
"ð": 177, | |
"ñ": 178, | |
"ò": 179, | |
"ó": 180, | |
"ô": 181, | |
"õ": 182, | |
"ö": 183, | |
"÷": 184, | |
"ø": 185, | |
"ù": 186, | |
"ú": 187, | |
"û": 188, | |
"ü": 189, | |
"ý": 190, | |
"þ": 191, | |
"ÿ": 192, | |
"Ā": 193, | |
"ā": 194, | |
"Ă": 195, | |
"ă": 196, | |
"Ą": 197, | |
"ą": 198, | |
"Ć": 199, | |
"ć": 200, | |
"Ĉ": 201, | |
"ĉ": 202, | |
"Ċ": 203, | |
"ċ": 204, | |
"Č": 205, | |
"č": 206, | |
"Ď": 207, | |
"ď": 208, | |
"Đ": 209, | |
"đ": 210, | |
"Ē": 211, | |
"ē": 212, | |
"Ĕ": 213, | |
"ĕ": 214, | |
"Ė": 215, | |
"ė": 216, | |
"Ę": 217, | |
"ę": 218, | |
"Ě": 219, | |
"ě": 220, | |
"Ĝ": 221, | |
"ĝ": 222, | |
"Ğ": 223, | |
"ğ": 224, | |
"Ġ": 225, | |
"ġ": 226, | |
"Ģ": 227, | |
"ģ": 228, | |
"Ĥ": 229, | |
"ĥ": 230, | |
"Ħ": 231, | |
"ħ": 232, | |
"Ĩ": 233, | |
"ĩ": 234, | |
"Ī": 235, | |
"ī": 236, | |
"Ĭ": 237, | |
"ĭ": 238, | |
"Į": 239, | |
"į": 240, | |
"İ": 241, | |
"ı": 242, | |
"IJ": 243, | |
"ij": 244, | |
"Ĵ": 245, | |
"ĵ": 246, | |
"Ķ": 247, | |
"ķ": 248, | |
"ĸ": 249, | |
"Ĺ": 250, | |
"ĺ": 251, | |
"Ļ": 252, | |
"ļ": 253, | |
"Ľ": 254, | |
"ľ": 255, | |
"Ŀ": 256, | |
"ŀ": 257, | |
"Ł": 258, | |
"ł": 259, | |
"Ń": 260 | |
}, | |
"merges": [] | |
} | |
} |