test-tokenizer / tokenizer.json
tnieva's picture
add tokenizer
b22c8fd
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<|endoftext|>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "ByteLevel",
"add_prefix_space": false,
"trim_offsets": true,
"use_regex": true
},
"post_processor": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": false,
"use_regex": true
},
"decoder": {
"type": "ByteLevel",
"add_prefix_space": true,
"trim_offsets": true,
"use_regex": true
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": null,
"continuing_subword_prefix": "",
"end_of_word_suffix": "",
"fuse_unk": false,
"vocab": {
"<|endoftext|>": 0,
"!": 1,
"\"": 2,
"#": 3,
"$": 4,
"%": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
"+": 11,
",": 12,
"-": 13,
".": 14,
"/": 15,
"0": 16,
"1": 17,
"2": 18,
"3": 19,
"4": 20,
"5": 21,
"6": 22,
"7": 23,
"8": 24,
"9": 25,
":": 26,
";": 27,
"<": 28,
"=": 29,
">": 30,
"?": 31,
"@": 32,
"A": 33,
"B": 34,
"C": 35,
"D": 36,
"E": 37,
"F": 38,
"G": 39,
"H": 40,
"I": 41,
"J": 42,
"K": 43,
"L": 44,
"M": 45,
"N": 46,
"O": 47,
"P": 48,
"Q": 49,
"R": 50,
"S": 51,
"T": 52,
"U": 53,
"V": 54,
"W": 55,
"X": 56,
"Y": 57,
"Z": 58,
"[": 59,
"\\": 60,
"]": 61,
"^": 62,
"_": 63,
"`": 64,
"a": 65,
"b": 66,
"c": 67,
"d": 68,
"e": 69,
"f": 70,
"g": 71,
"h": 72,
"i": 73,
"j": 74,
"k": 75,
"l": 76,
"m": 77,
"n": 78,
"o": 79,
"p": 80,
"q": 81,
"r": 82,
"s": 83,
"t": 84,
"u": 85,
"v": 86,
"w": 87,
"x": 88,
"y": 89,
"z": 90,
"{": 91,
"|": 92,
"}": 93,
"~": 94,
"¡": 95,
"¢": 96,
"£": 97,
"¤": 98,
"¥": 99,
"¦": 100,
"§": 101,
"¨": 102,
"©": 103,
"ª": 104,
"«": 105,
"¬": 106,
"®": 107,
"¯": 108,
"°": 109,
"±": 110,
"²": 111,
"³": 112,
"´": 113,
"µ": 114,
"¶": 115,
"·": 116,
"¸": 117,
"¹": 118,
"º": 119,
"»": 120,
"¼": 121,
"½": 122,
"¾": 123,
"¿": 124,
"À": 125,
"Á": 126,
"Â": 127,
"Ã": 128,
"Ä": 129,
"Å": 130,
"Æ": 131,
"Ç": 132,
"È": 133,
"É": 134,
"Ê": 135,
"Ë": 136,
"Ì": 137,
"Í": 138,
"Î": 139,
"Ï": 140,
"Ð": 141,
"Ñ": 142,
"Ò": 143,
"Ó": 144,
"Ô": 145,
"Õ": 146,
"Ö": 147,
"×": 148,
"Ø": 149,
"Ù": 150,
"Ú": 151,
"Û": 152,
"Ü": 153,
"Ý": 154,
"Þ": 155,
"ß": 156,
"à": 157,
"á": 158,
"â": 159,
"ã": 160,
"ä": 161,
"å": 162,
"æ": 163,
"ç": 164,
"è": 165,
"é": 166,
"ê": 167,
"ë": 168,
"ì": 169,
"í": 170,
"î": 171,
"ï": 172,
"ð": 173,
"ñ": 174,
"ò": 175,
"ó": 176,
"ô": 177,
"õ": 178,
"ö": 179,
"÷": 180,
"ø": 181,
"ù": 182,
"ú": 183,
"û": 184,
"ü": 185,
"ý": 186,
"þ": 187,
"ÿ": 188,
"Ā": 189,
"ā": 190,
"Ă": 191,
"ă": 192,
"Ą": 193,
"ą": 194,
"Ć": 195,
"ć": 196,
"Ĉ": 197,
"ĉ": 198,
"Ċ": 199,
"ċ": 200,
"Č": 201,
"č": 202,
"Ď": 203,
"ď": 204,
"Đ": 205,
"đ": 206,
"Ē": 207,
"ē": 208,
"Ĕ": 209,
"ĕ": 210,
"Ė": 211,
"ė": 212,
"Ę": 213,
"ę": 214,
"Ě": 215,
"ě": 216,
"Ĝ": 217,
"ĝ": 218,
"Ğ": 219,
"ğ": 220,
"Ġ": 221,
"ġ": 222,
"Ģ": 223,
"ģ": 224,
"Ĥ": 225,
"ĥ": 226,
"Ħ": 227,
"ħ": 228,
"Ĩ": 229,
"ĩ": 230,
"Ī": 231,
"ī": 232,
"Ĭ": 233,
"ĭ": 234,
"Į": 235,
"į": 236,
"İ": 237,
"ı": 238,
"IJ": 239,
"ij": 240,
"Ĵ": 241,
"ĵ": 242,
"Ķ": 243,
"ķ": 244,
"ĸ": 245,
"Ĺ": 246,
"ĺ": 247,
"Ļ": 248,
"ļ": 249,
"Ľ": 250,
"ľ": 251,
"Ŀ": 252,
"ŀ": 253,
"Ł": 254,
"ł": 255,
"Ń": 256,
"--": 257,
"ĠĠ": 258,
"se": 259,
"in": 260,
"es": 261,
"pr": 262,
"ĊĠĠ": 263,
"Ġi": 264,
"Ġt": 265,
"Ġ--": 266,
"Ġpr": 267,
"int": 268,
"est": 269,
"Ġprint": 270,
"as": 271,
"on": 272,
"):": 273,
"un": 274,
"Ġ\"": 275,
"Ġth": 276,
"ar": 277,
"\")": 278,
"(\"": 279,
"EM": 280,
"EN": 281,
"ES": 282,
"Test": 283,
"ase": 284,
"ct": 285,
"de": 286,
"fun": 287,
"it": 288,
"ion": 289,
"lse": 290,
"ĠE": 291,
"ĠS": 292,
"Ġde": 293,
"Ġif": 294,
"Ġtha": 295,
"ction": 296,
"function": 297,
"Ġdef": 298,
"Lar": 299,
"ry": 300,
"Larry": 301,
"ea": 302,
"Jas": 303,
"Jason": 304,
"Ġ0": 305,
"),": 306,
"AS": 307,
"AT": 308,
"BL": 309,
"CI": 310,
"Case": 311,
"CAS": 312,
"ER": 313,
"Eq": 314,
"IO": 315,
"LU": 316,
"OBL": 317,
"OLU": 318,
"PR": 319,
"SE": 320,
"TES": 321,
"TAT": 322,
"TIO": 323,
"Wr": 324,
"XER": 325,
"al": 326,
"bar": 327,
"cl": 328,
"case": 329,
"else": 330,
"il": 331,
"lf": 332,
"pu": 333,
"rt": 334,
"test": 335,
"ual": 336,
"wil": 337,
"ĊĠ": 338,
"Ġ(": 339,
"Ġa": 340,
"Ġse": 341,
"Ġin": 342,
"ĠTest": 343,
"Ġfunction": 344,
"ĠCAS": 345,
"ĠPR": 346,
"ĠWr": 347,
"Ġbar": 348,
"Ġelse": 349,
"Ġwil": 350,
"ĠĠĠ": 351,
"sert": 352,
"ĊĠĠĠ": 353,
"ĊĠĠĠĠĠ": 354,
"Ġis": 355,
"Ġit": 356,
"Ġtest": 357,
"Ġprints": 358,
"ass": 359,
"assert": 360,
"unit": 361,
"Ġthe": 362,
"EMEN": 363,
"END": 364,
"TestCase": 365,
"ite": 366,
"ĠElse": 367,
"ĠEXER": 368,
"ĠSOLU": 369,
"ĠSTAT": 370,
"Ġthan": 371,
"Ġthat": 372,
"CISE": 373,
"Equal": 374,
"OBLEM": 375,
"TEST": 376,
"TION": 377,
"class": 378,
"put": 379,
"Ġself": 380,
"Ġinput": 381,
"ĠCASES": 382,
"ĠPROBLEM": 383,
"ĠWrite": 384,
"Ġwill": 385,
"assertEqual": 386,
"unittest": 387,
"EMENT": 388,
"ĠEXERCISE": 389,
"ĠSOLUTION": 390,
"ĠSTATEMENT": 391,
"assertEquals": 392,
"Ġ5": 393,
"Co": 394,
"ee": 395,
"ff": 396,
"Coff": 397,
"Coffee": 398,
"er": 399,
"gr": 400,
"ter": 401,
"Ġ>": 402,
"Ġgr": 403,
"eater": 404,
"Ġgreater": 405,
"Tea": 406,
"Ju": 407,
"ce": 408,
"ice": 409,
"Juice": 410,
"les": 411,
"Ġ<": 412,
"Ġles": 413,
"Ġless": 414,
"Ro": 415,
"bin": 416,
"Robin": 417,
"(-": 418,
"10": 419,
"Ko": 420,
"hi": 421,
"Kohi": 422
},
"merges": [
"- -",
"Ġ Ġ",
"s e",
"i n",
"e s",
"p r",
"Ċ ĠĠ",
"Ġ i",
"Ġ t",
"Ġ --",
"Ġ pr",
"in t",
"es t",
"Ġpr int",
"a s",
"o n",
") :",
"u n",
"Ġ \"",
"Ġt h",
"a r",
"\" )",
"( \"",
"E M",
"E N",
"E S",
"T est",
"a se",
"c t",
"d e",
"f un",
"i t",
"i on",
"l se",
"Ġ E",
"Ġ S",
"Ġ de",
"Ġi f",
"Ġth a",
"ct ion",
"fun ction",
"Ġde f",
"L ar",
"r y",
"Lar ry",
"e a",
"J as",
"Jas on",
"Ġ 0",
") ,",
"A S",
"A T",
"B L",
"C I",
"C ase",
"C AS",
"E R",
"E q",
"I O",
"L U",
"O BL",
"O LU",
"P R",
"S E",
"T ES",
"T AT",
"T IO",
"W r",
"X ER",
"a l",
"b ar",
"c l",
"c ase",
"e lse",
"i l",
"l f",
"p u",
"r t",
"t est",
"u al",
"w il",
"Ċ Ġ",
"Ġ (",
"Ġ a",
"Ġ se",
"Ġ in",
"Ġ Test",
"Ġ function",
"Ġ CAS",
"Ġ PR",
"Ġ Wr",
"Ġ bar",
"Ġ else",
"Ġ wil",
"ĠĠ Ġ",
"se rt",
"ĊĠĠ Ġ",
"ĊĠĠ ĠĠĠ",
"Ġi s",
"Ġi t",
"Ġt est",
"Ġprint s",
"as s",
"as sert",
"un it",
"Ġth e",
"EM EN",
"EN D",
"Test Case",
"it e",
"ĠE lse",
"ĠE XER",
"ĠS OLU",
"ĠS TAT",
"Ġtha n",
"Ġtha t",
"CI SE",
"Eq ual",
"OBL EM",
"TES T",
"TIO N",
"cl ass",
"pu t",
"Ġse lf",
"Ġin put",
"ĠCAS ES",
"ĠPR OBLEM",
"ĠWr ite",
"Ġwil l",
"assert Equal",
"unit test",
"EMEN T",
"ĠEXER CISE",
"ĠSOLU TION",
"ĠSTAT EMENT",
"assertEqual s",
"Ġ 5",
"C o",
"e e",
"f f",
"Co ff",
"Coff ee",
"e r",
"g r",
"t er",
"Ġ >",
"Ġ gr",
"ea ter",
"Ġgr eater",
"T ea",
"J u",
"c e",
"i ce",
"Ju ice",
"l es",
"Ġ <",
"Ġ les",
"Ġles s",
"R o",
"b in",
"Ro bin",
"( -",
"1 0",
"K o",
"h i",
"Ko hi"
]
}
}