BioTokenizer-BFD-WPC-400 / tokenizer.json
dotan1111's picture
Upload 2 files
d3cf8b9
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<UNK>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Lowercase"
},
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "WordPiece",
"unk_token": "<UNK>",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 10000,
"vocab": {
"<UNK>": 0,
"a": 1,
"b": 2,
"c": 3,
"d": 4,
"e": 5,
"f": 6,
"g": 7,
"h": 8,
"i": 9,
"k": 10,
"l": 11,
"m": 12,
"n": 13,
"o": 14,
"p": 15,
"q": 16,
"r": 17,
"s": 18,
"t": 19,
"u": 20,
"v": 21,
"w": 22,
"x": 23,
"y": 24,
"z": 25,
"##t": 26,
"##i": 27,
"##p": 28,
"##g": 29,
"##a": 30,
"##f": 31,
"##v": 32,
"##h": 33,
"##n": 34,
"##w": 35,
"##l": 36,
"##d": 37,
"##q": 38,
"##y": 39,
"##e": 40,
"##s": 41,
"##r": 42,
"##k": 43,
"##m": 44,
"##c": 45,
"##x": 46,
"##b": 47,
"##o": 48,
"##u": 49,
"##z": 50,
"##aa": 51,
"##ll": 52,
"##la": 53,
"##gg": 54,
"##rr": 55,
"##va": 56,
"##ga": 57,
"##ra": 58,
"##lv": 59,
"##pa": 60,
"##lg": 61,
"##sa": 62,
"##lr": 63,
"##ea": 64,
"##vv": 65,
"##da": 66,
"##ta": 67,
"##ls": 68,
"##lp": 69,
"##ia": 70,
"##ld": 71,
"##gr": 72,
"##le": 73,
"##ss": 74,
"##gv": 75,
"##lt": 76,
"##gs": 77,
"##er": 78,
"##gt": 79,
"##gd": 80,
"##li": 81,
"##pp": 82,
"##vr": 83,
"##ge": 84,
"##qa": 85,
"##fa": 86,
"##lk": 87,
"##vt": 88,
"##vs": 89,
"##gi": 90,
"##vd": 91,
"##ve": 92,
"##lf": 93,
"##pr": 94,
"##ka": 95,
"##dr": 96,
"##lq": 97,
"##ps": 98,
"##ee": 99,
"##tt": 100,
"##gk": 101,
"##na": 102,
"##sr": 103,
"##pd": 104,
"##vi": 105,
"##pe": 106,
"##gf": 107,
"##ln": 108,
"##pt": 109,
"##gq": 110,
"##ha": 111,
"##st": 112,
"##dd": 113,
"##qr": 114,
"##gp": 115,
"##ei": 116,
"##ya": 117,
"##kk": 118,
"##gn": 119,
"##lh": 120,
"##vp": 121,
"##tr": 122,
"##vf": 123,
"##si": 124,
"##de": 125,
"##ma": 126,
"##ly": 127,
"##aaa": 128,
"##ir": 129,
"##vk": 130,
"##gy": 131,
"##ts": 132,
"##ti": 133,
"##vn": 134,
"##kr": 135,
"##gh": 136,
"##vq": 137,
"##sd": 138,
"##se": 139,
"##sf": 140,
"##ie": 141,
"##id": 142,
"##lm": 143,
"##hr": 144,
"##fr": 145,
"##laa": 146,
"##sp": 147,
"##td": 148,
"##ke": 149,
"##te": 150,
"##nr": 151,
"##fd": 152,
"##tp": 153,
"##yr": 154,
"##gm": 155,
"##ki": 156,
"##qq": 157,
"##pi": 158,
"##ff": 159,
"##pv": 160,
"##kd": 161,
"##ca": 162,
"##sn": 163,
"##ed": 164,
"##gw": 165,
"##sq": 166,
"##sv": 167,
"##lw": 168,
"##tf": 169,
"##pq": 170,
"##re": 171,
"##lla": 172,
"##tv": 173,
"##sk": 174,
"##pf": 175,
"##ii": 176,
"##eq": 177,
"##tn": 178,
"##lc": 179,
"##rd": 180,
"##vh": 181,
"##pn": 182,
"##vy": 183,
"##vg": 184,
"##kn": 185,
"##di": 186,
"##tq": 187,
"##fe": 188,
"##wa": 189,
"##sy": 190,
"##mr": 191,
"##qi": 192,
"##pk": 193,
"##ek": 194,
"##gc": 195,
"##gaa": 196,
"##ni": 197,
"##vm": 198,
"##th": 199,
"##tk": 200,
"##yd": 201,
"##fi": 202,
"##nd": 203,
"##ri": 204,
"##sh": 205,
"##ph": 206,
"##lrr": 207,
"##qe": 208,
"##lva": 209,
"##vaa": 210,
"##ty": 211,
"##gga": 212,
"##qd": 213,
"##sg": 214,
"##lga": 215,
"##wr": 216,
"##py": 217,
"##pg": 218,
"##lra": 219,
"##nn": 220,
"##vc": 221,
"##sm": 222,
"##hd": 223,
"##rra": 224,
"##lar": 225,
"##ne": 226,
"##kq": 227,
"##lgg": 228,
"##cr": 229,
"##tg": 230,
"##df": 231,
"##tm": 232,
"##ye": 233,
"##aar": 234,
"##rrr": 235,
"##fs": 236,
"##he": 237,
"##lpa": 238,
"##if": 239,
"##pm": 240,
"##dq": 241,
"##is": 242,
"##gla": 243,
"##lsa": 244,
"##vla": 245,
"##nf": 246,
"##lda": 247,
"##vw": 248,
"##rs": 249,
"##paa": 250,
"##ks": 251,
"##ef": 252,
"##kt": 253,
"##rq": 254,
"##et": 255,
"##kf": 256,
"##yf": 257,
"##lea": 258,
"##in": 259,
"##dv": 260,
"##gll": 261,
"##es": 262,
"##en": 263,
"##lta": 264,
"##it": 265,
"##ev": 266,
"##eh": 267,
"##dp": 268,
"##iv": 269,
"##qf": 270,
"##rf": 271,
"##qs": 272,
"##ky": 273,
"##ep": 274,
"##ds": 275,
"##qt": 276,
"##rp": 277,
"##hh": 278,
"##qn": 279,
"##gva": 280,
"##dy": 281,
"##gra": 282,
"##qp": 283,
"##kp": 284,
"##dt": 285,
"##em": 286,
"##lia": 287,
"##vva": 288,
"##vll": 289,
"##dh": 290,
"##rt": 291,
"##dn": 292,
"##qk": 293,
"##iy": 294,
"##sw": 295,
"##grr": 296,
"##ft": 297,
"##glv": 298,
"##glg": 299,
"##fn": 300,
"##sc": 301,
"##rh": 302,
"##km": 303,
"##ip": 304,
"##qh": 305,
"##rv": 306,
"##ey": 307,
"##ik": 308,
"##qv": 309,
"##dk": 310,
"##pw": 311,
"##ih": 312,
"##saa": 313,
"##gvv": 314,
"##rn": 315,
"##fy": 316,
"##iq": 317,
"##tw": 318,
"##taa": 319,
"##kh": 320,
"##ny": 321,
"##rar": 322,
"##llr": 323,
"##qy": 324,
"##vlv": 325,
"##gsa": 326,
"##qm": 327,
"##vga": 328,
"##dm": 329,
"##glr": 330,
"##gls": 331,
"##ggr": 332,
"##ns": 333,
"##np": 334,
"##glp": 335,
"##kv": 336,
"##fv": 337,
"##yy": 338,
"##nt": 339,
"##fh": 340,
"##lfa": 341,
"##gar": 342,
"##mi": 343,
"##gta": 344,
"##eaa": 345,
"##dw": 346,
"##lka": 347,
"##lqa": 348,
"##iaa": 349,
"##hp": 350,
"##vlg": 351,
"##tc": 352,
"##gpa": 353,
"##daa": 354,
"##lae": 355,
"##gia": 356,
"##var": 357,
"##vgg": 358,
"##ler": 359,
"##gld": 360,
"##gda": 361,
"##pc": 362,
"##glt": 363,
"##gea": 364,
"##im": 365,
"##nq": 366,
"##hf": 367,
"##sll": 368,
"##lgr": 369,
"##lad": 370,
"##vra": 371,
"##nk": 372,
"##gss": 373,
"xm": 374,
"##fp": 375,
"##fq": 376,
"##yi": 377,
"##sla": 378,
"##par": 379,
"##yt": 380,
"##hi": 381,
"##ys": 382,
"##rk": 383,
"##yn": 384,
"##pga": 385,
"##fk": 386,
"##pll": 387,
"##sga": 388,
"##sgg": 389,
"##ew": 390,
"##tla": 391,
"##gle": 392,
"##vrr": 393,
"##vlr": 394,
"##vld": 395,
"##ger": 396,
"##md": 397,
"##vsa": 398,
"##mt": 399
}
}
}