Mlxa commited on
Commit
a815421
1 Parent(s): e06d821

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +4 -0
  2. tokenizer.json +539 -0
  3. tokenizer_config.json +25 -0
special_tokens_map.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "pad_token": "[PAD]",
3
+ "unk_token": "[UNK]"
4
+ }
tokenizer.json ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 500,
8
+ "content": "[UNK]",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 501,
17
+ "content": "[PAD]",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ }
24
+ ],
25
+ "normalizer": null,
26
+ "pre_tokenizer": {
27
+ "type": "WhitespaceSplit"
28
+ },
29
+ "post_processor": null,
30
+ "decoder": null,
31
+ "model": {
32
+ "type": "WordLevel",
33
+ "vocab": {
34
+ "<1": 0,
35
+ "1>": 1,
36
+ "<2": 2,
37
+ "2>": 3,
38
+ "<3": 4,
39
+ "3>": 5,
40
+ "<4": 6,
41
+ "4>": 7,
42
+ "<5": 8,
43
+ "5>": 9,
44
+ "<6": 10,
45
+ "6>": 11,
46
+ "<7": 12,
47
+ "7>": 13,
48
+ "<8": 14,
49
+ "8>": 15,
50
+ "<9": 16,
51
+ "9>": 17,
52
+ "<10": 18,
53
+ "10>": 19,
54
+ "<11": 20,
55
+ "11>": 21,
56
+ "<12": 22,
57
+ "12>": 23,
58
+ "<13": 24,
59
+ "13>": 25,
60
+ "<14": 26,
61
+ "14>": 27,
62
+ "<15": 28,
63
+ "15>": 29,
64
+ "<16": 30,
65
+ "16>": 31,
66
+ "<17": 32,
67
+ "17>": 33,
68
+ "<18": 34,
69
+ "18>": 35,
70
+ "<19": 36,
71
+ "19>": 37,
72
+ "<20": 38,
73
+ "20>": 39,
74
+ "<21": 40,
75
+ "21>": 41,
76
+ "<22": 42,
77
+ "22>": 43,
78
+ "<23": 44,
79
+ "23>": 45,
80
+ "<24": 46,
81
+ "24>": 47,
82
+ "<25": 48,
83
+ "25>": 49,
84
+ "<26": 50,
85
+ "26>": 51,
86
+ "<27": 52,
87
+ "27>": 53,
88
+ "<28": 54,
89
+ "28>": 55,
90
+ "<29": 56,
91
+ "29>": 57,
92
+ "<30": 58,
93
+ "30>": 59,
94
+ "<31": 60,
95
+ "31>": 61,
96
+ "<32": 62,
97
+ "32>": 63,
98
+ "<33": 64,
99
+ "33>": 65,
100
+ "<34": 66,
101
+ "34>": 67,
102
+ "<35": 68,
103
+ "35>": 69,
104
+ "<36": 70,
105
+ "36>": 71,
106
+ "<37": 72,
107
+ "37>": 73,
108
+ "<38": 74,
109
+ "38>": 75,
110
+ "<39": 76,
111
+ "39>": 77,
112
+ "<40": 78,
113
+ "40>": 79,
114
+ "<41": 80,
115
+ "41>": 81,
116
+ "<42": 82,
117
+ "42>": 83,
118
+ "<43": 84,
119
+ "43>": 85,
120
+ "<44": 86,
121
+ "44>": 87,
122
+ "<45": 88,
123
+ "45>": 89,
124
+ "<46": 90,
125
+ "46>": 91,
126
+ "<47": 92,
127
+ "47>": 93,
128
+ "<48": 94,
129
+ "48>": 95,
130
+ "<49": 96,
131
+ "49>": 97,
132
+ "<50": 98,
133
+ "50>": 99,
134
+ "<51": 100,
135
+ "51>": 101,
136
+ "<52": 102,
137
+ "52>": 103,
138
+ "<53": 104,
139
+ "53>": 105,
140
+ "<54": 106,
141
+ "54>": 107,
142
+ "<55": 108,
143
+ "55>": 109,
144
+ "<56": 110,
145
+ "56>": 111,
146
+ "<57": 112,
147
+ "57>": 113,
148
+ "<58": 114,
149
+ "58>": 115,
150
+ "<59": 116,
151
+ "59>": 117,
152
+ "<60": 118,
153
+ "60>": 119,
154
+ "<61": 120,
155
+ "61>": 121,
156
+ "<62": 122,
157
+ "62>": 123,
158
+ "<63": 124,
159
+ "63>": 125,
160
+ "<64": 126,
161
+ "64>": 127,
162
+ "<65": 128,
163
+ "65>": 129,
164
+ "<66": 130,
165
+ "66>": 131,
166
+ "<67": 132,
167
+ "67>": 133,
168
+ "<68": 134,
169
+ "68>": 135,
170
+ "<69": 136,
171
+ "69>": 137,
172
+ "<70": 138,
173
+ "70>": 139,
174
+ "<71": 140,
175
+ "71>": 141,
176
+ "<72": 142,
177
+ "72>": 143,
178
+ "<73": 144,
179
+ "73>": 145,
180
+ "<74": 146,
181
+ "74>": 147,
182
+ "<75": 148,
183
+ "75>": 149,
184
+ "<76": 150,
185
+ "76>": 151,
186
+ "<77": 152,
187
+ "77>": 153,
188
+ "<78": 154,
189
+ "78>": 155,
190
+ "<79": 156,
191
+ "79>": 157,
192
+ "<80": 158,
193
+ "80>": 159,
194
+ "<81": 160,
195
+ "81>": 161,
196
+ "<82": 162,
197
+ "82>": 163,
198
+ "<83": 164,
199
+ "83>": 165,
200
+ "<84": 166,
201
+ "84>": 167,
202
+ "<85": 168,
203
+ "85>": 169,
204
+ "<86": 170,
205
+ "86>": 171,
206
+ "<87": 172,
207
+ "87>": 173,
208
+ "<88": 174,
209
+ "88>": 175,
210
+ "<89": 176,
211
+ "89>": 177,
212
+ "<90": 178,
213
+ "90>": 179,
214
+ "<91": 180,
215
+ "91>": 181,
216
+ "<92": 182,
217
+ "92>": 183,
218
+ "<93": 184,
219
+ "93>": 185,
220
+ "<94": 186,
221
+ "94>": 187,
222
+ "<95": 188,
223
+ "95>": 189,
224
+ "<96": 190,
225
+ "96>": 191,
226
+ "<97": 192,
227
+ "97>": 193,
228
+ "<98": 194,
229
+ "98>": 195,
230
+ "<99": 196,
231
+ "99>": 197,
232
+ "<100": 198,
233
+ "100>": 199,
234
+ "<101": 200,
235
+ "101>": 201,
236
+ "<102": 202,
237
+ "102>": 203,
238
+ "<103": 204,
239
+ "103>": 205,
240
+ "<104": 206,
241
+ "104>": 207,
242
+ "<105": 208,
243
+ "105>": 209,
244
+ "<106": 210,
245
+ "106>": 211,
246
+ "<107": 212,
247
+ "107>": 213,
248
+ "<108": 214,
249
+ "108>": 215,
250
+ "<109": 216,
251
+ "109>": 217,
252
+ "<110": 218,
253
+ "110>": 219,
254
+ "<111": 220,
255
+ "111>": 221,
256
+ "<112": 222,
257
+ "112>": 223,
258
+ "<113": 224,
259
+ "113>": 225,
260
+ "<114": 226,
261
+ "114>": 227,
262
+ "<115": 228,
263
+ "115>": 229,
264
+ "<116": 230,
265
+ "116>": 231,
266
+ "<117": 232,
267
+ "117>": 233,
268
+ "<118": 234,
269
+ "118>": 235,
270
+ "<119": 236,
271
+ "119>": 237,
272
+ "<120": 238,
273
+ "120>": 239,
274
+ "<121": 240,
275
+ "121>": 241,
276
+ "<122": 242,
277
+ "122>": 243,
278
+ "<123": 244,
279
+ "123>": 245,
280
+ "<124": 246,
281
+ "124>": 247,
282
+ "<125": 248,
283
+ "125>": 249,
284
+ "<126": 250,
285
+ "126>": 251,
286
+ "<127": 252,
287
+ "127>": 253,
288
+ "<128": 254,
289
+ "128>": 255,
290
+ "<129": 256,
291
+ "129>": 257,
292
+ "<130": 258,
293
+ "130>": 259,
294
+ "<131": 260,
295
+ "131>": 261,
296
+ "<132": 262,
297
+ "132>": 263,
298
+ "<133": 264,
299
+ "133>": 265,
300
+ "<134": 266,
301
+ "134>": 267,
302
+ "<135": 268,
303
+ "135>": 269,
304
+ "<136": 270,
305
+ "136>": 271,
306
+ "<137": 272,
307
+ "137>": 273,
308
+ "<138": 274,
309
+ "138>": 275,
310
+ "<139": 276,
311
+ "139>": 277,
312
+ "<140": 278,
313
+ "140>": 279,
314
+ "<141": 280,
315
+ "141>": 281,
316
+ "<142": 282,
317
+ "142>": 283,
318
+ "<143": 284,
319
+ "143>": 285,
320
+ "<144": 286,
321
+ "144>": 287,
322
+ "<145": 288,
323
+ "145>": 289,
324
+ "<146": 290,
325
+ "146>": 291,
326
+ "<147": 292,
327
+ "147>": 293,
328
+ "<148": 294,
329
+ "148>": 295,
330
+ "<149": 296,
331
+ "149>": 297,
332
+ "<150": 298,
333
+ "150>": 299,
334
+ "<151": 300,
335
+ "151>": 301,
336
+ "<152": 302,
337
+ "152>": 303,
338
+ "<153": 304,
339
+ "153>": 305,
340
+ "<154": 306,
341
+ "154>": 307,
342
+ "<155": 308,
343
+ "155>": 309,
344
+ "<156": 310,
345
+ "156>": 311,
346
+ "<157": 312,
347
+ "157>": 313,
348
+ "<158": 314,
349
+ "158>": 315,
350
+ "<159": 316,
351
+ "159>": 317,
352
+ "<160": 318,
353
+ "160>": 319,
354
+ "<161": 320,
355
+ "161>": 321,
356
+ "<162": 322,
357
+ "162>": 323,
358
+ "<163": 324,
359
+ "163>": 325,
360
+ "<164": 326,
361
+ "164>": 327,
362
+ "<165": 328,
363
+ "165>": 329,
364
+ "<166": 330,
365
+ "166>": 331,
366
+ "<167": 332,
367
+ "167>": 333,
368
+ "<168": 334,
369
+ "168>": 335,
370
+ "<169": 336,
371
+ "169>": 337,
372
+ "<170": 338,
373
+ "170>": 339,
374
+ "<171": 340,
375
+ "171>": 341,
376
+ "<172": 342,
377
+ "172>": 343,
378
+ "<173": 344,
379
+ "173>": 345,
380
+ "<174": 346,
381
+ "174>": 347,
382
+ "<175": 348,
383
+ "175>": 349,
384
+ "<176": 350,
385
+ "176>": 351,
386
+ "<177": 352,
387
+ "177>": 353,
388
+ "<178": 354,
389
+ "178>": 355,
390
+ "<179": 356,
391
+ "179>": 357,
392
+ "<180": 358,
393
+ "180>": 359,
394
+ "<181": 360,
395
+ "181>": 361,
396
+ "<182": 362,
397
+ "182>": 363,
398
+ "<183": 364,
399
+ "183>": 365,
400
+ "<184": 366,
401
+ "184>": 367,
402
+ "<185": 368,
403
+ "185>": 369,
404
+ "<186": 370,
405
+ "186>": 371,
406
+ "<187": 372,
407
+ "187>": 373,
408
+ "<188": 374,
409
+ "188>": 375,
410
+ "<189": 376,
411
+ "189>": 377,
412
+ "<190": 378,
413
+ "190>": 379,
414
+ "<191": 380,
415
+ "191>": 381,
416
+ "<192": 382,
417
+ "192>": 383,
418
+ "<193": 384,
419
+ "193>": 385,
420
+ "<194": 386,
421
+ "194>": 387,
422
+ "<195": 388,
423
+ "195>": 389,
424
+ "<196": 390,
425
+ "196>": 391,
426
+ "<197": 392,
427
+ "197>": 393,
428
+ "<198": 394,
429
+ "198>": 395,
430
+ "<199": 396,
431
+ "199>": 397,
432
+ "<200": 398,
433
+ "200>": 399,
434
+ "<201": 400,
435
+ "201>": 401,
436
+ "<202": 402,
437
+ "202>": 403,
438
+ "<203": 404,
439
+ "203>": 405,
440
+ "<204": 406,
441
+ "204>": 407,
442
+ "<205": 408,
443
+ "205>": 409,
444
+ "<206": 410,
445
+ "206>": 411,
446
+ "<207": 412,
447
+ "207>": 413,
448
+ "<208": 414,
449
+ "208>": 415,
450
+ "<209": 416,
451
+ "209>": 417,
452
+ "<210": 418,
453
+ "210>": 419,
454
+ "<211": 420,
455
+ "211>": 421,
456
+ "<212": 422,
457
+ "212>": 423,
458
+ "<213": 424,
459
+ "213>": 425,
460
+ "<214": 426,
461
+ "214>": 427,
462
+ "<215": 428,
463
+ "215>": 429,
464
+ "<216": 430,
465
+ "216>": 431,
466
+ "<217": 432,
467
+ "217>": 433,
468
+ "<218": 434,
469
+ "218>": 435,
470
+ "<219": 436,
471
+ "219>": 437,
472
+ "<220": 438,
473
+ "220>": 439,
474
+ "<221": 440,
475
+ "221>": 441,
476
+ "<222": 442,
477
+ "222>": 443,
478
+ "<223": 444,
479
+ "223>": 445,
480
+ "<224": 446,
481
+ "224>": 447,
482
+ "<225": 448,
483
+ "225>": 449,
484
+ "<226": 450,
485
+ "226>": 451,
486
+ "<227": 452,
487
+ "227>": 453,
488
+ "<228": 454,
489
+ "228>": 455,
490
+ "<229": 456,
491
+ "229>": 457,
492
+ "<230": 458,
493
+ "230>": 459,
494
+ "<231": 460,
495
+ "231>": 461,
496
+ "<232": 462,
497
+ "232>": 463,
498
+ "<233": 464,
499
+ "233>": 465,
500
+ "<234": 466,
501
+ "234>": 467,
502
+ "<235": 468,
503
+ "235>": 469,
504
+ "<236": 470,
505
+ "236>": 471,
506
+ "<237": 472,
507
+ "237>": 473,
508
+ "<238": 474,
509
+ "238>": 475,
510
+ "<239": 476,
511
+ "239>": 477,
512
+ "<240": 478,
513
+ "240>": 479,
514
+ "<241": 480,
515
+ "241>": 481,
516
+ "<242": 482,
517
+ "242>": 483,
518
+ "<243": 484,
519
+ "243>": 485,
520
+ "<244": 486,
521
+ "244>": 487,
522
+ "<245": 488,
523
+ "245>": 489,
524
+ "<246": 490,
525
+ "246>": 491,
526
+ "<247": 492,
527
+ "247>": 493,
528
+ "<248": 494,
529
+ "248>": 495,
530
+ "<249": 496,
531
+ "249>": 497,
532
+ "<250": 498,
533
+ "250>": 499,
534
+ "[UNK]": 500,
535
+ "[PAD]": 501
536
+ },
537
+ "unk_token": "[UNK]"
538
+ }
539
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "500": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "501": {
12
+ "content": "[PAD]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ }
19
+ },
20
+ "clean_up_tokenization_spaces": true,
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "pad_token": "[PAD]",
23
+ "tokenizer_class": "PreTrainedTokenizerFast",
24
+ "unk_token": "[UNK]"
25
+ }