bilguun commited on
Commit
786326c
1 Parent(s): de8506d

Upload tokenizer

Browse files
Files changed (4) hide show
  1. special_tokens_map.json +8 -0
  2. spiece.model +2 -2
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +110 -100
special_tokens_map.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "additional_special_tokens": [
 
3
  "<extra_id_0>",
4
  "<extra_id_1>",
5
  "<extra_id_2>",
@@ -101,6 +102,13 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
 
 
 
 
 
 
 
104
  "eos_token": {
105
  "content": "</s>",
106
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ "<s>",
4
  "<extra_id_0>",
5
  "<extra_id_1>",
6
  "<extra_id_2>",
 
102
  "<extra_id_98>",
103
  "<extra_id_99>"
104
  ],
105
+ "bos_token": {
106
+ "content": "<s>",
107
+ "lstrip": false,
108
+ "normalized": false,
109
+ "rstrip": false,
110
+ "single_word": false
111
+ },
112
  "eos_token": {
113
  "content": "</s>",
114
  "lstrip": false,
spiece.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64164c709b102402833d23138a9013273299a8940358030e1623a16c4e0cb121
3
- size 969536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829ed13b97a7ef734aaf095db01da9557aa389fffa99c9a85d483ada98a4bbe6
3
+ size 413481
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -9,6 +9,14 @@
9
  "single_word": false,
10
  "special": true
11
  },
 
 
 
 
 
 
 
 
12
  "2": {
13
  "content": "</s>",
14
  "lstrip": false,
@@ -25,7 +33,7 @@
25
  "single_word": false,
26
  "special": true
27
  },
28
- "32000": {
29
  "content": "<extra_id_99>",
30
  "lstrip": false,
31
  "normalized": false,
@@ -33,7 +41,7 @@
33
  "single_word": false,
34
  "special": true
35
  },
36
- "32001": {
37
  "content": "<extra_id_98>",
38
  "lstrip": false,
39
  "normalized": false,
@@ -41,7 +49,7 @@
41
  "single_word": false,
42
  "special": true
43
  },
44
- "32002": {
45
  "content": "<extra_id_97>",
46
  "lstrip": false,
47
  "normalized": false,
@@ -49,7 +57,7 @@
49
  "single_word": false,
50
  "special": true
51
  },
52
- "32003": {
53
  "content": "<extra_id_96>",
54
  "lstrip": false,
55
  "normalized": false,
@@ -57,7 +65,7 @@
57
  "single_word": false,
58
  "special": true
59
  },
60
- "32004": {
61
  "content": "<extra_id_95>",
62
  "lstrip": false,
63
  "normalized": false,
@@ -65,7 +73,7 @@
65
  "single_word": false,
66
  "special": true
67
  },
68
- "32005": {
69
  "content": "<extra_id_94>",
70
  "lstrip": false,
71
  "normalized": false,
@@ -73,7 +81,7 @@
73
  "single_word": false,
74
  "special": true
75
  },
76
- "32006": {
77
  "content": "<extra_id_93>",
78
  "lstrip": false,
79
  "normalized": false,
@@ -81,7 +89,7 @@
81
  "single_word": false,
82
  "special": true
83
  },
84
- "32007": {
85
  "content": "<extra_id_92>",
86
  "lstrip": false,
87
  "normalized": false,
@@ -89,7 +97,7 @@
89
  "single_word": false,
90
  "special": true
91
  },
92
- "32008": {
93
  "content": "<extra_id_91>",
94
  "lstrip": false,
95
  "normalized": false,
@@ -97,7 +105,7 @@
97
  "single_word": false,
98
  "special": true
99
  },
100
- "32009": {
101
  "content": "<extra_id_90>",
102
  "lstrip": false,
103
  "normalized": false,
@@ -105,7 +113,7 @@
105
  "single_word": false,
106
  "special": true
107
  },
108
- "32010": {
109
  "content": "<extra_id_89>",
110
  "lstrip": false,
111
  "normalized": false,
@@ -113,7 +121,7 @@
113
  "single_word": false,
114
  "special": true
115
  },
116
- "32011": {
117
  "content": "<extra_id_88>",
118
  "lstrip": false,
119
  "normalized": false,
@@ -121,7 +129,7 @@
121
  "single_word": false,
122
  "special": true
123
  },
124
- "32012": {
125
  "content": "<extra_id_87>",
126
  "lstrip": false,
127
  "normalized": false,
@@ -129,7 +137,7 @@
129
  "single_word": false,
130
  "special": true
131
  },
132
- "32013": {
133
  "content": "<extra_id_86>",
134
  "lstrip": false,
135
  "normalized": false,
@@ -137,7 +145,7 @@
137
  "single_word": false,
138
  "special": true
139
  },
140
- "32014": {
141
  "content": "<extra_id_85>",
142
  "lstrip": false,
143
  "normalized": false,
@@ -145,7 +153,7 @@
145
  "single_word": false,
146
  "special": true
147
  },
148
- "32015": {
149
  "content": "<extra_id_84>",
150
  "lstrip": false,
151
  "normalized": false,
@@ -153,7 +161,7 @@
153
  "single_word": false,
154
  "special": true
155
  },
156
- "32016": {
157
  "content": "<extra_id_83>",
158
  "lstrip": false,
159
  "normalized": false,
@@ -161,7 +169,7 @@
161
  "single_word": false,
162
  "special": true
163
  },
164
- "32017": {
165
  "content": "<extra_id_82>",
166
  "lstrip": false,
167
  "normalized": false,
@@ -169,7 +177,7 @@
169
  "single_word": false,
170
  "special": true
171
  },
172
- "32018": {
173
  "content": "<extra_id_81>",
174
  "lstrip": false,
175
  "normalized": false,
@@ -177,7 +185,7 @@
177
  "single_word": false,
178
  "special": true
179
  },
180
- "32019": {
181
  "content": "<extra_id_80>",
182
  "lstrip": false,
183
  "normalized": false,
@@ -185,7 +193,7 @@
185
  "single_word": false,
186
  "special": true
187
  },
188
- "32020": {
189
  "content": "<extra_id_79>",
190
  "lstrip": false,
191
  "normalized": false,
@@ -193,7 +201,7 @@
193
  "single_word": false,
194
  "special": true
195
  },
196
- "32021": {
197
  "content": "<extra_id_78>",
198
  "lstrip": false,
199
  "normalized": false,
@@ -201,7 +209,7 @@
201
  "single_word": false,
202
  "special": true
203
  },
204
- "32022": {
205
  "content": "<extra_id_77>",
206
  "lstrip": false,
207
  "normalized": false,
@@ -209,7 +217,7 @@
209
  "single_word": false,
210
  "special": true
211
  },
212
- "32023": {
213
  "content": "<extra_id_76>",
214
  "lstrip": false,
215
  "normalized": false,
@@ -217,7 +225,7 @@
217
  "single_word": false,
218
  "special": true
219
  },
220
- "32024": {
221
  "content": "<extra_id_75>",
222
  "lstrip": false,
223
  "normalized": false,
@@ -225,7 +233,7 @@
225
  "single_word": false,
226
  "special": true
227
  },
228
- "32025": {
229
  "content": "<extra_id_74>",
230
  "lstrip": false,
231
  "normalized": false,
@@ -233,7 +241,7 @@
233
  "single_word": false,
234
  "special": true
235
  },
236
- "32026": {
237
  "content": "<extra_id_73>",
238
  "lstrip": false,
239
  "normalized": false,
@@ -241,7 +249,7 @@
241
  "single_word": false,
242
  "special": true
243
  },
244
- "32027": {
245
  "content": "<extra_id_72>",
246
  "lstrip": false,
247
  "normalized": false,
@@ -249,7 +257,7 @@
249
  "single_word": false,
250
  "special": true
251
  },
252
- "32028": {
253
  "content": "<extra_id_71>",
254
  "lstrip": false,
255
  "normalized": false,
@@ -257,7 +265,7 @@
257
  "single_word": false,
258
  "special": true
259
  },
260
- "32029": {
261
  "content": "<extra_id_70>",
262
  "lstrip": false,
263
  "normalized": false,
@@ -265,7 +273,7 @@
265
  "single_word": false,
266
  "special": true
267
  },
268
- "32030": {
269
  "content": "<extra_id_69>",
270
  "lstrip": false,
271
  "normalized": false,
@@ -273,7 +281,7 @@
273
  "single_word": false,
274
  "special": true
275
  },
276
- "32031": {
277
  "content": "<extra_id_68>",
278
  "lstrip": false,
279
  "normalized": false,
@@ -281,7 +289,7 @@
281
  "single_word": false,
282
  "special": true
283
  },
284
- "32032": {
285
  "content": "<extra_id_67>",
286
  "lstrip": false,
287
  "normalized": false,
@@ -289,7 +297,7 @@
289
  "single_word": false,
290
  "special": true
291
  },
292
- "32033": {
293
  "content": "<extra_id_66>",
294
  "lstrip": false,
295
  "normalized": false,
@@ -297,7 +305,7 @@
297
  "single_word": false,
298
  "special": true
299
  },
300
- "32034": {
301
  "content": "<extra_id_65>",
302
  "lstrip": false,
303
  "normalized": false,
@@ -305,7 +313,7 @@
305
  "single_word": false,
306
  "special": true
307
  },
308
- "32035": {
309
  "content": "<extra_id_64>",
310
  "lstrip": false,
311
  "normalized": false,
@@ -313,7 +321,7 @@
313
  "single_word": false,
314
  "special": true
315
  },
316
- "32036": {
317
  "content": "<extra_id_63>",
318
  "lstrip": false,
319
  "normalized": false,
@@ -321,7 +329,7 @@
321
  "single_word": false,
322
  "special": true
323
  },
324
- "32037": {
325
  "content": "<extra_id_62>",
326
  "lstrip": false,
327
  "normalized": false,
@@ -329,7 +337,7 @@
329
  "single_word": false,
330
  "special": true
331
  },
332
- "32038": {
333
  "content": "<extra_id_61>",
334
  "lstrip": false,
335
  "normalized": false,
@@ -337,7 +345,7 @@
337
  "single_word": false,
338
  "special": true
339
  },
340
- "32039": {
341
  "content": "<extra_id_60>",
342
  "lstrip": false,
343
  "normalized": false,
@@ -345,7 +353,7 @@
345
  "single_word": false,
346
  "special": true
347
  },
348
- "32040": {
349
  "content": "<extra_id_59>",
350
  "lstrip": false,
351
  "normalized": false,
@@ -353,7 +361,7 @@
353
  "single_word": false,
354
  "special": true
355
  },
356
- "32041": {
357
  "content": "<extra_id_58>",
358
  "lstrip": false,
359
  "normalized": false,
@@ -361,7 +369,7 @@
361
  "single_word": false,
362
  "special": true
363
  },
364
- "32042": {
365
  "content": "<extra_id_57>",
366
  "lstrip": false,
367
  "normalized": false,
@@ -369,7 +377,7 @@
369
  "single_word": false,
370
  "special": true
371
  },
372
- "32043": {
373
  "content": "<extra_id_56>",
374
  "lstrip": false,
375
  "normalized": false,
@@ -377,7 +385,7 @@
377
  "single_word": false,
378
  "special": true
379
  },
380
- "32044": {
381
  "content": "<extra_id_55>",
382
  "lstrip": false,
383
  "normalized": false,
@@ -385,7 +393,7 @@
385
  "single_word": false,
386
  "special": true
387
  },
388
- "32045": {
389
  "content": "<extra_id_54>",
390
  "lstrip": false,
391
  "normalized": false,
@@ -393,7 +401,7 @@
393
  "single_word": false,
394
  "special": true
395
  },
396
- "32046": {
397
  "content": "<extra_id_53>",
398
  "lstrip": false,
399
  "normalized": false,
@@ -401,7 +409,7 @@
401
  "single_word": false,
402
  "special": true
403
  },
404
- "32047": {
405
  "content": "<extra_id_52>",
406
  "lstrip": false,
407
  "normalized": false,
@@ -409,7 +417,7 @@
409
  "single_word": false,
410
  "special": true
411
  },
412
- "32048": {
413
  "content": "<extra_id_51>",
414
  "lstrip": false,
415
  "normalized": false,
@@ -417,7 +425,7 @@
417
  "single_word": false,
418
  "special": true
419
  },
420
- "32049": {
421
  "content": "<extra_id_50>",
422
  "lstrip": false,
423
  "normalized": false,
@@ -425,7 +433,7 @@
425
  "single_word": false,
426
  "special": true
427
  },
428
- "32050": {
429
  "content": "<extra_id_49>",
430
  "lstrip": false,
431
  "normalized": false,
@@ -433,7 +441,7 @@
433
  "single_word": false,
434
  "special": true
435
  },
436
- "32051": {
437
  "content": "<extra_id_48>",
438
  "lstrip": false,
439
  "normalized": false,
@@ -441,7 +449,7 @@
441
  "single_word": false,
442
  "special": true
443
  },
444
- "32052": {
445
  "content": "<extra_id_47>",
446
  "lstrip": false,
447
  "normalized": false,
@@ -449,7 +457,7 @@
449
  "single_word": false,
450
  "special": true
451
  },
452
- "32053": {
453
  "content": "<extra_id_46>",
454
  "lstrip": false,
455
  "normalized": false,
@@ -457,7 +465,7 @@
457
  "single_word": false,
458
  "special": true
459
  },
460
- "32054": {
461
  "content": "<extra_id_45>",
462
  "lstrip": false,
463
  "normalized": false,
@@ -465,7 +473,7 @@
465
  "single_word": false,
466
  "special": true
467
  },
468
- "32055": {
469
  "content": "<extra_id_44>",
470
  "lstrip": false,
471
  "normalized": false,
@@ -473,7 +481,7 @@
473
  "single_word": false,
474
  "special": true
475
  },
476
- "32056": {
477
  "content": "<extra_id_43>",
478
  "lstrip": false,
479
  "normalized": false,
@@ -481,7 +489,7 @@
481
  "single_word": false,
482
  "special": true
483
  },
484
- "32057": {
485
  "content": "<extra_id_42>",
486
  "lstrip": false,
487
  "normalized": false,
@@ -489,7 +497,7 @@
489
  "single_word": false,
490
  "special": true
491
  },
492
- "32058": {
493
  "content": "<extra_id_41>",
494
  "lstrip": false,
495
  "normalized": false,
@@ -497,7 +505,7 @@
497
  "single_word": false,
498
  "special": true
499
  },
500
- "32059": {
501
  "content": "<extra_id_40>",
502
  "lstrip": false,
503
  "normalized": false,
@@ -505,7 +513,7 @@
505
  "single_word": false,
506
  "special": true
507
  },
508
- "32060": {
509
  "content": "<extra_id_39>",
510
  "lstrip": false,
511
  "normalized": false,
@@ -513,7 +521,7 @@
513
  "single_word": false,
514
  "special": true
515
  },
516
- "32061": {
517
  "content": "<extra_id_38>",
518
  "lstrip": false,
519
  "normalized": false,
@@ -521,7 +529,7 @@
521
  "single_word": false,
522
  "special": true
523
  },
524
- "32062": {
525
  "content": "<extra_id_37>",
526
  "lstrip": false,
527
  "normalized": false,
@@ -529,7 +537,7 @@
529
  "single_word": false,
530
  "special": true
531
  },
532
- "32063": {
533
  "content": "<extra_id_36>",
534
  "lstrip": false,
535
  "normalized": false,
@@ -537,7 +545,7 @@
537
  "single_word": false,
538
  "special": true
539
  },
540
- "32064": {
541
  "content": "<extra_id_35>",
542
  "lstrip": false,
543
  "normalized": false,
@@ -545,7 +553,7 @@
545
  "single_word": false,
546
  "special": true
547
  },
548
- "32065": {
549
  "content": "<extra_id_34>",
550
  "lstrip": false,
551
  "normalized": false,
@@ -553,7 +561,7 @@
553
  "single_word": false,
554
  "special": true
555
  },
556
- "32066": {
557
  "content": "<extra_id_33>",
558
  "lstrip": false,
559
  "normalized": false,
@@ -561,7 +569,7 @@
561
  "single_word": false,
562
  "special": true
563
  },
564
- "32067": {
565
  "content": "<extra_id_32>",
566
  "lstrip": false,
567
  "normalized": false,
@@ -569,7 +577,7 @@
569
  "single_word": false,
570
  "special": true
571
  },
572
- "32068": {
573
  "content": "<extra_id_31>",
574
  "lstrip": false,
575
  "normalized": false,
@@ -577,7 +585,7 @@
577
  "single_word": false,
578
  "special": true
579
  },
580
- "32069": {
581
  "content": "<extra_id_30>",
582
  "lstrip": false,
583
  "normalized": false,
@@ -585,7 +593,7 @@
585
  "single_word": false,
586
  "special": true
587
  },
588
- "32070": {
589
  "content": "<extra_id_29>",
590
  "lstrip": false,
591
  "normalized": false,
@@ -593,7 +601,7 @@
593
  "single_word": false,
594
  "special": true
595
  },
596
- "32071": {
597
  "content": "<extra_id_28>",
598
  "lstrip": false,
599
  "normalized": false,
@@ -601,7 +609,7 @@
601
  "single_word": false,
602
  "special": true
603
  },
604
- "32072": {
605
  "content": "<extra_id_27>",
606
  "lstrip": false,
607
  "normalized": false,
@@ -609,7 +617,7 @@
609
  "single_word": false,
610
  "special": true
611
  },
612
- "32073": {
613
  "content": "<extra_id_26>",
614
  "lstrip": false,
615
  "normalized": false,
@@ -617,7 +625,7 @@
617
  "single_word": false,
618
  "special": true
619
  },
620
- "32074": {
621
  "content": "<extra_id_25>",
622
  "lstrip": false,
623
  "normalized": false,
@@ -625,7 +633,7 @@
625
  "single_word": false,
626
  "special": true
627
  },
628
- "32075": {
629
  "content": "<extra_id_24>",
630
  "lstrip": false,
631
  "normalized": false,
@@ -633,7 +641,7 @@
633
  "single_word": false,
634
  "special": true
635
  },
636
- "32076": {
637
  "content": "<extra_id_23>",
638
  "lstrip": false,
639
  "normalized": false,
@@ -641,7 +649,7 @@
641
  "single_word": false,
642
  "special": true
643
  },
644
- "32077": {
645
  "content": "<extra_id_22>",
646
  "lstrip": false,
647
  "normalized": false,
@@ -649,7 +657,7 @@
649
  "single_word": false,
650
  "special": true
651
  },
652
- "32078": {
653
  "content": "<extra_id_21>",
654
  "lstrip": false,
655
  "normalized": false,
@@ -657,7 +665,7 @@
657
  "single_word": false,
658
  "special": true
659
  },
660
- "32079": {
661
  "content": "<extra_id_20>",
662
  "lstrip": false,
663
  "normalized": false,
@@ -665,7 +673,7 @@
665
  "single_word": false,
666
  "special": true
667
  },
668
- "32080": {
669
  "content": "<extra_id_19>",
670
  "lstrip": false,
671
  "normalized": false,
@@ -673,7 +681,7 @@
673
  "single_word": false,
674
  "special": true
675
  },
676
- "32081": {
677
  "content": "<extra_id_18>",
678
  "lstrip": false,
679
  "normalized": false,
@@ -681,7 +689,7 @@
681
  "single_word": false,
682
  "special": true
683
  },
684
- "32082": {
685
  "content": "<extra_id_17>",
686
  "lstrip": false,
687
  "normalized": false,
@@ -689,7 +697,7 @@
689
  "single_word": false,
690
  "special": true
691
  },
692
- "32083": {
693
  "content": "<extra_id_16>",
694
  "lstrip": false,
695
  "normalized": false,
@@ -697,7 +705,7 @@
697
  "single_word": false,
698
  "special": true
699
  },
700
- "32084": {
701
  "content": "<extra_id_15>",
702
  "lstrip": false,
703
  "normalized": false,
@@ -705,7 +713,7 @@
705
  "single_word": false,
706
  "special": true
707
  },
708
- "32085": {
709
  "content": "<extra_id_14>",
710
  "lstrip": false,
711
  "normalized": false,
@@ -713,7 +721,7 @@
713
  "single_word": false,
714
  "special": true
715
  },
716
- "32086": {
717
  "content": "<extra_id_13>",
718
  "lstrip": false,
719
  "normalized": false,
@@ -721,7 +729,7 @@
721
  "single_word": false,
722
  "special": true
723
  },
724
- "32087": {
725
  "content": "<extra_id_12>",
726
  "lstrip": false,
727
  "normalized": false,
@@ -729,7 +737,7 @@
729
  "single_word": false,
730
  "special": true
731
  },
732
- "32088": {
733
  "content": "<extra_id_11>",
734
  "lstrip": false,
735
  "normalized": false,
@@ -737,7 +745,7 @@
737
  "single_word": false,
738
  "special": true
739
  },
740
- "32089": {
741
  "content": "<extra_id_10>",
742
  "lstrip": false,
743
  "normalized": false,
@@ -745,7 +753,7 @@
745
  "single_word": false,
746
  "special": true
747
  },
748
- "32090": {
749
  "content": "<extra_id_9>",
750
  "lstrip": false,
751
  "normalized": false,
@@ -753,7 +761,7 @@
753
  "single_word": false,
754
  "special": true
755
  },
756
- "32091": {
757
  "content": "<extra_id_8>",
758
  "lstrip": false,
759
  "normalized": false,
@@ -761,7 +769,7 @@
761
  "single_word": false,
762
  "special": true
763
  },
764
- "32092": {
765
  "content": "<extra_id_7>",
766
  "lstrip": false,
767
  "normalized": false,
@@ -769,7 +777,7 @@
769
  "single_word": false,
770
  "special": true
771
  },
772
- "32093": {
773
  "content": "<extra_id_6>",
774
  "lstrip": false,
775
  "normalized": false,
@@ -777,7 +785,7 @@
777
  "single_word": false,
778
  "special": true
779
  },
780
- "32094": {
781
  "content": "<extra_id_5>",
782
  "lstrip": false,
783
  "normalized": false,
@@ -785,7 +793,7 @@
785
  "single_word": false,
786
  "special": true
787
  },
788
- "32095": {
789
  "content": "<extra_id_4>",
790
  "lstrip": false,
791
  "normalized": false,
@@ -793,7 +801,7 @@
793
  "single_word": false,
794
  "special": true
795
  },
796
- "32096": {
797
  "content": "<extra_id_3>",
798
  "lstrip": false,
799
  "normalized": false,
@@ -801,7 +809,7 @@
801
  "single_word": false,
802
  "special": true
803
  },
804
- "32097": {
805
  "content": "<extra_id_2>",
806
  "lstrip": false,
807
  "normalized": false,
@@ -809,7 +817,7 @@
809
  "single_word": false,
810
  "special": true
811
  },
812
- "32098": {
813
  "content": "<extra_id_1>",
814
  "lstrip": false,
815
  "normalized": false,
@@ -817,7 +825,7 @@
817
  "single_word": false,
818
  "special": true
819
  },
820
- "32099": {
821
  "content": "<extra_id_0>",
822
  "lstrip": false,
823
  "normalized": false,
@@ -827,6 +835,7 @@
827
  }
828
  },
829
  "additional_special_tokens": [
 
830
  "<extra_id_0>",
831
  "<extra_id_1>",
832
  "<extra_id_2>",
@@ -928,6 +937,7 @@
928
  "<extra_id_98>",
929
  "<extra_id_99>"
930
  ],
 
931
  "clean_up_tokenization_spaces": true,
932
  "eos_token": "</s>",
933
  "extra_ids": 100,
 
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "1": {
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
  "2": {
21
  "content": "</s>",
22
  "lstrip": false,
 
33
  "single_word": false,
34
  "special": true
35
  },
36
+ "8000": {
37
  "content": "<extra_id_99>",
38
  "lstrip": false,
39
  "normalized": false,
 
41
  "single_word": false,
42
  "special": true
43
  },
44
+ "8001": {
45
  "content": "<extra_id_98>",
46
  "lstrip": false,
47
  "normalized": false,
 
49
  "single_word": false,
50
  "special": true
51
  },
52
+ "8002": {
53
  "content": "<extra_id_97>",
54
  "lstrip": false,
55
  "normalized": false,
 
57
  "single_word": false,
58
  "special": true
59
  },
60
+ "8003": {
61
  "content": "<extra_id_96>",
62
  "lstrip": false,
63
  "normalized": false,
 
65
  "single_word": false,
66
  "special": true
67
  },
68
+ "8004": {
69
  "content": "<extra_id_95>",
70
  "lstrip": false,
71
  "normalized": false,
 
73
  "single_word": false,
74
  "special": true
75
  },
76
+ "8005": {
77
  "content": "<extra_id_94>",
78
  "lstrip": false,
79
  "normalized": false,
 
81
  "single_word": false,
82
  "special": true
83
  },
84
+ "8006": {
85
  "content": "<extra_id_93>",
86
  "lstrip": false,
87
  "normalized": false,
 
89
  "single_word": false,
90
  "special": true
91
  },
92
+ "8007": {
93
  "content": "<extra_id_92>",
94
  "lstrip": false,
95
  "normalized": false,
 
97
  "single_word": false,
98
  "special": true
99
  },
100
+ "8008": {
101
  "content": "<extra_id_91>",
102
  "lstrip": false,
103
  "normalized": false,
 
105
  "single_word": false,
106
  "special": true
107
  },
108
+ "8009": {
109
  "content": "<extra_id_90>",
110
  "lstrip": false,
111
  "normalized": false,
 
113
  "single_word": false,
114
  "special": true
115
  },
116
+ "8010": {
117
  "content": "<extra_id_89>",
118
  "lstrip": false,
119
  "normalized": false,
 
121
  "single_word": false,
122
  "special": true
123
  },
124
+ "8011": {
125
  "content": "<extra_id_88>",
126
  "lstrip": false,
127
  "normalized": false,
 
129
  "single_word": false,
130
  "special": true
131
  },
132
+ "8012": {
133
  "content": "<extra_id_87>",
134
  "lstrip": false,
135
  "normalized": false,
 
137
  "single_word": false,
138
  "special": true
139
  },
140
+ "8013": {
141
  "content": "<extra_id_86>",
142
  "lstrip": false,
143
  "normalized": false,
 
145
  "single_word": false,
146
  "special": true
147
  },
148
+ "8014": {
149
  "content": "<extra_id_85>",
150
  "lstrip": false,
151
  "normalized": false,
 
153
  "single_word": false,
154
  "special": true
155
  },
156
+ "8015": {
157
  "content": "<extra_id_84>",
158
  "lstrip": false,
159
  "normalized": false,
 
161
  "single_word": false,
162
  "special": true
163
  },
164
+ "8016": {
165
  "content": "<extra_id_83>",
166
  "lstrip": false,
167
  "normalized": false,
 
169
  "single_word": false,
170
  "special": true
171
  },
172
+ "8017": {
173
  "content": "<extra_id_82>",
174
  "lstrip": false,
175
  "normalized": false,
 
177
  "single_word": false,
178
  "special": true
179
  },
180
+ "8018": {
181
  "content": "<extra_id_81>",
182
  "lstrip": false,
183
  "normalized": false,
 
185
  "single_word": false,
186
  "special": true
187
  },
188
+ "8019": {
189
  "content": "<extra_id_80>",
190
  "lstrip": false,
191
  "normalized": false,
 
193
  "single_word": false,
194
  "special": true
195
  },
196
+ "8020": {
197
  "content": "<extra_id_79>",
198
  "lstrip": false,
199
  "normalized": false,
 
201
  "single_word": false,
202
  "special": true
203
  },
204
+ "8021": {
205
  "content": "<extra_id_78>",
206
  "lstrip": false,
207
  "normalized": false,
 
209
  "single_word": false,
210
  "special": true
211
  },
212
+ "8022": {
213
  "content": "<extra_id_77>",
214
  "lstrip": false,
215
  "normalized": false,
 
217
  "single_word": false,
218
  "special": true
219
  },
220
+ "8023": {
221
  "content": "<extra_id_76>",
222
  "lstrip": false,
223
  "normalized": false,
 
225
  "single_word": false,
226
  "special": true
227
  },
228
+ "8024": {
229
  "content": "<extra_id_75>",
230
  "lstrip": false,
231
  "normalized": false,
 
233
  "single_word": false,
234
  "special": true
235
  },
236
+ "8025": {
237
  "content": "<extra_id_74>",
238
  "lstrip": false,
239
  "normalized": false,
 
241
  "single_word": false,
242
  "special": true
243
  },
244
+ "8026": {
245
  "content": "<extra_id_73>",
246
  "lstrip": false,
247
  "normalized": false,
 
249
  "single_word": false,
250
  "special": true
251
  },
252
+ "8027": {
253
  "content": "<extra_id_72>",
254
  "lstrip": false,
255
  "normalized": false,
 
257
  "single_word": false,
258
  "special": true
259
  },
260
+ "8028": {
261
  "content": "<extra_id_71>",
262
  "lstrip": false,
263
  "normalized": false,
 
265
  "single_word": false,
266
  "special": true
267
  },
268
+ "8029": {
269
  "content": "<extra_id_70>",
270
  "lstrip": false,
271
  "normalized": false,
 
273
  "single_word": false,
274
  "special": true
275
  },
276
+ "8030": {
277
  "content": "<extra_id_69>",
278
  "lstrip": false,
279
  "normalized": false,
 
281
  "single_word": false,
282
  "special": true
283
  },
284
+ "8031": {
285
  "content": "<extra_id_68>",
286
  "lstrip": false,
287
  "normalized": false,
 
289
  "single_word": false,
290
  "special": true
291
  },
292
+ "8032": {
293
  "content": "<extra_id_67>",
294
  "lstrip": false,
295
  "normalized": false,
 
297
  "single_word": false,
298
  "special": true
299
  },
300
+ "8033": {
301
  "content": "<extra_id_66>",
302
  "lstrip": false,
303
  "normalized": false,
 
305
  "single_word": false,
306
  "special": true
307
  },
308
+ "8034": {
309
  "content": "<extra_id_65>",
310
  "lstrip": false,
311
  "normalized": false,
 
313
  "single_word": false,
314
  "special": true
315
  },
316
+ "8035": {
317
  "content": "<extra_id_64>",
318
  "lstrip": false,
319
  "normalized": false,
 
321
  "single_word": false,
322
  "special": true
323
  },
324
+ "8036": {
325
  "content": "<extra_id_63>",
326
  "lstrip": false,
327
  "normalized": false,
 
329
  "single_word": false,
330
  "special": true
331
  },
332
+ "8037": {
333
  "content": "<extra_id_62>",
334
  "lstrip": false,
335
  "normalized": false,
 
337
  "single_word": false,
338
  "special": true
339
  },
340
+ "8038": {
341
  "content": "<extra_id_61>",
342
  "lstrip": false,
343
  "normalized": false,
 
345
  "single_word": false,
346
  "special": true
347
  },
348
+ "8039": {
349
  "content": "<extra_id_60>",
350
  "lstrip": false,
351
  "normalized": false,
 
353
  "single_word": false,
354
  "special": true
355
  },
356
+ "8040": {
357
  "content": "<extra_id_59>",
358
  "lstrip": false,
359
  "normalized": false,
 
361
  "single_word": false,
362
  "special": true
363
  },
364
+ "8041": {
365
  "content": "<extra_id_58>",
366
  "lstrip": false,
367
  "normalized": false,
 
369
  "single_word": false,
370
  "special": true
371
  },
372
+ "8042": {
373
  "content": "<extra_id_57>",
374
  "lstrip": false,
375
  "normalized": false,
 
377
  "single_word": false,
378
  "special": true
379
  },
380
+ "8043": {
381
  "content": "<extra_id_56>",
382
  "lstrip": false,
383
  "normalized": false,
 
385
  "single_word": false,
386
  "special": true
387
  },
388
+ "8044": {
389
  "content": "<extra_id_55>",
390
  "lstrip": false,
391
  "normalized": false,
 
393
  "single_word": false,
394
  "special": true
395
  },
396
+ "8045": {
397
  "content": "<extra_id_54>",
398
  "lstrip": false,
399
  "normalized": false,
 
401
  "single_word": false,
402
  "special": true
403
  },
404
+ "8046": {
405
  "content": "<extra_id_53>",
406
  "lstrip": false,
407
  "normalized": false,
 
409
  "single_word": false,
410
  "special": true
411
  },
412
+ "8047": {
413
  "content": "<extra_id_52>",
414
  "lstrip": false,
415
  "normalized": false,
 
417
  "single_word": false,
418
  "special": true
419
  },
420
+ "8048": {
421
  "content": "<extra_id_51>",
422
  "lstrip": false,
423
  "normalized": false,
 
425
  "single_word": false,
426
  "special": true
427
  },
428
+ "8049": {
429
  "content": "<extra_id_50>",
430
  "lstrip": false,
431
  "normalized": false,
 
433
  "single_word": false,
434
  "special": true
435
  },
436
+ "8050": {
437
  "content": "<extra_id_49>",
438
  "lstrip": false,
439
  "normalized": false,
 
441
  "single_word": false,
442
  "special": true
443
  },
444
+ "8051": {
445
  "content": "<extra_id_48>",
446
  "lstrip": false,
447
  "normalized": false,
 
449
  "single_word": false,
450
  "special": true
451
  },
452
+ "8052": {
453
  "content": "<extra_id_47>",
454
  "lstrip": false,
455
  "normalized": false,
 
457
  "single_word": false,
458
  "special": true
459
  },
460
+ "8053": {
461
  "content": "<extra_id_46>",
462
  "lstrip": false,
463
  "normalized": false,
 
465
  "single_word": false,
466
  "special": true
467
  },
468
+ "8054": {
469
  "content": "<extra_id_45>",
470
  "lstrip": false,
471
  "normalized": false,
 
473
  "single_word": false,
474
  "special": true
475
  },
476
+ "8055": {
477
  "content": "<extra_id_44>",
478
  "lstrip": false,
479
  "normalized": false,
 
481
  "single_word": false,
482
  "special": true
483
  },
484
+ "8056": {
485
  "content": "<extra_id_43>",
486
  "lstrip": false,
487
  "normalized": false,
 
489
  "single_word": false,
490
  "special": true
491
  },
492
+ "8057": {
493
  "content": "<extra_id_42>",
494
  "lstrip": false,
495
  "normalized": false,
 
497
  "single_word": false,
498
  "special": true
499
  },
500
+ "8058": {
501
  "content": "<extra_id_41>",
502
  "lstrip": false,
503
  "normalized": false,
 
505
  "single_word": false,
506
  "special": true
507
  },
508
+ "8059": {
509
  "content": "<extra_id_40>",
510
  "lstrip": false,
511
  "normalized": false,
 
513
  "single_word": false,
514
  "special": true
515
  },
516
+ "8060": {
517
  "content": "<extra_id_39>",
518
  "lstrip": false,
519
  "normalized": false,
 
521
  "single_word": false,
522
  "special": true
523
  },
524
+ "8061": {
525
  "content": "<extra_id_38>",
526
  "lstrip": false,
527
  "normalized": false,
 
529
  "single_word": false,
530
  "special": true
531
  },
532
+ "8062": {
533
  "content": "<extra_id_37>",
534
  "lstrip": false,
535
  "normalized": false,
 
537
  "single_word": false,
538
  "special": true
539
  },
540
+ "8063": {
541
  "content": "<extra_id_36>",
542
  "lstrip": false,
543
  "normalized": false,
 
545
  "single_word": false,
546
  "special": true
547
  },
548
+ "8064": {
549
  "content": "<extra_id_35>",
550
  "lstrip": false,
551
  "normalized": false,
 
553
  "single_word": false,
554
  "special": true
555
  },
556
+ "8065": {
557
  "content": "<extra_id_34>",
558
  "lstrip": false,
559
  "normalized": false,
 
561
  "single_word": false,
562
  "special": true
563
  },
564
+ "8066": {
565
  "content": "<extra_id_33>",
566
  "lstrip": false,
567
  "normalized": false,
 
569
  "single_word": false,
570
  "special": true
571
  },
572
+ "8067": {
573
  "content": "<extra_id_32>",
574
  "lstrip": false,
575
  "normalized": false,
 
577
  "single_word": false,
578
  "special": true
579
  },
580
+ "8068": {
581
  "content": "<extra_id_31>",
582
  "lstrip": false,
583
  "normalized": false,
 
585
  "single_word": false,
586
  "special": true
587
  },
588
+ "8069": {
589
  "content": "<extra_id_30>",
590
  "lstrip": false,
591
  "normalized": false,
 
593
  "single_word": false,
594
  "special": true
595
  },
596
+ "8070": {
597
  "content": "<extra_id_29>",
598
  "lstrip": false,
599
  "normalized": false,
 
601
  "single_word": false,
602
  "special": true
603
  },
604
+ "8071": {
605
  "content": "<extra_id_28>",
606
  "lstrip": false,
607
  "normalized": false,
 
609
  "single_word": false,
610
  "special": true
611
  },
612
+ "8072": {
613
  "content": "<extra_id_27>",
614
  "lstrip": false,
615
  "normalized": false,
 
617
  "single_word": false,
618
  "special": true
619
  },
620
+ "8073": {
621
  "content": "<extra_id_26>",
622
  "lstrip": false,
623
  "normalized": false,
 
625
  "single_word": false,
626
  "special": true
627
  },
628
+ "8074": {
629
  "content": "<extra_id_25>",
630
  "lstrip": false,
631
  "normalized": false,
 
633
  "single_word": false,
634
  "special": true
635
  },
636
+ "8075": {
637
  "content": "<extra_id_24>",
638
  "lstrip": false,
639
  "normalized": false,
 
641
  "single_word": false,
642
  "special": true
643
  },
644
+ "8076": {
645
  "content": "<extra_id_23>",
646
  "lstrip": false,
647
  "normalized": false,
 
649
  "single_word": false,
650
  "special": true
651
  },
652
+ "8077": {
653
  "content": "<extra_id_22>",
654
  "lstrip": false,
655
  "normalized": false,
 
657
  "single_word": false,
658
  "special": true
659
  },
660
+ "8078": {
661
  "content": "<extra_id_21>",
662
  "lstrip": false,
663
  "normalized": false,
 
665
  "single_word": false,
666
  "special": true
667
  },
668
+ "8079": {
669
  "content": "<extra_id_20>",
670
  "lstrip": false,
671
  "normalized": false,
 
673
  "single_word": false,
674
  "special": true
675
  },
676
+ "8080": {
677
  "content": "<extra_id_19>",
678
  "lstrip": false,
679
  "normalized": false,
 
681
  "single_word": false,
682
  "special": true
683
  },
684
+ "8081": {
685
  "content": "<extra_id_18>",
686
  "lstrip": false,
687
  "normalized": false,
 
689
  "single_word": false,
690
  "special": true
691
  },
692
+ "8082": {
693
  "content": "<extra_id_17>",
694
  "lstrip": false,
695
  "normalized": false,
 
697
  "single_word": false,
698
  "special": true
699
  },
700
+ "8083": {
701
  "content": "<extra_id_16>",
702
  "lstrip": false,
703
  "normalized": false,
 
705
  "single_word": false,
706
  "special": true
707
  },
708
+ "8084": {
709
  "content": "<extra_id_15>",
710
  "lstrip": false,
711
  "normalized": false,
 
713
  "single_word": false,
714
  "special": true
715
  },
716
+ "8085": {
717
  "content": "<extra_id_14>",
718
  "lstrip": false,
719
  "normalized": false,
 
721
  "single_word": false,
722
  "special": true
723
  },
724
+ "8086": {
725
  "content": "<extra_id_13>",
726
  "lstrip": false,
727
  "normalized": false,
 
729
  "single_word": false,
730
  "special": true
731
  },
732
+ "8087": {
733
  "content": "<extra_id_12>",
734
  "lstrip": false,
735
  "normalized": false,
 
737
  "single_word": false,
738
  "special": true
739
  },
740
+ "8088": {
741
  "content": "<extra_id_11>",
742
  "lstrip": false,
743
  "normalized": false,
 
745
  "single_word": false,
746
  "special": true
747
  },
748
+ "8089": {
749
  "content": "<extra_id_10>",
750
  "lstrip": false,
751
  "normalized": false,
 
753
  "single_word": false,
754
  "special": true
755
  },
756
+ "8090": {
757
  "content": "<extra_id_9>",
758
  "lstrip": false,
759
  "normalized": false,
 
761
  "single_word": false,
762
  "special": true
763
  },
764
+ "8091": {
765
  "content": "<extra_id_8>",
766
  "lstrip": false,
767
  "normalized": false,
 
769
  "single_word": false,
770
  "special": true
771
  },
772
+ "8092": {
773
  "content": "<extra_id_7>",
774
  "lstrip": false,
775
  "normalized": false,
 
777
  "single_word": false,
778
  "special": true
779
  },
780
+ "8093": {
781
  "content": "<extra_id_6>",
782
  "lstrip": false,
783
  "normalized": false,
 
785
  "single_word": false,
786
  "special": true
787
  },
788
+ "8094": {
789
  "content": "<extra_id_5>",
790
  "lstrip": false,
791
  "normalized": false,
 
793
  "single_word": false,
794
  "special": true
795
  },
796
+ "8095": {
797
  "content": "<extra_id_4>",
798
  "lstrip": false,
799
  "normalized": false,
 
801
  "single_word": false,
802
  "special": true
803
  },
804
+ "8096": {
805
  "content": "<extra_id_3>",
806
  "lstrip": false,
807
  "normalized": false,
 
809
  "single_word": false,
810
  "special": true
811
  },
812
+ "8097": {
813
  "content": "<extra_id_2>",
814
  "lstrip": false,
815
  "normalized": false,
 
817
  "single_word": false,
818
  "special": true
819
  },
820
+ "8098": {
821
  "content": "<extra_id_1>",
822
  "lstrip": false,
823
  "normalized": false,
 
825
  "single_word": false,
826
  "special": true
827
  },
828
+ "8099": {
829
  "content": "<extra_id_0>",
830
  "lstrip": false,
831
  "normalized": false,
 
835
  }
836
  },
837
  "additional_special_tokens": [
838
+ "<s>",
839
  "<extra_id_0>",
840
  "<extra_id_1>",
841
  "<extra_id_2>",
 
937
  "<extra_id_98>",
938
  "<extra_id_99>"
939
  ],
940
+ "bos_token": "<s>",
941
  "clean_up_tokenization_spaces": true,
942
  "eos_token": "</s>",
943
  "extra_ids": 100,