dennispark commited on
Commit
6d16c5c
1 Parent(s): 791bf1d

Update tokenizer without extra_ids

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer.json +1 -901
  3. tokenizer_config.json +1 -1
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"eos_token": "</s>", "unk_token": "<pad>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
 
1
+ {"eos_token": "</s>", "unk_token": "<pad>", "pad_token": "<pad>"}
tokenizer.json CHANGED
@@ -27,906 +27,6 @@
27
  "rstrip": false,
28
  "normalized": false,
29
  "special": true
30
- },
31
- {
32
- "id": 50258,
33
- "content": "<extra_id_0>",
34
- "single_word": false,
35
- "lstrip": false,
36
- "rstrip": false,
37
- "normalized": false,
38
- "special": true
39
- },
40
- {
41
- "id": 50259,
42
- "content": "<extra_id_1>",
43
- "single_word": false,
44
- "lstrip": false,
45
- "rstrip": false,
46
- "normalized": false,
47
- "special": true
48
- },
49
- {
50
- "id": 50260,
51
- "content": "<extra_id_2>",
52
- "single_word": false,
53
- "lstrip": false,
54
- "rstrip": false,
55
- "normalized": false,
56
- "special": true
57
- },
58
- {
59
- "id": 50261,
60
- "content": "<extra_id_3>",
61
- "single_word": false,
62
- "lstrip": false,
63
- "rstrip": false,
64
- "normalized": false,
65
- "special": true
66
- },
67
- {
68
- "id": 50262,
69
- "content": "<extra_id_4>",
70
- "single_word": false,
71
- "lstrip": false,
72
- "rstrip": false,
73
- "normalized": false,
74
- "special": true
75
- },
76
- {
77
- "id": 50263,
78
- "content": "<extra_id_5>",
79
- "single_word": false,
80
- "lstrip": false,
81
- "rstrip": false,
82
- "normalized": false,
83
- "special": true
84
- },
85
- {
86
- "id": 50264,
87
- "content": "<extra_id_6>",
88
- "single_word": false,
89
- "lstrip": false,
90
- "rstrip": false,
91
- "normalized": false,
92
- "special": true
93
- },
94
- {
95
- "id": 50265,
96
- "content": "<extra_id_7>",
97
- "single_word": false,
98
- "lstrip": false,
99
- "rstrip": false,
100
- "normalized": false,
101
- "special": true
102
- },
103
- {
104
- "id": 50266,
105
- "content": "<extra_id_8>",
106
- "single_word": false,
107
- "lstrip": false,
108
- "rstrip": false,
109
- "normalized": false,
110
- "special": true
111
- },
112
- {
113
- "id": 50267,
114
- "content": "<extra_id_9>",
115
- "single_word": false,
116
- "lstrip": false,
117
- "rstrip": false,
118
- "normalized": false,
119
- "special": true
120
- },
121
- {
122
- "id": 50268,
123
- "content": "<extra_id_10>",
124
- "single_word": false,
125
- "lstrip": false,
126
- "rstrip": false,
127
- "normalized": false,
128
- "special": true
129
- },
130
- {
131
- "id": 50269,
132
- "content": "<extra_id_11>",
133
- "single_word": false,
134
- "lstrip": false,
135
- "rstrip": false,
136
- "normalized": false,
137
- "special": true
138
- },
139
- {
140
- "id": 50270,
141
- "content": "<extra_id_12>",
142
- "single_word": false,
143
- "lstrip": false,
144
- "rstrip": false,
145
- "normalized": false,
146
- "special": true
147
- },
148
- {
149
- "id": 50271,
150
- "content": "<extra_id_13>",
151
- "single_word": false,
152
- "lstrip": false,
153
- "rstrip": false,
154
- "normalized": false,
155
- "special": true
156
- },
157
- {
158
- "id": 50272,
159
- "content": "<extra_id_14>",
160
- "single_word": false,
161
- "lstrip": false,
162
- "rstrip": false,
163
- "normalized": false,
164
- "special": true
165
- },
166
- {
167
- "id": 50273,
168
- "content": "<extra_id_15>",
169
- "single_word": false,
170
- "lstrip": false,
171
- "rstrip": false,
172
- "normalized": false,
173
- "special": true
174
- },
175
- {
176
- "id": 50274,
177
- "content": "<extra_id_16>",
178
- "single_word": false,
179
- "lstrip": false,
180
- "rstrip": false,
181
- "normalized": false,
182
- "special": true
183
- },
184
- {
185
- "id": 50275,
186
- "content": "<extra_id_17>",
187
- "single_word": false,
188
- "lstrip": false,
189
- "rstrip": false,
190
- "normalized": false,
191
- "special": true
192
- },
193
- {
194
- "id": 50276,
195
- "content": "<extra_id_18>",
196
- "single_word": false,
197
- "lstrip": false,
198
- "rstrip": false,
199
- "normalized": false,
200
- "special": true
201
- },
202
- {
203
- "id": 50277,
204
- "content": "<extra_id_19>",
205
- "single_word": false,
206
- "lstrip": false,
207
- "rstrip": false,
208
- "normalized": false,
209
- "special": true
210
- },
211
- {
212
- "id": 50278,
213
- "content": "<extra_id_20>",
214
- "single_word": false,
215
- "lstrip": false,
216
- "rstrip": false,
217
- "normalized": false,
218
- "special": true
219
- },
220
- {
221
- "id": 50279,
222
- "content": "<extra_id_21>",
223
- "single_word": false,
224
- "lstrip": false,
225
- "rstrip": false,
226
- "normalized": false,
227
- "special": true
228
- },
229
- {
230
- "id": 50280,
231
- "content": "<extra_id_22>",
232
- "single_word": false,
233
- "lstrip": false,
234
- "rstrip": false,
235
- "normalized": false,
236
- "special": true
237
- },
238
- {
239
- "id": 50281,
240
- "content": "<extra_id_23>",
241
- "single_word": false,
242
- "lstrip": false,
243
- "rstrip": false,
244
- "normalized": false,
245
- "special": true
246
- },
247
- {
248
- "id": 50282,
249
- "content": "<extra_id_24>",
250
- "single_word": false,
251
- "lstrip": false,
252
- "rstrip": false,
253
- "normalized": false,
254
- "special": true
255
- },
256
- {
257
- "id": 50283,
258
- "content": "<extra_id_25>",
259
- "single_word": false,
260
- "lstrip": false,
261
- "rstrip": false,
262
- "normalized": false,
263
- "special": true
264
- },
265
- {
266
- "id": 50284,
267
- "content": "<extra_id_26>",
268
- "single_word": false,
269
- "lstrip": false,
270
- "rstrip": false,
271
- "normalized": false,
272
- "special": true
273
- },
274
- {
275
- "id": 50285,
276
- "content": "<extra_id_27>",
277
- "single_word": false,
278
- "lstrip": false,
279
- "rstrip": false,
280
- "normalized": false,
281
- "special": true
282
- },
283
- {
284
- "id": 50286,
285
- "content": "<extra_id_28>",
286
- "single_word": false,
287
- "lstrip": false,
288
- "rstrip": false,
289
- "normalized": false,
290
- "special": true
291
- },
292
- {
293
- "id": 50287,
294
- "content": "<extra_id_29>",
295
- "single_word": false,
296
- "lstrip": false,
297
- "rstrip": false,
298
- "normalized": false,
299
- "special": true
300
- },
301
- {
302
- "id": 50288,
303
- "content": "<extra_id_30>",
304
- "single_word": false,
305
- "lstrip": false,
306
- "rstrip": false,
307
- "normalized": false,
308
- "special": true
309
- },
310
- {
311
- "id": 50289,
312
- "content": "<extra_id_31>",
313
- "single_word": false,
314
- "lstrip": false,
315
- "rstrip": false,
316
- "normalized": false,
317
- "special": true
318
- },
319
- {
320
- "id": 50290,
321
- "content": "<extra_id_32>",
322
- "single_word": false,
323
- "lstrip": false,
324
- "rstrip": false,
325
- "normalized": false,
326
- "special": true
327
- },
328
- {
329
- "id": 50291,
330
- "content": "<extra_id_33>",
331
- "single_word": false,
332
- "lstrip": false,
333
- "rstrip": false,
334
- "normalized": false,
335
- "special": true
336
- },
337
- {
338
- "id": 50292,
339
- "content": "<extra_id_34>",
340
- "single_word": false,
341
- "lstrip": false,
342
- "rstrip": false,
343
- "normalized": false,
344
- "special": true
345
- },
346
- {
347
- "id": 50293,
348
- "content": "<extra_id_35>",
349
- "single_word": false,
350
- "lstrip": false,
351
- "rstrip": false,
352
- "normalized": false,
353
- "special": true
354
- },
355
- {
356
- "id": 50294,
357
- "content": "<extra_id_36>",
358
- "single_word": false,
359
- "lstrip": false,
360
- "rstrip": false,
361
- "normalized": false,
362
- "special": true
363
- },
364
- {
365
- "id": 50295,
366
- "content": "<extra_id_37>",
367
- "single_word": false,
368
- "lstrip": false,
369
- "rstrip": false,
370
- "normalized": false,
371
- "special": true
372
- },
373
- {
374
- "id": 50296,
375
- "content": "<extra_id_38>",
376
- "single_word": false,
377
- "lstrip": false,
378
- "rstrip": false,
379
- "normalized": false,
380
- "special": true
381
- },
382
- {
383
- "id": 50297,
384
- "content": "<extra_id_39>",
385
- "single_word": false,
386
- "lstrip": false,
387
- "rstrip": false,
388
- "normalized": false,
389
- "special": true
390
- },
391
- {
392
- "id": 50298,
393
- "content": "<extra_id_40>",
394
- "single_word": false,
395
- "lstrip": false,
396
- "rstrip": false,
397
- "normalized": false,
398
- "special": true
399
- },
400
- {
401
- "id": 50299,
402
- "content": "<extra_id_41>",
403
- "single_word": false,
404
- "lstrip": false,
405
- "rstrip": false,
406
- "normalized": false,
407
- "special": true
408
- },
409
- {
410
- "id": 50300,
411
- "content": "<extra_id_42>",
412
- "single_word": false,
413
- "lstrip": false,
414
- "rstrip": false,
415
- "normalized": false,
416
- "special": true
417
- },
418
- {
419
- "id": 50301,
420
- "content": "<extra_id_43>",
421
- "single_word": false,
422
- "lstrip": false,
423
- "rstrip": false,
424
- "normalized": false,
425
- "special": true
426
- },
427
- {
428
- "id": 50302,
429
- "content": "<extra_id_44>",
430
- "single_word": false,
431
- "lstrip": false,
432
- "rstrip": false,
433
- "normalized": false,
434
- "special": true
435
- },
436
- {
437
- "id": 50303,
438
- "content": "<extra_id_45>",
439
- "single_word": false,
440
- "lstrip": false,
441
- "rstrip": false,
442
- "normalized": false,
443
- "special": true
444
- },
445
- {
446
- "id": 50304,
447
- "content": "<extra_id_46>",
448
- "single_word": false,
449
- "lstrip": false,
450
- "rstrip": false,
451
- "normalized": false,
452
- "special": true
453
- },
454
- {
455
- "id": 50305,
456
- "content": "<extra_id_47>",
457
- "single_word": false,
458
- "lstrip": false,
459
- "rstrip": false,
460
- "normalized": false,
461
- "special": true
462
- },
463
- {
464
- "id": 50306,
465
- "content": "<extra_id_48>",
466
- "single_word": false,
467
- "lstrip": false,
468
- "rstrip": false,
469
- "normalized": false,
470
- "special": true
471
- },
472
- {
473
- "id": 50307,
474
- "content": "<extra_id_49>",
475
- "single_word": false,
476
- "lstrip": false,
477
- "rstrip": false,
478
- "normalized": false,
479
- "special": true
480
- },
481
- {
482
- "id": 50308,
483
- "content": "<extra_id_50>",
484
- "single_word": false,
485
- "lstrip": false,
486
- "rstrip": false,
487
- "normalized": false,
488
- "special": true
489
- },
490
- {
491
- "id": 50309,
492
- "content": "<extra_id_51>",
493
- "single_word": false,
494
- "lstrip": false,
495
- "rstrip": false,
496
- "normalized": false,
497
- "special": true
498
- },
499
- {
500
- "id": 50310,
501
- "content": "<extra_id_52>",
502
- "single_word": false,
503
- "lstrip": false,
504
- "rstrip": false,
505
- "normalized": false,
506
- "special": true
507
- },
508
- {
509
- "id": 50311,
510
- "content": "<extra_id_53>",
511
- "single_word": false,
512
- "lstrip": false,
513
- "rstrip": false,
514
- "normalized": false,
515
- "special": true
516
- },
517
- {
518
- "id": 50312,
519
- "content": "<extra_id_54>",
520
- "single_word": false,
521
- "lstrip": false,
522
- "rstrip": false,
523
- "normalized": false,
524
- "special": true
525
- },
526
- {
527
- "id": 50313,
528
- "content": "<extra_id_55>",
529
- "single_word": false,
530
- "lstrip": false,
531
- "rstrip": false,
532
- "normalized": false,
533
- "special": true
534
- },
535
- {
536
- "id": 50314,
537
- "content": "<extra_id_56>",
538
- "single_word": false,
539
- "lstrip": false,
540
- "rstrip": false,
541
- "normalized": false,
542
- "special": true
543
- },
544
- {
545
- "id": 50315,
546
- "content": "<extra_id_57>",
547
- "single_word": false,
548
- "lstrip": false,
549
- "rstrip": false,
550
- "normalized": false,
551
- "special": true
552
- },
553
- {
554
- "id": 50316,
555
- "content": "<extra_id_58>",
556
- "single_word": false,
557
- "lstrip": false,
558
- "rstrip": false,
559
- "normalized": false,
560
- "special": true
561
- },
562
- {
563
- "id": 50317,
564
- "content": "<extra_id_59>",
565
- "single_word": false,
566
- "lstrip": false,
567
- "rstrip": false,
568
- "normalized": false,
569
- "special": true
570
- },
571
- {
572
- "id": 50318,
573
- "content": "<extra_id_60>",
574
- "single_word": false,
575
- "lstrip": false,
576
- "rstrip": false,
577
- "normalized": false,
578
- "special": true
579
- },
580
- {
581
- "id": 50319,
582
- "content": "<extra_id_61>",
583
- "single_word": false,
584
- "lstrip": false,
585
- "rstrip": false,
586
- "normalized": false,
587
- "special": true
588
- },
589
- {
590
- "id": 50320,
591
- "content": "<extra_id_62>",
592
- "single_word": false,
593
- "lstrip": false,
594
- "rstrip": false,
595
- "normalized": false,
596
- "special": true
597
- },
598
- {
599
- "id": 50321,
600
- "content": "<extra_id_63>",
601
- "single_word": false,
602
- "lstrip": false,
603
- "rstrip": false,
604
- "normalized": false,
605
- "special": true
606
- },
607
- {
608
- "id": 50322,
609
- "content": "<extra_id_64>",
610
- "single_word": false,
611
- "lstrip": false,
612
- "rstrip": false,
613
- "normalized": false,
614
- "special": true
615
- },
616
- {
617
- "id": 50323,
618
- "content": "<extra_id_65>",
619
- "single_word": false,
620
- "lstrip": false,
621
- "rstrip": false,
622
- "normalized": false,
623
- "special": true
624
- },
625
- {
626
- "id": 50324,
627
- "content": "<extra_id_66>",
628
- "single_word": false,
629
- "lstrip": false,
630
- "rstrip": false,
631
- "normalized": false,
632
- "special": true
633
- },
634
- {
635
- "id": 50325,
636
- "content": "<extra_id_67>",
637
- "single_word": false,
638
- "lstrip": false,
639
- "rstrip": false,
640
- "normalized": false,
641
- "special": true
642
- },
643
- {
644
- "id": 50326,
645
- "content": "<extra_id_68>",
646
- "single_word": false,
647
- "lstrip": false,
648
- "rstrip": false,
649
- "normalized": false,
650
- "special": true
651
- },
652
- {
653
- "id": 50327,
654
- "content": "<extra_id_69>",
655
- "single_word": false,
656
- "lstrip": false,
657
- "rstrip": false,
658
- "normalized": false,
659
- "special": true
660
- },
661
- {
662
- "id": 50328,
663
- "content": "<extra_id_70>",
664
- "single_word": false,
665
- "lstrip": false,
666
- "rstrip": false,
667
- "normalized": false,
668
- "special": true
669
- },
670
- {
671
- "id": 50329,
672
- "content": "<extra_id_71>",
673
- "single_word": false,
674
- "lstrip": false,
675
- "rstrip": false,
676
- "normalized": false,
677
- "special": true
678
- },
679
- {
680
- "id": 50330,
681
- "content": "<extra_id_72>",
682
- "single_word": false,
683
- "lstrip": false,
684
- "rstrip": false,
685
- "normalized": false,
686
- "special": true
687
- },
688
- {
689
- "id": 50331,
690
- "content": "<extra_id_73>",
691
- "single_word": false,
692
- "lstrip": false,
693
- "rstrip": false,
694
- "normalized": false,
695
- "special": true
696
- },
697
- {
698
- "id": 50332,
699
- "content": "<extra_id_74>",
700
- "single_word": false,
701
- "lstrip": false,
702
- "rstrip": false,
703
- "normalized": false,
704
- "special": true
705
- },
706
- {
707
- "id": 50333,
708
- "content": "<extra_id_75>",
709
- "single_word": false,
710
- "lstrip": false,
711
- "rstrip": false,
712
- "normalized": false,
713
- "special": true
714
- },
715
- {
716
- "id": 50334,
717
- "content": "<extra_id_76>",
718
- "single_word": false,
719
- "lstrip": false,
720
- "rstrip": false,
721
- "normalized": false,
722
- "special": true
723
- },
724
- {
725
- "id": 50335,
726
- "content": "<extra_id_77>",
727
- "single_word": false,
728
- "lstrip": false,
729
- "rstrip": false,
730
- "normalized": false,
731
- "special": true
732
- },
733
- {
734
- "id": 50336,
735
- "content": "<extra_id_78>",
736
- "single_word": false,
737
- "lstrip": false,
738
- "rstrip": false,
739
- "normalized": false,
740
- "special": true
741
- },
742
- {
743
- "id": 50337,
744
- "content": "<extra_id_79>",
745
- "single_word": false,
746
- "lstrip": false,
747
- "rstrip": false,
748
- "normalized": false,
749
- "special": true
750
- },
751
- {
752
- "id": 50338,
753
- "content": "<extra_id_80>",
754
- "single_word": false,
755
- "lstrip": false,
756
- "rstrip": false,
757
- "normalized": false,
758
- "special": true
759
- },
760
- {
761
- "id": 50339,
762
- "content": "<extra_id_81>",
763
- "single_word": false,
764
- "lstrip": false,
765
- "rstrip": false,
766
- "normalized": false,
767
- "special": true
768
- },
769
- {
770
- "id": 50340,
771
- "content": "<extra_id_82>",
772
- "single_word": false,
773
- "lstrip": false,
774
- "rstrip": false,
775
- "normalized": false,
776
- "special": true
777
- },
778
- {
779
- "id": 50341,
780
- "content": "<extra_id_83>",
781
- "single_word": false,
782
- "lstrip": false,
783
- "rstrip": false,
784
- "normalized": false,
785
- "special": true
786
- },
787
- {
788
- "id": 50342,
789
- "content": "<extra_id_84>",
790
- "single_word": false,
791
- "lstrip": false,
792
- "rstrip": false,
793
- "normalized": false,
794
- "special": true
795
- },
796
- {
797
- "id": 50343,
798
- "content": "<extra_id_85>",
799
- "single_word": false,
800
- "lstrip": false,
801
- "rstrip": false,
802
- "normalized": false,
803
- "special": true
804
- },
805
- {
806
- "id": 50344,
807
- "content": "<extra_id_86>",
808
- "single_word": false,
809
- "lstrip": false,
810
- "rstrip": false,
811
- "normalized": false,
812
- "special": true
813
- },
814
- {
815
- "id": 50345,
816
- "content": "<extra_id_87>",
817
- "single_word": false,
818
- "lstrip": false,
819
- "rstrip": false,
820
- "normalized": false,
821
- "special": true
822
- },
823
- {
824
- "id": 50346,
825
- "content": "<extra_id_88>",
826
- "single_word": false,
827
- "lstrip": false,
828
- "rstrip": false,
829
- "normalized": false,
830
- "special": true
831
- },
832
- {
833
- "id": 50347,
834
- "content": "<extra_id_89>",
835
- "single_word": false,
836
- "lstrip": false,
837
- "rstrip": false,
838
- "normalized": false,
839
- "special": true
840
- },
841
- {
842
- "id": 50348,
843
- "content": "<extra_id_90>",
844
- "single_word": false,
845
- "lstrip": false,
846
- "rstrip": false,
847
- "normalized": false,
848
- "special": true
849
- },
850
- {
851
- "id": 50349,
852
- "content": "<extra_id_91>",
853
- "single_word": false,
854
- "lstrip": false,
855
- "rstrip": false,
856
- "normalized": false,
857
- "special": true
858
- },
859
- {
860
- "id": 50350,
861
- "content": "<extra_id_92>",
862
- "single_word": false,
863
- "lstrip": false,
864
- "rstrip": false,
865
- "normalized": false,
866
- "special": true
867
- },
868
- {
869
- "id": 50351,
870
- "content": "<extra_id_93>",
871
- "single_word": false,
872
- "lstrip": false,
873
- "rstrip": false,
874
- "normalized": false,
875
- "special": true
876
- },
877
- {
878
- "id": 50352,
879
- "content": "<extra_id_94>",
880
- "single_word": false,
881
- "lstrip": false,
882
- "rstrip": false,
883
- "normalized": false,
884
- "special": true
885
- },
886
- {
887
- "id": 50353,
888
- "content": "<extra_id_95>",
889
- "single_word": false,
890
- "lstrip": false,
891
- "rstrip": false,
892
- "normalized": false,
893
- "special": true
894
- },
895
- {
896
- "id": 50354,
897
- "content": "<extra_id_96>",
898
- "single_word": false,
899
- "lstrip": false,
900
- "rstrip": false,
901
- "normalized": false,
902
- "special": true
903
- },
904
- {
905
- "id": 50355,
906
- "content": "<extra_id_97>",
907
- "single_word": false,
908
- "lstrip": false,
909
- "rstrip": false,
910
- "normalized": false,
911
- "special": true
912
- },
913
- {
914
- "id": 50356,
915
- "content": "<extra_id_98>",
916
- "single_word": false,
917
- "lstrip": false,
918
- "rstrip": false,
919
- "normalized": false,
920
- "special": true
921
- },
922
- {
923
- "id": 50357,
924
- "content": "<extra_id_99>",
925
- "single_word": false,
926
- "lstrip": false,
927
- "rstrip": false,
928
- "normalized": false,
929
- "special": true
930
  }
931
  ],
932
  "normalizer": {
@@ -101262,4 +100362,4 @@
101262
  "ìĤ¬ìĭľ ëĬĶëį°"
101263
  ]
101264
  }
101265
- }
 
27
  "rstrip": false,
28
  "normalized": false,
29
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
31
  ],
32
  "normalizer": {
 
100362
  "ìĤ¬ìĭľ ëĬĶëį°"
100363
  ]
100364
  }
100365
+ }
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"eos_token": "</s>", "unk_token": "<pad>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "special_tokens_map_file": "./models/pko-t5/special_tokens_map.json", "name_or_path": "./models/pko-t5", "tokenizer_class": "T5TokenizerFast"}
 
1
+ {"eos_token": "</s>", "unk_token": "<pad>", "pad_token": "<pad>", "special_tokens_map_file": "./models/pko-t5/special_tokens_map.json", "name_or_path": "./models/pko-t5", "tokenizer_class": "T5TokenizerFast"}