Xenova HF staff commited on
Commit
944d506
1 Parent(s): 2b50336

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -133,11 +133,14 @@
133
  47282,
134
  49146,
135
  50257,
 
 
136
  50359,
137
  50360,
138
  50361
139
  ],
140
- "transformers_version": "4.27.2",
 
141
  "use_cache": true,
142
  "use_weighted_layer_sum": false,
143
  "vocab_size": 51864
 
133
  47282,
134
  49146,
135
  50257,
136
+ 50357,
137
+ 50358,
138
  50359,
139
  50360,
140
  50361
141
  ],
142
+ "torch_dtype": "float32",
143
+ "transformers_version": "4.29.2",
144
  "use_cache": true,
145
  "use_weighted_layer_sum": false,
146
  "vocab_size": 51864
generation_config.json CHANGED
@@ -103,9 +103,11 @@
103
  47282,
104
  49146,
105
  50257,
 
 
106
  50359,
107
  50360,
108
  50361
109
  ],
110
- "transformers_version": "4.27.0.dev0"
111
  }
 
103
  47282,
104
  49146,
105
  50257,
106
+ 50357,
107
+ 50358,
108
  50359,
109
  50360,
110
  50361
111
  ],
112
+ "transformers_version": "4.29.2"
113
  }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea38750c855e4a27256f08057fd9fb91f4ebb4705d6717522728ac9789a71394
3
+ size 774153408
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4416ce1dec9f8ed7860ed13a0da8f8ef5ecc36875335ba5858442eeee6a23be
3
+ size 774588347
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f190877dc4864b3be49117d3119c0f6be5ea3b5094fe62883de7dd176ad606a8
3
- size 196987486
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2f890cd2d7e7e4911dcb15736d9f231ebc2301f7e12f7e3087d44f43dca91f
3
+ size 196431022
onnx/decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f4f9490108aed882d00c037ef6dfec10a688da713d80d17a0caf27ea81d046
3
- size 196596817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:053b11d598b22f489e0075714ca9191f4016c356544131b908f2eb87648d72c2
3
+ size 195737631
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d605634ae33667a23a37100bb2d014d5cb99f774f3fe5891d0d4d777264b5990
3
+ size 717416719
onnx/decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9b6fc022fc14723de0769485040b034280a0685328ee3ea44f1922d2effb3bd
3
- size 182188068
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5b511413b04bb36148c2b45799223c14d5075bd6b1f16d16145d14fb6adc57f
3
+ size 181417229
onnx/encoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6590ebbed3fe0be31ca86ab0b8b6e086114e4da0f81421a4d06aeec5335d97d3
3
+ size 352812843
onnx/encoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f25862e6297bed816b7ec75f51b0c1f937e312e1032362b579f4cd6e1c8b4395
3
- size 92704740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7c82e4908c040e156b4063f6cc319b5f31d68ff5812a7e4ac05659cb88ac5c
3
+ size 92289703
preprocessor_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer.json CHANGED
@@ -5,973 +5,974 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
- "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
- "normalized": false
 
14
  },
15
  {
16
  "id": 50257,
17
- "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
- "normalized": false
 
23
  },
24
  {
25
  "id": 50258,
26
- "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
- "normalized": false
 
32
  },
33
  {
34
  "id": 50259,
35
- "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
- "normalized": false
 
41
  },
42
  {
43
  "id": 50260,
44
- "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
- "normalized": false
 
50
  },
51
  {
52
  "id": 50261,
53
- "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
- "normalized": false
 
59
  },
60
  {
61
  "id": 50262,
62
- "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
- "normalized": false
 
68
  },
69
  {
70
  "id": 50263,
71
- "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
- "normalized": false
 
77
  },
78
  {
79
  "id": 50264,
80
- "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
- "normalized": false
 
86
  },
87
  {
88
  "id": 50265,
89
- "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
- "normalized": false
 
95
  },
96
  {
97
  "id": 50266,
98
- "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
- "normalized": false
 
104
  },
105
  {
106
  "id": 50267,
107
- "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
- "normalized": false
 
113
  },
114
  {
115
  "id": 50268,
116
- "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
- "normalized": false
 
122
  },
123
  {
124
  "id": 50269,
125
- "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
- "normalized": false
 
131
  },
132
  {
133
  "id": 50270,
134
- "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
- "normalized": false
 
140
  },
141
  {
142
  "id": 50271,
143
- "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
- "normalized": false
 
149
  },
150
  {
151
  "id": 50272,
152
- "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
- "normalized": false
 
158
  },
159
  {
160
  "id": 50273,
161
- "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
- "normalized": false
 
167
  },
168
  {
169
  "id": 50274,
170
- "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
- "normalized": false
 
176
  },
177
  {
178
  "id": 50275,
179
- "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
- "normalized": false
 
185
  },
186
  {
187
  "id": 50276,
188
- "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
- "normalized": false
 
194
  },
195
  {
196
  "id": 50277,
197
- "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
- "normalized": false
 
203
  },
204
  {
205
  "id": 50278,
206
- "special": true,
207
  "content": "<|iw|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
- "normalized": false
 
212
  },
213
  {
214
  "id": 50279,
215
- "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
- "normalized": false
 
221
  },
222
  {
223
  "id": 50280,
224
- "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
- "normalized": false
 
230
  },
231
  {
232
  "id": 50281,
233
- "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
- "normalized": false
 
239
  },
240
  {
241
  "id": 50282,
242
- "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
- "normalized": false
 
248
  },
249
  {
250
  "id": 50283,
251
- "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
- "normalized": false
 
257
  },
258
  {
259
  "id": 50284,
260
- "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
- "normalized": false
 
266
  },
267
  {
268
  "id": 50285,
269
- "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
- "normalized": false
 
275
  },
276
  {
277
  "id": 50286,
278
- "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
- "normalized": false
 
284
  },
285
  {
286
  "id": 50287,
287
- "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
- "normalized": false
 
293
  },
294
  {
295
  "id": 50288,
296
- "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
- "normalized": false
 
302
  },
303
  {
304
  "id": 50289,
305
- "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
- "normalized": false
 
311
  },
312
  {
313
  "id": 50290,
314
- "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
- "normalized": false
 
320
  },
321
  {
322
  "id": 50291,
323
- "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
- "normalized": false
 
329
  },
330
  {
331
  "id": 50292,
332
- "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
- "normalized": false
 
338
  },
339
  {
340
  "id": 50293,
341
- "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
- "normalized": false
 
347
  },
348
  {
349
  "id": 50294,
350
- "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
- "normalized": false
 
356
  },
357
  {
358
  "id": 50295,
359
- "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
- "normalized": false
 
365
  },
366
  {
367
  "id": 50296,
368
- "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
- "normalized": false
 
374
  },
375
  {
376
  "id": 50297,
377
- "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
- "normalized": false
 
383
  },
384
  {
385
  "id": 50298,
386
- "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
- "normalized": false
 
392
  },
393
  {
394
  "id": 50299,
395
- "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
- "normalized": false
 
401
  },
402
  {
403
  "id": 50300,
404
- "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
- "normalized": false
 
410
  },
411
  {
412
  "id": 50301,
413
- "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
- "normalized": false
 
419
  },
420
  {
421
  "id": 50302,
422
- "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
- "normalized": false
 
428
  },
429
  {
430
  "id": 50303,
431
- "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
- "normalized": false
 
437
  },
438
  {
439
  "id": 50304,
440
- "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
- "normalized": false
 
446
  },
447
  {
448
  "id": 50305,
449
- "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
- "normalized": false
 
455
  },
456
  {
457
  "id": 50306,
458
- "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
- "normalized": false
 
464
  },
465
  {
466
  "id": 50307,
467
- "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
- "normalized": false
 
473
  },
474
  {
475
  "id": 50308,
476
- "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
- "normalized": false
 
482
  },
483
  {
484
  "id": 50309,
485
- "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
- "normalized": false
 
491
  },
492
  {
493
  "id": 50310,
494
- "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
- "normalized": false
 
500
  },
501
  {
502
  "id": 50311,
503
- "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
- "normalized": false
 
509
  },
510
  {
511
  "id": 50312,
512
- "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
- "normalized": false
 
518
  },
519
  {
520
  "id": 50313,
521
- "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
- "normalized": false
 
527
  },
528
  {
529
  "id": 50314,
530
- "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
- "normalized": false
 
536
  },
537
  {
538
  "id": 50315,
539
- "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
- "normalized": false
 
545
  },
546
  {
547
  "id": 50316,
548
- "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
- "normalized": false
 
554
  },
555
  {
556
  "id": 50317,
557
- "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
- "normalized": false
 
563
  },
564
  {
565
  "id": 50318,
566
- "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
- "normalized": false
 
572
  },
573
  {
574
  "id": 50319,
575
- "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
- "normalized": false
 
581
  },
582
  {
583
  "id": 50320,
584
- "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
- "normalized": false
 
590
  },
591
  {
592
  "id": 50321,
593
- "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
- "normalized": false
 
599
  },
600
  {
601
  "id": 50322,
602
- "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
- "normalized": false
 
608
  },
609
  {
610
  "id": 50323,
611
- "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
- "normalized": false
 
617
  },
618
  {
619
  "id": 50324,
620
- "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
- "normalized": false
 
626
  },
627
  {
628
  "id": 50325,
629
- "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
- "normalized": false
 
635
  },
636
  {
637
  "id": 50326,
638
- "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
- "normalized": false
 
644
  },
645
  {
646
  "id": 50327,
647
- "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
- "normalized": false
 
653
  },
654
  {
655
  "id": 50328,
656
- "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
- "normalized": false
 
662
  },
663
  {
664
  "id": 50329,
665
- "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
- "normalized": false
 
671
  },
672
  {
673
  "id": 50330,
674
- "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
- "normalized": false
 
680
  },
681
  {
682
  "id": 50331,
683
- "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
- "normalized": false
 
689
  },
690
  {
691
  "id": 50332,
692
- "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
- "normalized": false
 
698
  },
699
  {
700
  "id": 50333,
701
- "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
- "normalized": false
 
707
  },
708
  {
709
  "id": 50334,
710
- "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
- "normalized": false
 
716
  },
717
  {
718
  "id": 50335,
719
- "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
- "normalized": false
 
725
  },
726
  {
727
  "id": 50336,
728
- "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
- "normalized": false
 
734
  },
735
  {
736
  "id": 50337,
737
- "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
- "normalized": false
 
743
  },
744
  {
745
  "id": 50338,
746
- "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
- "normalized": false
 
752
  },
753
  {
754
  "id": 50339,
755
- "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
- "normalized": false
 
761
  },
762
  {
763
  "id": 50340,
764
- "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
- "normalized": false
 
770
  },
771
  {
772
  "id": 50341,
773
- "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
- "normalized": false
 
779
  },
780
  {
781
  "id": 50342,
782
- "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
- "normalized": false
 
788
  },
789
  {
790
  "id": 50343,
791
- "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
- "normalized": false
 
797
  },
798
  {
799
  "id": 50344,
800
- "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
- "normalized": false
 
806
  },
807
  {
808
  "id": 50345,
809
- "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
- "normalized": false
 
815
  },
816
  {
817
  "id": 50346,
818
- "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
- "normalized": false
 
824
  },
825
  {
826
  "id": 50347,
827
- "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
- "normalized": false
 
833
  },
834
  {
835
  "id": 50348,
836
- "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
- "normalized": false
 
842
  },
843
  {
844
  "id": 50349,
845
- "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
- "normalized": false
 
851
  },
852
  {
853
  "id": 50350,
854
- "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
- "normalized": false
 
860
  },
861
  {
862
  "id": 50351,
863
- "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
- "normalized": false
 
869
  },
870
  {
871
  "id": 50352,
872
- "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
- "normalized": false
 
878
  },
879
  {
880
  "id": 50353,
881
- "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
- "normalized": false
 
887
  },
888
  {
889
  "id": 50354,
890
- "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
- "normalized": false
 
896
  },
897
  {
898
  "id": 50355,
899
- "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
- "normalized": false
 
905
  },
906
  {
907
  "id": 50356,
908
- "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
- "normalized": false
 
914
  },
915
  {
916
  "id": 50357,
917
- "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
- "normalized": false
 
923
  },
924
  {
925
  "id": 50358,
926
- "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
- "normalized": false
 
932
  },
933
  {
934
  "id": 50359,
935
- "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
- "normalized": false
 
941
  },
942
  {
943
  "id": 50360,
944
- "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
- "normalized": false
 
950
  },
951
  {
952
  "id": 50361,
953
- "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
- "normalized": false
 
959
  },
960
  {
961
  "id": 50362,
962
- "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
- "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
@@ -1066,7 +1067,8 @@
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
- "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
@@ -1075,6 +1077,7 @@
1075
  "continuing_subword_prefix": "",
1076
  "end_of_word_suffix": "",
1077
  "fuse_unk": false,
 
1078
  "vocab": {
1079
  "!": 0,
1080
  "\"": 1,
 
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
  },
15
  {
16
  "id": 50257,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
  },
24
  {
25
  "id": 50258,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
  },
33
  {
34
  "id": 50259,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
  },
42
  {
43
  "id": 50260,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  },
51
  {
52
  "id": 50261,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  },
60
  {
61
  "id": 50262,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
+ "normalized": false,
67
+ "special": true
68
  },
69
  {
70
  "id": 50263,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
+ "normalized": false,
76
+ "special": true
77
  },
78
  {
79
  "id": 50264,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
+ "normalized": false,
85
+ "special": true
86
  },
87
  {
88
  "id": 50265,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
+ "normalized": false,
94
+ "special": true
95
  },
96
  {
97
  "id": 50266,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
+ "normalized": false,
103
+ "special": true
104
  },
105
  {
106
  "id": 50267,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
+ "normalized": false,
112
+ "special": true
113
  },
114
  {
115
  "id": 50268,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
+ "normalized": false,
121
+ "special": true
122
  },
123
  {
124
  "id": 50269,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
+ "normalized": false,
130
+ "special": true
131
  },
132
  {
133
  "id": 50270,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
+ "normalized": false,
139
+ "special": true
140
  },
141
  {
142
  "id": 50271,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
+ "normalized": false,
148
+ "special": true
149
  },
150
  {
151
  "id": 50272,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
+ "normalized": false,
157
+ "special": true
158
  },
159
  {
160
  "id": 50273,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
  },
168
  {
169
  "id": 50274,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
+ "normalized": false,
175
+ "special": true
176
  },
177
  {
178
  "id": 50275,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
  },
186
  {
187
  "id": 50276,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
+ "normalized": false,
193
+ "special": true
194
  },
195
  {
196
  "id": 50277,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
+ "normalized": false,
202
+ "special": true
203
  },
204
  {
205
  "id": 50278,
 
206
  "content": "<|iw|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
+ "normalized": false,
211
+ "special": true
212
  },
213
  {
214
  "id": 50279,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
+ "normalized": false,
220
+ "special": true
221
  },
222
  {
223
  "id": 50280,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
+ "normalized": false,
229
+ "special": true
230
  },
231
  {
232
  "id": 50281,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
+ "normalized": false,
238
+ "special": true
239
  },
240
  {
241
  "id": 50282,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
+ "normalized": false,
247
+ "special": true
248
  },
249
  {
250
  "id": 50283,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
+ "normalized": false,
256
+ "special": true
257
  },
258
  {
259
  "id": 50284,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
+ "normalized": false,
265
+ "special": true
266
  },
267
  {
268
  "id": 50285,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
+ "normalized": false,
274
+ "special": true
275
  },
276
  {
277
  "id": 50286,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
+ "normalized": false,
283
+ "special": true
284
  },
285
  {
286
  "id": 50287,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
+ "normalized": false,
292
+ "special": true
293
  },
294
  {
295
  "id": 50288,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
+ "normalized": false,
301
+ "special": true
302
  },
303
  {
304
  "id": 50289,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
+ "normalized": false,
310
+ "special": true
311
  },
312
  {
313
  "id": 50290,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
+ "normalized": false,
319
+ "special": true
320
  },
321
  {
322
  "id": 50291,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
+ "normalized": false,
328
+ "special": true
329
  },
330
  {
331
  "id": 50292,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
+ "normalized": false,
337
+ "special": true
338
  },
339
  {
340
  "id": 50293,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
+ "normalized": false,
346
+ "special": true
347
  },
348
  {
349
  "id": 50294,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
+ "normalized": false,
355
+ "special": true
356
  },
357
  {
358
  "id": 50295,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
+ "normalized": false,
364
+ "special": true
365
  },
366
  {
367
  "id": 50296,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
+ "normalized": false,
373
+ "special": true
374
  },
375
  {
376
  "id": 50297,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
+ "normalized": false,
382
+ "special": true
383
  },
384
  {
385
  "id": 50298,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
+ "normalized": false,
391
+ "special": true
392
  },
393
  {
394
  "id": 50299,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
+ "normalized": false,
400
+ "special": true
401
  },
402
  {
403
  "id": 50300,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
+ "normalized": false,
409
+ "special": true
410
  },
411
  {
412
  "id": 50301,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
+ "normalized": false,
418
+ "special": true
419
  },
420
  {
421
  "id": 50302,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
+ "normalized": false,
427
+ "special": true
428
  },
429
  {
430
  "id": 50303,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
+ "normalized": false,
436
+ "special": true
437
  },
438
  {
439
  "id": 50304,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
+ "normalized": false,
445
+ "special": true
446
  },
447
  {
448
  "id": 50305,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
+ "normalized": false,
454
+ "special": true
455
  },
456
  {
457
  "id": 50306,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
+ "normalized": false,
463
+ "special": true
464
  },
465
  {
466
  "id": 50307,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
+ "normalized": false,
472
+ "special": true
473
  },
474
  {
475
  "id": 50308,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
+ "normalized": false,
481
+ "special": true
482
  },
483
  {
484
  "id": 50309,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
+ "normalized": false,
490
+ "special": true
491
  },
492
  {
493
  "id": 50310,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
+ "normalized": false,
499
+ "special": true
500
  },
501
  {
502
  "id": 50311,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
+ "normalized": false,
508
+ "special": true
509
  },
510
  {
511
  "id": 50312,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
+ "normalized": false,
517
+ "special": true
518
  },
519
  {
520
  "id": 50313,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
+ "normalized": false,
526
+ "special": true
527
  },
528
  {
529
  "id": 50314,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
+ "normalized": false,
535
+ "special": true
536
  },
537
  {
538
  "id": 50315,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
+ "normalized": false,
544
+ "special": true
545
  },
546
  {
547
  "id": 50316,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
+ "normalized": false,
553
+ "special": true
554
  },
555
  {
556
  "id": 50317,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
+ "normalized": false,
562
+ "special": true
563
  },
564
  {
565
  "id": 50318,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
+ "normalized": false,
571
+ "special": true
572
  },
573
  {
574
  "id": 50319,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
+ "normalized": false,
580
+ "special": true
581
  },
582
  {
583
  "id": 50320,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
+ "normalized": false,
589
+ "special": true
590
  },
591
  {
592
  "id": 50321,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
+ "normalized": false,
598
+ "special": true
599
  },
600
  {
601
  "id": 50322,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
+ "normalized": false,
607
+ "special": true
608
  },
609
  {
610
  "id": 50323,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
+ "normalized": false,
616
+ "special": true
617
  },
618
  {
619
  "id": 50324,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
+ "normalized": false,
625
+ "special": true
626
  },
627
  {
628
  "id": 50325,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
+ "normalized": false,
634
+ "special": true
635
  },
636
  {
637
  "id": 50326,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
+ "normalized": false,
643
+ "special": true
644
  },
645
  {
646
  "id": 50327,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
+ "normalized": false,
652
+ "special": true
653
  },
654
  {
655
  "id": 50328,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
+ "normalized": false,
661
+ "special": true
662
  },
663
  {
664
  "id": 50329,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
+ "normalized": false,
670
+ "special": true
671
  },
672
  {
673
  "id": 50330,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
+ "normalized": false,
679
+ "special": true
680
  },
681
  {
682
  "id": 50331,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
+ "normalized": false,
688
+ "special": true
689
  },
690
  {
691
  "id": 50332,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
+ "normalized": false,
697
+ "special": true
698
  },
699
  {
700
  "id": 50333,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
+ "normalized": false,
706
+ "special": true
707
  },
708
  {
709
  "id": 50334,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
+ "normalized": false,
715
+ "special": true
716
  },
717
  {
718
  "id": 50335,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
+ "normalized": false,
724
+ "special": true
725
  },
726
  {
727
  "id": 50336,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
+ "normalized": false,
733
+ "special": true
734
  },
735
  {
736
  "id": 50337,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
+ "normalized": false,
742
+ "special": true
743
  },
744
  {
745
  "id": 50338,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
+ "normalized": false,
751
+ "special": true
752
  },
753
  {
754
  "id": 50339,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
+ "normalized": false,
760
+ "special": true
761
  },
762
  {
763
  "id": 50340,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
+ "normalized": false,
769
+ "special": true
770
  },
771
  {
772
  "id": 50341,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
+ "normalized": false,
778
+ "special": true
779
  },
780
  {
781
  "id": 50342,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
+ "normalized": false,
787
+ "special": true
788
  },
789
  {
790
  "id": 50343,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
+ "normalized": false,
796
+ "special": true
797
  },
798
  {
799
  "id": 50344,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
+ "normalized": false,
805
+ "special": true
806
  },
807
  {
808
  "id": 50345,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
+ "normalized": false,
814
+ "special": true
815
  },
816
  {
817
  "id": 50346,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
+ "normalized": false,
823
+ "special": true
824
  },
825
  {
826
  "id": 50347,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
+ "normalized": false,
832
+ "special": true
833
  },
834
  {
835
  "id": 50348,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
+ "normalized": false,
841
+ "special": true
842
  },
843
  {
844
  "id": 50349,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
+ "normalized": false,
850
+ "special": true
851
  },
852
  {
853
  "id": 50350,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
+ "normalized": false,
859
+ "special": true
860
  },
861
  {
862
  "id": 50351,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
+ "normalized": false,
868
+ "special": true
869
  },
870
  {
871
  "id": 50352,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
+ "normalized": false,
877
+ "special": true
878
  },
879
  {
880
  "id": 50353,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
+ "normalized": false,
886
+ "special": true
887
  },
888
  {
889
  "id": 50354,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
+ "normalized": false,
895
+ "special": true
896
  },
897
  {
898
  "id": 50355,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
+ "normalized": false,
904
+ "special": true
905
  },
906
  {
907
  "id": 50356,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
+ "normalized": false,
913
+ "special": true
914
  },
915
  {
916
  "id": 50357,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
+ "normalized": false,
922
+ "special": true
923
  },
924
  {
925
  "id": 50358,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
+ "normalized": false,
931
+ "special": true
932
  },
933
  {
934
  "id": 50359,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
+ "normalized": false,
940
+ "special": true
941
  },
942
  {
943
  "id": 50360,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
+ "normalized": false,
949
+ "special": true
950
  },
951
  {
952
  "id": 50361,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
+ "normalized": false,
958
+ "special": true
959
  },
960
  {
961
  "id": 50362,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
+ "normalized": false,
967
+ "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true,
975
+ "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
 
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
+ "trim_offsets": true,
1071
+ "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
 
1077
  "continuing_subword_prefix": "",
1078
  "end_of_word_suffix": "",
1079
  "fuse_unk": false,
1080
+ "byte_fallback": false,
1081
  "vocab": {
1082
  "!": 0,
1083
  "\"": 1,
tokenizer_config.json CHANGED
@@ -9,6 +9,7 @@
9
  "rstrip": false,
10
  "single_word": false
11
  },
 
12
  "eos_token": {
13
  "__type": "AddedToken",
14
  "content": "<|endoftext|>",
@@ -22,8 +23,8 @@
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
25
- "special_tokens_map_file": null,
26
  "tokenizer_class": "WhisperTokenizer",
 
27
  "unk_token": {
28
  "__type": "AddedToken",
29
  "content": "<|endoftext|>",
 
9
  "rstrip": false,
10
  "single_word": false
11
  },
12
+ "clean_up_tokenization_spaces": true,
13
  "eos_token": {
14
  "__type": "AddedToken",
15
  "content": "<|endoftext|>",
 
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
26
  "tokenizer_class": "WhisperTokenizer",
27
+ "trust_remote_code": false,
28
  "unk_token": {
29
  "__type": "AddedToken",
30
  "content": "<|endoftext|>",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff