Joshua Lochner commited on
Commit
26c7c20
1 Parent(s): ce06bcf

Update whisper models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. quantized/openai/whisper-base.en/default/config.json +10 -1
  2. quantized/openai/whisper-base.en/default/generation_config.json +0 -112
  3. quantized/openai/whisper-base.en/default/merges.txt +1 -1
  4. quantized/openai/whisper-base.en/default/{decoder_model.onnx → model.onnx} +2 -2
  5. quantized/openai/whisper-base.en/default/tokenizer.json +216 -218
  6. quantized/openai/whisper-base.en/default/tokenizer_config.json +0 -1
  7. quantized/openai/whisper-base.en/default/vocab.json +0 -0
  8. quantized/openai/whisper-base.en/speech2seq-lm-with-past/config.json +10 -1
  9. quantized/openai/whisper-base.en/speech2seq-lm-with-past/decoder_model.onnx +2 -2
  10. quantized/openai/whisper-base.en/speech2seq-lm-with-past/decoder_model_merged.onnx +2 -2
  11. quantized/openai/whisper-base.en/speech2seq-lm-with-past/decoder_with_past_model.onnx +2 -2
  12. quantized/openai/whisper-base.en/speech2seq-lm-with-past/encoder_model.onnx +2 -2
  13. quantized/openai/whisper-base.en/speech2seq-lm-with-past/merges.txt +1 -1
  14. quantized/openai/whisper-base.en/speech2seq-lm-with-past/tokenizer.json +216 -218
  15. quantized/openai/whisper-base.en/speech2seq-lm-with-past/tokenizer_config.json +0 -1
  16. quantized/openai/whisper-base.en/speech2seq-lm-with-past/vocab.json +0 -0
  17. quantized/openai/whisper-base/default/config.json +10 -1
  18. quantized/openai/whisper-base/default/generation_config.json +0 -219
  19. quantized/openai/whisper-base/default/merges.txt +1 -1
  20. quantized/openai/{whisper-base.en/default/encoder_model.onnx → whisper-base/default/model.onnx} +2 -2
  21. quantized/openai/whisper-base/default/tokenizer.json +216 -218
  22. quantized/openai/whisper-base/default/tokenizer_config.json +0 -1
  23. quantized/openai/whisper-base/default/vocab.json +0 -0
  24. quantized/openai/whisper-base/speech2seq-lm-with-past/config.json +10 -1
  25. quantized/openai/whisper-base/speech2seq-lm-with-past/decoder_model.onnx +2 -2
  26. quantized/openai/whisper-base/speech2seq-lm-with-past/decoder_model_merged.onnx +2 -2
  27. quantized/openai/whisper-base/speech2seq-lm-with-past/decoder_with_past_model.onnx +2 -2
  28. quantized/openai/whisper-base/speech2seq-lm-with-past/encoder_model.onnx +2 -2
  29. quantized/openai/whisper-base/speech2seq-lm-with-past/merges.txt +1 -1
  30. quantized/openai/whisper-base/speech2seq-lm-with-past/tokenizer.json +216 -218
  31. quantized/openai/whisper-base/speech2seq-lm-with-past/tokenizer_config.json +0 -1
  32. quantized/openai/whisper-base/speech2seq-lm-with-past/vocab.json +0 -0
  33. quantized/openai/whisper-small.en/default/config.json +10 -1
  34. quantized/openai/whisper-small.en/default/decoder_model.onnx +0 -3
  35. quantized/openai/whisper-small.en/default/encoder_model.onnx +0 -3
  36. quantized/openai/whisper-small.en/default/generation_config.json +0 -111
  37. quantized/openai/whisper-small.en/default/merges.txt +1 -1
  38. quantized/openai/whisper-small.en/default/model.onnx +3 -0
  39. quantized/openai/whisper-small.en/default/tokenizer.json +216 -218
  40. quantized/openai/whisper-small.en/default/tokenizer_config.json +0 -1
  41. quantized/openai/whisper-small.en/default/vocab.json +0 -0
  42. quantized/openai/whisper-small.en/speech2seq-lm-with-past/config.json +10 -1
  43. quantized/openai/whisper-small.en/speech2seq-lm-with-past/decoder_model.onnx +2 -2
  44. quantized/openai/whisper-small.en/speech2seq-lm-with-past/decoder_model_merged.onnx +2 -2
  45. quantized/openai/whisper-small.en/speech2seq-lm-with-past/decoder_with_past_model.onnx +2 -2
  46. quantized/openai/whisper-small.en/speech2seq-lm-with-past/encoder_model.onnx +2 -2
  47. quantized/openai/whisper-small.en/speech2seq-lm-with-past/merges.txt +1 -1
  48. quantized/openai/whisper-small.en/speech2seq-lm-with-past/tokenizer.json +216 -218
  49. quantized/openai/whisper-small.en/speech2seq-lm-with-past/tokenizer_config.json +0 -1
  50. quantized/openai/whisper-small.en/speech2seq-lm-with-past/vocab.json +0 -0
quantized/openai/whisper-base.en/default/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-base.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50256
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 512,
15
  "decoder_attention_heads": 8,
16
  "decoder_ffn_dim": 2048,
@@ -31,6 +33,12 @@
31
  ],
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
 
 
 
 
 
 
34
  "max_length": 448,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
@@ -129,7 +137,8 @@
129
  50360,
130
  50361
131
  ],
132
- "transformers_version": "4.26.1",
133
  "use_cache": true,
 
134
  "vocab_size": 51864
135
  }
2
  "_name_or_path": "openai/whisper-base.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
12
  50256
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 512,
17
  "decoder_attention_heads": 8,
18
  "decoder_ffn_dim": 2048,
33
  ],
34
  "init_std": 0.02,
35
  "is_encoder_decoder": true,
36
+ "mask_feature_length": 10,
37
+ "mask_feature_min_masks": 0,
38
+ "mask_feature_prob": 0.0,
39
+ "mask_time_length": 10,
40
+ "mask_time_min_masks": 2,
41
+ "mask_time_prob": 0.05,
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
137
  50360,
138
  50361
139
  ],
140
+ "transformers_version": "4.27.2",
141
  "use_cache": true,
142
+ "use_weighted_layer_sum": false,
143
  "vocab_size": 51864
144
  }
quantized/openai/whisper-base.en/default/generation_config.json DELETED
@@ -1,112 +0,0 @@
1
- {
2
- "begin_suppress_tokens": [
3
- 220,
4
- 50256
5
- ],
6
- "bos_token_id": 50257,
7
- "decoder_start_token_id": 50257,
8
- "eos_token_id": 50256,
9
- "forced_decoder_ids": [
10
- [
11
- 1,
12
- 50362
13
- ]
14
- ],
15
- "is_multilingual": false,
16
- "max_initial_timestamp_index": 1,
17
- "max_length": 448,
18
- "no_timestamps_token_id": 50362,
19
- "pad_token_id": 50256,
20
- "return_timestamps": false,
21
- "suppress_tokens": [
22
- 1,
23
- 2,
24
- 7,
25
- 8,
26
- 9,
27
- 10,
28
- 14,
29
- 25,
30
- 26,
31
- 27,
32
- 28,
33
- 29,
34
- 31,
35
- 58,
36
- 59,
37
- 60,
38
- 61,
39
- 62,
40
- 63,
41
- 90,
42
- 91,
43
- 92,
44
- 93,
45
- 357,
46
- 366,
47
- 438,
48
- 532,
49
- 685,
50
- 705,
51
- 796,
52
- 930,
53
- 1058,
54
- 1220,
55
- 1267,
56
- 1279,
57
- 1303,
58
- 1343,
59
- 1377,
60
- 1391,
61
- 1635,
62
- 1782,
63
- 1875,
64
- 2162,
65
- 2361,
66
- 2488,
67
- 3467,
68
- 4008,
69
- 4211,
70
- 4600,
71
- 4808,
72
- 5299,
73
- 5855,
74
- 6329,
75
- 7203,
76
- 9609,
77
- 9959,
78
- 10563,
79
- 10786,
80
- 11420,
81
- 11709,
82
- 11907,
83
- 13163,
84
- 13697,
85
- 13700,
86
- 14808,
87
- 15306,
88
- 16410,
89
- 16791,
90
- 17992,
91
- 19203,
92
- 19510,
93
- 20724,
94
- 22305,
95
- 22935,
96
- 27007,
97
- 30109,
98
- 30420,
99
- 33409,
100
- 34949,
101
- 40283,
102
- 40493,
103
- 40549,
104
- 47282,
105
- 49146,
106
- 50257,
107
- 50359,
108
- 50360,
109
- 50361
110
- ],
111
- "transformers_version": "4.27.0.dev0"
112
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quantized/openai/whisper-base.en/default/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
quantized/openai/whisper-base.en/default/{decoder_model.onnx → model.onnx} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90e2a5e52df832bbc35fb104cfc63deb2a3731bfe3c997df73f5fa234ee9e363
3
- size 53464941
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b7e5633c375bd177e5c7370be5606f093578b30bcb9e584c58a73e92f41701
3
+ size 76778687
quantized/openai/whisper-base.en/default/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50257,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50258,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50259,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50260,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50261,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50262,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50263,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50264,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50265,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50266,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50267,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50268,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50269,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50270,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50271,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50272,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50273,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50274,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50275,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50276,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50277,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50278,
 
206
  "content": "<|iw|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50279,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50280,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50281,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50282,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50283,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50284,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50285,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50286,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50287,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50288,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50289,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50290,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50291,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50292,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50293,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50294,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50295,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50296,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50297,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50298,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50299,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50300,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50301,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50302,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50303,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50304,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50305,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50306,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50307,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50308,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50309,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50310,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50311,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50312,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50313,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50314,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50315,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50316,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50317,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50318,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50319,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50320,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50321,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50322,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50323,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50324,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50325,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50326,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50327,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50328,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50329,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50330,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50331,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50332,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50333,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50334,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50335,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50336,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50337,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50338,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50339,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50340,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50341,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50342,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50343,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50344,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50345,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50346,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50347,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50348,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50349,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50350,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50351,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50352,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50353,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50354,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50355,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50356,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50357,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50358,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50359,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50360,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50361,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50362,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50257,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50258,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50259,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50260,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50261,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50262,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50263,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50264,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50265,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50266,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50267,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50268,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50269,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50270,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50271,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50272,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50273,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50274,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50275,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50276,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50277,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50278,
206
+ "special": true,
207
  "content": "<|iw|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50279,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50280,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50281,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50282,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50283,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50284,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50285,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50286,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50287,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50288,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50289,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50290,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50291,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50292,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50293,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50294,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50295,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50296,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50297,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50298,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50299,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50300,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50301,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50302,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50303,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50304,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50305,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50306,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50307,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50308,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50309,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50310,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50311,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50312,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50313,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50314,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50315,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50316,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50317,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50318,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50319,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50320,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50321,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50322,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50323,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50324,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50325,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50326,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50327,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50328,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50329,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50330,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50331,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50332,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50333,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50334,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50335,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50336,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50337,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50338,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50339,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50340,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50341,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50342,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50343,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50344,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50345,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50346,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50347,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50348,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50349,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50350,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50351,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50352,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50353,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50354,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50355,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50356,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50357,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50358,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50359,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50360,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50361,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50362,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-base.en/default/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-base.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-base.en/default/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
quantized/openai/whisper-base.en/speech2seq-lm-with-past/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-base.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50256
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 512,
15
  "decoder_attention_heads": 8,
16
  "decoder_ffn_dim": 2048,
@@ -31,6 +33,12 @@
31
  ],
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
 
 
 
 
 
 
34
  "max_length": 448,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
@@ -129,7 +137,8 @@
129
  50360,
130
  50361
131
  ],
132
- "transformers_version": "4.26.1",
133
  "use_cache": true,
 
134
  "vocab_size": 51864
135
  }
2
  "_name_or_path": "openai/whisper-base.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
12
  50256
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 512,
17
  "decoder_attention_heads": 8,
18
  "decoder_ffn_dim": 2048,
33
  ],
34
  "init_std": 0.02,
35
  "is_encoder_decoder": true,
36
+ "mask_feature_length": 10,
37
+ "mask_feature_min_masks": 0,
38
+ "mask_feature_prob": 0.0,
39
+ "mask_time_length": 10,
40
+ "mask_time_min_masks": 2,
41
+ "mask_time_prob": 0.05,
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
137
  50360,
138
  50361
139
  ],
140
+ "transformers_version": "4.27.2",
141
  "use_cache": true,
142
+ "use_weighted_layer_sum": false,
143
  "vocab_size": 51864
144
  }
quantized/openai/whisper-base.en/speech2seq-lm-with-past/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1a4eb50199211fafcccf5efb8176ab1f3422597fc767f05d30c559ce091abe6
3
- size 80321068
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbb51eee5ded5368f2892aae40f332440c73c28f805b0f814d6bede9b89f597
3
+ size 80281417
quantized/openai/whisper-base.en/speech2seq-lm-with-past/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b2396a1c4079f5b541d05ccfd21bef6d20af75e48f9ac5c34bc4b67f3751fb5
3
- size 80556990
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc1286247e416f25c6d7569e97369ced423c7e1253c9cfcbf20631a267a91ec
3
+ size 80477622
quantized/openai/whisper-base.en/speech2seq-lm-with-past/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65b70d66fa9dbb1a61c863065100a08bf58a689383b212b1e91646738cc97424
3
- size 77066299
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c03bc1a84dc5cbc36a0c37b035e08775c72eae4f9b39828c2d5e653fa389c2
3
+ size 77026648
quantized/openai/whisper-base.en/speech2seq-lm-with-past/encoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ceadcf4cb45d3e07507174a5f281f745085f01ea01b32f11dcb14f20e81291b
3
- size 23337235
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e5c7694b53a299a30beb6c6b9b4934f814cf3382bec0957b612ee0934b78858
3
+ size 23321703
quantized/openai/whisper-base.en/speech2seq-lm-with-past/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
quantized/openai/whisper-base.en/speech2seq-lm-with-past/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50257,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50258,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50259,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50260,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50261,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50262,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50263,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50264,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50265,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50266,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50267,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50268,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50269,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50270,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50271,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50272,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50273,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50274,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50275,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50276,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50277,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50278,
 
206
  "content": "<|iw|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50279,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50280,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50281,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50282,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50283,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50284,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50285,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50286,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50287,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50288,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50289,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50290,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50291,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50292,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50293,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50294,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50295,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50296,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50297,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50298,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50299,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50300,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50301,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50302,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50303,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50304,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50305,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50306,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50307,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50308,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50309,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50310,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50311,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50312,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50313,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50314,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50315,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50316,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50317,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50318,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50319,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50320,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50321,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50322,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50323,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50324,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50325,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50326,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50327,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50328,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50329,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50330,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50331,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50332,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50333,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50334,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50335,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50336,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50337,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50338,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50339,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50340,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50341,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50342,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50343,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50344,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50345,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50346,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50347,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50348,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50349,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50350,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50351,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50352,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50353,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50354,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50355,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50356,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50357,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50358,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50359,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50360,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50361,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50362,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50257,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50258,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50259,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50260,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50261,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50262,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50263,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50264,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50265,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50266,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50267,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50268,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50269,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50270,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50271,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50272,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50273,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50274,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50275,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50276,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50277,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50278,
206
+ "special": true,
207
  "content": "<|iw|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50279,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50280,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50281,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50282,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50283,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50284,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50285,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50286,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50287,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50288,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50289,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50290,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50291,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50292,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50293,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50294,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50295,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50296,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50297,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50298,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50299,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50300,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50301,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50302,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50303,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50304,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50305,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50306,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50307,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50308,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50309,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50310,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50311,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50312,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50313,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50314,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50315,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50316,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50317,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50318,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50319,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50320,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50321,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50322,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50323,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50324,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50325,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50326,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50327,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50328,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50329,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50330,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50331,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50332,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50333,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50334,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50335,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50336,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50337,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50338,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50339,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50340,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50341,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50342,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50343,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50344,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50345,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50346,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50347,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50348,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50349,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50350,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50351,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50352,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50353,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50354,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50355,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50356,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50357,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50358,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50359,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50360,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50361,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50362,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-base.en/speech2seq-lm-with-past/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-base.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-base.en/speech2seq-lm-with-past/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
quantized/openai/whisper-base/default/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-base",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50257
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 512,
15
  "decoder_attention_heads": 8,
16
  "decoder_ffn_dim": 2048,
@@ -39,6 +41,12 @@
39
  ],
40
  "init_std": 0.02,
41
  "is_encoder_decoder": true,
 
 
 
 
 
 
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
@@ -135,7 +143,8 @@
135
  50361,
136
  50362
137
  ],
138
- "transformers_version": "4.26.1",
139
  "use_cache": true,
 
140
  "vocab_size": 51865
141
  }
2
  "_name_or_path": "openai/whisper-base",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
12
  50257
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 512,
17
  "decoder_attention_heads": 8,
18
  "decoder_ffn_dim": 2048,
41
  ],
42
  "init_std": 0.02,
43
  "is_encoder_decoder": true,
44
+ "mask_feature_length": 10,
45
+ "mask_feature_min_masks": 0,
46
+ "mask_feature_prob": 0.0,
47
+ "mask_time_length": 10,
48
+ "mask_time_min_masks": 2,
49
+ "mask_time_prob": 0.05,
50
  "max_length": 448,
51
  "max_source_positions": 1500,
52
  "max_target_positions": 448,
143
  50361,
144
  50362
145
  ],
146
+ "transformers_version": "4.27.2",
147
  "use_cache": true,
148
+ "use_weighted_layer_sum": false,
149
  "vocab_size": 51865
150
  }
quantized/openai/whisper-base/default/generation_config.json DELETED
@@ -1,219 +0,0 @@
1
- {
2
- "begin_suppress_tokens": [
3
- 220,
4
- 50257
5
- ],
6
- "bos_token_id": 50257,
7
- "decoder_start_token_id": 50258,
8
- "eos_token_id": 50257,
9
- "forced_decoder_ids": [
10
- [
11
- 1,
12
- null
13
- ],
14
- [
15
- 2,
16
- 50359
17
- ]
18
- ],
19
- "is_multilingual": true,
20
- "lang_to_id": {
21
- "<|af|>": 50327,
22
- "<|am|>": 50334,
23
- "<|ar|>": 50272,
24
- "<|as|>": 50350,
25
- "<|az|>": 50304,
26
- "<|ba|>": 50355,
27
- "<|be|>": 50330,
28
- "<|bg|>": 50292,
29
- "<|bn|>": 50302,
30
- "<|bo|>": 50347,
31
- "<|br|>": 50309,
32
- "<|bs|>": 50315,
33
- "<|ca|>": 50270,
34
- "<|cs|>": 50283,
35
- "<|cy|>": 50297,
36
- "<|da|>": 50285,
37
- "<|de|>": 50261,
38
- "<|el|>": 50281,
39
- "<|en|>": 50259,
40
- "<|es|>": 50262,
41
- "<|et|>": 50307,
42
- "<|eu|>": 50310,
43
- "<|fa|>": 50300,
44
- "<|fi|>": 50277,
45
- "<|fo|>": 50338,
46
- "<|fr|>": 50265,
47
- "<|gl|>": 50319,
48
- "<|gu|>": 50333,
49
- "<|haw|>": 50352,
50
- "<|ha|>": 50354,
51
- "<|he|>": 50279,
52
- "<|hi|>": 50276,
53
- "<|hr|>": 50291,
54
- "<|ht|>": 50339,
55
- "<|hu|>": 50286,
56
- "<|hy|>": 50312,
57
- "<|id|>": 50275,
58
- "<|is|>": 50311,
59
- "<|it|>": 50274,
60
- "<|ja|>": 50266,
61
- "<|jw|>": 50356,
62
- "<|ka|>": 50329,
63
- "<|kk|>": 50316,
64
- "<|km|>": 50323,
65
- "<|kn|>": 50306,
66
- "<|ko|>": 50264,
67
- "<|la|>": 50294,
68
- "<|lb|>": 50345,
69
- "<|ln|>": 50353,
70
- "<|lo|>": 50336,
71
- "<|lt|>": 50293,
72
- "<|lv|>": 50301,
73
- "<|mg|>": 50349,
74
- "<|mi|>": 50295,
75
- "<|mk|>": 50308,
76
- "<|ml|>": 50296,
77
- "<|mn|>": 50314,
78
- "<|mr|>": 50320,
79
- "<|ms|>": 50282,
80
- "<|mt|>": 50343,
81
- "<|my|>": 50346,
82
- "<|ne|>": 50313,
83
- "<|nl|>": 50271,
84
- "<|nn|>": 50342,
85
- "<|no|>": 50288,
86
- "<|oc|>": 50328,
87
- "<|pa|>": 50321,
88
- "<|pl|>": 50269,
89
- "<|ps|>": 50340,
90
- "<|pt|>": 50267,
91
- "<|ro|>": 50284,
92
- "<|ru|>": 50263,
93
- "<|sa|>": 50344,
94
- "<|sd|>": 50332,
95
- "<|si|>": 50322,
96
- "<|sk|>": 50298,
97
- "<|sl|>": 50305,
98
- "<|sn|>": 50324,
99
- "<|so|>": 50326,
100
- "<|sq|>": 50317,
101
- "<|sr|>": 50303,
102
- "<|su|>": 50357,
103
- "<|sv|>": 50273,
104
- "<|sw|>": 50318,
105
- "<|ta|>": 50287,
106
- "<|te|>": 50299,
107
- "<|tg|>": 50331,
108
- "<|th|>": 50289,
109
- "<|tk|>": 50341,
110
- "<|tl|>": 50348,
111
- "<|tr|>": 50268,
112
- "<|tt|>": 50351,
113
- "<|uk|>": 50280,
114
- "<|ur|>": 50290,
115
- "<|uz|>": 50337,
116
- "<|vi|>": 50278,
117
- "<|yi|>": 50335,
118
- "<|yo|>": 50325,
119
- "<|zh|>": 50260
120
- },
121
- "max_initial_timestamp_index": 1,
122
- "max_length": 448,
123
- "no_timestamps_token_id": 50363,
124
- "pad_token_id": 50257,
125
- "return_timestamps": false,
126
- "suppress_tokens": [
127
- 1,
128
- 2,
129
- 7,
130
- 8,
131
- 9,
132
- 10,
133
- 14,
134
- 25,
135
- 26,
136
- 27,
137
- 28,
138
- 29,
139
- 31,
140
- 58,
141
- 59,
142
- 60,
143
- 61,
144
- 62,
145
- 63,
146
- 90,
147
- 91,
148
- 92,
149
- 93,
150
- 359,
151
- 503,
152
- 522,
153
- 542,
154
- 873,
155
- 893,
156
- 902,
157
- 918,
158
- 922,
159
- 931,
160
- 1350,
161
- 1853,
162
- 1982,
163
- 2460,
164
- 2627,
165
- 3246,
166
- 3253,
167
- 3268,
168
- 3536,
169
- 3846,
170
- 3961,
171
- 4183,
172
- 4667,
173
- 6585,
174
- 6647,
175
- 7273,
176
- 9061,
177
- 9383,
178
- 10428,
179
- 10929,
180
- 11938,
181
- 12033,
182
- 12331,
183
- 12562,
184
- 13793,
185
- 14157,
186
- 14635,
187
- 15265,
188
- 15618,
189
- 16553,
190
- 16604,
191
- 18362,
192
- 18956,
193
- 20075,
194
- 21675,
195
- 22520,
196
- 26130,
197
- 26161,
198
- 26435,
199
- 28279,
200
- 29464,
201
- 31650,
202
- 32302,
203
- 32470,
204
- 36865,
205
- 42863,
206
- 47425,
207
- 49870,
208
- 50254,
209
- 50258,
210
- 50360,
211
- 50361,
212
- 50362
213
- ],
214
- "task_to_id": {
215
- "transcribe": 50359,
216
- "translate": 50358
217
- },
218
- "transformers_version": "4.27.0.dev0"
219
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quantized/openai/whisper-base/default/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ a
3
  Ġt h
4
  i n
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ a
3
  Ġt h
4
  i n
quantized/openai/{whisper-base.en/default/encoder_model.onnx → whisper-base/default/model.onnx} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ceadcf4cb45d3e07507174a5f281f745085f01ea01b32f11dcb14f20e81291b
3
- size 23337235
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8ec8a5545484b07635fb781c0e7b590af576c6a0068bdbf298227ddf6bed16
3
+ size 76779199
quantized/openai/whisper-base/default/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50257,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50258,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50259,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50260,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50261,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50262,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50263,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50264,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50265,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50266,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50267,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50268,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50269,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50270,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50271,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50272,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50273,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50274,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50275,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50276,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50277,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50278,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50279,
 
206
  "content": "<|he|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50280,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50281,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50282,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50283,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50284,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50285,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50286,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50287,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50288,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50289,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50290,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50291,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50292,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50293,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50294,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50295,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50296,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50297,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50298,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50299,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50300,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50301,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50302,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50303,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50304,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50305,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50306,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50307,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50308,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50309,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50310,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50311,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50312,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50313,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50314,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50315,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50316,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50317,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50318,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50319,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50320,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50321,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50322,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50323,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50324,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50325,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50326,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50327,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50328,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50329,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50330,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50331,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50332,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50333,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50334,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50335,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50336,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50337,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50338,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50339,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50340,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50341,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50342,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50343,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50344,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50345,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50346,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50347,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50348,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50349,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50350,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50351,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50352,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50353,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50354,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50355,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50356,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50357,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50358,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50359,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50360,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50361,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50362,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50363,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50257,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50258,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50259,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50260,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50261,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50262,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50263,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50264,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50265,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50266,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50267,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50268,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50269,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50270,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50271,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50272,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50273,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50274,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50275,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50276,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50277,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50278,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50279,
206
+ "special": true,
207
  "content": "<|he|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50280,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50281,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50282,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50283,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50284,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50285,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50286,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50287,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50288,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50289,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50290,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50291,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50292,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50293,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50294,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50295,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50296,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50297,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50298,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50299,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50300,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50301,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50302,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50303,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50304,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50305,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50306,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50307,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50308,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50309,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50310,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50311,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50312,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50313,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50314,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50315,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50316,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50317,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50318,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50319,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50320,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50321,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50322,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50323,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50324,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50325,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50326,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50327,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50328,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50329,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50330,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50331,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50332,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50333,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50334,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50335,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50336,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50337,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50338,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50339,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50340,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50341,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50342,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50343,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50344,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50345,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50346,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50347,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50348,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50349,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50350,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50351,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50352,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50353,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50354,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50355,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50356,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50357,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50358,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50359,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50360,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50361,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50362,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50363,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-base/default/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 448,
22
- "name_or_path": "openai/whisper-base",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
19
  },
20
  "errors": "replace",
21
  "model_max_length": 448,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-base/default/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
quantized/openai/whisper-base/speech2seq-lm-with-past/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-base",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50257
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 512,
15
  "decoder_attention_heads": 8,
16
  "decoder_ffn_dim": 2048,
@@ -39,6 +41,12 @@
39
  ],
40
  "init_std": 0.02,
41
  "is_encoder_decoder": true,
 
 
 
 
 
 
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
@@ -135,7 +143,8 @@
135
  50361,
136
  50362
137
  ],
138
- "transformers_version": "4.26.1",
139
  "use_cache": true,
 
140
  "vocab_size": 51865
141
  }
2
  "_name_or_path": "openai/whisper-base",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
12
  50257
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 512,
17
  "decoder_attention_heads": 8,
18
  "decoder_ffn_dim": 2048,
41
  ],
42
  "init_std": 0.02,
43
  "is_encoder_decoder": true,
44
+ "mask_feature_length": 10,
45
+ "mask_feature_min_masks": 0,
46
+ "mask_feature_prob": 0.0,
47
+ "mask_time_length": 10,
48
+ "mask_time_min_masks": 2,
49
+ "mask_time_prob": 0.05,
50
  "max_length": 448,
51
  "max_source_positions": 1500,
52
  "max_target_positions": 448,
143
  50361,
144
  50362
145
  ],
146
+ "transformers_version": "4.27.2",
147
  "use_cache": true,
148
+ "use_weighted_layer_sum": false,
149
  "vocab_size": 51865
150
  }
quantized/openai/whisper-base/speech2seq-lm-with-past/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0800f9c5cd63139166bf2031993b8e136bb754a76a94aaf25a9e9b0dd9cc73bf
3
- size 80322097
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:884b84248916bf5250a668a64617810285b586683f22e841a9ba8f2220fc31c2
3
+ size 80282446
quantized/openai/whisper-base/speech2seq-lm-with-past/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62aac7f57c5f696817983bb46f45b694d9e870727dcade1c2be707146b1e215e
3
- size 80558019
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30f3e2924bdf44a0fa980d0420b126785925bae478b382baa5812af74bf89299
3
+ size 80478651
quantized/openai/whisper-base/speech2seq-lm-with-past/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f1e5181fd479407e403f50594bb14a38bca8f6c86e27b656c4066aaf9cd0a72
3
- size 77067328
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a168553192c8c05f3af164ea4dd4cedefe4a37cee46057d775584efeef15742d
3
+ size 77027677
quantized/openai/whisper-base/speech2seq-lm-with-past/encoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1990e063ab168d67092cbbd32588acb425e6d767bd3af38ecf36137fe3914b41
3
- size 23337235
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589ca79a883dd2dfae23acd7f801df018d3ffef9ed3eaa63eecfacff6bd18a6d
3
+ size 23321703
quantized/openai/whisper-base/speech2seq-lm-with-past/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ a
3
  Ġt h
4
  i n
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ a
3
  Ġt h
4
  i n
quantized/openai/whisper-base/speech2seq-lm-with-past/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50257,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50258,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50259,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50260,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50261,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50262,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50263,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50264,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50265,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50266,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50267,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50268,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50269,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50270,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50271,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50272,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50273,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50274,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50275,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50276,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50277,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50278,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50279,
 
206
  "content": "<|he|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50280,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50281,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50282,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50283,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50284,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50285,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50286,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50287,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50288,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50289,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50290,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50291,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50292,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50293,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50294,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50295,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50296,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50297,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50298,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50299,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50300,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50301,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50302,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50303,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50304,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50305,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50306,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50307,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50308,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50309,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50310,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50311,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50312,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50313,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50314,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50315,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50316,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50317,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50318,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50319,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50320,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50321,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50322,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50323,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50324,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50325,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50326,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50327,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50328,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50329,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50330,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50331,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50332,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50333,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50334,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50335,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50336,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50337,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50338,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50339,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50340,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50341,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50342,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50343,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50344,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50345,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50346,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50347,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50348,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50349,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50350,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50351,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50352,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50353,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50354,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50355,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50356,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50357,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50358,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50359,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50360,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50361,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50362,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50363,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50257,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50258,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50259,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50260,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50261,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50262,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50263,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50264,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50265,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50266,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50267,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50268,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50269,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50270,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50271,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50272,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50273,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50274,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50275,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50276,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50277,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50278,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50279,
206
+ "special": true,
207
  "content": "<|he|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50280,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50281,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50282,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50283,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50284,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50285,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50286,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50287,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50288,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50289,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50290,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50291,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50292,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50293,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50294,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50295,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50296,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50297,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50298,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50299,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50300,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50301,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50302,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50303,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50304,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50305,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50306,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50307,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50308,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50309,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50310,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50311,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50312,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50313,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50314,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50315,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50316,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50317,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50318,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50319,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50320,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50321,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50322,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50323,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50324,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50325,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50326,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50327,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50328,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50329,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50330,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50331,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50332,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50333,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50334,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50335,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50336,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50337,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50338,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50339,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50340,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50341,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50342,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50343,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50344,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50345,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50346,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50347,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50348,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50349,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50350,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50351,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50352,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50353,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50354,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50355,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50356,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50357,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50358,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50359,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50360,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50361,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50362,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50363,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-base/speech2seq-lm-with-past/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 448,
22
- "name_or_path": "openai/whisper-base",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
19
  },
20
  "errors": "replace",
21
  "model_max_length": 448,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-base/speech2seq-lm-with-past/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
quantized/openai/whisper-small.en/default/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-small.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50256
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 768,
15
  "decoder_attention_heads": 12,
16
  "decoder_ffn_dim": 3072,
@@ -31,6 +33,12 @@
31
  ],
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
 
 
 
 
 
 
34
  "max_length": 448,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
@@ -129,7 +137,8 @@
129
  50360,
130
  50361
131
  ],
132
- "transformers_version": "4.26.1",
133
  "use_cache": true,
 
134
  "vocab_size": 51864
135
  }
2
  "_name_or_path": "openai/whisper-small.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
12
  50256
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 768,
17
  "decoder_attention_heads": 12,
18
  "decoder_ffn_dim": 3072,
33
  ],
34
  "init_std": 0.02,
35
  "is_encoder_decoder": true,
36
+ "mask_feature_length": 10,
37
+ "mask_feature_min_masks": 0,
38
+ "mask_feature_prob": 0.0,
39
+ "mask_time_length": 10,
40
+ "mask_time_min_masks": 2,
41
+ "mask_time_prob": 0.05,
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
137
  50360,
138
  50361
139
  ],
140
+ "transformers_version": "4.27.2",
141
  "use_cache": true,
142
+ "use_weighted_layer_sum": false,
143
  "vocab_size": 51864
144
  }
quantized/openai/whisper-small.en/default/decoder_model.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af023c75b14ede43fa2904e5d702d1ea36673709cb6a0f849d2fb7c498f67f7
3
- size 156503755
 
 
 
quantized/openai/whisper-small.en/default/encoder_model.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6792ba696e505bd3527efa62327024bc04db8bf9558d47e16a2fff7e3810440f
3
- size 92735928
 
 
 
quantized/openai/whisper-small.en/default/generation_config.json DELETED
@@ -1,111 +0,0 @@
1
- {
2
- "begin_suppress_tokens": [
3
- 220,
4
- 50256
5
- ],
6
- "bos_token_id": 50257,
7
- "decoder_start_token_id": 50257,
8
- "eos_token_id": 50256,
9
- "forced_decoder_ids": [
10
- [
11
- 1,
12
- 50362
13
- ]
14
- ],
15
- "is_multilingual": false,
16
- "max_initial_timestamp_index": 1,
17
- "max_length": 448,
18
- "no_timestamps_token_id": 50362,
19
- "pad_token_id": 50256,
20
- "suppress_tokens": [
21
- 1,
22
- 2,
23
- 7,
24
- 8,
25
- 9,
26
- 10,
27
- 14,
28
- 25,
29
- 26,
30
- 27,
31
- 28,
32
- 29,
33
- 31,
34
- 58,
35
- 59,
36
- 60,
37
- 61,
38
- 62,
39
- 63,
40
- 90,
41
- 91,
42
- 92,
43
- 93,
44
- 357,
45
- 366,
46
- 438,
47
- 532,
48
- 685,
49
- 705,
50
- 796,
51
- 930,
52
- 1058,
53
- 1220,
54
- 1267,
55
- 1279,
56
- 1303,
57
- 1343,
58
- 1377,
59
- 1391,
60
- 1635,
61
- 1782,
62
- 1875,
63
- 2162,
64
- 2361,
65
- 2488,
66
- 3467,
67
- 4008,
68
- 4211,
69
- 4600,
70
- 4808,
71
- 5299,
72
- 5855,
73
- 6329,
74
- 7203,
75
- 9609,
76
- 9959,
77
- 10563,
78
- 10786,
79
- 11420,
80
- 11709,
81
- 11907,
82
- 13163,
83
- 13697,
84
- 13700,
85
- 14808,
86
- 15306,
87
- 16410,
88
- 16791,
89
- 17992,
90
- 19203,
91
- 19510,
92
- 20724,
93
- 22305,
94
- 22935,
95
- 27007,
96
- 30109,
97
- 30420,
98
- 33409,
99
- 34949,
100
- 40283,
101
- 40493,
102
- 40549,
103
- 47282,
104
- 49146,
105
- 50257,
106
- 50359,
107
- 50360,
108
- 50361
109
- ],
110
- "transformers_version": "4.27.0.dev0"
111
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
quantized/openai/whisper-small.en/default/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
quantized/openai/whisper-small.en/default/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c95ab3abd5aec74aefe228e1523cd05f5f28bc6c6f8db7d9a5f8d5c37b14aae6
3
+ size 249190690
quantized/openai/whisper-small.en/default/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50257,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50258,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50259,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50260,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50261,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50262,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50263,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50264,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50265,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50266,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50267,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50268,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50269,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50270,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50271,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50272,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50273,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50274,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50275,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50276,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50277,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50278,
 
206
  "content": "<|iw|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50279,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50280,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50281,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50282,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50283,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50284,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50285,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50286,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50287,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50288,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50289,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50290,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50291,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50292,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50293,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50294,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50295,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50296,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50297,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50298,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50299,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50300,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50301,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50302,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50303,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50304,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50305,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50306,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50307,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50308,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50309,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50310,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50311,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50312,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50313,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50314,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50315,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50316,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50317,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50318,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50319,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50320,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50321,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50322,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50323,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50324,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50325,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50326,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50327,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50328,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50329,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50330,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50331,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50332,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50333,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50334,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50335,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50336,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50337,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50338,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50339,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50340,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50341,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50342,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50343,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50344,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50345,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50346,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50347,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50348,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50349,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50350,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50351,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50352,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50353,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50354,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50355,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50356,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50357,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50358,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50359,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50360,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50361,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50362,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50257,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50258,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50259,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50260,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50261,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50262,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50263,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50264,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50265,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50266,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50267,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50268,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50269,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50270,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50271,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50272,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50273,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50274,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50275,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50276,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50277,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50278,
206
+ "special": true,
207
  "content": "<|iw|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50279,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50280,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50281,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50282,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50283,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50284,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50285,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50286,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50287,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50288,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50289,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50290,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50291,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50292,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50293,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50294,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50295,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50296,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50297,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50298,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50299,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50300,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50301,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50302,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50303,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50304,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50305,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50306,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50307,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50308,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50309,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50310,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50311,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50312,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50313,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50314,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50315,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50316,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50317,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50318,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50319,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50320,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50321,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50322,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50323,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50324,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50325,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50326,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50327,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50328,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50329,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50330,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50331,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50332,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50333,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50334,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50335,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50336,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50337,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50338,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50339,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50340,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50341,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50342,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50343,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50344,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50345,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50346,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50347,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50348,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50349,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50350,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50351,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50352,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50353,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50354,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50355,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50356,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50357,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50358,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50359,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50360,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50361,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50362,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-small.en/default/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-small.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-small.en/default/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
quantized/openai/whisper-small.en/speech2seq-lm-with-past/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-small.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50256
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 768,
15
  "decoder_attention_heads": 12,
16
  "decoder_ffn_dim": 3072,
@@ -31,6 +33,12 @@
31
  ],
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
 
 
 
 
 
 
34
  "max_length": 448,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
@@ -129,7 +137,8 @@
129
  50360,
130
  50361
131
  ],
132
- "transformers_version": "4.26.1",
133
  "use_cache": true,
 
134
  "vocab_size": 51864
135
  }
2
  "_name_or_path": "openai/whisper-small.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
12
  50256
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 768,
17
  "decoder_attention_heads": 12,
18
  "decoder_ffn_dim": 3072,
33
  ],
34
  "init_std": 0.02,
35
  "is_encoder_decoder": true,
36
+ "mask_feature_length": 10,
37
+ "mask_feature_min_masks": 0,
38
+ "mask_feature_prob": 0.0,
39
+ "mask_time_length": 10,
40
+ "mask_time_min_masks": 2,
41
+ "mask_time_prob": 0.05,
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
137
  50360,
138
  50361
139
  ],
140
+ "transformers_version": "4.27.2",
141
  "use_cache": true,
142
+ "use_weighted_layer_sum": false,
143
  "vocab_size": 51864
144
  }
quantized/openai/whisper-small.en/speech2seq-lm-with-past/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0105edbe0037f4c7ac11f020f53f581b6ed10c9f680124d2448f255ec52c28c3
3
- size 196676336
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f4f9490108aed882d00c037ef6dfec10a688da713d80d17a0caf27ea81d046
3
+ size 196596817
quantized/openai/whisper-small.en/speech2seq-lm-with-past/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8defd52aed2a7843e174afb4a42c44ae4dfb8e85c200e9955fb88a68ef8df0f7
3
- size 197146590
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f190877dc4864b3be49117d3119c0f6be5ea3b5094fe62883de7dd176ad606a8
3
+ size 196987486
quantized/openai/whisper-small.en/speech2seq-lm-with-past/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6ced91a295a5a5d10abebd872a1da952b441e12240cf2f1f680b24ae9e53029
3
- size 182267587
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9b6fc022fc14723de0769485040b034280a0685328ee3ea44f1922d2effb3bd
3
+ size 182188068
quantized/openai/whisper-small.en/speech2seq-lm-with-past/encoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6792ba696e505bd3527efa62327024bc04db8bf9558d47e16a2fff7e3810440f
3
- size 92735928
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25862e6297bed816b7ec75f51b0c1f937e312e1032362b579f4cd6e1c8b4395
3
+ size 92704740
quantized/openai/whisper-small.en/speech2seq-lm-with-past/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
quantized/openai/whisper-small.en/speech2seq-lm-with-past/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50257,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50258,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50259,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50260,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50261,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50262,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50263,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50264,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50265,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50266,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50267,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50268,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50269,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50270,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50271,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50272,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50273,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50274,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50275,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50276,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50277,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50278,
 
206
  "content": "<|iw|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50279,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50280,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50281,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50282,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50283,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50284,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50285,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50286,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50287,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50288,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50289,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50290,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50291,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50292,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50293,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50294,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50295,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50296,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50297,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50298,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50299,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50300,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50301,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50302,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50303,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50304,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50305,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50306,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50307,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50308,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50309,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50310,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50311,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50312,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50313,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50314,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50315,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50316,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50317,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50318,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50319,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50320,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50321,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50322,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50323,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50324,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50325,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50326,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50327,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50328,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50329,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50330,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50331,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50332,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50333,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50334,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50335,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50336,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50337,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50338,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50339,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50340,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50341,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50342,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50343,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50344,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50345,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50346,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50347,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50348,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50349,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50350,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50351,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50352,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50353,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50354,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50355,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50356,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50357,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50358,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50359,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50360,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50361,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50362,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50257,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50258,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50259,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50260,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50261,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50262,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50263,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50264,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50265,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50266,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50267,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50268,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50269,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50270,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50271,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50272,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50273,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50274,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50275,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50276,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50277,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50278,
206
+ "special": true,
207
  "content": "<|iw|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50279,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50280,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50281,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50282,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50283,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50284,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50285,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50286,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50287,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50288,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50289,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50290,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50291,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50292,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50293,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50294,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50295,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50296,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50297,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50298,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50299,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50300,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50301,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50302,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50303,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50304,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50305,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50306,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50307,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50308,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50309,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50310,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50311,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50312,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50313,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50314,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50315,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50316,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50317,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50318,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50319,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50320,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50321,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50322,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50323,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50324,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50325,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50326,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50327,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50328,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50329,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50330,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50331,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50332,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50333,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50334,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50335,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50336,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50337,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50338,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50339,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50340,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50341,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50342,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50343,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50344,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50345,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50346,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50347,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50348,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50349,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50350,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50351,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50352,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50353,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50354,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50355,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50356,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50357,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50358,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50359,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50360,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50361,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50362,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-small.en/speech2seq-lm-with-past/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-small.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-small.en/speech2seq-lm-with-past/vocab.json CHANGED
The diff for this file is too large to render. See raw diff