Joshua Lochner commited on
Commit
fde2d86
1 Parent(s): 74f0b2c

Update whisper-tiny config and tokenizer files

Browse files
quantized/openai/whisper-tiny/default/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-tiny",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50257
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 384,
15
  "decoder_attention_heads": 6,
16
  "decoder_ffn_dim": 1536,
@@ -39,6 +41,12 @@
39
  ],
40
  "init_std": 0.02,
41
  "is_encoder_decoder": true,
 
 
 
 
 
 
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
@@ -135,7 +143,8 @@
135
  50361,
136
  50362
137
  ],
138
- "transformers_version": "4.26.1",
139
  "use_cache": true,
 
140
  "vocab_size": 51865
141
  }
 
2
  "_name_or_path": "openai/whisper-tiny",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
 
12
  50257
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 384,
17
  "decoder_attention_heads": 6,
18
  "decoder_ffn_dim": 1536,
 
41
  ],
42
  "init_std": 0.02,
43
  "is_encoder_decoder": true,
44
+ "mask_feature_length": 10,
45
+ "mask_feature_min_masks": 0,
46
+ "mask_feature_prob": 0.0,
47
+ "mask_time_length": 10,
48
+ "mask_time_min_masks": 2,
49
+ "mask_time_prob": 0.05,
50
  "max_length": 448,
51
  "max_source_positions": 1500,
52
  "max_target_positions": 448,
 
143
  50361,
144
  50362
145
  ],
146
+ "transformers_version": "4.27.0",
147
  "use_cache": true,
148
+ "use_weighted_layer_sum": false,
149
  "vocab_size": 51865
150
  }
quantized/openai/whisper-tiny/default/generation_config.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50257
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50258,
8
+ "eos_token_id": 50257,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ null
13
+ ],
14
+ [
15
+ 2,
16
+ 50359
17
+ ]
18
+ ],
19
+ "is_multilingual": true,
20
+ "lang_to_id": {
21
+ "<|af|>": 50327,
22
+ "<|am|>": 50334,
23
+ "<|ar|>": 50272,
24
+ "<|as|>": 50350,
25
+ "<|az|>": 50304,
26
+ "<|ba|>": 50355,
27
+ "<|be|>": 50330,
28
+ "<|bg|>": 50292,
29
+ "<|bn|>": 50302,
30
+ "<|bo|>": 50347,
31
+ "<|br|>": 50309,
32
+ "<|bs|>": 50315,
33
+ "<|ca|>": 50270,
34
+ "<|cs|>": 50283,
35
+ "<|cy|>": 50297,
36
+ "<|da|>": 50285,
37
+ "<|de|>": 50261,
38
+ "<|el|>": 50281,
39
+ "<|en|>": 50259,
40
+ "<|es|>": 50262,
41
+ "<|et|>": 50307,
42
+ "<|eu|>": 50310,
43
+ "<|fa|>": 50300,
44
+ "<|fi|>": 50277,
45
+ "<|fo|>": 50338,
46
+ "<|fr|>": 50265,
47
+ "<|gl|>": 50319,
48
+ "<|gu|>": 50333,
49
+ "<|haw|>": 50352,
50
+ "<|ha|>": 50354,
51
+ "<|he|>": 50279,
52
+ "<|hi|>": 50276,
53
+ "<|hr|>": 50291,
54
+ "<|ht|>": 50339,
55
+ "<|hu|>": 50286,
56
+ "<|hy|>": 50312,
57
+ "<|id|>": 50275,
58
+ "<|is|>": 50311,
59
+ "<|it|>": 50274,
60
+ "<|ja|>": 50266,
61
+ "<|jw|>": 50356,
62
+ "<|ka|>": 50329,
63
+ "<|kk|>": 50316,
64
+ "<|km|>": 50323,
65
+ "<|kn|>": 50306,
66
+ "<|ko|>": 50264,
67
+ "<|la|>": 50294,
68
+ "<|lb|>": 50345,
69
+ "<|ln|>": 50353,
70
+ "<|lo|>": 50336,
71
+ "<|lt|>": 50293,
72
+ "<|lv|>": 50301,
73
+ "<|mg|>": 50349,
74
+ "<|mi|>": 50295,
75
+ "<|mk|>": 50308,
76
+ "<|ml|>": 50296,
77
+ "<|mn|>": 50314,
78
+ "<|mr|>": 50320,
79
+ "<|ms|>": 50282,
80
+ "<|mt|>": 50343,
81
+ "<|my|>": 50346,
82
+ "<|ne|>": 50313,
83
+ "<|nl|>": 50271,
84
+ "<|nn|>": 50342,
85
+ "<|no|>": 50288,
86
+ "<|oc|>": 50328,
87
+ "<|pa|>": 50321,
88
+ "<|pl|>": 50269,
89
+ "<|ps|>": 50340,
90
+ "<|pt|>": 50267,
91
+ "<|ro|>": 50284,
92
+ "<|ru|>": 50263,
93
+ "<|sa|>": 50344,
94
+ "<|sd|>": 50332,
95
+ "<|si|>": 50322,
96
+ "<|sk|>": 50298,
97
+ "<|sl|>": 50305,
98
+ "<|sn|>": 50324,
99
+ "<|so|>": 50326,
100
+ "<|sq|>": 50317,
101
+ "<|sr|>": 50303,
102
+ "<|su|>": 50357,
103
+ "<|sv|>": 50273,
104
+ "<|sw|>": 50318,
105
+ "<|ta|>": 50287,
106
+ "<|te|>": 50299,
107
+ "<|tg|>": 50331,
108
+ "<|th|>": 50289,
109
+ "<|tk|>": 50341,
110
+ "<|tl|>": 50348,
111
+ "<|tr|>": 50268,
112
+ "<|tt|>": 50351,
113
+ "<|uk|>": 50280,
114
+ "<|ur|>": 50290,
115
+ "<|uz|>": 50337,
116
+ "<|vi|>": 50278,
117
+ "<|yi|>": 50335,
118
+ "<|yo|>": 50325,
119
+ "<|zh|>": 50260
120
+ },
121
+ "max_initial_timestamp_index": 1,
122
+ "max_length": 448,
123
+ "no_timestamps_token_id": 50363,
124
+ "pad_token_id": 50257,
125
+ "return_timestamps": false,
126
+ "suppress_tokens": [
127
+ 1,
128
+ 2,
129
+ 7,
130
+ 8,
131
+ 9,
132
+ 10,
133
+ 14,
134
+ 25,
135
+ 26,
136
+ 27,
137
+ 28,
138
+ 29,
139
+ 31,
140
+ 58,
141
+ 59,
142
+ 60,
143
+ 61,
144
+ 62,
145
+ 63,
146
+ 90,
147
+ 91,
148
+ 92,
149
+ 93,
150
+ 359,
151
+ 503,
152
+ 522,
153
+ 542,
154
+ 873,
155
+ 893,
156
+ 902,
157
+ 918,
158
+ 922,
159
+ 931,
160
+ 1350,
161
+ 1853,
162
+ 1982,
163
+ 2460,
164
+ 2627,
165
+ 3246,
166
+ 3253,
167
+ 3268,
168
+ 3536,
169
+ 3846,
170
+ 3961,
171
+ 4183,
172
+ 4667,
173
+ 6585,
174
+ 6647,
175
+ 7273,
176
+ 9061,
177
+ 9383,
178
+ 10428,
179
+ 10929,
180
+ 11938,
181
+ 12033,
182
+ 12331,
183
+ 12562,
184
+ 13793,
185
+ 14157,
186
+ 14635,
187
+ 15265,
188
+ 15618,
189
+ 16553,
190
+ 16604,
191
+ 18362,
192
+ 18956,
193
+ 20075,
194
+ 21675,
195
+ 22520,
196
+ 26130,
197
+ 26161,
198
+ 26435,
199
+ 28279,
200
+ 29464,
201
+ 31650,
202
+ 32302,
203
+ 32470,
204
+ 36865,
205
+ 42863,
206
+ 47425,
207
+ 49870,
208
+ 50254,
209
+ 50258,
210
+ 50360,
211
+ 50361,
212
+ 50362
213
+ ],
214
+ "task_to_id": {
215
+ "transcribe": 50359,
216
+ "translate": 50358
217
+ },
218
+ "transformers_version": "4.27.0.dev0"
219
+ }
quantized/openai/whisper-tiny/default/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ a
3
  Ġt h
4
  i n
 
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ a
3
  Ġt h
4
  i n
quantized/openai/whisper-tiny/default/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50257,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50258,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50259,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50260,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50261,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50262,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50263,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50264,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50265,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50266,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50267,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50268,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50269,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50270,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50271,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50272,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50273,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50274,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50275,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50276,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50277,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50278,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50279,
 
206
  "content": "<|he|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50280,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50281,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50282,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50283,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50284,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50285,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50286,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50287,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50288,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50289,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50290,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50291,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50292,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50293,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50294,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50295,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50296,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50297,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50298,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50299,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50300,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50301,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50302,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50303,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50304,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50305,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50306,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50307,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50308,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50309,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50310,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50311,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50312,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50313,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50314,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50315,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50316,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50317,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50318,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50319,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50320,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50321,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50322,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50323,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50324,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50325,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50326,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50327,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50328,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50329,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50330,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50331,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50332,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50333,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50334,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50335,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50336,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50337,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50338,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50339,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50340,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50341,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50342,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50343,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50344,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50345,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50346,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50347,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50348,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50349,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50350,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50351,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50352,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50353,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50354,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50355,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50356,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50357,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50358,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50359,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50360,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50361,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50362,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50363,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
 
5
  "added_tokens": [
6
  {
7
  "id": 50257,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50258,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50259,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50260,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50261,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50262,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50263,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50264,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50265,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50266,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50267,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50268,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50269,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50270,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50271,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50272,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50273,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50274,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50275,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50276,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50277,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50278,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50279,
206
+ "special": true,
207
  "content": "<|he|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50280,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50281,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50282,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50283,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50284,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50285,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50286,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50287,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50288,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50289,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50290,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50291,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50292,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50293,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50294,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50295,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50296,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50297,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50298,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50299,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50300,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50301,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50302,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50303,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50304,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50305,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50306,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50307,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50308,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50309,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50310,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50311,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50312,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50313,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50314,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50315,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50316,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50317,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50318,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50319,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50320,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50321,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50322,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50323,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50324,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50325,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50326,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50327,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50328,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50329,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50330,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50331,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50332,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50333,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50334,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50335,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50336,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50337,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50338,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50339,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50340,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50341,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50342,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50343,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50344,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50345,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50346,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50347,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50348,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50349,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50350,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50351,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50352,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50353,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50354,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50355,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50356,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50357,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50358,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50359,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50360,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50361,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50362,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50363,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
 
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-tiny/default/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-tiny",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-tiny/default/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
quantized/openai/whisper-tiny/speech2seq-lm-with-past/generation_config.json ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50257
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50258,
8
+ "eos_token_id": 50257,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ null
13
+ ],
14
+ [
15
+ 2,
16
+ 50359
17
+ ]
18
+ ],
19
+ "is_multilingual": true,
20
+ "lang_to_id": {
21
+ "<|af|>": 50327,
22
+ "<|am|>": 50334,
23
+ "<|ar|>": 50272,
24
+ "<|as|>": 50350,
25
+ "<|az|>": 50304,
26
+ "<|ba|>": 50355,
27
+ "<|be|>": 50330,
28
+ "<|bg|>": 50292,
29
+ "<|bn|>": 50302,
30
+ "<|bo|>": 50347,
31
+ "<|br|>": 50309,
32
+ "<|bs|>": 50315,
33
+ "<|ca|>": 50270,
34
+ "<|cs|>": 50283,
35
+ "<|cy|>": 50297,
36
+ "<|da|>": 50285,
37
+ "<|de|>": 50261,
38
+ "<|el|>": 50281,
39
+ "<|en|>": 50259,
40
+ "<|es|>": 50262,
41
+ "<|et|>": 50307,
42
+ "<|eu|>": 50310,
43
+ "<|fa|>": 50300,
44
+ "<|fi|>": 50277,
45
+ "<|fo|>": 50338,
46
+ "<|fr|>": 50265,
47
+ "<|gl|>": 50319,
48
+ "<|gu|>": 50333,
49
+ "<|haw|>": 50352,
50
+ "<|ha|>": 50354,
51
+ "<|he|>": 50279,
52
+ "<|hi|>": 50276,
53
+ "<|hr|>": 50291,
54
+ "<|ht|>": 50339,
55
+ "<|hu|>": 50286,
56
+ "<|hy|>": 50312,
57
+ "<|id|>": 50275,
58
+ "<|is|>": 50311,
59
+ "<|it|>": 50274,
60
+ "<|ja|>": 50266,
61
+ "<|jw|>": 50356,
62
+ "<|ka|>": 50329,
63
+ "<|kk|>": 50316,
64
+ "<|km|>": 50323,
65
+ "<|kn|>": 50306,
66
+ "<|ko|>": 50264,
67
+ "<|la|>": 50294,
68
+ "<|lb|>": 50345,
69
+ "<|ln|>": 50353,
70
+ "<|lo|>": 50336,
71
+ "<|lt|>": 50293,
72
+ "<|lv|>": 50301,
73
+ "<|mg|>": 50349,
74
+ "<|mi|>": 50295,
75
+ "<|mk|>": 50308,
76
+ "<|ml|>": 50296,
77
+ "<|mn|>": 50314,
78
+ "<|mr|>": 50320,
79
+ "<|ms|>": 50282,
80
+ "<|mt|>": 50343,
81
+ "<|my|>": 50346,
82
+ "<|ne|>": 50313,
83
+ "<|nl|>": 50271,
84
+ "<|nn|>": 50342,
85
+ "<|no|>": 50288,
86
+ "<|oc|>": 50328,
87
+ "<|pa|>": 50321,
88
+ "<|pl|>": 50269,
89
+ "<|ps|>": 50340,
90
+ "<|pt|>": 50267,
91
+ "<|ro|>": 50284,
92
+ "<|ru|>": 50263,
93
+ "<|sa|>": 50344,
94
+ "<|sd|>": 50332,
95
+ "<|si|>": 50322,
96
+ "<|sk|>": 50298,
97
+ "<|sl|>": 50305,
98
+ "<|sn|>": 50324,
99
+ "<|so|>": 50326,
100
+ "<|sq|>": 50317,
101
+ "<|sr|>": 50303,
102
+ "<|su|>": 50357,
103
+ "<|sv|>": 50273,
104
+ "<|sw|>": 50318,
105
+ "<|ta|>": 50287,
106
+ "<|te|>": 50299,
107
+ "<|tg|>": 50331,
108
+ "<|th|>": 50289,
109
+ "<|tk|>": 50341,
110
+ "<|tl|>": 50348,
111
+ "<|tr|>": 50268,
112
+ "<|tt|>": 50351,
113
+ "<|uk|>": 50280,
114
+ "<|ur|>": 50290,
115
+ "<|uz|>": 50337,
116
+ "<|vi|>": 50278,
117
+ "<|yi|>": 50335,
118
+ "<|yo|>": 50325,
119
+ "<|zh|>": 50260
120
+ },
121
+ "max_initial_timestamp_index": 1,
122
+ "max_length": 448,
123
+ "no_timestamps_token_id": 50363,
124
+ "pad_token_id": 50257,
125
+ "return_timestamps": false,
126
+ "suppress_tokens": [
127
+ 1,
128
+ 2,
129
+ 7,
130
+ 8,
131
+ 9,
132
+ 10,
133
+ 14,
134
+ 25,
135
+ 26,
136
+ 27,
137
+ 28,
138
+ 29,
139
+ 31,
140
+ 58,
141
+ 59,
142
+ 60,
143
+ 61,
144
+ 62,
145
+ 63,
146
+ 90,
147
+ 91,
148
+ 92,
149
+ 93,
150
+ 359,
151
+ 503,
152
+ 522,
153
+ 542,
154
+ 873,
155
+ 893,
156
+ 902,
157
+ 918,
158
+ 922,
159
+ 931,
160
+ 1350,
161
+ 1853,
162
+ 1982,
163
+ 2460,
164
+ 2627,
165
+ 3246,
166
+ 3253,
167
+ 3268,
168
+ 3536,
169
+ 3846,
170
+ 3961,
171
+ 4183,
172
+ 4667,
173
+ 6585,
174
+ 6647,
175
+ 7273,
176
+ 9061,
177
+ 9383,
178
+ 10428,
179
+ 10929,
180
+ 11938,
181
+ 12033,
182
+ 12331,
183
+ 12562,
184
+ 13793,
185
+ 14157,
186
+ 14635,
187
+ 15265,
188
+ 15618,
189
+ 16553,
190
+ 16604,
191
+ 18362,
192
+ 18956,
193
+ 20075,
194
+ 21675,
195
+ 22520,
196
+ 26130,
197
+ 26161,
198
+ 26435,
199
+ 28279,
200
+ 29464,
201
+ 31650,
202
+ 32302,
203
+ 32470,
204
+ 36865,
205
+ 42863,
206
+ 47425,
207
+ 49870,
208
+ 50254,
209
+ 50258,
210
+ 50360,
211
+ 50361,
212
+ 50362
213
+ ],
214
+ "task_to_id": {
215
+ "transcribe": 50359,
216
+ "translate": 50358
217
+ },
218
+ "transformers_version": "4.27.0.dev0"
219
+ }