Joshua Lochner commited on
Commit
9ca1f50
1 Parent(s): fde2d86

Update `whisper-tiny.en` model files

Browse files
quantized/openai/whisper-tiny.en/default/generation_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50256
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50257,
8
+ "eos_token_id": 50256,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ 50362
13
+ ]
14
+ ],
15
+ "is_multilingual": false,
16
+ "max_initial_timestamp_index": 1,
17
+ "max_length": 448,
18
+ "no_timestamps_token_id": 50362,
19
+ "pad_token_id": 50256,
20
+ "return_timestamps": false,
21
+ "suppress_tokens": [
22
+ 1,
23
+ 2,
24
+ 7,
25
+ 8,
26
+ 9,
27
+ 10,
28
+ 14,
29
+ 25,
30
+ 26,
31
+ 27,
32
+ 28,
33
+ 29,
34
+ 31,
35
+ 58,
36
+ 59,
37
+ 60,
38
+ 61,
39
+ 62,
40
+ 63,
41
+ 90,
42
+ 91,
43
+ 92,
44
+ 93,
45
+ 357,
46
+ 366,
47
+ 438,
48
+ 532,
49
+ 685,
50
+ 705,
51
+ 796,
52
+ 930,
53
+ 1058,
54
+ 1220,
55
+ 1267,
56
+ 1279,
57
+ 1303,
58
+ 1343,
59
+ 1377,
60
+ 1391,
61
+ 1635,
62
+ 1782,
63
+ 1875,
64
+ 2162,
65
+ 2361,
66
+ 2488,
67
+ 3467,
68
+ 4008,
69
+ 4211,
70
+ 4600,
71
+ 4808,
72
+ 5299,
73
+ 5855,
74
+ 6329,
75
+ 7203,
76
+ 9609,
77
+ 9959,
78
+ 10563,
79
+ 10786,
80
+ 11420,
81
+ 11709,
82
+ 11907,
83
+ 13163,
84
+ 13697,
85
+ 13700,
86
+ 14808,
87
+ 15306,
88
+ 16410,
89
+ 16791,
90
+ 17992,
91
+ 19203,
92
+ 19510,
93
+ 20724,
94
+ 22305,
95
+ 22935,
96
+ 27007,
97
+ 30109,
98
+ 30420,
99
+ 33409,
100
+ 34949,
101
+ 40283,
102
+ 40493,
103
+ 40549,
104
+ 47282,
105
+ 49146,
106
+ 50257,
107
+ 50359,
108
+ 50360,
109
+ 50361
110
+ ],
111
+ "transformers_version": "4.27.0.dev0"
112
+ }
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/config.json CHANGED
@@ -2,6 +2,7 @@
2
  "_name_or_path": "openai/whisper-tiny.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
 
5
  "architectures": [
6
  "WhisperForConditionalGeneration"
7
  ],
@@ -11,6 +12,7 @@
11
  50256
12
  ],
13
  "bos_token_id": 50257,
 
14
  "d_model": 384,
15
  "decoder_attention_heads": 6,
16
  "decoder_ffn_dim": 1536,
@@ -31,6 +33,12 @@
31
  ],
32
  "init_std": 0.02,
33
  "is_encoder_decoder": true,
 
 
 
 
 
 
34
  "max_length": 448,
35
  "max_source_positions": 1500,
36
  "max_target_positions": 448,
@@ -129,7 +137,8 @@
129
  50360,
130
  50361
131
  ],
132
- "transformers_version": "4.26.1",
133
  "use_cache": true,
 
134
  "vocab_size": 51864
135
  }
 
2
  "_name_or_path": "openai/whisper-tiny.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
  "architectures": [
7
  "WhisperForConditionalGeneration"
8
  ],
 
12
  50256
13
  ],
14
  "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
  "d_model": 384,
17
  "decoder_attention_heads": 6,
18
  "decoder_ffn_dim": 1536,
 
33
  ],
34
  "init_std": 0.02,
35
  "is_encoder_decoder": true,
36
+ "mask_feature_length": 10,
37
+ "mask_feature_min_masks": 0,
38
+ "mask_feature_prob": 0.0,
39
+ "mask_time_length": 10,
40
+ "mask_time_min_masks": 2,
41
+ "mask_time_prob": 0.05,
42
  "max_length": 448,
43
  "max_source_positions": 1500,
44
  "max_target_positions": 448,
 
137
  50360,
138
  50361
139
  ],
140
+ "transformers_version": "4.27.0",
141
  "use_cache": true,
142
+ "use_weighted_layer_sum": false,
143
  "vocab_size": 51864
144
  }
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:417751a3fa339b5eb7ccd239a3ed832a6e3e8edbeec3a7d049e132ac3478bbb5
3
- size 50733783
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6b8c26eca81f2eae9c332c5accadc19075908f90744ac3e4c2f1ebf364b21ee
3
+ size 50707360
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b82d9231f4fb81470f8cc2878b6dd67bcf2f737b251924da4eba45d79777bc14
3
- size 50891940
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32c0b8af80535da0057ffbd19f7ed57d92e5502da406f3476a9dd0089ae1bec2
3
+ size 50839025
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaa60b7625c15a1c26d1f21e2898b101367f5c746692a44b3bd68bcc5d4fcc15
3
- size 49485740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d59c2a732407d8f00ced917d1ccfaa6aafe5e4f9ca23434f7f2f7ed47878ca10
3
+ size 49459317
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/encoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29ee3cc641cf1e3cd448644bd474a974d7bb7b2a26aa3ad3d56aa458215e9cf7
3
- size 10192819
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8149b739060eaa30b6211a5e107e33e422f5c4d914229b94357f31bac4ed68f0
3
+ size 10182475
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/generation_config.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "begin_suppress_tokens": [
3
+ 220,
4
+ 50256
5
+ ],
6
+ "bos_token_id": 50257,
7
+ "decoder_start_token_id": 50257,
8
+ "eos_token_id": 50256,
9
+ "forced_decoder_ids": [
10
+ [
11
+ 1,
12
+ 50362
13
+ ]
14
+ ],
15
+ "is_multilingual": false,
16
+ "max_initial_timestamp_index": 1,
17
+ "max_length": 448,
18
+ "no_timestamps_token_id": 50362,
19
+ "pad_token_id": 50256,
20
+ "return_timestamps": false,
21
+ "suppress_tokens": [
22
+ 1,
23
+ 2,
24
+ 7,
25
+ 8,
26
+ 9,
27
+ 10,
28
+ 14,
29
+ 25,
30
+ 26,
31
+ 27,
32
+ 28,
33
+ 29,
34
+ 31,
35
+ 58,
36
+ 59,
37
+ 60,
38
+ 61,
39
+ 62,
40
+ 63,
41
+ 90,
42
+ 91,
43
+ 92,
44
+ 93,
45
+ 357,
46
+ 366,
47
+ 438,
48
+ 532,
49
+ 685,
50
+ 705,
51
+ 796,
52
+ 930,
53
+ 1058,
54
+ 1220,
55
+ 1267,
56
+ 1279,
57
+ 1303,
58
+ 1343,
59
+ 1377,
60
+ 1391,
61
+ 1635,
62
+ 1782,
63
+ 1875,
64
+ 2162,
65
+ 2361,
66
+ 2488,
67
+ 3467,
68
+ 4008,
69
+ 4211,
70
+ 4600,
71
+ 4808,
72
+ 5299,
73
+ 5855,
74
+ 6329,
75
+ 7203,
76
+ 9609,
77
+ 9959,
78
+ 10563,
79
+ 10786,
80
+ 11420,
81
+ 11709,
82
+ 11907,
83
+ 13163,
84
+ 13697,
85
+ 13700,
86
+ 14808,
87
+ 15306,
88
+ 16410,
89
+ 16791,
90
+ 17992,
91
+ 19203,
92
+ 19510,
93
+ 20724,
94
+ 22305,
95
+ 22935,
96
+ 27007,
97
+ 30109,
98
+ 30420,
99
+ 33409,
100
+ 34949,
101
+ 40283,
102
+ 40493,
103
+ 40549,
104
+ 47282,
105
+ 49146,
106
+ 50257,
107
+ 50359,
108
+ 50360,
109
+ 50361
110
+ ],
111
+ "transformers_version": "4.27.0.dev0"
112
+ }
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
 
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/tokenizer.json CHANGED
@@ -5,974 +5,973 @@
5
  "added_tokens": [
6
  {
7
  "id": 50256,
 
8
  "content": "<|endoftext|>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
  },
15
  {
16
  "id": 50257,
 
17
  "content": "<|startoftranscript|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
21
- "normalized": false,
22
- "special": true
23
  },
24
  {
25
  "id": 50258,
 
26
  "content": "<|en|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
30
- "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 50259,
 
35
  "content": "<|zh|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
  },
42
  {
43
  "id": 50260,
 
44
  "content": "<|de|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  },
51
  {
52
  "id": 50261,
 
53
  "content": "<|es|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  },
60
  {
61
  "id": 50262,
 
62
  "content": "<|ru|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
  },
69
  {
70
  "id": 50263,
 
71
  "content": "<|ko|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
  },
78
  {
79
  "id": 50264,
 
80
  "content": "<|fr|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
  },
87
  {
88
  "id": 50265,
 
89
  "content": "<|ja|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
  },
96
  {
97
  "id": 50266,
 
98
  "content": "<|pt|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
  },
105
  {
106
  "id": 50267,
 
107
  "content": "<|tr|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  },
114
  {
115
  "id": 50268,
 
116
  "content": "<|pl|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
120
- "normalized": false,
121
- "special": true
122
  },
123
  {
124
  "id": 50269,
 
125
  "content": "<|ca|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
129
- "normalized": false,
130
- "special": true
131
  },
132
  {
133
  "id": 50270,
 
134
  "content": "<|nl|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
138
- "normalized": false,
139
- "special": true
140
  },
141
  {
142
  "id": 50271,
 
143
  "content": "<|ar|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
147
- "normalized": false,
148
- "special": true
149
  },
150
  {
151
  "id": 50272,
 
152
  "content": "<|sv|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
156
- "normalized": false,
157
- "special": true
158
  },
159
  {
160
  "id": 50273,
 
161
  "content": "<|it|>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
165
- "normalized": false,
166
- "special": true
167
  },
168
  {
169
  "id": 50274,
 
170
  "content": "<|id|>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
174
- "normalized": false,
175
- "special": true
176
  },
177
  {
178
  "id": 50275,
 
179
  "content": "<|hi|>",
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
183
- "normalized": false,
184
- "special": true
185
  },
186
  {
187
  "id": 50276,
 
188
  "content": "<|fi|>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
192
- "normalized": false,
193
- "special": true
194
  },
195
  {
196
  "id": 50277,
 
197
  "content": "<|vi|>",
198
  "single_word": false,
199
  "lstrip": false,
200
  "rstrip": false,
201
- "normalized": false,
202
- "special": true
203
  },
204
  {
205
  "id": 50278,
 
206
  "content": "<|iw|>",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": false,
211
- "special": true
212
  },
213
  {
214
  "id": 50279,
 
215
  "content": "<|uk|>",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": false,
220
- "special": true
221
  },
222
  {
223
  "id": 50280,
 
224
  "content": "<|el|>",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": false,
229
- "special": true
230
  },
231
  {
232
  "id": 50281,
 
233
  "content": "<|ms|>",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": false,
238
- "special": true
239
  },
240
  {
241
  "id": 50282,
 
242
  "content": "<|cs|>",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": false,
247
- "special": true
248
  },
249
  {
250
  "id": 50283,
 
251
  "content": "<|ro|>",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": false,
256
- "special": true
257
  },
258
  {
259
  "id": 50284,
 
260
  "content": "<|da|>",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": false,
265
- "special": true
266
  },
267
  {
268
  "id": 50285,
 
269
  "content": "<|hu|>",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": false,
274
- "special": true
275
  },
276
  {
277
  "id": 50286,
 
278
  "content": "<|ta|>",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": false,
283
- "special": true
284
  },
285
  {
286
  "id": 50287,
 
287
  "content": "<|no|>",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": false,
292
- "special": true
293
  },
294
  {
295
  "id": 50288,
 
296
  "content": "<|th|>",
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": false,
301
- "special": true
302
  },
303
  {
304
  "id": 50289,
 
305
  "content": "<|ur|>",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": false,
310
- "special": true
311
  },
312
  {
313
  "id": 50290,
 
314
  "content": "<|hr|>",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": false,
319
- "special": true
320
  },
321
  {
322
  "id": 50291,
 
323
  "content": "<|bg|>",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": false,
328
- "special": true
329
  },
330
  {
331
  "id": 50292,
 
332
  "content": "<|lt|>",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": false,
337
- "special": true
338
  },
339
  {
340
  "id": 50293,
 
341
  "content": "<|la|>",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": false,
346
- "special": true
347
  },
348
  {
349
  "id": 50294,
 
350
  "content": "<|mi|>",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": false,
355
- "special": true
356
  },
357
  {
358
  "id": 50295,
 
359
  "content": "<|ml|>",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": false,
364
- "special": true
365
  },
366
  {
367
  "id": 50296,
 
368
  "content": "<|cy|>",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": false,
373
- "special": true
374
  },
375
  {
376
  "id": 50297,
 
377
  "content": "<|sk|>",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": false,
382
- "special": true
383
  },
384
  {
385
  "id": 50298,
 
386
  "content": "<|te|>",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": false,
391
- "special": true
392
  },
393
  {
394
  "id": 50299,
 
395
  "content": "<|fa|>",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": false,
400
- "special": true
401
  },
402
  {
403
  "id": 50300,
 
404
  "content": "<|lv|>",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": false,
409
- "special": true
410
  },
411
  {
412
  "id": 50301,
 
413
  "content": "<|bn|>",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": false,
418
- "special": true
419
  },
420
  {
421
  "id": 50302,
 
422
  "content": "<|sr|>",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": false,
427
- "special": true
428
  },
429
  {
430
  "id": 50303,
 
431
  "content": "<|az|>",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": false,
436
- "special": true
437
  },
438
  {
439
  "id": 50304,
 
440
  "content": "<|sl|>",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": false,
445
- "special": true
446
  },
447
  {
448
  "id": 50305,
 
449
  "content": "<|kn|>",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": false,
454
- "special": true
455
  },
456
  {
457
  "id": 50306,
 
458
  "content": "<|et|>",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": false,
463
- "special": true
464
  },
465
  {
466
  "id": 50307,
 
467
  "content": "<|mk|>",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": false,
472
- "special": true
473
  },
474
  {
475
  "id": 50308,
 
476
  "content": "<|br|>",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": false,
481
- "special": true
482
  },
483
  {
484
  "id": 50309,
 
485
  "content": "<|eu|>",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": false,
490
- "special": true
491
  },
492
  {
493
  "id": 50310,
 
494
  "content": "<|is|>",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": false,
499
- "special": true
500
  },
501
  {
502
  "id": 50311,
 
503
  "content": "<|hy|>",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": false,
508
- "special": true
509
  },
510
  {
511
  "id": 50312,
 
512
  "content": "<|ne|>",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": false,
517
- "special": true
518
  },
519
  {
520
  "id": 50313,
 
521
  "content": "<|mn|>",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": false,
526
- "special": true
527
  },
528
  {
529
  "id": 50314,
 
530
  "content": "<|bs|>",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": false,
535
- "special": true
536
  },
537
  {
538
  "id": 50315,
 
539
  "content": "<|kk|>",
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
- "normalized": false,
544
- "special": true
545
  },
546
  {
547
  "id": 50316,
 
548
  "content": "<|sq|>",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": false,
553
- "special": true
554
  },
555
  {
556
  "id": 50317,
 
557
  "content": "<|sw|>",
558
  "single_word": false,
559
  "lstrip": false,
560
  "rstrip": false,
561
- "normalized": false,
562
- "special": true
563
  },
564
  {
565
  "id": 50318,
 
566
  "content": "<|gl|>",
567
  "single_word": false,
568
  "lstrip": false,
569
  "rstrip": false,
570
- "normalized": false,
571
- "special": true
572
  },
573
  {
574
  "id": 50319,
 
575
  "content": "<|mr|>",
576
  "single_word": false,
577
  "lstrip": false,
578
  "rstrip": false,
579
- "normalized": false,
580
- "special": true
581
  },
582
  {
583
  "id": 50320,
 
584
  "content": "<|pa|>",
585
  "single_word": false,
586
  "lstrip": false,
587
  "rstrip": false,
588
- "normalized": false,
589
- "special": true
590
  },
591
  {
592
  "id": 50321,
 
593
  "content": "<|si|>",
594
  "single_word": false,
595
  "lstrip": false,
596
  "rstrip": false,
597
- "normalized": false,
598
- "special": true
599
  },
600
  {
601
  "id": 50322,
 
602
  "content": "<|km|>",
603
  "single_word": false,
604
  "lstrip": false,
605
  "rstrip": false,
606
- "normalized": false,
607
- "special": true
608
  },
609
  {
610
  "id": 50323,
 
611
  "content": "<|sn|>",
612
  "single_word": false,
613
  "lstrip": false,
614
  "rstrip": false,
615
- "normalized": false,
616
- "special": true
617
  },
618
  {
619
  "id": 50324,
 
620
  "content": "<|yo|>",
621
  "single_word": false,
622
  "lstrip": false,
623
  "rstrip": false,
624
- "normalized": false,
625
- "special": true
626
  },
627
  {
628
  "id": 50325,
 
629
  "content": "<|so|>",
630
  "single_word": false,
631
  "lstrip": false,
632
  "rstrip": false,
633
- "normalized": false,
634
- "special": true
635
  },
636
  {
637
  "id": 50326,
 
638
  "content": "<|af|>",
639
  "single_word": false,
640
  "lstrip": false,
641
  "rstrip": false,
642
- "normalized": false,
643
- "special": true
644
  },
645
  {
646
  "id": 50327,
 
647
  "content": "<|oc|>",
648
  "single_word": false,
649
  "lstrip": false,
650
  "rstrip": false,
651
- "normalized": false,
652
- "special": true
653
  },
654
  {
655
  "id": 50328,
 
656
  "content": "<|ka|>",
657
  "single_word": false,
658
  "lstrip": false,
659
  "rstrip": false,
660
- "normalized": false,
661
- "special": true
662
  },
663
  {
664
  "id": 50329,
 
665
  "content": "<|be|>",
666
  "single_word": false,
667
  "lstrip": false,
668
  "rstrip": false,
669
- "normalized": false,
670
- "special": true
671
  },
672
  {
673
  "id": 50330,
 
674
  "content": "<|tg|>",
675
  "single_word": false,
676
  "lstrip": false,
677
  "rstrip": false,
678
- "normalized": false,
679
- "special": true
680
  },
681
  {
682
  "id": 50331,
 
683
  "content": "<|sd|>",
684
  "single_word": false,
685
  "lstrip": false,
686
  "rstrip": false,
687
- "normalized": false,
688
- "special": true
689
  },
690
  {
691
  "id": 50332,
 
692
  "content": "<|gu|>",
693
  "single_word": false,
694
  "lstrip": false,
695
  "rstrip": false,
696
- "normalized": false,
697
- "special": true
698
  },
699
  {
700
  "id": 50333,
 
701
  "content": "<|am|>",
702
  "single_word": false,
703
  "lstrip": false,
704
  "rstrip": false,
705
- "normalized": false,
706
- "special": true
707
  },
708
  {
709
  "id": 50334,
 
710
  "content": "<|yi|>",
711
  "single_word": false,
712
  "lstrip": false,
713
  "rstrip": false,
714
- "normalized": false,
715
- "special": true
716
  },
717
  {
718
  "id": 50335,
 
719
  "content": "<|lo|>",
720
  "single_word": false,
721
  "lstrip": false,
722
  "rstrip": false,
723
- "normalized": false,
724
- "special": true
725
  },
726
  {
727
  "id": 50336,
 
728
  "content": "<|uz|>",
729
  "single_word": false,
730
  "lstrip": false,
731
  "rstrip": false,
732
- "normalized": false,
733
- "special": true
734
  },
735
  {
736
  "id": 50337,
 
737
  "content": "<|fo|>",
738
  "single_word": false,
739
  "lstrip": false,
740
  "rstrip": false,
741
- "normalized": false,
742
- "special": true
743
  },
744
  {
745
  "id": 50338,
 
746
  "content": "<|ht|>",
747
  "single_word": false,
748
  "lstrip": false,
749
  "rstrip": false,
750
- "normalized": false,
751
- "special": true
752
  },
753
  {
754
  "id": 50339,
 
755
  "content": "<|ps|>",
756
  "single_word": false,
757
  "lstrip": false,
758
  "rstrip": false,
759
- "normalized": false,
760
- "special": true
761
  },
762
  {
763
  "id": 50340,
 
764
  "content": "<|tk|>",
765
  "single_word": false,
766
  "lstrip": false,
767
  "rstrip": false,
768
- "normalized": false,
769
- "special": true
770
  },
771
  {
772
  "id": 50341,
 
773
  "content": "<|nn|>",
774
  "single_word": false,
775
  "lstrip": false,
776
  "rstrip": false,
777
- "normalized": false,
778
- "special": true
779
  },
780
  {
781
  "id": 50342,
 
782
  "content": "<|mt|>",
783
  "single_word": false,
784
  "lstrip": false,
785
  "rstrip": false,
786
- "normalized": false,
787
- "special": true
788
  },
789
  {
790
  "id": 50343,
 
791
  "content": "<|sa|>",
792
  "single_word": false,
793
  "lstrip": false,
794
  "rstrip": false,
795
- "normalized": false,
796
- "special": true
797
  },
798
  {
799
  "id": 50344,
 
800
  "content": "<|lb|>",
801
  "single_word": false,
802
  "lstrip": false,
803
  "rstrip": false,
804
- "normalized": false,
805
- "special": true
806
  },
807
  {
808
  "id": 50345,
 
809
  "content": "<|my|>",
810
  "single_word": false,
811
  "lstrip": false,
812
  "rstrip": false,
813
- "normalized": false,
814
- "special": true
815
  },
816
  {
817
  "id": 50346,
 
818
  "content": "<|bo|>",
819
  "single_word": false,
820
  "lstrip": false,
821
  "rstrip": false,
822
- "normalized": false,
823
- "special": true
824
  },
825
  {
826
  "id": 50347,
 
827
  "content": "<|tl|>",
828
  "single_word": false,
829
  "lstrip": false,
830
  "rstrip": false,
831
- "normalized": false,
832
- "special": true
833
  },
834
  {
835
  "id": 50348,
 
836
  "content": "<|mg|>",
837
  "single_word": false,
838
  "lstrip": false,
839
  "rstrip": false,
840
- "normalized": false,
841
- "special": true
842
  },
843
  {
844
  "id": 50349,
 
845
  "content": "<|as|>",
846
  "single_word": false,
847
  "lstrip": false,
848
  "rstrip": false,
849
- "normalized": false,
850
- "special": true
851
  },
852
  {
853
  "id": 50350,
 
854
  "content": "<|tt|>",
855
  "single_word": false,
856
  "lstrip": false,
857
  "rstrip": false,
858
- "normalized": false,
859
- "special": true
860
  },
861
  {
862
  "id": 50351,
 
863
  "content": "<|haw|>",
864
  "single_word": false,
865
  "lstrip": false,
866
  "rstrip": false,
867
- "normalized": false,
868
- "special": true
869
  },
870
  {
871
  "id": 50352,
 
872
  "content": "<|ln|>",
873
  "single_word": false,
874
  "lstrip": false,
875
  "rstrip": false,
876
- "normalized": false,
877
- "special": true
878
  },
879
  {
880
  "id": 50353,
 
881
  "content": "<|ha|>",
882
  "single_word": false,
883
  "lstrip": false,
884
  "rstrip": false,
885
- "normalized": false,
886
- "special": true
887
  },
888
  {
889
  "id": 50354,
 
890
  "content": "<|ba|>",
891
  "single_word": false,
892
  "lstrip": false,
893
  "rstrip": false,
894
- "normalized": false,
895
- "special": true
896
  },
897
  {
898
  "id": 50355,
 
899
  "content": "<|jw|>",
900
  "single_word": false,
901
  "lstrip": false,
902
  "rstrip": false,
903
- "normalized": false,
904
- "special": true
905
  },
906
  {
907
  "id": 50356,
 
908
  "content": "<|su|>",
909
  "single_word": false,
910
  "lstrip": false,
911
  "rstrip": false,
912
- "normalized": false,
913
- "special": true
914
  },
915
  {
916
  "id": 50357,
 
917
  "content": "<|translate|>",
918
  "single_word": false,
919
  "lstrip": false,
920
  "rstrip": false,
921
- "normalized": false,
922
- "special": true
923
  },
924
  {
925
  "id": 50358,
 
926
  "content": "<|transcribe|>",
927
  "single_word": false,
928
  "lstrip": false,
929
  "rstrip": false,
930
- "normalized": false,
931
- "special": true
932
  },
933
  {
934
  "id": 50359,
 
935
  "content": "<|startoflm|>",
936
  "single_word": false,
937
  "lstrip": false,
938
  "rstrip": false,
939
- "normalized": false,
940
- "special": true
941
  },
942
  {
943
  "id": 50360,
 
944
  "content": "<|startofprev|>",
945
  "single_word": false,
946
  "lstrip": false,
947
  "rstrip": false,
948
- "normalized": false,
949
- "special": true
950
  },
951
  {
952
  "id": 50361,
 
953
  "content": "<|nocaptions|>",
954
  "single_word": false,
955
  "lstrip": false,
956
  "rstrip": false,
957
- "normalized": false,
958
- "special": true
959
  },
960
  {
961
  "id": 50362,
 
962
  "content": "<|notimestamps|>",
963
  "single_word": false,
964
  "lstrip": false,
965
  "rstrip": false,
966
- "normalized": false,
967
- "special": true
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
- "trim_offsets": true,
975
- "use_regex": true
976
  },
977
  "post_processor": {
978
  "type": "TemplateProcessing",
@@ -1067,8 +1066,7 @@
1067
  "decoder": {
1068
  "type": "ByteLevel",
1069
  "add_prefix_space": true,
1070
- "trim_offsets": true,
1071
- "use_regex": true
1072
  },
1073
  "model": {
1074
  "type": "BPE",
 
5
  "added_tokens": [
6
  {
7
  "id": 50256,
8
+ "special": true,
9
  "content": "<|endoftext|>",
10
  "single_word": false,
11
  "lstrip": false,
12
  "rstrip": false,
13
+ "normalized": false
 
14
  },
15
  {
16
  "id": 50257,
17
+ "special": true,
18
  "content": "<|startoftranscript|>",
19
  "single_word": false,
20
  "lstrip": false,
21
  "rstrip": false,
22
+ "normalized": false
 
23
  },
24
  {
25
  "id": 50258,
26
+ "special": true,
27
  "content": "<|en|>",
28
  "single_word": false,
29
  "lstrip": false,
30
  "rstrip": false,
31
+ "normalized": false
 
32
  },
33
  {
34
  "id": 50259,
35
+ "special": true,
36
  "content": "<|zh|>",
37
  "single_word": false,
38
  "lstrip": false,
39
  "rstrip": false,
40
+ "normalized": false
 
41
  },
42
  {
43
  "id": 50260,
44
+ "special": true,
45
  "content": "<|de|>",
46
  "single_word": false,
47
  "lstrip": false,
48
  "rstrip": false,
49
+ "normalized": false
 
50
  },
51
  {
52
  "id": 50261,
53
+ "special": true,
54
  "content": "<|es|>",
55
  "single_word": false,
56
  "lstrip": false,
57
  "rstrip": false,
58
+ "normalized": false
 
59
  },
60
  {
61
  "id": 50262,
62
+ "special": true,
63
  "content": "<|ru|>",
64
  "single_word": false,
65
  "lstrip": false,
66
  "rstrip": false,
67
+ "normalized": false
 
68
  },
69
  {
70
  "id": 50263,
71
+ "special": true,
72
  "content": "<|ko|>",
73
  "single_word": false,
74
  "lstrip": false,
75
  "rstrip": false,
76
+ "normalized": false
 
77
  },
78
  {
79
  "id": 50264,
80
+ "special": true,
81
  "content": "<|fr|>",
82
  "single_word": false,
83
  "lstrip": false,
84
  "rstrip": false,
85
+ "normalized": false
 
86
  },
87
  {
88
  "id": 50265,
89
+ "special": true,
90
  "content": "<|ja|>",
91
  "single_word": false,
92
  "lstrip": false,
93
  "rstrip": false,
94
+ "normalized": false
 
95
  },
96
  {
97
  "id": 50266,
98
+ "special": true,
99
  "content": "<|pt|>",
100
  "single_word": false,
101
  "lstrip": false,
102
  "rstrip": false,
103
+ "normalized": false
 
104
  },
105
  {
106
  "id": 50267,
107
+ "special": true,
108
  "content": "<|tr|>",
109
  "single_word": false,
110
  "lstrip": false,
111
  "rstrip": false,
112
+ "normalized": false
 
113
  },
114
  {
115
  "id": 50268,
116
+ "special": true,
117
  "content": "<|pl|>",
118
  "single_word": false,
119
  "lstrip": false,
120
  "rstrip": false,
121
+ "normalized": false
 
122
  },
123
  {
124
  "id": 50269,
125
+ "special": true,
126
  "content": "<|ca|>",
127
  "single_word": false,
128
  "lstrip": false,
129
  "rstrip": false,
130
+ "normalized": false
 
131
  },
132
  {
133
  "id": 50270,
134
+ "special": true,
135
  "content": "<|nl|>",
136
  "single_word": false,
137
  "lstrip": false,
138
  "rstrip": false,
139
+ "normalized": false
 
140
  },
141
  {
142
  "id": 50271,
143
+ "special": true,
144
  "content": "<|ar|>",
145
  "single_word": false,
146
  "lstrip": false,
147
  "rstrip": false,
148
+ "normalized": false
 
149
  },
150
  {
151
  "id": 50272,
152
+ "special": true,
153
  "content": "<|sv|>",
154
  "single_word": false,
155
  "lstrip": false,
156
  "rstrip": false,
157
+ "normalized": false
 
158
  },
159
  {
160
  "id": 50273,
161
+ "special": true,
162
  "content": "<|it|>",
163
  "single_word": false,
164
  "lstrip": false,
165
  "rstrip": false,
166
+ "normalized": false
 
167
  },
168
  {
169
  "id": 50274,
170
+ "special": true,
171
  "content": "<|id|>",
172
  "single_word": false,
173
  "lstrip": false,
174
  "rstrip": false,
175
+ "normalized": false
 
176
  },
177
  {
178
  "id": 50275,
179
+ "special": true,
180
  "content": "<|hi|>",
181
  "single_word": false,
182
  "lstrip": false,
183
  "rstrip": false,
184
+ "normalized": false
 
185
  },
186
  {
187
  "id": 50276,
188
+ "special": true,
189
  "content": "<|fi|>",
190
  "single_word": false,
191
  "lstrip": false,
192
  "rstrip": false,
193
+ "normalized": false
 
194
  },
195
  {
196
  "id": 50277,
197
+ "special": true,
198
  "content": "<|vi|>",
199
  "single_word": false,
200
  "lstrip": false,
201
  "rstrip": false,
202
+ "normalized": false
 
203
  },
204
  {
205
  "id": 50278,
206
+ "special": true,
207
  "content": "<|iw|>",
208
  "single_word": false,
209
  "lstrip": false,
210
  "rstrip": false,
211
+ "normalized": false
 
212
  },
213
  {
214
  "id": 50279,
215
+ "special": true,
216
  "content": "<|uk|>",
217
  "single_word": false,
218
  "lstrip": false,
219
  "rstrip": false,
220
+ "normalized": false
 
221
  },
222
  {
223
  "id": 50280,
224
+ "special": true,
225
  "content": "<|el|>",
226
  "single_word": false,
227
  "lstrip": false,
228
  "rstrip": false,
229
+ "normalized": false
 
230
  },
231
  {
232
  "id": 50281,
233
+ "special": true,
234
  "content": "<|ms|>",
235
  "single_word": false,
236
  "lstrip": false,
237
  "rstrip": false,
238
+ "normalized": false
 
239
  },
240
  {
241
  "id": 50282,
242
+ "special": true,
243
  "content": "<|cs|>",
244
  "single_word": false,
245
  "lstrip": false,
246
  "rstrip": false,
247
+ "normalized": false
 
248
  },
249
  {
250
  "id": 50283,
251
+ "special": true,
252
  "content": "<|ro|>",
253
  "single_word": false,
254
  "lstrip": false,
255
  "rstrip": false,
256
+ "normalized": false
 
257
  },
258
  {
259
  "id": 50284,
260
+ "special": true,
261
  "content": "<|da|>",
262
  "single_word": false,
263
  "lstrip": false,
264
  "rstrip": false,
265
+ "normalized": false
 
266
  },
267
  {
268
  "id": 50285,
269
+ "special": true,
270
  "content": "<|hu|>",
271
  "single_word": false,
272
  "lstrip": false,
273
  "rstrip": false,
274
+ "normalized": false
 
275
  },
276
  {
277
  "id": 50286,
278
+ "special": true,
279
  "content": "<|ta|>",
280
  "single_word": false,
281
  "lstrip": false,
282
  "rstrip": false,
283
+ "normalized": false
 
284
  },
285
  {
286
  "id": 50287,
287
+ "special": true,
288
  "content": "<|no|>",
289
  "single_word": false,
290
  "lstrip": false,
291
  "rstrip": false,
292
+ "normalized": false
 
293
  },
294
  {
295
  "id": 50288,
296
+ "special": true,
297
  "content": "<|th|>",
298
  "single_word": false,
299
  "lstrip": false,
300
  "rstrip": false,
301
+ "normalized": false
 
302
  },
303
  {
304
  "id": 50289,
305
+ "special": true,
306
  "content": "<|ur|>",
307
  "single_word": false,
308
  "lstrip": false,
309
  "rstrip": false,
310
+ "normalized": false
 
311
  },
312
  {
313
  "id": 50290,
314
+ "special": true,
315
  "content": "<|hr|>",
316
  "single_word": false,
317
  "lstrip": false,
318
  "rstrip": false,
319
+ "normalized": false
 
320
  },
321
  {
322
  "id": 50291,
323
+ "special": true,
324
  "content": "<|bg|>",
325
  "single_word": false,
326
  "lstrip": false,
327
  "rstrip": false,
328
+ "normalized": false
 
329
  },
330
  {
331
  "id": 50292,
332
+ "special": true,
333
  "content": "<|lt|>",
334
  "single_word": false,
335
  "lstrip": false,
336
  "rstrip": false,
337
+ "normalized": false
 
338
  },
339
  {
340
  "id": 50293,
341
+ "special": true,
342
  "content": "<|la|>",
343
  "single_word": false,
344
  "lstrip": false,
345
  "rstrip": false,
346
+ "normalized": false
 
347
  },
348
  {
349
  "id": 50294,
350
+ "special": true,
351
  "content": "<|mi|>",
352
  "single_word": false,
353
  "lstrip": false,
354
  "rstrip": false,
355
+ "normalized": false
 
356
  },
357
  {
358
  "id": 50295,
359
+ "special": true,
360
  "content": "<|ml|>",
361
  "single_word": false,
362
  "lstrip": false,
363
  "rstrip": false,
364
+ "normalized": false
 
365
  },
366
  {
367
  "id": 50296,
368
+ "special": true,
369
  "content": "<|cy|>",
370
  "single_word": false,
371
  "lstrip": false,
372
  "rstrip": false,
373
+ "normalized": false
 
374
  },
375
  {
376
  "id": 50297,
377
+ "special": true,
378
  "content": "<|sk|>",
379
  "single_word": false,
380
  "lstrip": false,
381
  "rstrip": false,
382
+ "normalized": false
 
383
  },
384
  {
385
  "id": 50298,
386
+ "special": true,
387
  "content": "<|te|>",
388
  "single_word": false,
389
  "lstrip": false,
390
  "rstrip": false,
391
+ "normalized": false
 
392
  },
393
  {
394
  "id": 50299,
395
+ "special": true,
396
  "content": "<|fa|>",
397
  "single_word": false,
398
  "lstrip": false,
399
  "rstrip": false,
400
+ "normalized": false
 
401
  },
402
  {
403
  "id": 50300,
404
+ "special": true,
405
  "content": "<|lv|>",
406
  "single_word": false,
407
  "lstrip": false,
408
  "rstrip": false,
409
+ "normalized": false
 
410
  },
411
  {
412
  "id": 50301,
413
+ "special": true,
414
  "content": "<|bn|>",
415
  "single_word": false,
416
  "lstrip": false,
417
  "rstrip": false,
418
+ "normalized": false
 
419
  },
420
  {
421
  "id": 50302,
422
+ "special": true,
423
  "content": "<|sr|>",
424
  "single_word": false,
425
  "lstrip": false,
426
  "rstrip": false,
427
+ "normalized": false
 
428
  },
429
  {
430
  "id": 50303,
431
+ "special": true,
432
  "content": "<|az|>",
433
  "single_word": false,
434
  "lstrip": false,
435
  "rstrip": false,
436
+ "normalized": false
 
437
  },
438
  {
439
  "id": 50304,
440
+ "special": true,
441
  "content": "<|sl|>",
442
  "single_word": false,
443
  "lstrip": false,
444
  "rstrip": false,
445
+ "normalized": false
 
446
  },
447
  {
448
  "id": 50305,
449
+ "special": true,
450
  "content": "<|kn|>",
451
  "single_word": false,
452
  "lstrip": false,
453
  "rstrip": false,
454
+ "normalized": false
 
455
  },
456
  {
457
  "id": 50306,
458
+ "special": true,
459
  "content": "<|et|>",
460
  "single_word": false,
461
  "lstrip": false,
462
  "rstrip": false,
463
+ "normalized": false
 
464
  },
465
  {
466
  "id": 50307,
467
+ "special": true,
468
  "content": "<|mk|>",
469
  "single_word": false,
470
  "lstrip": false,
471
  "rstrip": false,
472
+ "normalized": false
 
473
  },
474
  {
475
  "id": 50308,
476
+ "special": true,
477
  "content": "<|br|>",
478
  "single_word": false,
479
  "lstrip": false,
480
  "rstrip": false,
481
+ "normalized": false
 
482
  },
483
  {
484
  "id": 50309,
485
+ "special": true,
486
  "content": "<|eu|>",
487
  "single_word": false,
488
  "lstrip": false,
489
  "rstrip": false,
490
+ "normalized": false
 
491
  },
492
  {
493
  "id": 50310,
494
+ "special": true,
495
  "content": "<|is|>",
496
  "single_word": false,
497
  "lstrip": false,
498
  "rstrip": false,
499
+ "normalized": false
 
500
  },
501
  {
502
  "id": 50311,
503
+ "special": true,
504
  "content": "<|hy|>",
505
  "single_word": false,
506
  "lstrip": false,
507
  "rstrip": false,
508
+ "normalized": false
 
509
  },
510
  {
511
  "id": 50312,
512
+ "special": true,
513
  "content": "<|ne|>",
514
  "single_word": false,
515
  "lstrip": false,
516
  "rstrip": false,
517
+ "normalized": false
 
518
  },
519
  {
520
  "id": 50313,
521
+ "special": true,
522
  "content": "<|mn|>",
523
  "single_word": false,
524
  "lstrip": false,
525
  "rstrip": false,
526
+ "normalized": false
 
527
  },
528
  {
529
  "id": 50314,
530
+ "special": true,
531
  "content": "<|bs|>",
532
  "single_word": false,
533
  "lstrip": false,
534
  "rstrip": false,
535
+ "normalized": false
 
536
  },
537
  {
538
  "id": 50315,
539
+ "special": true,
540
  "content": "<|kk|>",
541
  "single_word": false,
542
  "lstrip": false,
543
  "rstrip": false,
544
+ "normalized": false
 
545
  },
546
  {
547
  "id": 50316,
548
+ "special": true,
549
  "content": "<|sq|>",
550
  "single_word": false,
551
  "lstrip": false,
552
  "rstrip": false,
553
+ "normalized": false
 
554
  },
555
  {
556
  "id": 50317,
557
+ "special": true,
558
  "content": "<|sw|>",
559
  "single_word": false,
560
  "lstrip": false,
561
  "rstrip": false,
562
+ "normalized": false
 
563
  },
564
  {
565
  "id": 50318,
566
+ "special": true,
567
  "content": "<|gl|>",
568
  "single_word": false,
569
  "lstrip": false,
570
  "rstrip": false,
571
+ "normalized": false
 
572
  },
573
  {
574
  "id": 50319,
575
+ "special": true,
576
  "content": "<|mr|>",
577
  "single_word": false,
578
  "lstrip": false,
579
  "rstrip": false,
580
+ "normalized": false
 
581
  },
582
  {
583
  "id": 50320,
584
+ "special": true,
585
  "content": "<|pa|>",
586
  "single_word": false,
587
  "lstrip": false,
588
  "rstrip": false,
589
+ "normalized": false
 
590
  },
591
  {
592
  "id": 50321,
593
+ "special": true,
594
  "content": "<|si|>",
595
  "single_word": false,
596
  "lstrip": false,
597
  "rstrip": false,
598
+ "normalized": false
 
599
  },
600
  {
601
  "id": 50322,
602
+ "special": true,
603
  "content": "<|km|>",
604
  "single_word": false,
605
  "lstrip": false,
606
  "rstrip": false,
607
+ "normalized": false
 
608
  },
609
  {
610
  "id": 50323,
611
+ "special": true,
612
  "content": "<|sn|>",
613
  "single_word": false,
614
  "lstrip": false,
615
  "rstrip": false,
616
+ "normalized": false
 
617
  },
618
  {
619
  "id": 50324,
620
+ "special": true,
621
  "content": "<|yo|>",
622
  "single_word": false,
623
  "lstrip": false,
624
  "rstrip": false,
625
+ "normalized": false
 
626
  },
627
  {
628
  "id": 50325,
629
+ "special": true,
630
  "content": "<|so|>",
631
  "single_word": false,
632
  "lstrip": false,
633
  "rstrip": false,
634
+ "normalized": false
 
635
  },
636
  {
637
  "id": 50326,
638
+ "special": true,
639
  "content": "<|af|>",
640
  "single_word": false,
641
  "lstrip": false,
642
  "rstrip": false,
643
+ "normalized": false
 
644
  },
645
  {
646
  "id": 50327,
647
+ "special": true,
648
  "content": "<|oc|>",
649
  "single_word": false,
650
  "lstrip": false,
651
  "rstrip": false,
652
+ "normalized": false
 
653
  },
654
  {
655
  "id": 50328,
656
+ "special": true,
657
  "content": "<|ka|>",
658
  "single_word": false,
659
  "lstrip": false,
660
  "rstrip": false,
661
+ "normalized": false
 
662
  },
663
  {
664
  "id": 50329,
665
+ "special": true,
666
  "content": "<|be|>",
667
  "single_word": false,
668
  "lstrip": false,
669
  "rstrip": false,
670
+ "normalized": false
 
671
  },
672
  {
673
  "id": 50330,
674
+ "special": true,
675
  "content": "<|tg|>",
676
  "single_word": false,
677
  "lstrip": false,
678
  "rstrip": false,
679
+ "normalized": false
 
680
  },
681
  {
682
  "id": 50331,
683
+ "special": true,
684
  "content": "<|sd|>",
685
  "single_word": false,
686
  "lstrip": false,
687
  "rstrip": false,
688
+ "normalized": false
 
689
  },
690
  {
691
  "id": 50332,
692
+ "special": true,
693
  "content": "<|gu|>",
694
  "single_word": false,
695
  "lstrip": false,
696
  "rstrip": false,
697
+ "normalized": false
 
698
  },
699
  {
700
  "id": 50333,
701
+ "special": true,
702
  "content": "<|am|>",
703
  "single_word": false,
704
  "lstrip": false,
705
  "rstrip": false,
706
+ "normalized": false
 
707
  },
708
  {
709
  "id": 50334,
710
+ "special": true,
711
  "content": "<|yi|>",
712
  "single_word": false,
713
  "lstrip": false,
714
  "rstrip": false,
715
+ "normalized": false
 
716
  },
717
  {
718
  "id": 50335,
719
+ "special": true,
720
  "content": "<|lo|>",
721
  "single_word": false,
722
  "lstrip": false,
723
  "rstrip": false,
724
+ "normalized": false
 
725
  },
726
  {
727
  "id": 50336,
728
+ "special": true,
729
  "content": "<|uz|>",
730
  "single_word": false,
731
  "lstrip": false,
732
  "rstrip": false,
733
+ "normalized": false
 
734
  },
735
  {
736
  "id": 50337,
737
+ "special": true,
738
  "content": "<|fo|>",
739
  "single_word": false,
740
  "lstrip": false,
741
  "rstrip": false,
742
+ "normalized": false
 
743
  },
744
  {
745
  "id": 50338,
746
+ "special": true,
747
  "content": "<|ht|>",
748
  "single_word": false,
749
  "lstrip": false,
750
  "rstrip": false,
751
+ "normalized": false
 
752
  },
753
  {
754
  "id": 50339,
755
+ "special": true,
756
  "content": "<|ps|>",
757
  "single_word": false,
758
  "lstrip": false,
759
  "rstrip": false,
760
+ "normalized": false
 
761
  },
762
  {
763
  "id": 50340,
764
+ "special": true,
765
  "content": "<|tk|>",
766
  "single_word": false,
767
  "lstrip": false,
768
  "rstrip": false,
769
+ "normalized": false
 
770
  },
771
  {
772
  "id": 50341,
773
+ "special": true,
774
  "content": "<|nn|>",
775
  "single_word": false,
776
  "lstrip": false,
777
  "rstrip": false,
778
+ "normalized": false
 
779
  },
780
  {
781
  "id": 50342,
782
+ "special": true,
783
  "content": "<|mt|>",
784
  "single_word": false,
785
  "lstrip": false,
786
  "rstrip": false,
787
+ "normalized": false
 
788
  },
789
  {
790
  "id": 50343,
791
+ "special": true,
792
  "content": "<|sa|>",
793
  "single_word": false,
794
  "lstrip": false,
795
  "rstrip": false,
796
+ "normalized": false
 
797
  },
798
  {
799
  "id": 50344,
800
+ "special": true,
801
  "content": "<|lb|>",
802
  "single_word": false,
803
  "lstrip": false,
804
  "rstrip": false,
805
+ "normalized": false
 
806
  },
807
  {
808
  "id": 50345,
809
+ "special": true,
810
  "content": "<|my|>",
811
  "single_word": false,
812
  "lstrip": false,
813
  "rstrip": false,
814
+ "normalized": false
 
815
  },
816
  {
817
  "id": 50346,
818
+ "special": true,
819
  "content": "<|bo|>",
820
  "single_word": false,
821
  "lstrip": false,
822
  "rstrip": false,
823
+ "normalized": false
 
824
  },
825
  {
826
  "id": 50347,
827
+ "special": true,
828
  "content": "<|tl|>",
829
  "single_word": false,
830
  "lstrip": false,
831
  "rstrip": false,
832
+ "normalized": false
 
833
  },
834
  {
835
  "id": 50348,
836
+ "special": true,
837
  "content": "<|mg|>",
838
  "single_word": false,
839
  "lstrip": false,
840
  "rstrip": false,
841
+ "normalized": false
 
842
  },
843
  {
844
  "id": 50349,
845
+ "special": true,
846
  "content": "<|as|>",
847
  "single_word": false,
848
  "lstrip": false,
849
  "rstrip": false,
850
+ "normalized": false
 
851
  },
852
  {
853
  "id": 50350,
854
+ "special": true,
855
  "content": "<|tt|>",
856
  "single_word": false,
857
  "lstrip": false,
858
  "rstrip": false,
859
+ "normalized": false
 
860
  },
861
  {
862
  "id": 50351,
863
+ "special": true,
864
  "content": "<|haw|>",
865
  "single_word": false,
866
  "lstrip": false,
867
  "rstrip": false,
868
+ "normalized": false
 
869
  },
870
  {
871
  "id": 50352,
872
+ "special": true,
873
  "content": "<|ln|>",
874
  "single_word": false,
875
  "lstrip": false,
876
  "rstrip": false,
877
+ "normalized": false
 
878
  },
879
  {
880
  "id": 50353,
881
+ "special": true,
882
  "content": "<|ha|>",
883
  "single_word": false,
884
  "lstrip": false,
885
  "rstrip": false,
886
+ "normalized": false
 
887
  },
888
  {
889
  "id": 50354,
890
+ "special": true,
891
  "content": "<|ba|>",
892
  "single_word": false,
893
  "lstrip": false,
894
  "rstrip": false,
895
+ "normalized": false
 
896
  },
897
  {
898
  "id": 50355,
899
+ "special": true,
900
  "content": "<|jw|>",
901
  "single_word": false,
902
  "lstrip": false,
903
  "rstrip": false,
904
+ "normalized": false
 
905
  },
906
  {
907
  "id": 50356,
908
+ "special": true,
909
  "content": "<|su|>",
910
  "single_word": false,
911
  "lstrip": false,
912
  "rstrip": false,
913
+ "normalized": false
 
914
  },
915
  {
916
  "id": 50357,
917
+ "special": true,
918
  "content": "<|translate|>",
919
  "single_word": false,
920
  "lstrip": false,
921
  "rstrip": false,
922
+ "normalized": false
 
923
  },
924
  {
925
  "id": 50358,
926
+ "special": true,
927
  "content": "<|transcribe|>",
928
  "single_word": false,
929
  "lstrip": false,
930
  "rstrip": false,
931
+ "normalized": false
 
932
  },
933
  {
934
  "id": 50359,
935
+ "special": true,
936
  "content": "<|startoflm|>",
937
  "single_word": false,
938
  "lstrip": false,
939
  "rstrip": false,
940
+ "normalized": false
 
941
  },
942
  {
943
  "id": 50360,
944
+ "special": true,
945
  "content": "<|startofprev|>",
946
  "single_word": false,
947
  "lstrip": false,
948
  "rstrip": false,
949
+ "normalized": false
 
950
  },
951
  {
952
  "id": 50361,
953
+ "special": true,
954
  "content": "<|nocaptions|>",
955
  "single_word": false,
956
  "lstrip": false,
957
  "rstrip": false,
958
+ "normalized": false
 
959
  },
960
  {
961
  "id": 50362,
962
+ "special": true,
963
  "content": "<|notimestamps|>",
964
  "single_word": false,
965
  "lstrip": false,
966
  "rstrip": false,
967
+ "normalized": false
 
968
  }
969
  ],
970
  "normalizer": null,
971
  "pre_tokenizer": {
972
  "type": "ByteLevel",
973
  "add_prefix_space": false,
974
+ "trim_offsets": true
 
975
  },
976
  "post_processor": {
977
  "type": "TemplateProcessing",
 
1066
  "decoder": {
1067
  "type": "ByteLevel",
1068
  "add_prefix_space": true,
1069
+ "trim_offsets": true
 
1070
  },
1071
  "model": {
1072
  "type": "BPE",
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/tokenizer_config.json CHANGED
@@ -19,7 +19,6 @@
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
22
- "name_or_path": "openai/whisper-tiny.en",
23
  "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
 
19
  },
20
  "errors": "replace",
21
  "model_max_length": 1024,
 
22
  "pad_token": null,
23
  "processor_class": "WhisperProcessor",
24
  "return_attention_mask": false,
quantized/openai/whisper-tiny.en/speech2seq-lm-with-past/vocab.json CHANGED
The diff for this file is too large to render. See raw diff