huuquyet commited on
Commit
77dd221
1 Parent(s): c796daa

chore: clone vinai/PhoWhisper-base

Browse files
.gitattributes CHANGED
@@ -1 +1,2 @@
 
1
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
1
+ *.bin filter=lfs diff=lfs merge=lfs -text
2
  *.onnx filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,25 +1,3 @@
1
- ---
2
- language:
3
- - vi
4
- tags:
5
- - audio
6
- - automatic-speech-recognition
7
- - hf-asr-leaderboard
8
- library_name: transformers.js
9
- pipeline_tag: automatic-speech-recognition
10
- widget:
11
- - example_title: Librispeech sample 1
12
- src: https://cdn-media.huggingface.co/speech_samples/sample1.flac
13
- - example_title: Librispeech sample 2
14
- src: https://cdn-media.huggingface.co/speech_samples/sample2.flac
15
- license: wtfpl
16
- ---
17
-
18
- https://hf.co/vinai/PhoWhisper-base with ONNX weights to be compatible with Transformers.js.
19
-
20
- Please check out this demo using the model:
21
- [![Open in Spaces](https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-md-dark.svg)](https://huggingface.co/spaces/huuquyet/PhoWhisper-next)
22
-
23
  # PhoWhisper: Automatic Speech Recognition for Vietnamese
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # PhoWhisper: Automatic Speech Recognition for Vietnamese
2
 
3
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "vinai/PhoWhisper-base",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -43,7 +43,8 @@
43
  "num_mel_bins": 80,
44
  "pad_token_id": 50257,
45
  "scale_embedding": false,
46
- "transformers_version": "4.38.2",
 
47
  "use_cache": true,
48
  "use_weighted_layer_sum": false,
49
  "vocab_size": 51865
 
1
  {
2
+ "_name_or_path": "./finetuned_models_FINAL/whisper-base-vi/checkpoint-6000",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
43
  "num_mel_bins": 80,
44
  "pad_token_id": 50257,
45
  "scale_embedding": false,
46
+ "torch_dtype": "float32",
47
+ "transformers_version": "4.31.0",
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
50
  "vocab_size": 51865
generation_config.json CHANGED
@@ -251,6 +251,5 @@
251
  "transcribe": 50359,
252
  "translate": 50358
253
  },
254
- "transformers_version": "4.38.2",
255
- "trust_remote_code": false
256
  }
 
251
  "transcribe": 50359,
252
  "translate": 50358
253
  },
254
+ "transformers_version": "4.31.0"
 
255
  }
onnx/decoder_model_bnb4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c2e286d0c3e95e5620ab9dd487dce34b091c94348504b624bf8723e7ed79fd
3
- size 121791631
 
 
 
 
onnx/decoder_model_fp16.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:046420e8db940c1efb4c7d2be78eae59c26a89fbe48e502ba8346cbaee038457
3
- size 104396174
 
 
 
 
onnx/decoder_model_int8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e142815b58c4515d6aa7d67c9dd2c668da2afaeab152d655652b650507d450
3
- size 53310178
 
 
 
 
onnx/decoder_model_merged.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ab9b5333682f91ca4ca13012149e89d906c94036b37ad5d8d9c1a7f9ce894a
3
- size 208521528
 
 
 
 
onnx/decoder_model_merged_bnb4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:77bfcbaaf6465eaa18a5127d8c18b5a9f48544283e0f8833d24b18583496edb6
3
- size 122030467
 
 
 
 
onnx/decoder_model_merged_fp16.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4399583669d3d6802708a014a14022e4737a999039f70377862ff484af5d82f0
3
- size 104722354
 
 
 
 
onnx/decoder_model_merged_int8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09824fa165c230d6af1df9d1e30148a08dcef080447bb4bd5108853a5950a41a
3
- size 53693315
 
 
 
 
onnx/decoder_model_merged_q4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4da59eed743c0df7302e7e3ed16288a81fea3583b6c47e6dd9914fda16934b9e
3
- size 123602419
 
 
 
 
onnx/decoder_model_merged_quantized.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09824fa165c230d6af1df9d1e30148a08dcef080447bb4bd5108853a5950a41a
3
- size 53693315
 
 
 
 
onnx/decoder_model_merged_uint8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5adcec696b04effbe4d0355b6f6217121356715f26216d8bb8be3803e7cde4ea
3
- size 53693344
 
 
 
 
onnx/decoder_model_q4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d6685fdbbb667bfbfbe3a7935d7824e59aaee03d82ee3d434983ddec7516143
3
- size 123364015
 
 
 
 
onnx/decoder_model_quantized.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0e142815b58c4515d6aa7d67c9dd2c668da2afaeab152d655652b650507d450
3
- size 53310178
 
 
 
 
onnx/decoder_model_uint8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c079dc6b427ea83cdcd4013d5b16c3be62e66deed5495a6b54cd7077cbd847d
3
- size 53310207
 
 
 
 
onnx/decoder_with_past_model.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd365ad7252f648e1d0c1f2ef82e16e82e2843c89a4e544c3c30bba9571c9642
3
- size 195647494
 
 
 
 
onnx/decoder_with_past_model_bnb4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:636c8a6bdc8f973d8c76bc9de7d57ae3b86fab1e83ef4e3baf5ecba07823fd34
3
- size 119960957
 
 
 
 
onnx/decoder_with_past_model_fp16.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb24e77b63a49cd3716537985d9dfc595ce0d452caaf7921e535224bbbabe688
3
- size 98035209
 
 
 
 
onnx/decoder_with_past_model_int8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a44b2ea755073236dfe9cb6d8f6ba44b46fd71b3e18dbacaa1dd869d8666d0b9
3
- size 50075136
 
 
 
 
onnx/decoder_with_past_model_q4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43568aa3a7a094615e80d1692a57c80a49b6352937ce662126314c8fe7905053
3
- size 121336829
 
 
 
 
onnx/decoder_with_past_model_quantized.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a44b2ea755073236dfe9cb6d8f6ba44b46fd71b3e18dbacaa1dd869d8666d0b9
3
- size 50075136
 
 
 
 
onnx/decoder_with_past_model_uint8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a52ef68fbdbe098b458bff482070348e7620d3926ddce79b72ad4cc45f3f131
3
- size 50075160
 
 
 
 
onnx/encoder_model.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:052ef143ad5ef3278479b47e9e960c2510c19163cd9774e2ce275c5e1ee38859
3
- size 82468078
 
 
 
 
onnx/encoder_model_bnb4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:897d4397f5546baba8ea94caa2d7ffef3d966b9815c0d6eae173ef33c230dbf4
3
- size 17593091
 
 
 
 
onnx/encoder_model_fp16.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfd0409adedf3dd1a2acc0f9c471ef605dfadd0f99151faba9a0672b3b05abe6
3
- size 41332612
 
 
 
 
onnx/encoder_model_int8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9eb3dfd8fcbba04ef5497aa5cd62d1cb1973e5481b0e88a3424679d41416f20
3
- size 23201297
 
 
 
 
onnx/encoder_model_q4.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba51e812ef4ec3fea85cf5babcecaca79ad9c3776ff3db99ec3d0287175f183
3
- size 18772451
 
 
 
 
onnx/encoder_model_quantized.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ec714b54851c844b30539204c883f8a3ef072a2df9b010aea8845964009e9d
3
- size 23201315
 
 
 
 
onnx/encoder_model_uint8.onnx DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ec714b54851c844b30539204c883f8a3ef072a2df9b010aea8845964009e9d
3
- size 23201315
 
 
 
 
onnx/decoder_model.onnx → pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:282eac34e5ad2154168e9874357e33edf785d54abc9a9e3156f39be4b8447597
3
- size 208289724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:910b9e8875497078efa83dc506ea7ee378e4e759a206ceecbf23e088aec9b9f8
3
+ size 290458785
special_tokens_map.json CHANGED
@@ -111,28 +111,22 @@
111
  "bos_token": {
112
  "content": "<|endoftext|>",
113
  "lstrip": false,
114
- "normalized": false,
115
  "rstrip": false,
116
  "single_word": false
117
  },
118
  "eos_token": {
119
  "content": "<|endoftext|>",
120
  "lstrip": false,
121
- "normalized": false,
122
- "rstrip": false,
123
- "single_word": false
124
- },
125
- "pad_token": {
126
- "content": "<|endoftext|>",
127
- "lstrip": false,
128
- "normalized": false,
129
  "rstrip": false,
130
  "single_word": false
131
  },
 
132
  "unk_token": {
133
  "content": "<|endoftext|>",
134
  "lstrip": false,
135
- "normalized": false,
136
  "rstrip": false,
137
  "single_word": false
138
  }
 
111
  "bos_token": {
112
  "content": "<|endoftext|>",
113
  "lstrip": false,
114
+ "normalized": true,
115
  "rstrip": false,
116
  "single_word": false
117
  },
118
  "eos_token": {
119
  "content": "<|endoftext|>",
120
  "lstrip": false,
121
+ "normalized": true,
 
 
 
 
 
 
 
122
  "rstrip": false,
123
  "single_word": false
124
  },
125
+ "pad_token": "<|endoftext|>",
126
  "unk_token": {
127
  "content": "<|endoftext|>",
128
  "lstrip": false,
129
+ "normalized": true,
130
  "rstrip": false,
131
  "single_word": false
132
  }
tokenizer_config.json CHANGED
@@ -1,981 +1,35 @@
1
  {
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "50257": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "50258": {
14
- "content": "<|startoftranscript|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "50259": {
22
- "content": "<|en|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "50260": {
30
- "content": "<|zh|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "50261": {
38
- "content": "<|de|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "50262": {
46
- "content": "<|es|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "50263": {
54
- "content": "<|ru|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "50264": {
62
- "content": "<|ko|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "50265": {
70
- "content": "<|fr|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "50266": {
78
- "content": "<|ja|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "50267": {
86
- "content": "<|pt|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "50268": {
94
- "content": "<|tr|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "50269": {
102
- "content": "<|pl|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "50270": {
110
- "content": "<|ca|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "50271": {
118
- "content": "<|nl|>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": true
124
- },
125
- "50272": {
126
- "content": "<|ar|>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": true
132
- },
133
- "50273": {
134
- "content": "<|sv|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": true
140
- },
141
- "50274": {
142
- "content": "<|it|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": true
148
- },
149
- "50275": {
150
- "content": "<|id|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": true
156
- },
157
- "50276": {
158
- "content": "<|hi|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": true
164
- },
165
- "50277": {
166
- "content": "<|fi|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": true
172
- },
173
- "50278": {
174
- "content": "<|vi|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": true
180
- },
181
- "50279": {
182
- "content": "<|he|>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": true
188
- },
189
- "50280": {
190
- "content": "<|uk|>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": true
196
- },
197
- "50281": {
198
- "content": "<|el|>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": true
204
- },
205
- "50282": {
206
- "content": "<|ms|>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": true
212
- },
213
- "50283": {
214
- "content": "<|cs|>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": true
220
- },
221
- "50284": {
222
- "content": "<|ro|>",
223
- "lstrip": false,
224
- "normalized": false,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": true
228
- },
229
- "50285": {
230
- "content": "<|da|>",
231
- "lstrip": false,
232
- "normalized": false,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": true
236
- },
237
- "50286": {
238
- "content": "<|hu|>",
239
- "lstrip": false,
240
- "normalized": false,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": true
244
- },
245
- "50287": {
246
- "content": "<|ta|>",
247
- "lstrip": false,
248
- "normalized": false,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": true
252
- },
253
- "50288": {
254
- "content": "<|no|>",
255
- "lstrip": false,
256
- "normalized": false,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": true
260
- },
261
- "50289": {
262
- "content": "<|th|>",
263
- "lstrip": false,
264
- "normalized": false,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": true
268
- },
269
- "50290": {
270
- "content": "<|ur|>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": true
276
- },
277
- "50291": {
278
- "content": "<|hr|>",
279
- "lstrip": false,
280
- "normalized": false,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": true
284
- },
285
- "50292": {
286
- "content": "<|bg|>",
287
- "lstrip": false,
288
- "normalized": false,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": true
292
- },
293
- "50293": {
294
- "content": "<|lt|>",
295
- "lstrip": false,
296
- "normalized": false,
297
- "rstrip": false,
298
- "single_word": false,
299
- "special": true
300
- },
301
- "50294": {
302
- "content": "<|la|>",
303
- "lstrip": false,
304
- "normalized": false,
305
- "rstrip": false,
306
- "single_word": false,
307
- "special": true
308
- },
309
- "50295": {
310
- "content": "<|mi|>",
311
- "lstrip": false,
312
- "normalized": false,
313
- "rstrip": false,
314
- "single_word": false,
315
- "special": true
316
- },
317
- "50296": {
318
- "content": "<|ml|>",
319
- "lstrip": false,
320
- "normalized": false,
321
- "rstrip": false,
322
- "single_word": false,
323
- "special": true
324
- },
325
- "50297": {
326
- "content": "<|cy|>",
327
- "lstrip": false,
328
- "normalized": false,
329
- "rstrip": false,
330
- "single_word": false,
331
- "special": true
332
- },
333
- "50298": {
334
- "content": "<|sk|>",
335
- "lstrip": false,
336
- "normalized": false,
337
- "rstrip": false,
338
- "single_word": false,
339
- "special": true
340
- },
341
- "50299": {
342
- "content": "<|te|>",
343
- "lstrip": false,
344
- "normalized": false,
345
- "rstrip": false,
346
- "single_word": false,
347
- "special": true
348
- },
349
- "50300": {
350
- "content": "<|fa|>",
351
- "lstrip": false,
352
- "normalized": false,
353
- "rstrip": false,
354
- "single_word": false,
355
- "special": true
356
- },
357
- "50301": {
358
- "content": "<|lv|>",
359
- "lstrip": false,
360
- "normalized": false,
361
- "rstrip": false,
362
- "single_word": false,
363
- "special": true
364
- },
365
- "50302": {
366
- "content": "<|bn|>",
367
- "lstrip": false,
368
- "normalized": false,
369
- "rstrip": false,
370
- "single_word": false,
371
- "special": true
372
- },
373
- "50303": {
374
- "content": "<|sr|>",
375
- "lstrip": false,
376
- "normalized": false,
377
- "rstrip": false,
378
- "single_word": false,
379
- "special": true
380
- },
381
- "50304": {
382
- "content": "<|az|>",
383
- "lstrip": false,
384
- "normalized": false,
385
- "rstrip": false,
386
- "single_word": false,
387
- "special": true
388
- },
389
- "50305": {
390
- "content": "<|sl|>",
391
- "lstrip": false,
392
- "normalized": false,
393
- "rstrip": false,
394
- "single_word": false,
395
- "special": true
396
- },
397
- "50306": {
398
- "content": "<|kn|>",
399
- "lstrip": false,
400
- "normalized": false,
401
- "rstrip": false,
402
- "single_word": false,
403
- "special": true
404
- },
405
- "50307": {
406
- "content": "<|et|>",
407
- "lstrip": false,
408
- "normalized": false,
409
- "rstrip": false,
410
- "single_word": false,
411
- "special": true
412
- },
413
- "50308": {
414
- "content": "<|mk|>",
415
- "lstrip": false,
416
- "normalized": false,
417
- "rstrip": false,
418
- "single_word": false,
419
- "special": true
420
- },
421
- "50309": {
422
- "content": "<|br|>",
423
- "lstrip": false,
424
- "normalized": false,
425
- "rstrip": false,
426
- "single_word": false,
427
- "special": true
428
- },
429
- "50310": {
430
- "content": "<|eu|>",
431
- "lstrip": false,
432
- "normalized": false,
433
- "rstrip": false,
434
- "single_word": false,
435
- "special": true
436
- },
437
- "50311": {
438
- "content": "<|is|>",
439
- "lstrip": false,
440
- "normalized": false,
441
- "rstrip": false,
442
- "single_word": false,
443
- "special": true
444
- },
445
- "50312": {
446
- "content": "<|hy|>",
447
- "lstrip": false,
448
- "normalized": false,
449
- "rstrip": false,
450
- "single_word": false,
451
- "special": true
452
- },
453
- "50313": {
454
- "content": "<|ne|>",
455
- "lstrip": false,
456
- "normalized": false,
457
- "rstrip": false,
458
- "single_word": false,
459
- "special": true
460
- },
461
- "50314": {
462
- "content": "<|mn|>",
463
- "lstrip": false,
464
- "normalized": false,
465
- "rstrip": false,
466
- "single_word": false,
467
- "special": true
468
- },
469
- "50315": {
470
- "content": "<|bs|>",
471
- "lstrip": false,
472
- "normalized": false,
473
- "rstrip": false,
474
- "single_word": false,
475
- "special": true
476
- },
477
- "50316": {
478
- "content": "<|kk|>",
479
- "lstrip": false,
480
- "normalized": false,
481
- "rstrip": false,
482
- "single_word": false,
483
- "special": true
484
- },
485
- "50317": {
486
- "content": "<|sq|>",
487
- "lstrip": false,
488
- "normalized": false,
489
- "rstrip": false,
490
- "single_word": false,
491
- "special": true
492
- },
493
- "50318": {
494
- "content": "<|sw|>",
495
- "lstrip": false,
496
- "normalized": false,
497
- "rstrip": false,
498
- "single_word": false,
499
- "special": true
500
- },
501
- "50319": {
502
- "content": "<|gl|>",
503
- "lstrip": false,
504
- "normalized": false,
505
- "rstrip": false,
506
- "single_word": false,
507
- "special": true
508
- },
509
- "50320": {
510
- "content": "<|mr|>",
511
- "lstrip": false,
512
- "normalized": false,
513
- "rstrip": false,
514
- "single_word": false,
515
- "special": true
516
- },
517
- "50321": {
518
- "content": "<|pa|>",
519
- "lstrip": false,
520
- "normalized": false,
521
- "rstrip": false,
522
- "single_word": false,
523
- "special": true
524
- },
525
- "50322": {
526
- "content": "<|si|>",
527
- "lstrip": false,
528
- "normalized": false,
529
- "rstrip": false,
530
- "single_word": false,
531
- "special": true
532
- },
533
- "50323": {
534
- "content": "<|km|>",
535
- "lstrip": false,
536
- "normalized": false,
537
- "rstrip": false,
538
- "single_word": false,
539
- "special": true
540
- },
541
- "50324": {
542
- "content": "<|sn|>",
543
- "lstrip": false,
544
- "normalized": false,
545
- "rstrip": false,
546
- "single_word": false,
547
- "special": true
548
- },
549
- "50325": {
550
- "content": "<|yo|>",
551
- "lstrip": false,
552
- "normalized": false,
553
- "rstrip": false,
554
- "single_word": false,
555
- "special": true
556
- },
557
- "50326": {
558
- "content": "<|so|>",
559
- "lstrip": false,
560
- "normalized": false,
561
- "rstrip": false,
562
- "single_word": false,
563
- "special": true
564
- },
565
- "50327": {
566
- "content": "<|af|>",
567
- "lstrip": false,
568
- "normalized": false,
569
- "rstrip": false,
570
- "single_word": false,
571
- "special": true
572
- },
573
- "50328": {
574
- "content": "<|oc|>",
575
- "lstrip": false,
576
- "normalized": false,
577
- "rstrip": false,
578
- "single_word": false,
579
- "special": true
580
- },
581
- "50329": {
582
- "content": "<|ka|>",
583
- "lstrip": false,
584
- "normalized": false,
585
- "rstrip": false,
586
- "single_word": false,
587
- "special": true
588
- },
589
- "50330": {
590
- "content": "<|be|>",
591
- "lstrip": false,
592
- "normalized": false,
593
- "rstrip": false,
594
- "single_word": false,
595
- "special": true
596
- },
597
- "50331": {
598
- "content": "<|tg|>",
599
- "lstrip": false,
600
- "normalized": false,
601
- "rstrip": false,
602
- "single_word": false,
603
- "special": true
604
- },
605
- "50332": {
606
- "content": "<|sd|>",
607
- "lstrip": false,
608
- "normalized": false,
609
- "rstrip": false,
610
- "single_word": false,
611
- "special": true
612
- },
613
- "50333": {
614
- "content": "<|gu|>",
615
- "lstrip": false,
616
- "normalized": false,
617
- "rstrip": false,
618
- "single_word": false,
619
- "special": true
620
- },
621
- "50334": {
622
- "content": "<|am|>",
623
- "lstrip": false,
624
- "normalized": false,
625
- "rstrip": false,
626
- "single_word": false,
627
- "special": true
628
- },
629
- "50335": {
630
- "content": "<|yi|>",
631
- "lstrip": false,
632
- "normalized": false,
633
- "rstrip": false,
634
- "single_word": false,
635
- "special": true
636
- },
637
- "50336": {
638
- "content": "<|lo|>",
639
- "lstrip": false,
640
- "normalized": false,
641
- "rstrip": false,
642
- "single_word": false,
643
- "special": true
644
- },
645
- "50337": {
646
- "content": "<|uz|>",
647
- "lstrip": false,
648
- "normalized": false,
649
- "rstrip": false,
650
- "single_word": false,
651
- "special": true
652
- },
653
- "50338": {
654
- "content": "<|fo|>",
655
- "lstrip": false,
656
- "normalized": false,
657
- "rstrip": false,
658
- "single_word": false,
659
- "special": true
660
- },
661
- "50339": {
662
- "content": "<|ht|>",
663
- "lstrip": false,
664
- "normalized": false,
665
- "rstrip": false,
666
- "single_word": false,
667
- "special": true
668
- },
669
- "50340": {
670
- "content": "<|ps|>",
671
- "lstrip": false,
672
- "normalized": false,
673
- "rstrip": false,
674
- "single_word": false,
675
- "special": true
676
- },
677
- "50341": {
678
- "content": "<|tk|>",
679
- "lstrip": false,
680
- "normalized": false,
681
- "rstrip": false,
682
- "single_word": false,
683
- "special": true
684
- },
685
- "50342": {
686
- "content": "<|nn|>",
687
- "lstrip": false,
688
- "normalized": false,
689
- "rstrip": false,
690
- "single_word": false,
691
- "special": true
692
- },
693
- "50343": {
694
- "content": "<|mt|>",
695
- "lstrip": false,
696
- "normalized": false,
697
- "rstrip": false,
698
- "single_word": false,
699
- "special": true
700
- },
701
- "50344": {
702
- "content": "<|sa|>",
703
- "lstrip": false,
704
- "normalized": false,
705
- "rstrip": false,
706
- "single_word": false,
707
- "special": true
708
- },
709
- "50345": {
710
- "content": "<|lb|>",
711
- "lstrip": false,
712
- "normalized": false,
713
- "rstrip": false,
714
- "single_word": false,
715
- "special": true
716
- },
717
- "50346": {
718
- "content": "<|my|>",
719
- "lstrip": false,
720
- "normalized": false,
721
- "rstrip": false,
722
- "single_word": false,
723
- "special": true
724
- },
725
- "50347": {
726
- "content": "<|bo|>",
727
- "lstrip": false,
728
- "normalized": false,
729
- "rstrip": false,
730
- "single_word": false,
731
- "special": true
732
- },
733
- "50348": {
734
- "content": "<|tl|>",
735
- "lstrip": false,
736
- "normalized": false,
737
- "rstrip": false,
738
- "single_word": false,
739
- "special": true
740
- },
741
- "50349": {
742
- "content": "<|mg|>",
743
- "lstrip": false,
744
- "normalized": false,
745
- "rstrip": false,
746
- "single_word": false,
747
- "special": true
748
- },
749
- "50350": {
750
- "content": "<|as|>",
751
- "lstrip": false,
752
- "normalized": false,
753
- "rstrip": false,
754
- "single_word": false,
755
- "special": true
756
- },
757
- "50351": {
758
- "content": "<|tt|>",
759
- "lstrip": false,
760
- "normalized": false,
761
- "rstrip": false,
762
- "single_word": false,
763
- "special": true
764
- },
765
- "50352": {
766
- "content": "<|haw|>",
767
- "lstrip": false,
768
- "normalized": false,
769
- "rstrip": false,
770
- "single_word": false,
771
- "special": true
772
- },
773
- "50353": {
774
- "content": "<|ln|>",
775
- "lstrip": false,
776
- "normalized": false,
777
- "rstrip": false,
778
- "single_word": false,
779
- "special": true
780
- },
781
- "50354": {
782
- "content": "<|ha|>",
783
- "lstrip": false,
784
- "normalized": false,
785
- "rstrip": false,
786
- "single_word": false,
787
- "special": true
788
- },
789
- "50355": {
790
- "content": "<|ba|>",
791
- "lstrip": false,
792
- "normalized": false,
793
- "rstrip": false,
794
- "single_word": false,
795
- "special": true
796
- },
797
- "50356": {
798
- "content": "<|jw|>",
799
- "lstrip": false,
800
- "normalized": false,
801
- "rstrip": false,
802
- "single_word": false,
803
- "special": true
804
- },
805
- "50357": {
806
- "content": "<|su|>",
807
- "lstrip": false,
808
- "normalized": false,
809
- "rstrip": false,
810
- "single_word": false,
811
- "special": true
812
- },
813
- "50358": {
814
- "content": "<|translate|>",
815
- "lstrip": false,
816
- "normalized": false,
817
- "rstrip": false,
818
- "single_word": false,
819
- "special": true
820
- },
821
- "50359": {
822
- "content": "<|transcribe|>",
823
- "lstrip": false,
824
- "normalized": false,
825
- "rstrip": false,
826
- "single_word": false,
827
- "special": true
828
- },
829
- "50360": {
830
- "content": "<|startoflm|>",
831
- "lstrip": false,
832
- "normalized": false,
833
- "rstrip": false,
834
- "single_word": false,
835
- "special": true
836
- },
837
- "50361": {
838
- "content": "<|startofprev|>",
839
- "lstrip": false,
840
- "normalized": false,
841
- "rstrip": false,
842
- "single_word": false,
843
- "special": true
844
- },
845
- "50362": {
846
- "content": "<|nocaptions|>",
847
- "lstrip": false,
848
- "normalized": false,
849
- "rstrip": false,
850
- "single_word": false,
851
- "special": true
852
- },
853
- "50363": {
854
- "content": "<|notimestamps|>",
855
- "lstrip": false,
856
- "normalized": false,
857
- "rstrip": false,
858
- "single_word": false,
859
- "special": true
860
- }
861
  },
862
- "additional_special_tokens": [
863
- "<|endoftext|>",
864
- "<|startoftranscript|>",
865
- "<|en|>",
866
- "<|zh|>",
867
- "<|de|>",
868
- "<|es|>",
869
- "<|ru|>",
870
- "<|ko|>",
871
- "<|fr|>",
872
- "<|ja|>",
873
- "<|pt|>",
874
- "<|tr|>",
875
- "<|pl|>",
876
- "<|ca|>",
877
- "<|nl|>",
878
- "<|ar|>",
879
- "<|sv|>",
880
- "<|it|>",
881
- "<|id|>",
882
- "<|hi|>",
883
- "<|fi|>",
884
- "<|vi|>",
885
- "<|he|>",
886
- "<|uk|>",
887
- "<|el|>",
888
- "<|ms|>",
889
- "<|cs|>",
890
- "<|ro|>",
891
- "<|da|>",
892
- "<|hu|>",
893
- "<|ta|>",
894
- "<|no|>",
895
- "<|th|>",
896
- "<|ur|>",
897
- "<|hr|>",
898
- "<|bg|>",
899
- "<|lt|>",
900
- "<|la|>",
901
- "<|mi|>",
902
- "<|ml|>",
903
- "<|cy|>",
904
- "<|sk|>",
905
- "<|te|>",
906
- "<|fa|>",
907
- "<|lv|>",
908
- "<|bn|>",
909
- "<|sr|>",
910
- "<|az|>",
911
- "<|sl|>",
912
- "<|kn|>",
913
- "<|et|>",
914
- "<|mk|>",
915
- "<|br|>",
916
- "<|eu|>",
917
- "<|is|>",
918
- "<|hy|>",
919
- "<|ne|>",
920
- "<|mn|>",
921
- "<|bs|>",
922
- "<|kk|>",
923
- "<|sq|>",
924
- "<|sw|>",
925
- "<|gl|>",
926
- "<|mr|>",
927
- "<|pa|>",
928
- "<|si|>",
929
- "<|km|>",
930
- "<|sn|>",
931
- "<|yo|>",
932
- "<|so|>",
933
- "<|af|>",
934
- "<|oc|>",
935
- "<|ka|>",
936
- "<|be|>",
937
- "<|tg|>",
938
- "<|sd|>",
939
- "<|gu|>",
940
- "<|am|>",
941
- "<|yi|>",
942
- "<|lo|>",
943
- "<|uz|>",
944
- "<|fo|>",
945
- "<|ht|>",
946
- "<|ps|>",
947
- "<|tk|>",
948
- "<|nn|>",
949
- "<|mt|>",
950
- "<|sa|>",
951
- "<|lb|>",
952
- "<|my|>",
953
- "<|bo|>",
954
- "<|tl|>",
955
- "<|mg|>",
956
- "<|as|>",
957
- "<|tt|>",
958
- "<|haw|>",
959
- "<|ln|>",
960
- "<|ha|>",
961
- "<|ba|>",
962
- "<|jw|>",
963
- "<|su|>",
964
- "<|translate|>",
965
- "<|transcribe|>",
966
- "<|startoflm|>",
967
- "<|startofprev|>",
968
- "<|nocaptions|>",
969
- "<|notimestamps|>"
970
- ],
971
- "bos_token": "<|endoftext|>",
972
  "clean_up_tokenization_spaces": true,
973
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
974
  "errors": "replace",
975
  "model_max_length": 448,
976
- "pad_token": "<|endoftext|>",
977
  "processor_class": "WhisperProcessor",
978
  "return_attention_mask": false,
979
  "tokenizer_class": "WhisperTokenizer",
980
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
981
  }
 
1
  {
2
  "add_bos_token": false,
3
  "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
  "errors": "replace",
22
  "model_max_length": 448,
23
+ "pad_token": null,
24
  "processor_class": "WhisperProcessor",
25
  "return_attention_mask": false,
26
  "tokenizer_class": "WhisperTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<|endoftext|>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff