huuquyet commited on
Commit
515c6b5
1 Parent(s): ae84a9c

feat: update converted models with config

Browse files
config.json CHANGED
@@ -144,8 +144,7 @@
144
  50361,
145
  50362
146
  ],
147
- "torch_dtype": "float32",
148
- "transformers_version": "4.38.2",
149
  "use_cache": true,
150
  "use_weighted_layer_sum": false,
151
  "vocab_size": 51865
 
144
  50361,
145
  50362
146
  ],
147
+ "transformers_version": "4.33.2",
 
148
  "use_cache": true,
149
  "use_weighted_layer_sum": false,
150
  "vocab_size": 51865
generation_config.json CHANGED
@@ -260,6 +260,6 @@
260
  "transcribe": 50359,
261
  "translate": 50358
262
  },
263
- "transformers_version": "4.38.2",
264
  "trust_remote_code": false
265
  }
 
260
  "transcribe": 50359,
261
  "translate": 50358
262
  },
263
+ "transformers_version": "4.33.2",
264
  "trust_remote_code": false
265
  }
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de390f93a7cf4116e50aa00b0771a5215a196cf76e597fc93e686d2d6b82381
3
+ size 614830372
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e79d0773eea1a54f0f491d531be8e75b1e14c794bc4c70bdace6a1393ff95e
3
+ size 615265438
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218ccd2063c1b1601d6d5ebae9d84a9d1059223bcd110acf284fb0b38b3fa533
3
+ size 156599474
onnx/decoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ebce3170d2de23f8cedcd0a36118a24e88b53ec0627795fe357072bd9bfa18
3
+ size 155906420
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67154d109fa38d6a0e1f1cf246a5555a227ad665f90cafb61c0dd8d9a347b288
3
+ size 558093683
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af1909b12f3f1bdd0239067e1bfb9805e9e55486a703493b8b18a71d77dd641
3
+ size 141586018
onnx/encoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab13fb271bacbf472aeba6f2114ad475da44c2d47ce76cf08b38ccfead7ccc2a
3
+ size 352812844
onnx/encoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd66a815bc5f0c452a8726b3ca48f4665d1d5352d00d7b9f4f4ef452c905de32
3
+ size 92289694
preprocessor_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
quantize_config.json CHANGED
@@ -1,357 +1,115 @@
1
  {
2
- "fp16": {},
3
- "q8": {
4
- "per_model_config": {
5
- "encoder_model": {
6
- "op_types": [
7
- "Add",
8
- "Concat",
9
- "Constant",
10
- "Conv",
11
- "Div",
12
- "Erf",
13
- "Gather",
14
- "MatMul",
15
- "Mul",
16
- "Pow",
17
- "ReduceMean",
18
- "Reshape",
19
- "Shape",
20
- "Softmax",
21
- "Sqrt",
22
- "Sub",
23
- "Transpose",
24
- "Unsqueeze"
25
- ],
26
- "weight_type": "QUInt8"
27
- },
28
- "decoder_model": {
29
- "op_types": [
30
- "Add",
31
- "Cast",
32
- "Concat",
33
- "Constant",
34
- "ConstantOfShape",
35
- "Div",
36
- "Equal",
37
- "Erf",
38
- "Expand",
39
- "Gather",
40
- "Less",
41
- "MatMul",
42
- "Mul",
43
- "Pow",
44
- "Range",
45
- "ReduceMean",
46
- "Reshape",
47
- "Shape",
48
- "Slice",
49
- "Softmax",
50
- "Sqrt",
51
- "Squeeze",
52
- "Sub",
53
- "Transpose",
54
- "Unsqueeze",
55
- "Where"
56
- ],
57
- "weight_type": "QInt8"
58
- },
59
- "decoder_with_past_model": {
60
- "op_types": [
61
- "Add",
62
- "Concat",
63
- "Constant",
64
- "Div",
65
- "Erf",
66
- "Gather",
67
- "MatMul",
68
- "Mul",
69
- "Pow",
70
- "ReduceMean",
71
- "Reshape",
72
- "Shape",
73
- "Slice",
74
- "Softmax",
75
- "Sqrt",
76
- "Sub",
77
- "Transpose",
78
- "Unsqueeze"
79
- ],
80
- "weight_type": "QInt8"
81
- },
82
- "decoder_model_merged": {
83
- "op_types": [
84
- "Add",
85
- "Cast",
86
- "Concat",
87
- "Constant",
88
- "ConstantOfShape",
89
- "Div",
90
- "Equal",
91
- "Erf",
92
- "Expand",
93
- "Gather",
94
- "If",
95
- "Less",
96
- "MatMul",
97
- "Mul",
98
- "Pow",
99
- "Range",
100
- "ReduceMean",
101
- "Reshape",
102
- "Shape",
103
- "Slice",
104
- "Softmax",
105
- "Sqrt",
106
- "Squeeze",
107
- "Sub",
108
- "Transpose",
109
- "Unsqueeze",
110
- "Where"
111
- ],
112
- "weight_type": "QInt8"
113
- }
114
  },
115
- "per_channel": false,
116
- "reduce_range": false
117
- },
118
- "int8": {
119
- "per_model_config": {
120
- "encoder_model": {
121
- "op_types": [
122
- "Add",
123
- "Concat",
124
- "Constant",
125
- "Conv",
126
- "Div",
127
- "Erf",
128
- "Gather",
129
- "MatMul",
130
- "Mul",
131
- "Pow",
132
- "ReduceMean",
133
- "Reshape",
134
- "Shape",
135
- "Softmax",
136
- "Sqrt",
137
- "Sub",
138
- "Transpose",
139
- "Unsqueeze"
140
- ],
141
- "weight_type": "QInt8"
142
- },
143
- "decoder_model": {
144
- "op_types": [
145
- "Add",
146
- "Cast",
147
- "Concat",
148
- "Constant",
149
- "ConstantOfShape",
150
- "Div",
151
- "Equal",
152
- "Erf",
153
- "Expand",
154
- "Gather",
155
- "Less",
156
- "MatMul",
157
- "Mul",
158
- "Pow",
159
- "Range",
160
- "ReduceMean",
161
- "Reshape",
162
- "Shape",
163
- "Slice",
164
- "Softmax",
165
- "Sqrt",
166
- "Squeeze",
167
- "Sub",
168
- "Transpose",
169
- "Unsqueeze",
170
- "Where"
171
- ],
172
- "weight_type": "QInt8"
173
- },
174
- "decoder_with_past_model": {
175
- "op_types": [
176
- "Add",
177
- "Concat",
178
- "Constant",
179
- "Div",
180
- "Erf",
181
- "Gather",
182
- "MatMul",
183
- "Mul",
184
- "Pow",
185
- "ReduceMean",
186
- "Reshape",
187
- "Shape",
188
- "Slice",
189
- "Softmax",
190
- "Sqrt",
191
- "Sub",
192
- "Transpose",
193
- "Unsqueeze"
194
- ],
195
- "weight_type": "QInt8"
196
- },
197
- "decoder_model_merged": {
198
- "op_types": [
199
- "Add",
200
- "Cast",
201
- "Concat",
202
- "Constant",
203
- "ConstantOfShape",
204
- "Div",
205
- "Equal",
206
- "Erf",
207
- "Expand",
208
- "Gather",
209
- "If",
210
- "Less",
211
- "MatMul",
212
- "Mul",
213
- "Pow",
214
- "Range",
215
- "ReduceMean",
216
- "Reshape",
217
- "Shape",
218
- "Slice",
219
- "Softmax",
220
- "Sqrt",
221
- "Squeeze",
222
- "Sub",
223
- "Transpose",
224
- "Unsqueeze",
225
- "Where"
226
- ],
227
- "weight_type": "QInt8"
228
- }
229
  },
230
- "per_channel": false,
231
- "reduce_range": false
232
- },
233
- "uint8": {
234
- "per_model_config": {
235
- "encoder_model": {
236
- "op_types": [
237
- "Add",
238
- "Concat",
239
- "Constant",
240
- "Conv",
241
- "Div",
242
- "Erf",
243
- "Gather",
244
- "MatMul",
245
- "Mul",
246
- "Pow",
247
- "ReduceMean",
248
- "Reshape",
249
- "Shape",
250
- "Softmax",
251
- "Sqrt",
252
- "Sub",
253
- "Transpose",
254
- "Unsqueeze"
255
- ],
256
- "weight_type": "QUInt8"
257
- },
258
- "decoder_model": {
259
- "op_types": [
260
- "Add",
261
- "Cast",
262
- "Concat",
263
- "Constant",
264
- "ConstantOfShape",
265
- "Div",
266
- "Equal",
267
- "Erf",
268
- "Expand",
269
- "Gather",
270
- "Less",
271
- "MatMul",
272
- "Mul",
273
- "Pow",
274
- "Range",
275
- "ReduceMean",
276
- "Reshape",
277
- "Shape",
278
- "Slice",
279
- "Softmax",
280
- "Sqrt",
281
- "Squeeze",
282
- "Sub",
283
- "Transpose",
284
- "Unsqueeze",
285
- "Where"
286
- ],
287
- "weight_type": "QUInt8"
288
- },
289
- "decoder_with_past_model": {
290
- "op_types": [
291
- "Add",
292
- "Concat",
293
- "Constant",
294
- "Div",
295
- "Erf",
296
- "Gather",
297
- "MatMul",
298
- "Mul",
299
- "Pow",
300
- "ReduceMean",
301
- "Reshape",
302
- "Shape",
303
- "Slice",
304
- "Softmax",
305
- "Sqrt",
306
- "Sub",
307
- "Transpose",
308
- "Unsqueeze"
309
- ],
310
- "weight_type": "QUInt8"
311
- },
312
- "decoder_model_merged": {
313
- "op_types": [
314
- "Add",
315
- "Cast",
316
- "Concat",
317
- "Constant",
318
- "ConstantOfShape",
319
- "Div",
320
- "Equal",
321
- "Erf",
322
- "Expand",
323
- "Gather",
324
- "If",
325
- "Less",
326
- "MatMul",
327
- "Mul",
328
- "Pow",
329
- "Range",
330
- "ReduceMean",
331
- "Reshape",
332
- "Shape",
333
- "Slice",
334
- "Softmax",
335
- "Sqrt",
336
- "Squeeze",
337
- "Sub",
338
- "Transpose",
339
- "Unsqueeze",
340
- "Where"
341
- ],
342
- "weight_type": "QUInt8"
343
- }
344
  },
345
- "per_channel": false,
346
- "reduce_range": false
347
- },
348
- "q4": {
349
- "block_size": 32,
350
- "is_symmetric": true,
351
- "accuracy_level": null
352
- },
353
- "bnb4": {
354
- "block_size": 64,
355
- "quant_type": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  }
357
  }
 
1
  {
2
+ "per_channel": false,
3
+ "reduce_range": false,
4
+ "per_model_config": {
5
+ "encoder_model": {
6
+ "op_types": [
7
+ "Constant",
8
+ "Softmax",
9
+ "Mul",
10
+ "Sqrt",
11
+ "Erf",
12
+ "ReduceMean",
13
+ "Gather",
14
+ "Concat",
15
+ "MatMul",
16
+ "Reshape",
17
+ "Sub",
18
+ "Shape",
19
+ "Div",
20
+ "Pow",
21
+ "Transpose",
22
+ "Add",
23
+ "Unsqueeze",
24
+ "Conv"
25
+ ],
26
+ "weight_type": "QUInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
+ "decoder_model": {
29
+ "op_types": [
30
+ "Softmax",
31
+ "Range",
32
+ "ConstantOfShape",
33
+ "Transpose",
34
+ "Gather",
35
+ "ReduceMean",
36
+ "Sub",
37
+ "Shape",
38
+ "Equal",
39
+ "Expand",
40
+ "Where",
41
+ "Sqrt",
42
+ "Erf",
43
+ "Concat",
44
+ "Reshape",
45
+ "Squeeze",
46
+ "MatMul",
47
+ "Constant",
48
+ "Slice",
49
+ "Mul",
50
+ "Cast",
51
+ "Div",
52
+ "Pow",
53
+ "Add",
54
+ "Unsqueeze",
55
+ "Less"
56
+ ],
57
+ "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
+ "decoder_with_past_model": {
60
+ "op_types": [
61
+ "Constant",
62
+ "Slice",
63
+ "Mul",
64
+ "Gather",
65
+ "Sqrt",
66
+ "ReduceMean",
67
+ "Softmax",
68
+ "Concat",
69
+ "Erf",
70
+ "MatMul",
71
+ "Reshape",
72
+ "Sub",
73
+ "Shape",
74
+ "Div",
75
+ "Pow",
76
+ "Transpose",
77
+ "Add",
78
+ "Unsqueeze"
79
+ ],
80
+ "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
+ "decoder_model_merged": {
83
+ "op_types": [
84
+ "Softmax",
85
+ "Range",
86
+ "ConstantOfShape",
87
+ "Transpose",
88
+ "Gather",
89
+ "ReduceMean",
90
+ "Sub",
91
+ "Shape",
92
+ "Equal",
93
+ "Expand",
94
+ "Where",
95
+ "Sqrt",
96
+ "Erf",
97
+ "Concat",
98
+ "If",
99
+ "Reshape",
100
+ "Squeeze",
101
+ "MatMul",
102
+ "Constant",
103
+ "Slice",
104
+ "Mul",
105
+ "Cast",
106
+ "Div",
107
+ "Pow",
108
+ "Add",
109
+ "Unsqueeze",
110
+ "Less"
111
+ ],
112
+ "weight_type": "QInt8"
113
+ }
114
  }
115
  }
tokenizer.json CHANGED
@@ -114850,4 +114850,4 @@
114850
  "åľ º"
114851
  ]
114852
  }
114853
- }
 
114850
  "åľ º"
114851
  ]
114852
  }
114853
+ }
tokenizer_config.json CHANGED
@@ -12976,14 +12976,43 @@
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
- "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12980
  "clean_up_tokenization_spaces": true,
12981
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12982
  "errors": "replace",
12983
  "model_max_length": 1024,
12984
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
 
12985
  "processor_class": "WhisperProcessor",
12986
  "return_attention_mask": false,
12987
  "tokenizer_class": "WhisperTokenizer",
12988
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
12989
  }
 
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
+ "bos_token": {
12980
+ "__type": "AddedToken",
12981
+ "content": "<|endoftext|>",
12982
+ "lstrip": false,
12983
+ "normalized": true,
12984
+ "rstrip": false,
12985
+ "single_word": false
12986
+ },
12987
  "clean_up_tokenization_spaces": true,
12988
+ "eos_token": {
12989
+ "__type": "AddedToken",
12990
+ "content": "<|endoftext|>",
12991
+ "lstrip": false,
12992
+ "normalized": true,
12993
+ "rstrip": false,
12994
+ "single_word": false
12995
+ },
12996
  "errors": "replace",
12997
  "model_max_length": 1024,
12998
+ "pad_token": {
12999
+ "__type": "AddedToken",
13000
+ "content": "<|endoftext|>",
13001
+ "lstrip": false,
13002
+ "normalized": true,
13003
+ "rstrip": false,
13004
+ "single_word": false
13005
+ },
13006
  "processor_class": "WhisperProcessor",
13007
  "return_attention_mask": false,
13008
  "tokenizer_class": "WhisperTokenizer",
13009
+ "trust_remote_code": false,
13010
+ "unk_token": {
13011
+ "__type": "AddedToken",
13012
+ "content": "<|endoftext|>",
13013
+ "lstrip": false,
13014
+ "normalized": true,
13015
+ "rstrip": false,
13016
+ "single_word": false
13017
+ }
13018
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff