huuquyet commited on
Commit
7623a80
1 Parent(s): b34636d

feat: update converted model with config

Browse files
config.json CHANGED
@@ -146,8 +146,7 @@
146
  50361,
147
  50362
148
  ],
149
- "torch_dtype": "float32",
150
- "transformers_version": "4.38.2",
151
  "use_cache": true,
152
  "use_weighted_layer_sum": false,
153
  "vocab_size": 51865
 
146
  50361,
147
  50362
148
  ],
149
+ "transformers_version": "4.33.2",
 
150
  "use_cache": true,
151
  "use_weighted_layer_sum": false,
152
  "vocab_size": 51865
generation_config.json CHANGED
@@ -252,5 +252,6 @@
252
  "transcribe": 50359,
253
  "translate": 50358
254
  },
255
- "transformers_version": "4.38.2"
 
256
  }
 
252
  "transcribe": 50359,
253
  "translate": 50358
254
  },
255
+ "transformers_version": "4.33.2",
256
+ "trust_remote_code": false
257
  }
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41476bfbb62f72bd4ac699a5b50845e486b0de673ae5f48e45a4e41fed183c03
3
+ size 208271694
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ecf3b6999f507430becfb964d7cdc9f60a116d3fccc2a990585ffb9b735ae0
3
+ size 208491287
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aab16e3dce2b8a64a9f2f551766146c0ac8183d5218e4738d92b4aca5f5345a
3
+ size 53617059
onnx/decoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a024c59f3c16266b01c8c8337234b7dbf3e9d315fa3ccb552a1b6e00fb4972
3
+ size 53267560
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef88353f43a44b98b7b759bf31e240756c6f9d8808fd17e46e976920f61db44
3
+ size 195635283
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d132ebf02d5facbaaf40b7ad29caf5162550fcb8163bb7ae449fde56ee1999
3
+ size 50041642
onnx/encoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42a2fb35d4d31cf0f1040df4c04415a297de1718931f8f40804eb3b1de83e8a5
3
+ size 82461622
onnx/encoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6679e3deb48907533a51da9a9c58ae0feab36c074fe826a45ab4ecaff755c44
3
+ size 23183302
preprocessor_config.json CHANGED
The diff for this file is too large to render. See raw diff
 
quantize_config.json CHANGED
@@ -1,357 +1,115 @@
1
  {
2
- "fp16": {},
3
- "q8": {
4
- "per_model_config": {
5
- "encoder_model": {
6
- "op_types": [
7
- "Add",
8
- "Concat",
9
- "Constant",
10
- "Conv",
11
- "Div",
12
- "Erf",
13
- "Gather",
14
- "MatMul",
15
- "Mul",
16
- "Pow",
17
- "ReduceMean",
18
- "Reshape",
19
- "Shape",
20
- "Softmax",
21
- "Sqrt",
22
- "Sub",
23
- "Transpose",
24
- "Unsqueeze"
25
- ],
26
- "weight_type": "QUInt8"
27
- },
28
- "decoder_model": {
29
- "op_types": [
30
- "Add",
31
- "Cast",
32
- "Concat",
33
- "Constant",
34
- "ConstantOfShape",
35
- "Div",
36
- "Equal",
37
- "Erf",
38
- "Expand",
39
- "Gather",
40
- "Less",
41
- "MatMul",
42
- "Mul",
43
- "Pow",
44
- "Range",
45
- "ReduceMean",
46
- "Reshape",
47
- "Shape",
48
- "Slice",
49
- "Softmax",
50
- "Sqrt",
51
- "Squeeze",
52
- "Sub",
53
- "Transpose",
54
- "Unsqueeze",
55
- "Where"
56
- ],
57
- "weight_type": "QInt8"
58
- },
59
- "decoder_with_past_model": {
60
- "op_types": [
61
- "Add",
62
- "Concat",
63
- "Constant",
64
- "Div",
65
- "Erf",
66
- "Gather",
67
- "MatMul",
68
- "Mul",
69
- "Pow",
70
- "ReduceMean",
71
- "Reshape",
72
- "Shape",
73
- "Slice",
74
- "Softmax",
75
- "Sqrt",
76
- "Sub",
77
- "Transpose",
78
- "Unsqueeze"
79
- ],
80
- "weight_type": "QInt8"
81
- },
82
- "decoder_model_merged": {
83
- "op_types": [
84
- "Add",
85
- "Cast",
86
- "Concat",
87
- "Constant",
88
- "ConstantOfShape",
89
- "Div",
90
- "Equal",
91
- "Erf",
92
- "Expand",
93
- "Gather",
94
- "If",
95
- "Less",
96
- "MatMul",
97
- "Mul",
98
- "Pow",
99
- "Range",
100
- "ReduceMean",
101
- "Reshape",
102
- "Shape",
103
- "Slice",
104
- "Softmax",
105
- "Sqrt",
106
- "Squeeze",
107
- "Sub",
108
- "Transpose",
109
- "Unsqueeze",
110
- "Where"
111
- ],
112
- "weight_type": "QInt8"
113
- }
114
  },
115
- "per_channel": false,
116
- "reduce_range": false
117
- },
118
- "int8": {
119
- "per_model_config": {
120
- "encoder_model": {
121
- "op_types": [
122
- "Add",
123
- "Concat",
124
- "Constant",
125
- "Conv",
126
- "Div",
127
- "Erf",
128
- "Gather",
129
- "MatMul",
130
- "Mul",
131
- "Pow",
132
- "ReduceMean",
133
- "Reshape",
134
- "Shape",
135
- "Softmax",
136
- "Sqrt",
137
- "Sub",
138
- "Transpose",
139
- "Unsqueeze"
140
- ],
141
- "weight_type": "QInt8"
142
- },
143
- "decoder_model": {
144
- "op_types": [
145
- "Add",
146
- "Cast",
147
- "Concat",
148
- "Constant",
149
- "ConstantOfShape",
150
- "Div",
151
- "Equal",
152
- "Erf",
153
- "Expand",
154
- "Gather",
155
- "Less",
156
- "MatMul",
157
- "Mul",
158
- "Pow",
159
- "Range",
160
- "ReduceMean",
161
- "Reshape",
162
- "Shape",
163
- "Slice",
164
- "Softmax",
165
- "Sqrt",
166
- "Squeeze",
167
- "Sub",
168
- "Transpose",
169
- "Unsqueeze",
170
- "Where"
171
- ],
172
- "weight_type": "QInt8"
173
- },
174
- "decoder_with_past_model": {
175
- "op_types": [
176
- "Add",
177
- "Concat",
178
- "Constant",
179
- "Div",
180
- "Erf",
181
- "Gather",
182
- "MatMul",
183
- "Mul",
184
- "Pow",
185
- "ReduceMean",
186
- "Reshape",
187
- "Shape",
188
- "Slice",
189
- "Softmax",
190
- "Sqrt",
191
- "Sub",
192
- "Transpose",
193
- "Unsqueeze"
194
- ],
195
- "weight_type": "QInt8"
196
- },
197
- "decoder_model_merged": {
198
- "op_types": [
199
- "Add",
200
- "Cast",
201
- "Concat",
202
- "Constant",
203
- "ConstantOfShape",
204
- "Div",
205
- "Equal",
206
- "Erf",
207
- "Expand",
208
- "Gather",
209
- "If",
210
- "Less",
211
- "MatMul",
212
- "Mul",
213
- "Pow",
214
- "Range",
215
- "ReduceMean",
216
- "Reshape",
217
- "Shape",
218
- "Slice",
219
- "Softmax",
220
- "Sqrt",
221
- "Squeeze",
222
- "Sub",
223
- "Transpose",
224
- "Unsqueeze",
225
- "Where"
226
- ],
227
- "weight_type": "QInt8"
228
- }
229
  },
230
- "per_channel": false,
231
- "reduce_range": false
232
- },
233
- "uint8": {
234
- "per_model_config": {
235
- "encoder_model": {
236
- "op_types": [
237
- "Add",
238
- "Concat",
239
- "Constant",
240
- "Conv",
241
- "Div",
242
- "Erf",
243
- "Gather",
244
- "MatMul",
245
- "Mul",
246
- "Pow",
247
- "ReduceMean",
248
- "Reshape",
249
- "Shape",
250
- "Softmax",
251
- "Sqrt",
252
- "Sub",
253
- "Transpose",
254
- "Unsqueeze"
255
- ],
256
- "weight_type": "QUInt8"
257
- },
258
- "decoder_model": {
259
- "op_types": [
260
- "Add",
261
- "Cast",
262
- "Concat",
263
- "Constant",
264
- "ConstantOfShape",
265
- "Div",
266
- "Equal",
267
- "Erf",
268
- "Expand",
269
- "Gather",
270
- "Less",
271
- "MatMul",
272
- "Mul",
273
- "Pow",
274
- "Range",
275
- "ReduceMean",
276
- "Reshape",
277
- "Shape",
278
- "Slice",
279
- "Softmax",
280
- "Sqrt",
281
- "Squeeze",
282
- "Sub",
283
- "Transpose",
284
- "Unsqueeze",
285
- "Where"
286
- ],
287
- "weight_type": "QUInt8"
288
- },
289
- "decoder_with_past_model": {
290
- "op_types": [
291
- "Add",
292
- "Concat",
293
- "Constant",
294
- "Div",
295
- "Erf",
296
- "Gather",
297
- "MatMul",
298
- "Mul",
299
- "Pow",
300
- "ReduceMean",
301
- "Reshape",
302
- "Shape",
303
- "Slice",
304
- "Softmax",
305
- "Sqrt",
306
- "Sub",
307
- "Transpose",
308
- "Unsqueeze"
309
- ],
310
- "weight_type": "QUInt8"
311
- },
312
- "decoder_model_merged": {
313
- "op_types": [
314
- "Add",
315
- "Cast",
316
- "Concat",
317
- "Constant",
318
- "ConstantOfShape",
319
- "Div",
320
- "Equal",
321
- "Erf",
322
- "Expand",
323
- "Gather",
324
- "If",
325
- "Less",
326
- "MatMul",
327
- "Mul",
328
- "Pow",
329
- "Range",
330
- "ReduceMean",
331
- "Reshape",
332
- "Shape",
333
- "Slice",
334
- "Softmax",
335
- "Sqrt",
336
- "Squeeze",
337
- "Sub",
338
- "Transpose",
339
- "Unsqueeze",
340
- "Where"
341
- ],
342
- "weight_type": "QUInt8"
343
- }
344
  },
345
- "per_channel": false,
346
- "reduce_range": false
347
- },
348
- "q4": {
349
- "block_size": 32,
350
- "is_symmetric": true,
351
- "accuracy_level": null
352
- },
353
- "bnb4": {
354
- "block_size": 64,
355
- "quant_type": 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  }
357
  }
 
1
  {
2
+ "per_channel": false,
3
+ "reduce_range": false,
4
+ "per_model_config": {
5
+ "encoder_model": {
6
+ "op_types": [
7
+ "Reshape",
8
+ "Sqrt",
9
+ "Erf",
10
+ "Mul",
11
+ "ReduceMean",
12
+ "Gather",
13
+ "Softmax",
14
+ "Constant",
15
+ "Add",
16
+ "Pow",
17
+ "Unsqueeze",
18
+ "Shape",
19
+ "Concat",
20
+ "Div",
21
+ "MatMul",
22
+ "Sub",
23
+ "Transpose",
24
+ "Conv"
25
+ ],
26
+ "weight_type": "QUInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
+ "decoder_model": {
29
+ "op_types": [
30
+ "Erf",
31
+ "Equal",
32
+ "Unsqueeze",
33
+ "Range",
34
+ "Less",
35
+ "Slice",
36
+ "Sqrt",
37
+ "Add",
38
+ "Pow",
39
+ "MatMul",
40
+ "Sub",
41
+ "Expand",
42
+ "Where",
43
+ "Transpose",
44
+ "Reshape",
45
+ "Cast",
46
+ "Mul",
47
+ "ReduceMean",
48
+ "Gather",
49
+ "Softmax",
50
+ "Constant",
51
+ "Concat",
52
+ "Squeeze",
53
+ "Shape",
54
+ "Div",
55
+ "ConstantOfShape"
56
+ ],
57
+ "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  },
59
+ "decoder_with_past_model": {
60
+ "op_types": [
61
+ "Reshape",
62
+ "Sqrt",
63
+ "Erf",
64
+ "Mul",
65
+ "ReduceMean",
66
+ "Gather",
67
+ "Softmax",
68
+ "Constant",
69
+ "Concat",
70
+ "Unsqueeze",
71
+ "Add",
72
+ "Shape",
73
+ "Pow",
74
+ "Div",
75
+ "MatMul",
76
+ "Sub",
77
+ "Transpose",
78
+ "Slice"
79
+ ],
80
+ "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
+ "decoder_model_merged": {
83
+ "op_types": [
84
+ "Erf",
85
+ "Equal",
86
+ "Unsqueeze",
87
+ "Range",
88
+ "Less",
89
+ "Slice",
90
+ "Sqrt",
91
+ "Add",
92
+ "Pow",
93
+ "MatMul",
94
+ "Sub",
95
+ "Expand",
96
+ "If",
97
+ "Where",
98
+ "Transpose",
99
+ "Reshape",
100
+ "Cast",
101
+ "Mul",
102
+ "ReduceMean",
103
+ "Gather",
104
+ "Softmax",
105
+ "Constant",
106
+ "Concat",
107
+ "Squeeze",
108
+ "Shape",
109
+ "Div",
110
+ "ConstantOfShape"
111
+ ],
112
+ "weight_type": "QInt8"
113
+ }
114
  }
115
  }
tokenizer.json CHANGED
@@ -114850,4 +114850,4 @@
114850
  "åľ º"
114851
  ]
114852
  }
114853
- }
 
114850
  "åľ º"
114851
  ]
114852
  }
114853
+ }
tokenizer_config.json CHANGED
@@ -12976,14 +12976,43 @@
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
- "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12980
  "clean_up_tokenization_spaces": true,
12981
- "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
12982
  "errors": "replace",
12983
  "model_max_length": 1024,
12984
- "pad_token": "<|endoftext|>",
 
 
 
 
 
 
 
12985
  "processor_class": "WhisperProcessor",
12986
  "return_attention_mask": false,
12987
  "tokenizer_class": "WhisperTokenizer",
12988
- "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
12989
  }
 
12976
  "<|nocaptions|>",
12977
  "<|notimestamps|>"
12978
  ],
12979
+ "bos_token": {
12980
+ "__type": "AddedToken",
12981
+ "content": "<|endoftext|>",
12982
+ "lstrip": false,
12983
+ "normalized": true,
12984
+ "rstrip": false,
12985
+ "single_word": false
12986
+ },
12987
  "clean_up_tokenization_spaces": true,
12988
+ "eos_token": {
12989
+ "__type": "AddedToken",
12990
+ "content": "<|endoftext|>",
12991
+ "lstrip": false,
12992
+ "normalized": true,
12993
+ "rstrip": false,
12994
+ "single_word": false
12995
+ },
12996
  "errors": "replace",
12997
  "model_max_length": 1024,
12998
+ "pad_token": {
12999
+ "__type": "AddedToken",
13000
+ "content": "<|endoftext|>",
13001
+ "lstrip": false,
13002
+ "normalized": true,
13003
+ "rstrip": false,
13004
+ "single_word": false
13005
+ },
13006
  "processor_class": "WhisperProcessor",
13007
  "return_attention_mask": false,
13008
  "tokenizer_class": "WhisperTokenizer",
13009
+ "trust_remote_code": false,
13010
+ "unk_token": {
13011
+ "__type": "AddedToken",
13012
+ "content": "<|endoftext|>",
13013
+ "lstrip": false,
13014
+ "normalized": true,
13015
+ "rstrip": false,
13016
+ "single_word": false
13017
+ }
13018
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff