Xenova HF staff commited on
Commit
e8ca71f
1 Parent(s): 0f4435e

Update quantize_config.json

Browse files
Files changed (1) hide show
  1. quantize_config.json +351 -109
quantize_config.json CHANGED
@@ -1,115 +1,357 @@
1
  {
2
- "per_channel": false,
3
- "reduce_range": false,
4
- "per_model_config": {
5
- "decoder_model": {
6
- "op_types": [
7
- "ReduceMean",
8
- "Concat",
9
- "Sqrt",
10
- "Less",
11
- "Unsqueeze",
12
- "Add",
13
- "Cast",
14
- "Div",
15
- "Equal",
16
- "Softmax",
17
- "Pow",
18
- "Squeeze",
19
- "Where",
20
- "Sub",
21
- "ConstantOfShape",
22
- "Erf",
23
- "Transpose",
24
- "Reshape",
25
- "MatMul",
26
- "Gather",
27
- "Shape",
28
- "Mul",
29
- "Range",
30
- "Constant",
31
- "Slice",
32
- "Expand"
33
- ],
34
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
- "decoder_model_merged": {
37
- "op_types": [
38
- "ReduceMean",
39
- "Concat",
40
- "Sqrt",
41
- "Less",
42
- "If",
43
- "Unsqueeze",
44
- "Add",
45
- "Cast",
46
- "Div",
47
- "Equal",
48
- "Softmax",
49
- "Pow",
50
- "Squeeze",
51
- "Where",
52
- "Sub",
53
- "ConstantOfShape",
54
- "Erf",
55
- "Transpose",
56
- "Reshape",
57
- "MatMul",
58
- "Gather",
59
- "Shape",
60
- "Mul",
61
- "Range",
62
- "Constant",
63
- "Slice",
64
- "Expand"
65
- ],
66
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  },
68
- "decoder_with_past_model": {
69
- "op_types": [
70
- "MatMul",
71
- "Gather",
72
- "ReduceMean",
73
- "Sub",
74
- "Div",
75
- "Concat",
76
- "Sqrt",
77
- "Shape",
78
- "Softmax",
79
- "Pow",
80
- "Erf",
81
- "Transpose",
82
- "Reshape",
83
- "Mul",
84
- "Constant",
85
- "Unsqueeze",
86
- "Add",
87
- "Slice"
88
- ],
89
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  },
91
- "encoder_model": {
92
- "op_types": [
93
- "MatMul",
94
- "ReduceMean",
95
- "Conv",
96
- "Div",
97
- "Sub",
98
- "Sqrt",
99
- "Gather",
100
- "Shape",
101
- "Concat",
102
- "Softmax",
103
- "Pow",
104
- "Erf",
105
- "Transpose",
106
- "Reshape",
107
- "Mul",
108
- "Constant",
109
- "Unsqueeze",
110
- "Add"
111
- ],
112
- "weight_type": "QUInt8"
113
- }
114
  }
115
  }
 
1
  {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "encoder_model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Concat",
9
+ "Constant",
10
+ "Conv",
11
+ "Div",
12
+ "Erf",
13
+ "Gather",
14
+ "MatMul",
15
+ "Mul",
16
+ "Pow",
17
+ "ReduceMean",
18
+ "Reshape",
19
+ "Shape",
20
+ "Softmax",
21
+ "Sqrt",
22
+ "Sub",
23
+ "Transpose",
24
+ "Unsqueeze"
25
+ ],
26
+ "weight_type": "QUInt8"
27
+ },
28
+ "decoder_model_merged": {
29
+ "op_types": [
30
+ "Add",
31
+ "Cast",
32
+ "Concat",
33
+ "Constant",
34
+ "ConstantOfShape",
35
+ "Div",
36
+ "Equal",
37
+ "Erf",
38
+ "Expand",
39
+ "Gather",
40
+ "If",
41
+ "Less",
42
+ "MatMul",
43
+ "Mul",
44
+ "Pow",
45
+ "Range",
46
+ "ReduceMean",
47
+ "Reshape",
48
+ "Shape",
49
+ "Slice",
50
+ "Softmax",
51
+ "Sqrt",
52
+ "Squeeze",
53
+ "Sub",
54
+ "Transpose",
55
+ "Unsqueeze",
56
+ "Where"
57
+ ],
58
+ "weight_type": "QInt8"
59
+ },
60
+ "decoder_model": {
61
+ "op_types": [
62
+ "Add",
63
+ "Cast",
64
+ "Concat",
65
+ "Constant",
66
+ "ConstantOfShape",
67
+ "Div",
68
+ "Equal",
69
+ "Erf",
70
+ "Expand",
71
+ "Gather",
72
+ "Less",
73
+ "MatMul",
74
+ "Mul",
75
+ "Pow",
76
+ "Range",
77
+ "ReduceMean",
78
+ "Reshape",
79
+ "Shape",
80
+ "Slice",
81
+ "Softmax",
82
+ "Sqrt",
83
+ "Squeeze",
84
+ "Sub",
85
+ "Transpose",
86
+ "Unsqueeze",
87
+ "Where"
88
+ ],
89
+ "weight_type": "QInt8"
90
+ },
91
+ "decoder_with_past_model": {
92
+ "op_types": [
93
+ "Add",
94
+ "Concat",
95
+ "Constant",
96
+ "Div",
97
+ "Erf",
98
+ "Gather",
99
+ "MatMul",
100
+ "Mul",
101
+ "Pow",
102
+ "ReduceMean",
103
+ "Reshape",
104
+ "Shape",
105
+ "Slice",
106
+ "Softmax",
107
+ "Sqrt",
108
+ "Sub",
109
+ "Transpose",
110
+ "Unsqueeze"
111
+ ],
112
+ "weight_type": "QInt8"
113
+ }
114
  },
115
+ "per_channel": false,
116
+ "reduce_range": false
117
+ },
118
+ "int8": {
119
+ "per_model_config": {
120
+ "encoder_model": {
121
+ "op_types": [
122
+ "Add",
123
+ "Concat",
124
+ "Constant",
125
+ "Conv",
126
+ "Div",
127
+ "Erf",
128
+ "Gather",
129
+ "MatMul",
130
+ "Mul",
131
+ "Pow",
132
+ "ReduceMean",
133
+ "Reshape",
134
+ "Shape",
135
+ "Softmax",
136
+ "Sqrt",
137
+ "Sub",
138
+ "Transpose",
139
+ "Unsqueeze"
140
+ ],
141
+ "weight_type": "QInt8"
142
+ },
143
+ "decoder_model_merged": {
144
+ "op_types": [
145
+ "Add",
146
+ "Cast",
147
+ "Concat",
148
+ "Constant",
149
+ "ConstantOfShape",
150
+ "Div",
151
+ "Equal",
152
+ "Erf",
153
+ "Expand",
154
+ "Gather",
155
+ "If",
156
+ "Less",
157
+ "MatMul",
158
+ "Mul",
159
+ "Pow",
160
+ "Range",
161
+ "ReduceMean",
162
+ "Reshape",
163
+ "Shape",
164
+ "Slice",
165
+ "Softmax",
166
+ "Sqrt",
167
+ "Squeeze",
168
+ "Sub",
169
+ "Transpose",
170
+ "Unsqueeze",
171
+ "Where"
172
+ ],
173
+ "weight_type": "QInt8"
174
+ },
175
+ "decoder_model": {
176
+ "op_types": [
177
+ "Add",
178
+ "Cast",
179
+ "Concat",
180
+ "Constant",
181
+ "ConstantOfShape",
182
+ "Div",
183
+ "Equal",
184
+ "Erf",
185
+ "Expand",
186
+ "Gather",
187
+ "Less",
188
+ "MatMul",
189
+ "Mul",
190
+ "Pow",
191
+ "Range",
192
+ "ReduceMean",
193
+ "Reshape",
194
+ "Shape",
195
+ "Slice",
196
+ "Softmax",
197
+ "Sqrt",
198
+ "Squeeze",
199
+ "Sub",
200
+ "Transpose",
201
+ "Unsqueeze",
202
+ "Where"
203
+ ],
204
+ "weight_type": "QInt8"
205
+ },
206
+ "decoder_with_past_model": {
207
+ "op_types": [
208
+ "Add",
209
+ "Concat",
210
+ "Constant",
211
+ "Div",
212
+ "Erf",
213
+ "Gather",
214
+ "MatMul",
215
+ "Mul",
216
+ "Pow",
217
+ "ReduceMean",
218
+ "Reshape",
219
+ "Shape",
220
+ "Slice",
221
+ "Softmax",
222
+ "Sqrt",
223
+ "Sub",
224
+ "Transpose",
225
+ "Unsqueeze"
226
+ ],
227
+ "weight_type": "QInt8"
228
+ }
229
  },
230
+ "per_channel": false,
231
+ "reduce_range": false
232
+ },
233
+ "uint8": {
234
+ "per_model_config": {
235
+ "encoder_model": {
236
+ "op_types": [
237
+ "Add",
238
+ "Concat",
239
+ "Constant",
240
+ "Conv",
241
+ "Div",
242
+ "Erf",
243
+ "Gather",
244
+ "MatMul",
245
+ "Mul",
246
+ "Pow",
247
+ "ReduceMean",
248
+ "Reshape",
249
+ "Shape",
250
+ "Softmax",
251
+ "Sqrt",
252
+ "Sub",
253
+ "Transpose",
254
+ "Unsqueeze"
255
+ ],
256
+ "weight_type": "QUInt8"
257
+ },
258
+ "decoder_model_merged": {
259
+ "op_types": [
260
+ "Add",
261
+ "Cast",
262
+ "Concat",
263
+ "Constant",
264
+ "ConstantOfShape",
265
+ "Div",
266
+ "Equal",
267
+ "Erf",
268
+ "Expand",
269
+ "Gather",
270
+ "If",
271
+ "Less",
272
+ "MatMul",
273
+ "Mul",
274
+ "Pow",
275
+ "Range",
276
+ "ReduceMean",
277
+ "Reshape",
278
+ "Shape",
279
+ "Slice",
280
+ "Softmax",
281
+ "Sqrt",
282
+ "Squeeze",
283
+ "Sub",
284
+ "Transpose",
285
+ "Unsqueeze",
286
+ "Where"
287
+ ],
288
+ "weight_type": "QUInt8"
289
+ },
290
+ "decoder_model": {
291
+ "op_types": [
292
+ "Add",
293
+ "Cast",
294
+ "Concat",
295
+ "Constant",
296
+ "ConstantOfShape",
297
+ "Div",
298
+ "Equal",
299
+ "Erf",
300
+ "Expand",
301
+ "Gather",
302
+ "Less",
303
+ "MatMul",
304
+ "Mul",
305
+ "Pow",
306
+ "Range",
307
+ "ReduceMean",
308
+ "Reshape",
309
+ "Shape",
310
+ "Slice",
311
+ "Softmax",
312
+ "Sqrt",
313
+ "Squeeze",
314
+ "Sub",
315
+ "Transpose",
316
+ "Unsqueeze",
317
+ "Where"
318
+ ],
319
+ "weight_type": "QUInt8"
320
+ },
321
+ "decoder_with_past_model": {
322
+ "op_types": [
323
+ "Add",
324
+ "Concat",
325
+ "Constant",
326
+ "Div",
327
+ "Erf",
328
+ "Gather",
329
+ "MatMul",
330
+ "Mul",
331
+ "Pow",
332
+ "ReduceMean",
333
+ "Reshape",
334
+ "Shape",
335
+ "Slice",
336
+ "Softmax",
337
+ "Sqrt",
338
+ "Sub",
339
+ "Transpose",
340
+ "Unsqueeze"
341
+ ],
342
+ "weight_type": "QUInt8"
343
+ }
344
  },
345
+ "per_channel": false,
346
+ "reduce_range": false
347
+ },
348
+ "q4": {
349
+ "block_size": 32,
350
+ "is_symmetric": true,
351
+ "accuracy_level": null
352
+ },
353
+ "bnb4": {
354
+ "block_size": 64,
355
+ "quant_type": 1
 
 
 
 
 
 
 
 
 
 
 
 
356
  }
357
  }