abdoeid commited on
Commit
1043975
1 Parent(s): 10907c4

another trial

Browse files
onnx/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e1e9e7daff11e87eff1a471ffd4a2c4f3407f539764124ed23b54a99763b255
3
  size 208595505
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee17b9dadd58b44026b661bb2441c1df38d34a814ed4a006bb7296c39a3ccb1
3
  size 208595505
onnx/decoder_model_merged_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f06f97d32e334c8654e92d14cd20659a599f46ab0bdc2455d6d4b341a5d95b5e
3
  size 122104442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:278b3811d97b52edc9ef75e5c735169c86d0d4149e7a1d2d2db50182dbb44d81
3
  size 122104442
onnx/decoder_model_merged_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:118d746f5482726b103e5f99331a6a9660fb9a12b54477571c3fec2f0b860a70
3
  size 104596830
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d51123a7790e4978c8a1066ac7d8ee8ef66e00917d7212a4e2edc8e4c5ca2e2e
3
  size 104596830
onnx/decoder_model_merged_int8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b186dd467c85b8028c98a5812a6f61d060c38ebdcc3cac03f87df49cdfd08003
3
  size 159786980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca9dde26d885471f512df9817c48b99a138e0b91bc1c00235588ac3c9839ac1
3
  size 159786980
onnx/decoder_model_merged_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63701b872e3ec8bfbc24f8a90fec7f085fa5fb9cac6922a86558e8d3949503c1
3
  size 123676396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:097d939d6f5d7e02dfbe1a4581bcd7febe789884d9cd4e640f4805292d1b8a95
3
  size 123676396
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b186dd467c85b8028c98a5812a6f61d060c38ebdcc3cac03f87df49cdfd08003
3
  size 159786980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca9dde26d885471f512df9817c48b99a138e0b91bc1c00235588ac3c9839ac1
3
  size 159786980
onnx/decoder_model_merged_uint8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3667353df7963828f1327caeb133641a1614059607bb278aaf2bfc0f5e44a4da
3
  size 159786956
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e57abacadd56baa82b8ec1523a6e865ab9fcef72aabdd8ef5d126722834865
3
  size 159786956
quantize_config.json CHANGED
@@ -2,18 +2,16 @@
2
  "fp16": {},
3
  "q8": {
4
  "per_model_config": {
5
- "decoder_model_merged": {
6
  "op_types": [
7
  "Add",
8
  "Concat",
9
- "Constant",
10
  "ConstantOfShape",
11
  "Div",
12
  "Equal",
13
  "Erf",
14
  "Expand",
15
  "Gather",
16
- "If",
17
  "Less",
18
  "MatMul",
19
  "Mul",
@@ -33,54 +31,56 @@
33
  ],
34
  "weight_type": "QInt8"
35
  },
36
- "decoder_model": {
37
  "op_types": [
38
  "Add",
39
  "Concat",
40
- "ConstantOfShape",
41
  "Div",
42
- "Equal",
43
  "Erf",
44
- "Expand",
45
  "Gather",
46
- "Less",
47
  "MatMul",
48
  "Mul",
49
  "Pow",
50
- "Range",
51
  "ReduceMean",
52
  "Reshape",
53
  "Shape",
54
  "Slice",
55
  "Softmax",
56
  "Sqrt",
57
- "Squeeze",
58
  "Sub",
59
  "Transpose",
60
- "Unsqueeze",
61
- "Where"
62
  ],
63
  "weight_type": "QInt8"
64
  },
65
- "decoder_with_past_model": {
66
  "op_types": [
67
  "Add",
68
  "Concat",
 
 
69
  "Div",
 
70
  "Erf",
 
71
  "Gather",
 
 
72
  "MatMul",
73
  "Mul",
74
  "Pow",
 
75
  "ReduceMean",
76
  "Reshape",
77
  "Shape",
78
  "Slice",
79
  "Softmax",
80
  "Sqrt",
 
81
  "Sub",
82
  "Transpose",
83
- "Unsqueeze"
 
84
  ],
85
  "weight_type": "QInt8"
86
  },
@@ -108,18 +108,16 @@
108
  },
109
  "int8": {
110
  "per_model_config": {
111
- "decoder_model_merged": {
112
  "op_types": [
113
  "Add",
114
  "Concat",
115
- "Constant",
116
  "ConstantOfShape",
117
  "Div",
118
  "Equal",
119
  "Erf",
120
  "Expand",
121
  "Gather",
122
- "If",
123
  "Less",
124
  "MatMul",
125
  "Mul",
@@ -139,54 +137,56 @@
139
  ],
140
  "weight_type": "QInt8"
141
  },
142
- "decoder_model": {
143
  "op_types": [
144
  "Add",
145
  "Concat",
146
- "ConstantOfShape",
147
  "Div",
148
- "Equal",
149
  "Erf",
150
- "Expand",
151
  "Gather",
152
- "Less",
153
  "MatMul",
154
  "Mul",
155
  "Pow",
156
- "Range",
157
  "ReduceMean",
158
  "Reshape",
159
  "Shape",
160
  "Slice",
161
  "Softmax",
162
  "Sqrt",
163
- "Squeeze",
164
  "Sub",
165
  "Transpose",
166
- "Unsqueeze",
167
- "Where"
168
  ],
169
  "weight_type": "QInt8"
170
  },
171
- "decoder_with_past_model": {
172
  "op_types": [
173
  "Add",
174
  "Concat",
 
 
175
  "Div",
 
176
  "Erf",
 
177
  "Gather",
 
 
178
  "MatMul",
179
  "Mul",
180
  "Pow",
 
181
  "ReduceMean",
182
  "Reshape",
183
  "Shape",
184
  "Slice",
185
  "Softmax",
186
  "Sqrt",
 
187
  "Sub",
188
  "Transpose",
189
- "Unsqueeze"
 
190
  ],
191
  "weight_type": "QInt8"
192
  },
@@ -214,18 +214,16 @@
214
  },
215
  "uint8": {
216
  "per_model_config": {
217
- "decoder_model_merged": {
218
  "op_types": [
219
  "Add",
220
  "Concat",
221
- "Constant",
222
  "ConstantOfShape",
223
  "Div",
224
  "Equal",
225
  "Erf",
226
  "Expand",
227
  "Gather",
228
- "If",
229
  "Less",
230
  "MatMul",
231
  "Mul",
@@ -245,54 +243,56 @@
245
  ],
246
  "weight_type": "QUInt8"
247
  },
248
- "decoder_model": {
249
  "op_types": [
250
  "Add",
251
  "Concat",
252
- "ConstantOfShape",
253
  "Div",
254
- "Equal",
255
  "Erf",
256
- "Expand",
257
  "Gather",
258
- "Less",
259
  "MatMul",
260
  "Mul",
261
  "Pow",
262
- "Range",
263
  "ReduceMean",
264
  "Reshape",
265
  "Shape",
266
  "Slice",
267
  "Softmax",
268
  "Sqrt",
269
- "Squeeze",
270
  "Sub",
271
  "Transpose",
272
- "Unsqueeze",
273
- "Where"
274
  ],
275
  "weight_type": "QUInt8"
276
  },
277
- "decoder_with_past_model": {
278
  "op_types": [
279
  "Add",
280
  "Concat",
 
 
281
  "Div",
 
282
  "Erf",
 
283
  "Gather",
 
 
284
  "MatMul",
285
  "Mul",
286
  "Pow",
 
287
  "ReduceMean",
288
  "Reshape",
289
  "Shape",
290
  "Slice",
291
  "Softmax",
292
  "Sqrt",
 
293
  "Sub",
294
  "Transpose",
295
- "Unsqueeze"
 
296
  ],
297
  "weight_type": "QUInt8"
298
  },
 
2
  "fp16": {},
3
  "q8": {
4
  "per_model_config": {
5
+ "decoder_model": {
6
  "op_types": [
7
  "Add",
8
  "Concat",
 
9
  "ConstantOfShape",
10
  "Div",
11
  "Equal",
12
  "Erf",
13
  "Expand",
14
  "Gather",
 
15
  "Less",
16
  "MatMul",
17
  "Mul",
 
31
  ],
32
  "weight_type": "QInt8"
33
  },
34
+ "decoder_with_past_model": {
35
  "op_types": [
36
  "Add",
37
  "Concat",
 
38
  "Div",
 
39
  "Erf",
 
40
  "Gather",
 
41
  "MatMul",
42
  "Mul",
43
  "Pow",
 
44
  "ReduceMean",
45
  "Reshape",
46
  "Shape",
47
  "Slice",
48
  "Softmax",
49
  "Sqrt",
 
50
  "Sub",
51
  "Transpose",
52
+ "Unsqueeze"
 
53
  ],
54
  "weight_type": "QInt8"
55
  },
56
+ "decoder_model_merged": {
57
  "op_types": [
58
  "Add",
59
  "Concat",
60
+ "Constant",
61
+ "ConstantOfShape",
62
  "Div",
63
+ "Equal",
64
  "Erf",
65
+ "Expand",
66
  "Gather",
67
+ "If",
68
+ "Less",
69
  "MatMul",
70
  "Mul",
71
  "Pow",
72
+ "Range",
73
  "ReduceMean",
74
  "Reshape",
75
  "Shape",
76
  "Slice",
77
  "Softmax",
78
  "Sqrt",
79
+ "Squeeze",
80
  "Sub",
81
  "Transpose",
82
+ "Unsqueeze",
83
+ "Where"
84
  ],
85
  "weight_type": "QInt8"
86
  },
 
108
  },
109
  "int8": {
110
  "per_model_config": {
111
+ "decoder_model": {
112
  "op_types": [
113
  "Add",
114
  "Concat",
 
115
  "ConstantOfShape",
116
  "Div",
117
  "Equal",
118
  "Erf",
119
  "Expand",
120
  "Gather",
 
121
  "Less",
122
  "MatMul",
123
  "Mul",
 
137
  ],
138
  "weight_type": "QInt8"
139
  },
140
+ "decoder_with_past_model": {
141
  "op_types": [
142
  "Add",
143
  "Concat",
 
144
  "Div",
 
145
  "Erf",
 
146
  "Gather",
 
147
  "MatMul",
148
  "Mul",
149
  "Pow",
 
150
  "ReduceMean",
151
  "Reshape",
152
  "Shape",
153
  "Slice",
154
  "Softmax",
155
  "Sqrt",
 
156
  "Sub",
157
  "Transpose",
158
+ "Unsqueeze"
 
159
  ],
160
  "weight_type": "QInt8"
161
  },
162
+ "decoder_model_merged": {
163
  "op_types": [
164
  "Add",
165
  "Concat",
166
+ "Constant",
167
+ "ConstantOfShape",
168
  "Div",
169
+ "Equal",
170
  "Erf",
171
+ "Expand",
172
  "Gather",
173
+ "If",
174
+ "Less",
175
  "MatMul",
176
  "Mul",
177
  "Pow",
178
+ "Range",
179
  "ReduceMean",
180
  "Reshape",
181
  "Shape",
182
  "Slice",
183
  "Softmax",
184
  "Sqrt",
185
+ "Squeeze",
186
  "Sub",
187
  "Transpose",
188
+ "Unsqueeze",
189
+ "Where"
190
  ],
191
  "weight_type": "QInt8"
192
  },
 
214
  },
215
  "uint8": {
216
  "per_model_config": {
217
+ "decoder_model": {
218
  "op_types": [
219
  "Add",
220
  "Concat",
 
221
  "ConstantOfShape",
222
  "Div",
223
  "Equal",
224
  "Erf",
225
  "Expand",
226
  "Gather",
 
227
  "Less",
228
  "MatMul",
229
  "Mul",
 
243
  ],
244
  "weight_type": "QUInt8"
245
  },
246
+ "decoder_with_past_model": {
247
  "op_types": [
248
  "Add",
249
  "Concat",
 
250
  "Div",
 
251
  "Erf",
 
252
  "Gather",
 
253
  "MatMul",
254
  "Mul",
255
  "Pow",
 
256
  "ReduceMean",
257
  "Reshape",
258
  "Shape",
259
  "Slice",
260
  "Softmax",
261
  "Sqrt",
 
262
  "Sub",
263
  "Transpose",
264
+ "Unsqueeze"
 
265
  ],
266
  "weight_type": "QUInt8"
267
  },
268
+ "decoder_model_merged": {
269
  "op_types": [
270
  "Add",
271
  "Concat",
272
+ "Constant",
273
+ "ConstantOfShape",
274
  "Div",
275
+ "Equal",
276
  "Erf",
277
+ "Expand",
278
  "Gather",
279
+ "If",
280
+ "Less",
281
  "MatMul",
282
  "Mul",
283
  "Pow",
284
+ "Range",
285
  "ReduceMean",
286
  "Reshape",
287
  "Shape",
288
  "Slice",
289
  "Softmax",
290
  "Sqrt",
291
+ "Squeeze",
292
  "Sub",
293
  "Transpose",
294
+ "Unsqueeze",
295
+ "Where"
296
  ],
297
  "weight_type": "QUInt8"
298
  },