abdoeid commited on
Commit
fb85236
1 Parent(s): b8431d5
config.json CHANGED
@@ -44,7 +44,7 @@
44
  "pad_token_id": 50257,
45
  "scale_embedding": false,
46
  "transformers_version": "4.38.2",
47
- "use_cache": false,
48
  "use_weighted_layer_sum": false,
49
  "vocab_size": 51865
50
  }
 
44
  "pad_token_id": 50257,
45
  "scale_embedding": false,
46
  "transformers_version": "4.38.2",
47
+ "use_cache": true,
48
  "use_weighted_layer_sum": false,
49
  "vocab_size": 51865
50
  }
onnx/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70b33f914b345a7f1980ddba1872c516c960d8049cd6dcb6be8196cd7fc9cf81
3
- size 208282931
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07f3527bf1b6e757ce8fba20d7f01e121935b4826965aa4de76702bc39220aeb
3
+ size 208282631
onnx/decoder_model_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e235bdae6806210f51d5ae040421d75d0a46440c2d8c2f694b20929d89e1c9a5
3
- size 121784838
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc5b459651ece621b120147003eaf61f6cfab61cd45c53293a9dad8f646ce22
3
+ size 121784538
onnx/decoder_model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b883d046cf7bb401dbaf290e8a1eaddb00a7a20e196271dbf2eac792e0e46166
3
- size 104276420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf86e42647a98c19ea747778a0b9ba76602262a0b2c7837ab52cbab90d9071b7
3
+ size 104280035
onnx/decoder_model_int8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a8c9529dddf6c870639b3faaf3ea103f8d395f843324c3ca93a7804f574383b
3
- size 159413729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a916c1ff71df2bbcbeed5e54f6ce60fed68dc37c64e22ca2912b52cb4f9ff1
3
+ size 159413429
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eaec3ff9265177a159887e1d081c0bca768e56831d049c4cffb24211a268f7a
3
+ size 208595505
onnx/decoder_model_merged_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e7618ba9cbf73710e3d094c5843a91306e9a242a068b9f32872c62b8a09dda
3
+ size 122104442
onnx/decoder_model_merged_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc685d105b76ef2ef7c66c03129b0aa25938981a9f048a99ce2a1bb54236b51
3
+ size 104596830
onnx/decoder_model_merged_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1caba27cdfee30aebfd6cd310f15440905a3351667d28b0a7206090167f00df5
3
+ size 159786980
onnx/decoder_model_merged_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c06674d6934ea209bbedb2a40d16409755b0931087f6d635de615901cefa5bd
3
+ size 123676396
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1caba27cdfee30aebfd6cd310f15440905a3351667d28b0a7206090167f00df5
3
+ size 159786980
onnx/decoder_model_merged_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4484081ee6ebb967258d391e0bae6d40f16ad0f9101801549e74f3a375421bb5
3
+ size 159786956
onnx/decoder_model_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1661564f270a907e591798f05c4361305b7ac9e7fb815bec2a7de8f9e26804c
3
- size 123357222
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e68a1557b1360f3fe86e69d3936df84403c204c8d7018d3371d1f5cdec606fa6
3
+ size 123356922
onnx/decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a8c9529dddf6c870639b3faaf3ea103f8d395f843324c3ca93a7804f574383b
3
- size 159413729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a916c1ff71df2bbcbeed5e54f6ce60fed68dc37c64e22ca2912b52cb4f9ff1
3
+ size 159413429
onnx/decoder_model_uint8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4981d7fe33971a61ca8bfbb7edb5ac8047f9e604bc874084837bc247171638d7
3
- size 159413759
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09fa99095ecdeb4c0493297c895f774586aaa5d5f90cc5729d0dafa2ae49634
3
+ size 159413459
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f6fead1892d7b172486713df8b8cdd2f7fce82094da75516f250add3da74d48
3
+ size 195677181
onnx/decoder_with_past_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f54219d2012ad553fbfc4bc75e70abf73f77b9e222fdbfb3e825370e632f920
3
+ size 119990644
onnx/decoder_with_past_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb2faf06bbeace2f2ed86cf796d272c4f2a7a12819aa4ea36eef71019b77f041
3
+ size 97974264
onnx/decoder_with_past_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ec935f9c9aee49775ec0c86f2c5867fc9b1234b9cfe71e8e6a462b7961eb9f
3
+ size 156233280
onnx/decoder_with_past_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad28c9000b148c431b5294b5f6ba247f2c00c56700041ab061449877999b40b
3
+ size 121366516
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ec935f9c9aee49775ec0c86f2c5867fc9b1234b9cfe71e8e6a462b7961eb9f
3
+ size 156233280
onnx/decoder_with_past_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f582712c5fa2f52b96e623a7b24dc84ed2dae316dcb6f31a00d4723f79b96c07
3
+ size 156233304
quantize_config.json CHANGED
@@ -2,23 +2,36 @@
2
  "fp16": {},
3
  "q8": {
4
  "per_model_config": {
5
- "encoder_model": {
6
  "op_types": [
7
  "Add",
8
- "Conv",
 
 
9
  "Div",
 
10
  "Erf",
 
 
 
 
11
  "MatMul",
12
  "Mul",
13
  "Pow",
 
14
  "ReduceMean",
15
  "Reshape",
 
 
16
  "Softmax",
17
  "Sqrt",
 
18
  "Sub",
19
- "Transpose"
 
 
20
  ],
21
- "weight_type": "QUInt8"
22
  },
23
  "decoder_model": {
24
  "op_types": [
@@ -48,6 +61,46 @@
48
  "Where"
49
  ],
50
  "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  }
52
  },
53
  "per_channel": false,
@@ -55,21 +108,34 @@
55
  },
56
  "int8": {
57
  "per_model_config": {
58
- "encoder_model": {
59
  "op_types": [
60
  "Add",
61
- "Conv",
 
 
62
  "Div",
 
63
  "Erf",
 
 
 
 
64
  "MatMul",
65
  "Mul",
66
  "Pow",
 
67
  "ReduceMean",
68
  "Reshape",
 
 
69
  "Softmax",
70
  "Sqrt",
 
71
  "Sub",
72
- "Transpose"
 
 
73
  ],
74
  "weight_type": "QInt8"
75
  },
@@ -101,6 +167,46 @@
101
  "Where"
102
  ],
103
  "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  }
105
  },
106
  "per_channel": false,
@@ -108,21 +214,34 @@
108
  },
109
  "uint8": {
110
  "per_model_config": {
111
- "encoder_model": {
112
  "op_types": [
113
  "Add",
114
- "Conv",
 
 
115
  "Div",
 
116
  "Erf",
 
 
 
 
117
  "MatMul",
118
  "Mul",
119
  "Pow",
 
120
  "ReduceMean",
121
  "Reshape",
 
 
122
  "Softmax",
123
  "Sqrt",
 
124
  "Sub",
125
- "Transpose"
 
 
126
  ],
127
  "weight_type": "QUInt8"
128
  },
@@ -154,6 +273,46 @@
154
  "Where"
155
  ],
156
  "weight_type": "QUInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  }
158
  },
159
  "per_channel": false,
 
2
  "fp16": {},
3
  "q8": {
4
  "per_model_config": {
5
+ "decoder_model_merged": {
6
  "op_types": [
7
  "Add",
8
+ "Concat",
9
+ "Constant",
10
+ "ConstantOfShape",
11
  "Div",
12
+ "Equal",
13
  "Erf",
14
+ "Expand",
15
+ "Gather",
16
+ "If",
17
+ "Less",
18
  "MatMul",
19
  "Mul",
20
  "Pow",
21
+ "Range",
22
  "ReduceMean",
23
  "Reshape",
24
+ "Shape",
25
+ "Slice",
26
  "Softmax",
27
  "Sqrt",
28
+ "Squeeze",
29
  "Sub",
30
+ "Transpose",
31
+ "Unsqueeze",
32
+ "Where"
33
  ],
34
+ "weight_type": "QInt8"
35
  },
36
  "decoder_model": {
37
  "op_types": [
 
61
  "Where"
62
  ],
63
  "weight_type": "QInt8"
64
+ },
65
+ "decoder_with_past_model": {
66
+ "op_types": [
67
+ "Add",
68
+ "Concat",
69
+ "Div",
70
+ "Erf",
71
+ "Gather",
72
+ "MatMul",
73
+ "Mul",
74
+ "Pow",
75
+ "ReduceMean",
76
+ "Reshape",
77
+ "Shape",
78
+ "Slice",
79
+ "Softmax",
80
+ "Sqrt",
81
+ "Sub",
82
+ "Transpose",
83
+ "Unsqueeze"
84
+ ],
85
+ "weight_type": "QInt8"
86
+ },
87
+ "encoder_model": {
88
+ "op_types": [
89
+ "Add",
90
+ "Conv",
91
+ "Div",
92
+ "Erf",
93
+ "MatMul",
94
+ "Mul",
95
+ "Pow",
96
+ "ReduceMean",
97
+ "Reshape",
98
+ "Softmax",
99
+ "Sqrt",
100
+ "Sub",
101
+ "Transpose"
102
+ ],
103
+ "weight_type": "QUInt8"
104
  }
105
  },
106
  "per_channel": false,
 
108
  },
109
  "int8": {
110
  "per_model_config": {
111
+ "decoder_model_merged": {
112
  "op_types": [
113
  "Add",
114
+ "Concat",
115
+ "Constant",
116
+ "ConstantOfShape",
117
  "Div",
118
+ "Equal",
119
  "Erf",
120
+ "Expand",
121
+ "Gather",
122
+ "If",
123
+ "Less",
124
  "MatMul",
125
  "Mul",
126
  "Pow",
127
+ "Range",
128
  "ReduceMean",
129
  "Reshape",
130
+ "Shape",
131
+ "Slice",
132
  "Softmax",
133
  "Sqrt",
134
+ "Squeeze",
135
  "Sub",
136
+ "Transpose",
137
+ "Unsqueeze",
138
+ "Where"
139
  ],
140
  "weight_type": "QInt8"
141
  },
 
167
  "Where"
168
  ],
169
  "weight_type": "QInt8"
170
+ },
171
+ "decoder_with_past_model": {
172
+ "op_types": [
173
+ "Add",
174
+ "Concat",
175
+ "Div",
176
+ "Erf",
177
+ "Gather",
178
+ "MatMul",
179
+ "Mul",
180
+ "Pow",
181
+ "ReduceMean",
182
+ "Reshape",
183
+ "Shape",
184
+ "Slice",
185
+ "Softmax",
186
+ "Sqrt",
187
+ "Sub",
188
+ "Transpose",
189
+ "Unsqueeze"
190
+ ],
191
+ "weight_type": "QInt8"
192
+ },
193
+ "encoder_model": {
194
+ "op_types": [
195
+ "Add",
196
+ "Conv",
197
+ "Div",
198
+ "Erf",
199
+ "MatMul",
200
+ "Mul",
201
+ "Pow",
202
+ "ReduceMean",
203
+ "Reshape",
204
+ "Softmax",
205
+ "Sqrt",
206
+ "Sub",
207
+ "Transpose"
208
+ ],
209
+ "weight_type": "QInt8"
210
  }
211
  },
212
  "per_channel": false,
 
214
  },
215
  "uint8": {
216
  "per_model_config": {
217
+ "decoder_model_merged": {
218
  "op_types": [
219
  "Add",
220
+ "Concat",
221
+ "Constant",
222
+ "ConstantOfShape",
223
  "Div",
224
+ "Equal",
225
  "Erf",
226
+ "Expand",
227
+ "Gather",
228
+ "If",
229
+ "Less",
230
  "MatMul",
231
  "Mul",
232
  "Pow",
233
+ "Range",
234
  "ReduceMean",
235
  "Reshape",
236
+ "Shape",
237
+ "Slice",
238
  "Softmax",
239
  "Sqrt",
240
+ "Squeeze",
241
  "Sub",
242
+ "Transpose",
243
+ "Unsqueeze",
244
+ "Where"
245
  ],
246
  "weight_type": "QUInt8"
247
  },
 
273
  "Where"
274
  ],
275
  "weight_type": "QUInt8"
276
+ },
277
+ "decoder_with_past_model": {
278
+ "op_types": [
279
+ "Add",
280
+ "Concat",
281
+ "Div",
282
+ "Erf",
283
+ "Gather",
284
+ "MatMul",
285
+ "Mul",
286
+ "Pow",
287
+ "ReduceMean",
288
+ "Reshape",
289
+ "Shape",
290
+ "Slice",
291
+ "Softmax",
292
+ "Sqrt",
293
+ "Sub",
294
+ "Transpose",
295
+ "Unsqueeze"
296
+ ],
297
+ "weight_type": "QUInt8"
298
+ },
299
+ "encoder_model": {
300
+ "op_types": [
301
+ "Add",
302
+ "Conv",
303
+ "Div",
304
+ "Erf",
305
+ "MatMul",
306
+ "Mul",
307
+ "Pow",
308
+ "ReduceMean",
309
+ "Reshape",
310
+ "Softmax",
311
+ "Sqrt",
312
+ "Sub",
313
+ "Transpose"
314
+ ],
315
+ "weight_type": "QUInt8"
316
  }
317
  },
318
  "per_channel": false,