Upload folder using huggingface_hub

#2
by Xenova HF staff - opened
onnx/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38f0b91131d1f1c8b2b5f1d1096f61e44ded18be49313c80abc5c11daf03bd28
3
- size 118405120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14f1d425a4821feeba77cf93eeeaf812ca816f2e3fec382b4f0fa93d29de710e
3
+ size 118395947
onnx/decoder_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc19e2b24afd0d338a7da0d3b5cd04f869247db5c8bac05f6186a76174e13548
3
+ size 85961846
onnx/decoder_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:750818a7dab16956c93e3a414c492c705a1e4276182db2a1b863a571ca9a3c83
3
+ size 59369281
onnx/decoder_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2adcd415dd1ddfdd7a5a55d303a0925612869f7b0f7b810eada6069837128d1d
3
+ size 30460688
onnx/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:950978b1dbcbf250335358c1236053ba19a7f7849b33dc777f4421b72b7626fa
3
- size 118578063
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5ed2c958def66b0f8c388068d533df57b6cad1cd0872ecf6875af5f59841966
3
+ size 118552291
onnx/decoder_model_merged_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:add3cdb488ec3987c02622724fdc120fb64c9e3aecf40d5b7156720925970517
3
+ size 86122878
onnx/decoder_model_merged_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6260d80026e23bfa91b47917e534edf73f11fbf03d733836424e3e5236991a38
3
- size 59599490
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c875d5a65958a636a6be56a9b96ed47c0cdce0e60708b51dd255ccf89c585090
3
+ size 59589240
onnx/decoder_model_merged_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b3aa804202c04d4ecda9ae9551fb7a5fc0321d008fc4247467c9249eb9b48fe
3
+ size 30718858
onnx/decoder_model_merged_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1256c9ae823b52f7e8e694a9cab87b5c3c0dd3cef52808d99093cea75e9148f6
3
+ size 86712166
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbb2e063b7fbc41d9803b9698f93ecb035c50cbb3fb87b56cb131e4a5eb99059
3
- size 30727382
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b3aa804202c04d4ecda9ae9551fb7a5fc0321d008fc4247467c9249eb9b48fe
3
+ size 30718858
onnx/decoder_model_merged_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dcca01c7f016c4b6643683db3f6408bf33065b7e416264dd809a7295f66a624
3
+ size 30718880
onnx/decoder_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc11b8f04b4b13dce602f632f2ddc4608561713e75083df8d04db59608180a46
3
+ size 86551422
onnx/decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62b1761a6c23961cdabbf7caa25495525d78528a64baacf7e4283c83cb2941f1
3
- size 30459718
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2adcd415dd1ddfdd7a5a55d303a0925612869f7b0f7b810eada6069837128d1d
3
+ size 30460688
onnx/decoder_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:700a17c4a3673e60b16f78f6bb46c59639f64969a090369f24b5212ef7e8dbca
3
+ size 30460710
onnx/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52b796cccc711df396afa098ae7cb6271d7fad42e4e7188564448cfa99aed830
3
- size 113653015
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ed76a8f8b9448c9eb74ad07549b65285d29dd36f4e42911fc65d67becbe9458
3
+ size 113637462
onnx/decoder_with_past_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbb2149fb3765f30c51e5d18b198e4debc9eebfad0790cb99dddca44bda60ec6
3
+ size 85257161
onnx/decoder_with_past_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1de4bf3c8044cad24dec4017f3e2fb2e28f7ec00d68ddced06164769dbaddd3f
3
+ size 56961209
onnx/decoder_with_past_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49f0d4ec759b60ec752c74feb80e69c5dd49727f0c8a2a043583b8c109ca39a
3
+ size 29220093
onnx/decoder_with_past_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7320a1909e5775df1e9fe6e7869a4a1508e0509ef154eb5ced0149744ce69be7
3
+ size 85773057
onnx/decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c7390ddd38c567a51a73f175c6655afcbcd5cf1d5dc6be5badc89cba3f1d59c
3
- size 29228736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b49f0d4ec759b60ec752c74feb80e69c5dd49727f0c8a2a043583b8c109ca39a
3
+ size 29220093
onnx/decoder_with_past_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2f26f2e006f4fd2ebd04ebd065d22b5204be94e1bc962a3165599c95b78a6b6
3
+ size 29220111
onnx/encoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a048dcf0cde98db805f46be32b75d778cf824aad20b51a02e5b9cff457426238
3
- size 32909539
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c361b9430a5ef6619ee64b7fe06c725df19f36d508cc8b847064b34a888a3fe
3
+ size 32904992
onnx/encoder_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8ef1607bc8b62b01ecba2ac185e644f80c40fa26d78a1ded3d87a1682894ca
3
+ size 8578451
onnx/encoder_model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9642671531f37f756defd070993a815f94dd9c6a716a799b01fee61d73d3f506
3
- size 16519776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89d421642a8652afd3456661060de9ee4d58b5435cf765cda0395919b11aeaf
3
+ size 16519192
onnx/encoder_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21712ecbe2d1078eaa206b41218a6dff945eb9ac0854b55fd584e8bc88b20368
3
+ size 10124977
onnx/encoder_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb73f790e63906c9e9d02c4e3abf55817dd16fd7ef7c7f4754c1395202191b29
3
+ size 9020667
onnx/encoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc3c6f8563d1b3fbd2c5af9f64c2bed8b020bc593c402d1ef53b9f08fbf1b90
3
- size 10124913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93ec822f16a8fd264e7de972ad17d615ea7334b75a52d54c50c2e18dd503a25
3
+ size 10124993
onnx/encoder_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e93ec822f16a8fd264e7de972ad17d615ea7334b75a52d54c50c2e18dd503a25
3
+ size 10124993
quantize_config.json CHANGED
@@ -1,115 +1,357 @@
1
  {
2
- "per_channel": false,
3
- "reduce_range": false,
4
- "per_model_config": {
5
- "decoder_model": {
6
- "op_types": [
7
- "ReduceMean",
8
- "Concat",
9
- "Sqrt",
10
- "Less",
11
- "Unsqueeze",
12
- "Add",
13
- "Cast",
14
- "Div",
15
- "Equal",
16
- "Softmax",
17
- "Pow",
18
- "Squeeze",
19
- "Where",
20
- "Sub",
21
- "ConstantOfShape",
22
- "Erf",
23
- "Transpose",
24
- "Reshape",
25
- "MatMul",
26
- "Gather",
27
- "Shape",
28
- "Mul",
29
- "Range",
30
- "Constant",
31
- "Slice",
32
- "Expand"
33
- ],
34
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  },
36
- "decoder_model_merged": {
37
- "op_types": [
38
- "ReduceMean",
39
- "Concat",
40
- "Sqrt",
41
- "Less",
42
- "If",
43
- "Unsqueeze",
44
- "Add",
45
- "Cast",
46
- "Div",
47
- "Equal",
48
- "Softmax",
49
- "Pow",
50
- "Squeeze",
51
- "Where",
52
- "Sub",
53
- "ConstantOfShape",
54
- "Erf",
55
- "Transpose",
56
- "Reshape",
57
- "MatMul",
58
- "Gather",
59
- "Shape",
60
- "Mul",
61
- "Range",
62
- "Constant",
63
- "Slice",
64
- "Expand"
65
- ],
66
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  },
68
- "decoder_with_past_model": {
69
- "op_types": [
70
- "MatMul",
71
- "Gather",
72
- "ReduceMean",
73
- "Sub",
74
- "Div",
75
- "Concat",
76
- "Sqrt",
77
- "Shape",
78
- "Softmax",
79
- "Pow",
80
- "Erf",
81
- "Transpose",
82
- "Reshape",
83
- "Mul",
84
- "Constant",
85
- "Unsqueeze",
86
- "Add",
87
- "Slice"
88
- ],
89
- "weight_type": "QInt8"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  },
91
- "encoder_model": {
92
- "op_types": [
93
- "MatMul",
94
- "ReduceMean",
95
- "Conv",
96
- "Div",
97
- "Sub",
98
- "Sqrt",
99
- "Gather",
100
- "Shape",
101
- "Concat",
102
- "Softmax",
103
- "Pow",
104
- "Erf",
105
- "Transpose",
106
- "Reshape",
107
- "Mul",
108
- "Constant",
109
- "Unsqueeze",
110
- "Add"
111
- ],
112
- "weight_type": "QUInt8"
113
- }
114
  }
115
  }
 
1
  {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "encoder_model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Concat",
9
+ "Constant",
10
+ "Conv",
11
+ "Div",
12
+ "Erf",
13
+ "Gather",
14
+ "MatMul",
15
+ "Mul",
16
+ "Pow",
17
+ "ReduceMean",
18
+ "Reshape",
19
+ "Shape",
20
+ "Softmax",
21
+ "Sqrt",
22
+ "Sub",
23
+ "Transpose",
24
+ "Unsqueeze"
25
+ ],
26
+ "weight_type": "QUInt8"
27
+ },
28
+ "decoder_model_merged": {
29
+ "op_types": [
30
+ "Add",
31
+ "Cast",
32
+ "Concat",
33
+ "Constant",
34
+ "ConstantOfShape",
35
+ "Div",
36
+ "Equal",
37
+ "Erf",
38
+ "Expand",
39
+ "Gather",
40
+ "If",
41
+ "Less",
42
+ "MatMul",
43
+ "Mul",
44
+ "Pow",
45
+ "Range",
46
+ "ReduceMean",
47
+ "Reshape",
48
+ "Shape",
49
+ "Slice",
50
+ "Softmax",
51
+ "Sqrt",
52
+ "Squeeze",
53
+ "Sub",
54
+ "Transpose",
55
+ "Unsqueeze",
56
+ "Where"
57
+ ],
58
+ "weight_type": "QInt8"
59
+ },
60
+ "decoder_model": {
61
+ "op_types": [
62
+ "Add",
63
+ "Cast",
64
+ "Concat",
65
+ "Constant",
66
+ "ConstantOfShape",
67
+ "Div",
68
+ "Equal",
69
+ "Erf",
70
+ "Expand",
71
+ "Gather",
72
+ "Less",
73
+ "MatMul",
74
+ "Mul",
75
+ "Pow",
76
+ "Range",
77
+ "ReduceMean",
78
+ "Reshape",
79
+ "Shape",
80
+ "Slice",
81
+ "Softmax",
82
+ "Sqrt",
83
+ "Squeeze",
84
+ "Sub",
85
+ "Transpose",
86
+ "Unsqueeze",
87
+ "Where"
88
+ ],
89
+ "weight_type": "QInt8"
90
+ },
91
+ "decoder_with_past_model": {
92
+ "op_types": [
93
+ "Add",
94
+ "Concat",
95
+ "Constant",
96
+ "Div",
97
+ "Erf",
98
+ "Gather",
99
+ "MatMul",
100
+ "Mul",
101
+ "Pow",
102
+ "ReduceMean",
103
+ "Reshape",
104
+ "Shape",
105
+ "Slice",
106
+ "Softmax",
107
+ "Sqrt",
108
+ "Sub",
109
+ "Transpose",
110
+ "Unsqueeze"
111
+ ],
112
+ "weight_type": "QInt8"
113
+ }
114
  },
115
+ "per_channel": false,
116
+ "reduce_range": false
117
+ },
118
+ "int8": {
119
+ "per_model_config": {
120
+ "encoder_model": {
121
+ "op_types": [
122
+ "Add",
123
+ "Concat",
124
+ "Constant",
125
+ "Conv",
126
+ "Div",
127
+ "Erf",
128
+ "Gather",
129
+ "MatMul",
130
+ "Mul",
131
+ "Pow",
132
+ "ReduceMean",
133
+ "Reshape",
134
+ "Shape",
135
+ "Softmax",
136
+ "Sqrt",
137
+ "Sub",
138
+ "Transpose",
139
+ "Unsqueeze"
140
+ ],
141
+ "weight_type": "QInt8"
142
+ },
143
+ "decoder_model_merged": {
144
+ "op_types": [
145
+ "Add",
146
+ "Cast",
147
+ "Concat",
148
+ "Constant",
149
+ "ConstantOfShape",
150
+ "Div",
151
+ "Equal",
152
+ "Erf",
153
+ "Expand",
154
+ "Gather",
155
+ "If",
156
+ "Less",
157
+ "MatMul",
158
+ "Mul",
159
+ "Pow",
160
+ "Range",
161
+ "ReduceMean",
162
+ "Reshape",
163
+ "Shape",
164
+ "Slice",
165
+ "Softmax",
166
+ "Sqrt",
167
+ "Squeeze",
168
+ "Sub",
169
+ "Transpose",
170
+ "Unsqueeze",
171
+ "Where"
172
+ ],
173
+ "weight_type": "QInt8"
174
+ },
175
+ "decoder_model": {
176
+ "op_types": [
177
+ "Add",
178
+ "Cast",
179
+ "Concat",
180
+ "Constant",
181
+ "ConstantOfShape",
182
+ "Div",
183
+ "Equal",
184
+ "Erf",
185
+ "Expand",
186
+ "Gather",
187
+ "Less",
188
+ "MatMul",
189
+ "Mul",
190
+ "Pow",
191
+ "Range",
192
+ "ReduceMean",
193
+ "Reshape",
194
+ "Shape",
195
+ "Slice",
196
+ "Softmax",
197
+ "Sqrt",
198
+ "Squeeze",
199
+ "Sub",
200
+ "Transpose",
201
+ "Unsqueeze",
202
+ "Where"
203
+ ],
204
+ "weight_type": "QInt8"
205
+ },
206
+ "decoder_with_past_model": {
207
+ "op_types": [
208
+ "Add",
209
+ "Concat",
210
+ "Constant",
211
+ "Div",
212
+ "Erf",
213
+ "Gather",
214
+ "MatMul",
215
+ "Mul",
216
+ "Pow",
217
+ "ReduceMean",
218
+ "Reshape",
219
+ "Shape",
220
+ "Slice",
221
+ "Softmax",
222
+ "Sqrt",
223
+ "Sub",
224
+ "Transpose",
225
+ "Unsqueeze"
226
+ ],
227
+ "weight_type": "QInt8"
228
+ }
229
  },
230
+ "per_channel": false,
231
+ "reduce_range": false
232
+ },
233
+ "uint8": {
234
+ "per_model_config": {
235
+ "encoder_model": {
236
+ "op_types": [
237
+ "Add",
238
+ "Concat",
239
+ "Constant",
240
+ "Conv",
241
+ "Div",
242
+ "Erf",
243
+ "Gather",
244
+ "MatMul",
245
+ "Mul",
246
+ "Pow",
247
+ "ReduceMean",
248
+ "Reshape",
249
+ "Shape",
250
+ "Softmax",
251
+ "Sqrt",
252
+ "Sub",
253
+ "Transpose",
254
+ "Unsqueeze"
255
+ ],
256
+ "weight_type": "QUInt8"
257
+ },
258
+ "decoder_model_merged": {
259
+ "op_types": [
260
+ "Add",
261
+ "Cast",
262
+ "Concat",
263
+ "Constant",
264
+ "ConstantOfShape",
265
+ "Div",
266
+ "Equal",
267
+ "Erf",
268
+ "Expand",
269
+ "Gather",
270
+ "If",
271
+ "Less",
272
+ "MatMul",
273
+ "Mul",
274
+ "Pow",
275
+ "Range",
276
+ "ReduceMean",
277
+ "Reshape",
278
+ "Shape",
279
+ "Slice",
280
+ "Softmax",
281
+ "Sqrt",
282
+ "Squeeze",
283
+ "Sub",
284
+ "Transpose",
285
+ "Unsqueeze",
286
+ "Where"
287
+ ],
288
+ "weight_type": "QUInt8"
289
+ },
290
+ "decoder_model": {
291
+ "op_types": [
292
+ "Add",
293
+ "Cast",
294
+ "Concat",
295
+ "Constant",
296
+ "ConstantOfShape",
297
+ "Div",
298
+ "Equal",
299
+ "Erf",
300
+ "Expand",
301
+ "Gather",
302
+ "Less",
303
+ "MatMul",
304
+ "Mul",
305
+ "Pow",
306
+ "Range",
307
+ "ReduceMean",
308
+ "Reshape",
309
+ "Shape",
310
+ "Slice",
311
+ "Softmax",
312
+ "Sqrt",
313
+ "Squeeze",
314
+ "Sub",
315
+ "Transpose",
316
+ "Unsqueeze",
317
+ "Where"
318
+ ],
319
+ "weight_type": "QUInt8"
320
+ },
321
+ "decoder_with_past_model": {
322
+ "op_types": [
323
+ "Add",
324
+ "Concat",
325
+ "Constant",
326
+ "Div",
327
+ "Erf",
328
+ "Gather",
329
+ "MatMul",
330
+ "Mul",
331
+ "Pow",
332
+ "ReduceMean",
333
+ "Reshape",
334
+ "Shape",
335
+ "Slice",
336
+ "Softmax",
337
+ "Sqrt",
338
+ "Sub",
339
+ "Transpose",
340
+ "Unsqueeze"
341
+ ],
342
+ "weight_type": "QUInt8"
343
+ }
344
  },
345
+ "per_channel": false,
346
+ "reduce_range": false
347
+ },
348
+ "q4": {
349
+ "block_size": 32,
350
+ "is_symmetric": true,
351
+ "accuracy_level": null
352
+ },
353
+ "bnb4": {
354
+ "block_size": 64,
355
+ "quant_type": 1
 
 
 
 
 
 
 
 
 
 
 
 
356
  }
357
  }