Xenova HF staff commited on
Commit
e3528f6
1 Parent(s): a197abc

Upload ONNX weights (#1)

Browse files

- Upload ONNX weights (b4f1eb96e0127e87f319b2dca126f2bd41a6ca40)
- Update quantize_config.json (1dfaec240135f05f1b92cd4407b41638d9a562ef)

onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5cd70d65de2a04153a474773f0102857f4dd4d0d17d84a2da107d667674061b
3
- size 1262486260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a80d8866149b1208c4a2bbf8eb19c1ed637d766e74a7514a421449ecfd3b674
3
+ size 1262486261
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2c4b9ae92ff349221fcff688aaf0e366ff617a42dfe10a2717ee2cfa8475b0
3
+ size 222500558
onnx/model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f84e73595ceda7a5b1009c755e371922254f3381d841f76c1eb75c18cb86d07
3
- size 631741971
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1549e6fadcea0b7d958d9f7f98695e52c807309171d47475fb915b061da0b59
3
+ size 631741972
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b8cc658baee8361d529e85101bcb8084d6287a287383393b683f5d32e71482
3
+ size 317490640
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb7a8c93f64fa08080b3c42e67a08d563a317aa0cc0698663572534d4a9fd38
3
+ size 241408614
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5deb06fe70137f24bac4ed9a8987ed525a6945fb53121f07ea2f0a6adb803e1f
3
- size 318602084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b7d6b8603fd4dec80d92edae4bc71ebab57ef4dd671883ffe20c13eb95444c
3
+ size 317490718
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86b7d6b8603fd4dec80d92edae4bc71ebab57ef4dd671883ffe20c13eb95444c
3
+ size 317490718
quantize_config.json CHANGED
@@ -1,30 +1,102 @@
1
  {
2
- "per_channel": true,
3
- "reduce_range": true,
4
- "per_model_config": {
5
- "model": {
6
- "op_types": [
7
- "Conv",
8
- "Sub",
9
- "ReduceMean",
10
- "Div",
11
- "Gather",
12
- "Mul",
13
- "Shape",
14
- "Pow",
15
- "Sqrt",
16
- "MatMul",
17
- "Concat",
18
- "Unsqueeze",
19
- "Add",
20
- "Slice",
21
- "Transpose",
22
- "Constant",
23
- "Softmax",
24
- "Reshape",
25
- "Erf"
26
- ],
27
- "weight_type": "QUInt8"
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  }
 
1
  {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Concat",
9
+ "Constant",
10
+ "Conv",
11
+ "Div",
12
+ "Erf",
13
+ "Gather",
14
+ "MatMul",
15
+ "Mul",
16
+ "Pow",
17
+ "ReduceMean",
18
+ "Reshape",
19
+ "Shape",
20
+ "Slice",
21
+ "Softmax",
22
+ "Sqrt",
23
+ "Sub",
24
+ "Transpose",
25
+ "Unsqueeze"
26
+ ],
27
+ "weight_type": "QUInt8"
28
+ }
29
+ },
30
+ "per_channel": false,
31
+ "reduce_range": false
32
+ },
33
+ "int8": {
34
+ "per_model_config": {
35
+ "model": {
36
+ "op_types": [
37
+ "Add",
38
+ "Concat",
39
+ "Constant",
40
+ "Conv",
41
+ "Div",
42
+ "Erf",
43
+ "Gather",
44
+ "MatMul",
45
+ "Mul",
46
+ "Pow",
47
+ "ReduceMean",
48
+ "Reshape",
49
+ "Shape",
50
+ "Slice",
51
+ "Softmax",
52
+ "Sqrt",
53
+ "Sub",
54
+ "Transpose",
55
+ "Unsqueeze"
56
+ ],
57
+ "weight_type": "QInt8"
58
+ }
59
+ },
60
+ "per_channel": false,
61
+ "reduce_range": false
62
+ },
63
+ "uint8": {
64
+ "per_model_config": {
65
+ "model": {
66
+ "op_types": [
67
+ "Add",
68
+ "Concat",
69
+ "Constant",
70
+ "Conv",
71
+ "Div",
72
+ "Erf",
73
+ "Gather",
74
+ "MatMul",
75
+ "Mul",
76
+ "Pow",
77
+ "ReduceMean",
78
+ "Reshape",
79
+ "Shape",
80
+ "Slice",
81
+ "Softmax",
82
+ "Sqrt",
83
+ "Sub",
84
+ "Transpose",
85
+ "Unsqueeze"
86
+ ],
87
+ "weight_type": "QUInt8"
88
+ }
89
+ },
90
+ "per_channel": false,
91
+ "reduce_range": false
92
+ },
93
+ "q4": {
94
+ "block_size": 32,
95
+ "is_symmetric": true,
96
+ "accuracy_level": null
97
+ },
98
+ "bnb4": {
99
+ "block_size": 64,
100
+ "quant_type": 1
101
  }
102
  }