Xenova HF staff commited on
Commit
e77a003
1 Parent(s): f9a6efe
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb834378afbc7d0f00fda65861e06cc1af85730f68b596a66f2aa068d4b1124
3
+ size 17963874
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a40ba6ecc73f01ecf139d871ffe936f5aa2af3a3316765a3dea6608c7a5764e0
3
+ size 4715295
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb834378afbc7d0f00fda65861e06cc1af85730f68b596a66f2aa068d4b1124
3
+ size 17963874
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5007aef59fd63101e45ffe5bed76ca7e1f2d1fe638b5de29b50235f629f323
3
+ size 4715295
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5007aef59fd63101e45ffe5bed76ca7e1f2d1fe638b5de29b50235f629f323
3
+ size 4715295
quantize_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Cast",
9
+ "Concat",
10
+ "Constant",
11
+ "Conv",
12
+ "Div",
13
+ "Gather",
14
+ "LeakyRelu",
15
+ "Mul",
16
+ "Reshape",
17
+ "Resize",
18
+ "Shape",
19
+ "Transpose",
20
+ "Unsqueeze"
21
+ ],
22
+ "weight_type": "QUInt8"
23
+ }
24
+ },
25
+ "per_channel": true,
26
+ "reduce_range": true
27
+ },
28
+ "int8": {
29
+ "per_model_config": {
30
+ "model": {
31
+ "op_types": [
32
+ "Add",
33
+ "Cast",
34
+ "Concat",
35
+ "Constant",
36
+ "Conv",
37
+ "Div",
38
+ "Gather",
39
+ "LeakyRelu",
40
+ "Mul",
41
+ "Reshape",
42
+ "Resize",
43
+ "Shape",
44
+ "Transpose",
45
+ "Unsqueeze"
46
+ ],
47
+ "weight_type": "QInt8"
48
+ }
49
+ },
50
+ "per_channel": true,
51
+ "reduce_range": true
52
+ },
53
+ "uint8": {
54
+ "per_model_config": {
55
+ "model": {
56
+ "op_types": [
57
+ "Add",
58
+ "Cast",
59
+ "Concat",
60
+ "Constant",
61
+ "Conv",
62
+ "Div",
63
+ "Gather",
64
+ "LeakyRelu",
65
+ "Mul",
66
+ "Reshape",
67
+ "Resize",
68
+ "Shape",
69
+ "Transpose",
70
+ "Unsqueeze"
71
+ ],
72
+ "weight_type": "QUInt8"
73
+ }
74
+ },
75
+ "per_channel": true,
76
+ "reduce_range": true
77
+ },
78
+ "q4": {
79
+ "block_size": 32,
80
+ "is_symmetric": true,
81
+ "accuracy_level": null
82
+ },
83
+ "bnb4": {
84
+ "block_size": 64,
85
+ "quant_type": 1
86
+ }
87
+ }