Xenova HF staff commited on
Commit
0b54d2c
1 Parent(s): b91a329

Upload folder using huggingface_hub

Browse files
onnx/model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcfd5ab88829855a3a5964dbb03ae6a5bf89c4c68604c5a295c19c0b9df2992e
3
+ size 46434169
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89be01958da262ae0b131377dde126d29301cf1386fc7b9138618b6ca1fa4599
3
+ size 39225852
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d221711133a3e43b8ffce46ea67a000327a092dabb97c9894e5a20efe39e1e2
3
+ size 23324980
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b7d10236df94549d01fb111239a91902a8703dff785391c4d2f4f7ff5ed4c2d
3
+ size 12057342
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff449dfdd1f70c699d64c8919559d3b95a678f2a1b2f63db93d134cc34d9202e
3
+ size 39356894
onnx/model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:203ae59f5c9c1f864e22e8c846d16669ca86ded5475f90d4918d02998d247e98
3
+ size 12057392
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:203ae59f5c9c1f864e22e8c846d16669ca86ded5475f90d4918d02998d247e98
3
+ size 12057392
quantize_config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "BatchNormalization",
9
+ "Cast",
10
+ "Concat",
11
+ "Constant",
12
+ "Conv",
13
+ "Div",
14
+ "Erf",
15
+ "Flatten",
16
+ "Gather",
17
+ "Gemm",
18
+ "GlobalAveragePool",
19
+ "Identity",
20
+ "MatMul",
21
+ "Mul",
22
+ "ReduceMean",
23
+ "Relu",
24
+ "Reshape",
25
+ "Shape",
26
+ "Sigmoid",
27
+ "Slice",
28
+ "Softmax",
29
+ "Split",
30
+ "Sqrt",
31
+ "Squeeze",
32
+ "Sub",
33
+ "Transpose",
34
+ "Unsqueeze"
35
+ ],
36
+ "weight_type": "QUInt8"
37
+ }
38
+ },
39
+ "per_channel": false,
40
+ "reduce_range": false
41
+ },
42
+ "int8": {
43
+ "per_model_config": {
44
+ "model": {
45
+ "op_types": [
46
+ "Add",
47
+ "BatchNormalization",
48
+ "Cast",
49
+ "Concat",
50
+ "Constant",
51
+ "Conv",
52
+ "Div",
53
+ "Erf",
54
+ "Flatten",
55
+ "Gather",
56
+ "Gemm",
57
+ "GlobalAveragePool",
58
+ "Identity",
59
+ "MatMul",
60
+ "Mul",
61
+ "ReduceMean",
62
+ "Relu",
63
+ "Reshape",
64
+ "Shape",
65
+ "Sigmoid",
66
+ "Slice",
67
+ "Softmax",
68
+ "Split",
69
+ "Sqrt",
70
+ "Squeeze",
71
+ "Sub",
72
+ "Transpose",
73
+ "Unsqueeze"
74
+ ],
75
+ "weight_type": "QInt8"
76
+ }
77
+ },
78
+ "per_channel": false,
79
+ "reduce_range": false
80
+ },
81
+ "uint8": {
82
+ "per_model_config": {
83
+ "model": {
84
+ "op_types": [
85
+ "Add",
86
+ "BatchNormalization",
87
+ "Cast",
88
+ "Concat",
89
+ "Constant",
90
+ "Conv",
91
+ "Div",
92
+ "Erf",
93
+ "Flatten",
94
+ "Gather",
95
+ "Gemm",
96
+ "GlobalAveragePool",
97
+ "Identity",
98
+ "MatMul",
99
+ "Mul",
100
+ "ReduceMean",
101
+ "Relu",
102
+ "Reshape",
103
+ "Shape",
104
+ "Sigmoid",
105
+ "Slice",
106
+ "Softmax",
107
+ "Split",
108
+ "Sqrt",
109
+ "Squeeze",
110
+ "Sub",
111
+ "Transpose",
112
+ "Unsqueeze"
113
+ ],
114
+ "weight_type": "QUInt8"
115
+ }
116
+ },
117
+ "per_channel": false,
118
+ "reduce_range": false
119
+ },
120
+ "q4": {
121
+ "block_size": 32,
122
+ "is_symmetric": true,
123
+ "accuracy_level": null
124
+ },
125
+ "bnb4": {
126
+ "block_size": 64,
127
+ "quant_type": 1
128
+ }
129
+ }