Upload ONNX weights (#1)

- Upload ONNX weights (b4f1eb96e0127e87f319b2dca126f2bd41a6ca40)
- Update quantize_config.json (1dfaec240135f05f1b92cd4407b41638d9a562ef)

Files changed (8) hide show

onnx/model.onnx +2 -2
onnx/model_bnb4.onnx +3 -0
onnx/model_fp16.onnx +2 -2
onnx/model_int8.onnx +3 -0
onnx/model_q4.onnx +3 -0
onnx/model_quantized.onnx +2 -2
onnx/model_uint8.onnx +3 -0
quantize_config.json +99 -27

onnx/model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5cd70d65de2a04153a474773f0102857f4dd4d0d17d84a2da107d667674061b
-size 1262486260

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a80d8866149b1208c4a2bbf8eb19c1ed637d766e74a7514a421449ecfd3b674
+size 1262486261

onnx/model_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad2c4b9ae92ff349221fcff688aaf0e366ff617a42dfe10a2717ee2cfa8475b0
+size 222500558

onnx/model_fp16.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f84e73595ceda7a5b1009c755e371922254f3381d841f76c1eb75c18cb86d07
-size 631741971

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1549e6fadcea0b7d958d9f7f98695e52c807309171d47475fb915b061da0b59
+size 631741972

onnx/model_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2b8cc658baee8361d529e85101bcb8084d6287a287383393b683f5d32e71482
+size 317490640

onnx/model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fb7a8c93f64fa08080b3c42e67a08d563a317aa0cc0698663572534d4a9fd38
+size 241408614

onnx/model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5deb06fe70137f24bac4ed9a8987ed525a6945fb53121f07ea2f0a6adb803e1f
-size 318602084

 version https://git-lfs.github.com/spec/v1
+oid sha256:86b7d6b8603fd4dec80d92edae4bc71ebab57ef4dd671883ffe20c13eb95444c
+size 317490718

onnx/model_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86b7d6b8603fd4dec80d92edae4bc71ebab57ef4dd671883ffe20c13eb95444c
+size 317490718

quantize_config.json CHANGED Viewed

@@ -1,30 +1,102 @@
 {
-    "per_channel": true,
-    "reduce_range": true,
-    "per_model_config": {
-        "model": {
-            "op_types": [
-                "Conv",
-                "Sub",
-                "ReduceMean",
-                "Div",
-                "Gather",
-                "Mul",
-                "Shape",
-                "Pow",
-                "Sqrt",
-                "MatMul",
-                "Concat",
-                "Unsqueeze",
-                "Add",
-                "Slice",
-                "Transpose",
-                "Constant",
-                "Softmax",
-                "Reshape",
-                "Erf"
-            ],
-            "weight_type": "QUInt8"
-        }
     }
 }

 {
+    "fp16": {},
+    "q8": {
+        "per_model_config": {
+            "model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Constant",
+                    "Conv",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QUInt8"
+            }
+        },
+        "per_channel": false,
+        "reduce_range": false
+    },
+    "int8": {
+        "per_model_config": {
+            "model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Constant",
+                    "Conv",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QInt8"
+            }
+        },
+        "per_channel": false,
+        "reduce_range": false
+    },
+    "uint8": {
+        "per_model_config": {
+            "model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Constant",
+                    "Conv",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QUInt8"
+            }
+        },
+        "per_channel": false,
+        "reduce_range": false
+    },
+    "q4": {
+        "block_size": 32,
+        "is_symmetric": true,
+        "accuracy_level": null
+    },
+    "bnb4": {
+        "block_size": 64,
+        "quant_type": 1
     }
 }