Update

Browse files

Files changed (9) hide show

config.json +2 -1
onnx/model.onnx +1 -1
onnx/model_bnb4.onnx +3 -0
onnx/model_fp16.onnx +3 -0
onnx/model_int8.onnx +3 -0
onnx/model_q4.onnx +3 -0
onnx/model_quantized.onnx +2 -2
onnx/model_uint8.onnx +3 -0
quantize_config.json +99 -27

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "avsolatorio/GIST-small-Embedding-v0",
   "architectures": [
     "BertModel"
   ],
@@ -21,6 +21,7 @@
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "transformers_version": "4.38.1",

 {
+  "_name_or_path": "avsolatorio/NoInstruct-small-Embedding-v0",
   "architectures": [
     "BertModel"
   ],
   "model_type": "bert",
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "output_hidden_states": true,
   "pad_token_id": 0,
   "position_embedding_type": "absolute",
   "transformers_version": "4.38.1",

onnx/model.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c95502dc23d4ffb50a81d3838f721abde71509982d720d355ce52c0cdfe2dfe
 size 133093492

 version https://git-lfs.github.com/spec/v1
+oid sha256:37a0a5eb063d3479ad8f0d764cb99cc71fd51b12b0c0d2b85b58c5423f821d3d
 size 133093492

onnx/model_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b169590fdbc586696c2bf29346a51b8dffa2e1196f878aa4b115eb2e486a6790
+size 60113951

onnx/model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:195acee1af16e6f3ee605d2a71b4c900e2cee6fac8eb58fa36bd25b1cf228387
+size 66749214

onnx/model_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43f4bed7552ac961cb329c5fbbc47dbddb2135fe7ae5c5f67d30f91acc608c6f
+size 34015111

onnx/model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd7cce04f628df93f7f6fc4c12e7eef2ff3e78459f80b77e62e02fde5d8524b
+size 61440599

onnx/model_quantized.onnx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45b972073570f3af92b90b708ec580d63cca87654e2d5e7ccb2cf560aa881bf3
-size 34014427

 version https://git-lfs.github.com/spec/v1
+oid sha256:43f4bed7552ac961cb329c5fbbc47dbddb2135fe7ae5c5f67d30f91acc608c6f
+size 34015111

onnx/model_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b4e77809f071f798de78dd9358164415043afa2ce72c4c95f51ca2203444612
+size 34015108

quantize_config.json CHANGED Viewed

@@ -1,30 +1,102 @@
 {
-    "per_channel": true,
-    "reduce_range": true,
-    "per_model_config": {
-        "model": {
-            "op_types": [
-                "Transpose",
-                "Add",
-                "Concat",
-                "Shape",
-                "Mul",
-                "Reshape",
-                "Gather",
-                "Div",
-                "Constant",
-                "Sub",
-                "Pow",
-                "Slice",
-                "Sqrt",
-                "Erf",
-                "Cast",
-                "Softmax",
-                "Unsqueeze",
-                "ReduceMean",
-                "MatMul"
-            ],
-            "weight_type": "QInt8"
-        }
     }
 }

 {
+    "fp16": {},
+    "q8": {
+        "per_model_config": {
+            "model": {
+                "op_types": [
+                    "Add",
+                    "Cast",
+                    "Concat",
+                    "Constant",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QInt8"
+            }
+        },
+        "per_channel": true,
+        "reduce_range": true
+    },
+    "int8": {
+        "per_model_config": {
+            "model": {
+                "op_types": [
+                    "Add",
+                    "Cast",
+                    "Concat",
+                    "Constant",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QInt8"
+            }
+        },
+        "per_channel": true,
+        "reduce_range": true
+    },
+    "uint8": {
+        "per_model_config": {
+            "model": {
+                "op_types": [
+                    "Add",
+                    "Cast",
+                    "Concat",
+                    "Constant",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QUInt8"
+            }
+        },
+        "per_channel": true,
+        "reduce_range": true
+    },
+    "q4": {
+        "block_size": 32,
+        "is_symmetric": true,
+        "accuracy_level": null
+    },
+    "bnb4": {
+        "block_size": 64,
+        "quant_type": 1
     }
 }