Feature Extraction
sentence-transformers
ONNX
English
bert
sentence-similarity
Inference Endpoints
shuttie commited on
Commit
1e6ba95
1 Parent(s): c4c8efb

add quantized version

Browse files
Files changed (2) hide show
  1. model_quantized.onnx +3 -0
  2. quantize_config.json +30 -0
model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:740b0e562df8a054267cf1ff250f70c307959030880eaa556f5e1b38a71d5c1f
3
+ size 22972869
quantize_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "per_channel": true,
3
+ "reduce_range": true,
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Erf",
8
+ "Transpose",
9
+ "Slice",
10
+ "Reshape",
11
+ "Unsqueeze",
12
+ "Sub",
13
+ "Constant",
14
+ "Sqrt",
15
+ "MatMul",
16
+ "Cast",
17
+ "Mul",
18
+ "Div",
19
+ "Add",
20
+ "Softmax",
21
+ "Concat",
22
+ "Gather",
23
+ "Shape",
24
+ "ReduceMean",
25
+ "Pow"
26
+ ],
27
+ "weight_type": "QInt8"
28
+ }
29
+ }
30
+ }