csukuangfj commited on
Commit
e93da4c
1 Parent(s): 9234e07

add int8 models

Browse files
Files changed (2) hide show
  1. model.int8.onnx +3 -0
  2. quantize-model.py +23 -0
model.int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ada9127ca5b82320385ac12340eb8b05dee64fd45cf8cf593ec693826ec2fd7
3
+ size 223385835
quantize-model.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import onnx
4
+ from onnxruntime.quantization import QuantType, quantize_dynamic
5
+
6
+
7
+ def main():
8
+ onnx_model = onnx.load("model.onnx")
9
+ nodes = [n.name for n in onnx_model.graph.node]
10
+ nodes_to_exclude = [m for m in nodes if "output" in m]
11
+ print(nodes_to_exclude)
12
+ quantize_dynamic(
13
+ model_input="model.onnx",
14
+ model_output="model.int8.onnx",
15
+ op_types_to_quantize=["MatMul"],
16
+ per_channel=True,
17
+ weight_type=QuantType.QUInt8,
18
+ nodes_to_exclude=nodes_to_exclude,
19
+ )
20
+
21
+
22
+ if __name__ == "__main__":
23
+ main()