csukuangfj
/

paraformer-onnxruntime-python-example

Model card Files Files and versions Community

csukuangfj commited on Mar 31, 2023

Commit

e93da4c

•

1 Parent(s): 9234e07

add int8 models

Files changed (2) hide show

model.int8.onnx +3 -0
quantize-model.py +23 -0

model.int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ada9127ca5b82320385ac12340eb8b05dee64fd45cf8cf593ec693826ec2fd7
+size 223385835

quantize-model.py ADDED Viewed

	@@ -0,0 +1,23 @@

+#!/usr/bin/env python3
+import onnx
+from onnxruntime.quantization import QuantType, quantize_dynamic
+def main():
+    onnx_model = onnx.load("model.onnx")
+    nodes = [n.name for n in onnx_model.graph.node]
+    nodes_to_exclude = [m for m in nodes if "output" in m]
+    print(nodes_to_exclude)
+    quantize_dynamic(
+        model_input="model.onnx",
+        model_output="model.int8.onnx",
+        op_types_to_quantize=["MatMul"],
+        per_channel=True,
+        weight_type=QuantType.QUInt8,
+        nodes_to_exclude=nodes_to_exclude,
+    )
+if __name__ == "__main__":
+    main()