import onnx | |
from onnxruntime.quantization import quantize_dynamic, QuantType | |
# Load the original init_decoder_model | |
model_path = "decoder_model.onnx" | |
quantized_model_path = "decoder_quantized.onnx" | |
# Perform dynamic quantization on the model | |
quantize_dynamic(model_path, quantized_model_path, weight_type=QuantType.QInt8) | |
print(f"Quantized model saved to {quantized_model_path}") | |