from onnxruntime import optimizer | |
# Specify the models you want to optimize | |
models = [ | |
'onnx/decoder_model.onnx', | |
'onnx/decoder_model_merged.onnx', | |
'onnx/decoder_model_merged_quantized.onnx', | |
'onnx/decoder_model_quantized.onnx', | |
'onnx/decoder_with_past_model.onnx', | |
'onnx/decoder_with_past_model_quantized.onnx', | |
'onnx/encoder_model.onnx', | |
'onnx/encoder_model_quantized.onnx' | |
] | |
for model_path in models: | |
# Load and optimize the model | |
optimized_model = optimizer.optimize_model(model_path) | |
# Save the optimized model | |
optimized_model.save_model_to_file(model_path.replace('.onnx', '_optimized.onnx')) |