from fastT5 import generate_onnx_representation,quantize
import os

chkpt="mrm8488/t5-base-finetuned-question-generation-ap"
model_path= 'onnx_t5'

# # Step 1. convert huggingfaces t5 model to onnx
# onnx_model_paths = generate_onnx_representation(chkpt,output_path=model_path)
#
# # Step 2. (recommended) quantize the converted model for fast inference and to reduce model size.
# quant_model_paths = quantize(model_path)

#delete non-quantized models to save space
try:
    os.remove(f'{model_path}/{chkpt.split("/")[1]}-encoder.onnx')
    os.remove(f'{model_path}/{chkpt.split("/")[1]}-decoder.onnx')
    os.remove(f'{model_path}/{chkpt.split("/")[1]}-init-decoder.onnx')
except:
    pass


#