from fastT5 import generate_onnx_representation,quantize import os chkpt="mrm8488/t5-base-finetuned-question-generation-ap" model_path= 'onnx_t5' # # Step 1. convert huggingfaces t5 model to onnx # onnx_model_paths = generate_onnx_representation(chkpt,output_path=model_path) # # # Step 2. (recommended) quantize the converted model for fast inference and to reduce model size. # quant_model_paths = quantize(model_path) #delete non-quantized models to save space try: os.remove(f'{model_path}/{chkpt.split("/")[1]}-encoder.onnx') os.remove(f'{model_path}/{chkpt.split("/")[1]}-decoder.onnx') os.remove(f'{model_path}/{chkpt.split("/")[1]}-init-decoder.onnx') except: pass #