from optimum.onnxruntime import ORTModelForTokenClassification, ORTQuantizer from optimum.onnxruntime.configuration import AutoQuantizationConfig onnx_model = ORTModelForTokenClassification.from_pretrained( "nickprock/bert-italian-finetuned-ner", export=True ) quantizer = ORTQuantizer.from_pretrained(onnx_model) dqconfig = AutoQuantizationConfig.avx512_vnni( is_static=False, per_channel=False ) model_quantized_path = quantizer.quantize( save_dir="bert-italian-ner-onnx-quantized-avx512", quantization_config=dqconfig, )