z-uo's picture
Upload 2 files
a0b2bc1 verified
from optimum.onnxruntime import ORTModelForTokenClassification, ORTQuantizer
from optimum.onnxruntime.configuration import AutoQuantizationConfig
onnx_model = ORTModelForTokenClassification.from_pretrained(
"nickprock/bert-italian-finetuned-ner",
export=True
)
quantizer = ORTQuantizer.from_pretrained(onnx_model)
dqconfig = AutoQuantizationConfig.avx512_vnni(
is_static=False,
per_channel=False
)
model_quantized_path = quantizer.quantize(
save_dir="bert-italian-ner-onnx-quantized-avx512",
quantization_config=dqconfig,
)