from transformers import AutoTokenizer,AutoModelForSequenceClassification import transformers.convert_graph_to_onnx as onnx_convert from pathlib import Path import transformers from onnxruntime.quantization import quantize_dynamic,QuantType import onnx import onnxruntime as ort """ type in cmd to create onnx model of hugging face chkpt python3 -m transformers.onnx --model= distilbert-base-uncased-finetuned-sst-2-english sentiment_onnx/ """ chkpt='distilbert-base-uncased-finetuned-sst-2-english' model= AutoModelForSequenceClassification.from_pretrained(chkpt) tokenizer= AutoTokenizer.from_pretrained(chkpt) """ or download the model directly from hub -- chkpt='distilbert-base-uncased-finetuned-sst-2-english' model= AutoModelForSequenceClassification.from_pretrained(chkpt) tokenizer= AutoTokenizer.from_pretrained(chkpt) """ pipeline=transformers.pipeline("text-classification",model=model,tokenizer=tokenizer) """ convert pipeline to onnx object""" onnx_convert.convert_pytorch(pipeline, opset=11, output=Path("sent_clf_onnx/sentiment_classifier_onnx.onnx"), use_external_format=False ) """ convert onnx object to another onnx object with int8 quantization """ quantize_dynamic("sent_clf_onnx/sentiment_classifier_onnx.onnx","sent_clf_onnx/sentiment_classifier_onnx_int8.onnx", weight_type=QuantType.QUInt8) print(ort.__version__) print(onnx.__version__)