philschmid's picture
philschmid HF staff
add custom handler
a854397
raw
history blame contribute delete
No virus
1.03 kB
from typing import Dict, List, Any
from optimum.onnxruntime import ORTModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
class EndpointHandler():
def __init__(self, path=""):
# load the optimized model
self.model = ORTModelForQuestionAnswering.from_pretrained(path, file_name="model_optimized_quantized.onnx")
self.tokenizer = AutoTokenizer.from_pretrained(path)
# create pipeline
self.pipeline = pipeline("question-answering", model=self.model, tokenizer=self.tokenizer)
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
"""
Args:
data (:obj:):
includes the input data and the parameters for the inference.
Return:
A :obj:`list`:. The list contains the answer and scores of the inference inputs
"""
inputs = data.get("inputs", data)
# run the model
prediction = self.pipeline(**inputs)
# return prediction
return prediction