| from typing import Dict, List, Any | |
| from optimum.onnxruntime import ORTModelForQuestionAnswering | |
| from transformers import AutoTokenizer, pipeline | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| # load the optimized model | |
| self.model = ORTModelForQuestionAnswering.from_pretrained(path, file_name="model_optimized_quantized.onnx") | |
| self.tokenizer = AutoTokenizer.from_pretrained(path) | |
| # create pipeline | |
| self.pipeline = pipeline("question-answering", model=self.model, tokenizer=self.tokenizer) | |
| def __call__(self, data: Any) -> List[List[Dict[str, float]]]: | |
| """ | |
| Args: | |
| data (:obj:): | |
| includes the input data and the parameters for the inference. | |
| Return: | |
| A :obj:`list`:. The list contains the answer and scores of the inference inputs | |
| """ | |
| inputs = data.get("inputs", data) | |
| # run the model | |
| prediction = self.pipeline(**inputs) | |
| # return prediction | |
| return prediction | |