from typing import Dict, List, Any from PIL import Image from io import BytesIO from transformers import pipeline import base64 class EndpointHandler(): def __init__(self, path=""): self.pipeline=pipeline("image-to-text",model=path) def __call__(self, data: Dict[str, Any]) -> str: """ data args: images (:obj:`string`) Return: A str containing a caption for the text """ inputs = data.pop("inputs", data) # decode base64 image to PIL image = Image.open(BytesIO(base64.b64decode(inputs['image']))) # run prediction one image wit provided candiates prediction = self.pipeline(images=[image]) return prediction[0]