from typing import Dict, List, Any
from PIL import Image
from io import BytesIO
from transformers import pipeline
import base64


class EndpointHandler():
    def __init__(self, path=""):
        self.pipeline=pipeline("image-to-text",model=path)
    
    def __call__(self, data: Dict[str, Any]) -> str:
        """
       data args:
            images (:obj:`string`)
      Return:
            A str containing a caption for the text
        """
        inputs = data.pop("inputs", data)

        # decode base64 image to PIL
        image = Image.open(BytesIO(base64.b64decode(inputs['image'])))

        # run prediction one image wit provided candiates
        prediction = self.pipeline(images=[image])
        return prediction[0]