djomo's picture
Upload 2 files
e7bc726 verified
raw
history blame
1.4 kB
from typing import Dict, List, Any
import base64
from PIL import Image
from io import BytesIO
import numpy as np
from paddleocr import PaddleOCR, draw_ocr
class EndpointHandler():
def __init__(self, path=""):
self.pipeline = PaddleOCR(lang="en",ocr_version="PP-OCRv4",show_log = False,use_gpu=True)
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
"""
Args:
data (:obj:):
includes the input data and the parameters for the inference.
Return:
A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
- "label": A string representing what the label/class is. There can be multiple labels.
- "score": A score between 0 and 1 describing how confident the model is for this label/class.
"""
inputs = data.pop("inputs", data)
#parameters = data.pop("parameters", None)
receipt_image = Image.open(BytesIO(base64.b64decode(inputs)))
receipt_image_array = np.array(receipt_image.convert('RGB'))
result = self.pipeline.ocr(receipt_image_array,cls=True)
txts = [line[1][0] for line in result[0]]
# pass inputs with all kwargs in data
extract = "".join(txts)
# postprocess the prediction
return extract