from paddleocr import PaddleOCR from PIL import Image def normalize_bbox(bbox, width, height): return [ int(1000 * (bbox[0] / width)), int(1000 * (bbox[1] / height)), int(1000 * (bbox[2] / width)), int(1000 * (bbox[3] / height)), ] def unnormalize_box(bbox, width, height): return [ width * (bbox[0] / 1000), height * (bbox[1] / 1000), width * (bbox[2] / 1000), height * (bbox[3] / 1000), ] def OCR(image_path: str): ocr = PaddleOCR(use_angle_cls=True) image = Image.open(image_path) result = ocr.ocr(image_path, cls=True) bboxes = [] words = [] for idx in range(len(result)): res = result[idx] for line in res: # print(line) # print(line[0][0] + line[0][2]) bboxes.append(normalize_bbox(line[0][0]+line[0][2], image.width, image.height)) # print(line[1][0]) words.append(line[1][0]) return bboxes, words