# Copyright (C) 2021-2024, Mindee. # This program is licensed under the Apache License 2.0. # See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details. from typing import Any, Dict, Optional import numpy as np from anyascii import anyascii from PIL import Image, ImageDraw from .fonts import get_font __all__ = ["synthesize_page", "synthesize_kie_page"] def synthesize_page( page: Dict[str, Any], draw_proba: bool = False, font_family: Optional[str] = None, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. Args: ---- page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_size: size of the font, default font = 13 font_family: family of the font Returns: ------- the synthesized page """ # Draw template h, w = page["dimensions"] response = 255 * np.ones((h, w, 3), dtype=np.int32) # Draw each word for block in page["blocks"]: for line in block["lines"]: for word in line["words"]: # Get absolute word geometry (xmin, ymin), (xmax, ymax) = word["geometry"] xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) # White drawing context adapted to font size, 0.75 factor to convert pts --> pix font = get_font(font_family, int(0.75 * (ymax - ymin))) img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255)) d = ImageDraw.Draw(img) # Draw in black the value of the word try: d.text((0, 0), word["value"], font=font, fill=(0, 0, 0)) except UnicodeEncodeError: # When character cannot be encoded, use its anyascii version d.text((0, 0), anyascii(word["value"]), font=font, fill=(0, 0, 0)) # Colorize if draw_proba if draw_proba: p = int(255 * word["confidence"]) mask = np.where(np.array(img) == 0, 1, 0) proba: np.ndarray = np.array([255 - p, 0, p]) color = mask * proba[np.newaxis, np.newaxis, :] white_mask = 255 * (1 - mask) img = color + white_mask # Write to response page response[ymin:ymax, xmin:xmax, :] = np.array(img) return response def synthesize_kie_page( page: Dict[str, Any], draw_proba: bool = False, font_family: Optional[str] = None, ) -> np.ndarray: """Draw a the content of the element page (OCR response) on a blank page. Args: ---- page: exported Page object to represent draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 font_size: size of the font, default font = 13 font_family: family of the font Returns: ------- the synthesized page """ # Draw template h, w = page["dimensions"] response = 255 * np.ones((h, w, 3), dtype=np.int32) # Draw each word for predictions in page["predictions"].values(): for prediction in predictions: # Get aboslute word geometry (xmin, ymin), (xmax, ymax) = prediction["geometry"] xmin, xmax = int(round(w * xmin)), int(round(w * xmax)) ymin, ymax = int(round(h * ymin)), int(round(h * ymax)) # White drawing context adapted to font size, 0.75 factor to convert pts --> pix font = get_font(font_family, int(0.75 * (ymax - ymin))) img = Image.new("RGB", (xmax - xmin, ymax - ymin), color=(255, 255, 255)) d = ImageDraw.Draw(img) # Draw in black the value of the word try: d.text((0, 0), prediction["value"], font=font, fill=(0, 0, 0)) except UnicodeEncodeError: # When character cannot be encoded, use its anyascii version d.text((0, 0), anyascii(prediction["value"]), font=font, fill=(0, 0, 0)) # Colorize if draw_proba if draw_proba: p = int(255 * prediction["confidence"]) mask = np.where(np.array(img) == 0, 1, 0) proba: np.ndarray = np.array([255 - p, 0, p]) color = mask * proba[np.newaxis, np.newaxis, :] white_mask = 255 * (1 - mask) img = color + white_mask # Write to response page response[ymin:ymax, xmin:xmax, :] = np.array(img) return response