Spaces:
Sleeping
Sleeping
import io | |
import os | |
import textwrap | |
from typing import Dict, Optional, Tuple | |
from huggingface_hub import hf_hub_download | |
from PIL import Image, ImageDraw, ImageFont | |
DEFAULT_FONT_PATH = "ybelkada/fonts" | |
def download_default_font(): | |
font_path = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF") | |
return font_path | |
def render_text( | |
text: str, | |
text_size: int = 36, | |
text_color: str = "black", | |
background_color: str = "white", | |
left_padding: int = 5, | |
right_padding: int = 5, | |
top_padding: int = 5, | |
bottom_padding: int = 5, | |
font_bytes: Optional[bytes] = None, | |
font_path: Optional[str] = None, | |
) -> Image.Image: | |
""" | |
Render text. This script is entirely adapted from the original script that can be found here: | |
https://github.com/google-research/pix2struct/blob/main/pix2struct/preprocessing/preprocessing_utils.py | |
Args: | |
text (`str`, *optional*, defaults to ): | |
Text to render. | |
text_size (`int`, *optional*, defaults to 36): | |
Size of the text. | |
text_color (`str`, *optional*, defaults to `"black"`): | |
Color of the text. | |
background_color (`str`, *optional*, defaults to `"white"`): | |
Color of the background. | |
left_padding (`int`, *optional*, defaults to 5): | |
Padding on the left. | |
right_padding (`int`, *optional*, defaults to 5): | |
Padding on the right. | |
top_padding (`int`, *optional*, defaults to 5): | |
Padding on the top. | |
bottom_padding (`int`, *optional*, defaults to 5): | |
Padding on the bottom. | |
font_bytes (`bytes`, *optional*): | |
Bytes of the font to use. If `None`, the default font will be used. | |
font_path (`str`, *optional*): | |
Path to the font to use. If `None`, the default font will be used. | |
""" | |
wrapper = textwrap.TextWrapper( | |
width=80 | |
) # Add new lines so that each line is no more than 80 characters. | |
lines = wrapper.wrap(text=text) | |
wrapped_text = "\n".join(lines) | |
if font_bytes is not None and font_path is None: | |
font = io.BytesIO(font_bytes) | |
elif font_path is not None: | |
font = font_path | |
else: | |
font = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF") | |
raise ValueError( | |
"Either font_bytes or font_path must be provided. " | |
f"Using default font {font}." | |
) | |
font = ImageFont.truetype(font, encoding="UTF-8", size=text_size) | |
# Use a temporary canvas to determine the width and height in pixels when | |
# rendering the text. | |
temp_draw = ImageDraw.Draw(Image.new("RGB", (1, 1), background_color)) | |
_, _, text_width, text_height = temp_draw.textbbox((0, 0), wrapped_text, font) | |
# Create the actual image with a bit of padding around the text. | |
image_width = text_width + left_padding + right_padding | |
image_height = text_height + top_padding + bottom_padding | |
image = Image.new("RGB", (image_width, image_height), background_color) | |
draw = ImageDraw.Draw(image) | |
draw.text( | |
xy=(left_padding, top_padding), text=wrapped_text, fill=text_color, font=font | |
) | |
return image | |
# Adapted from https://github.com/google-research/pix2struct/blob/0e1779af0f4db4b652c1d92b3bbd2550a7399123/pix2struct/preprocessing/preprocessing_utils.py#L87 | |
def render_header( | |
image: Image.Image, header: str, bbox: Dict[str, float], font_path: str, **kwargs | |
) -> Tuple[Image.Image, Tuple[float, float, float, float]]: | |
""" | |
Renders the input text as a header on the input image and updates the bounding box. | |
Args: | |
image (Image.Image): | |
The image to render the header on. | |
header (str): | |
The header text. | |
bbox (Dict[str,float]): | |
The bounding box in relative position (0-1), format ("x_min": 0, | |
"y_min": 0, | |
"x_max": 0, | |
"y_max": 0). | |
input_data_format (Union[str, ChildProcessError], optional): | |
The data format of the image. | |
Returns: | |
Tuple[Image.Image, Dict[str, float] ]: | |
The image with the header rendered and the updated bounding box. | |
""" | |
assert os.path.exists(font_path), f"Font path {font_path} does not exist." | |
header_image = render_text(text=header, font_path=font_path, **kwargs) | |
new_width = max(header_image.width, image.width) | |
new_height = int(image.height * (new_width / image.width)) | |
new_header_height = int(header_image.height * (new_width / header_image.width)) | |
new_image = Image.new("RGB", (new_width, new_height + new_header_height), "white") | |
new_image.paste(header_image.resize((new_width, new_header_height)), (0, 0)) | |
new_image.paste(image.resize((new_width, new_height)), (0, new_header_height)) | |
new_total_height = new_image.height | |
new_bbox = { | |
"xmin": bbox["xmin"], | |
"ymin": ((bbox["ymin"] * new_height) + new_header_height) | |
/ new_total_height, # shift y_min down by the header's relative height | |
"xmax": bbox["xmax"], | |
"ymax": ((bbox["ymax"] * new_height) + new_header_height) | |
/ new_total_height, # shift y_min down by the header's relative height | |
} | |
return ( | |
new_image, | |
new_bbox, | |
{ | |
"width": new_width, | |
"height": new_height, | |
"header_height": new_header_height, | |
"total_height": new_total_height, | |
}, | |
) | |