Spaces:
Sleeping
Sleeping
File size: 5,585 Bytes
da59cbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import io
import os
import textwrap
from typing import Dict, Optional, Tuple
from huggingface_hub import hf_hub_download
from PIL import Image, ImageDraw, ImageFont
DEFAULT_FONT_PATH = "ybelkada/fonts"
def download_default_font():
font_path = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF")
return font_path
def render_text(
text: str,
text_size: int = 36,
text_color: str = "black",
background_color: str = "white",
left_padding: int = 5,
right_padding: int = 5,
top_padding: int = 5,
bottom_padding: int = 5,
font_bytes: Optional[bytes] = None,
font_path: Optional[str] = None,
) -> Image.Image:
"""
Render text. This script is entirely adapted from the original script that can be found here:
https://github.com/google-research/pix2struct/blob/main/pix2struct/preprocessing/preprocessing_utils.py
Args:
text (`str`, *optional*, defaults to ):
Text to render.
text_size (`int`, *optional*, defaults to 36):
Size of the text.
text_color (`str`, *optional*, defaults to `"black"`):
Color of the text.
background_color (`str`, *optional*, defaults to `"white"`):
Color of the background.
left_padding (`int`, *optional*, defaults to 5):
Padding on the left.
right_padding (`int`, *optional*, defaults to 5):
Padding on the right.
top_padding (`int`, *optional*, defaults to 5):
Padding on the top.
bottom_padding (`int`, *optional*, defaults to 5):
Padding on the bottom.
font_bytes (`bytes`, *optional*):
Bytes of the font to use. If `None`, the default font will be used.
font_path (`str`, *optional*):
Path to the font to use. If `None`, the default font will be used.
"""
wrapper = textwrap.TextWrapper(
width=80
) # Add new lines so that each line is no more than 80 characters.
lines = wrapper.wrap(text=text)
wrapped_text = "\n".join(lines)
if font_bytes is not None and font_path is None:
font = io.BytesIO(font_bytes)
elif font_path is not None:
font = font_path
else:
font = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF")
raise ValueError(
"Either font_bytes or font_path must be provided. "
f"Using default font {font}."
)
font = ImageFont.truetype(font, encoding="UTF-8", size=text_size)
# Use a temporary canvas to determine the width and height in pixels when
# rendering the text.
temp_draw = ImageDraw.Draw(Image.new("RGB", (1, 1), background_color))
_, _, text_width, text_height = temp_draw.textbbox((0, 0), wrapped_text, font)
# Create the actual image with a bit of padding around the text.
image_width = text_width + left_padding + right_padding
image_height = text_height + top_padding + bottom_padding
image = Image.new("RGB", (image_width, image_height), background_color)
draw = ImageDraw.Draw(image)
draw.text(
xy=(left_padding, top_padding), text=wrapped_text, fill=text_color, font=font
)
return image
# Adapted from https://github.com/google-research/pix2struct/blob/0e1779af0f4db4b652c1d92b3bbd2550a7399123/pix2struct/preprocessing/preprocessing_utils.py#L87
def render_header(
image: Image.Image, header: str, bbox: Dict[str, float], font_path: str, **kwargs
) -> Tuple[Image.Image, Tuple[float, float, float, float]]:
"""
Renders the input text as a header on the input image and updates the bounding box.
Args:
image (Image.Image):
The image to render the header on.
header (str):
The header text.
bbox (Dict[str,float]):
The bounding box in relative position (0-1), format ("x_min": 0,
"y_min": 0,
"x_max": 0,
"y_max": 0).
input_data_format (Union[str, ChildProcessError], optional):
The data format of the image.
Returns:
Tuple[Image.Image, Dict[str, float] ]:
The image with the header rendered and the updated bounding box.
"""
assert os.path.exists(font_path), f"Font path {font_path} does not exist."
header_image = render_text(text=header, font_path=font_path, **kwargs)
new_width = max(header_image.width, image.width)
new_height = int(image.height * (new_width / image.width))
new_header_height = int(header_image.height * (new_width / header_image.width))
new_image = Image.new("RGB", (new_width, new_height + new_header_height), "white")
new_image.paste(header_image.resize((new_width, new_header_height)), (0, 0))
new_image.paste(image.resize((new_width, new_height)), (0, new_header_height))
new_total_height = new_image.height
new_bbox = {
"xmin": bbox["xmin"],
"ymin": ((bbox["ymin"] * new_height) + new_header_height)
/ new_total_height, # shift y_min down by the header's relative height
"xmax": bbox["xmax"],
"ymax": ((bbox["ymax"] * new_height) + new_header_height)
/ new_total_height, # shift y_min down by the header's relative height
}
return (
new_image,
new_bbox,
{
"width": new_width,
"height": new_height,
"header_height": new_header_height,
"total_height": new_total_height,
},
)
|