Spaces:
Running
Running
import os | |
from typing import List, Tuple | |
import requests | |
from PIL import Image, ImageDraw, ImageFont | |
from surya.postprocessing.fonts import get_font_path | |
from surya.schema import TextLine | |
from surya.settings import settings | |
from surya.postprocessing.math.latex import is_latex | |
def sort_text_lines(lines: List[TextLine] | List[dict], tolerance=1.25): | |
# Sorts in reading order. Not 100% accurate, this should only | |
# be used as a starting point for more advanced sorting. | |
vertical_groups = {} | |
for line in lines: | |
group_key = round(line.bbox[1] if isinstance(line, TextLine) else line["bbox"][1] / tolerance) * tolerance | |
if group_key not in vertical_groups: | |
vertical_groups[group_key] = [] | |
vertical_groups[group_key].append(line) | |
# Sort each group horizontally and flatten the groups into a single list | |
sorted_lines = [] | |
for _, group in sorted(vertical_groups.items()): | |
sorted_group = sorted(group, key=lambda x: x.bbox[0] if isinstance(x, TextLine) else x["bbox"][0]) | |
sorted_lines.extend(sorted_group) | |
return sorted_lines | |
def truncate_repetitions(text: str, min_len=15): | |
# From nougat, with some cleanup | |
if len(text) < 2 * min_len: | |
return text | |
# try to find a length at which the tail is repeating | |
max_rep_len = None | |
for rep_len in range(min_len, int(len(text) / 2)): | |
# check if there is a repetition at the end | |
same = True | |
for i in range(0, rep_len): | |
if text[len(text) - rep_len - i - 1] != text[len(text) - i - 1]: | |
same = False | |
break | |
if same: | |
max_rep_len = rep_len | |
if max_rep_len is None: | |
return text | |
lcs = text[-max_rep_len:] | |
# remove all but the last repetition | |
text_to_truncate = text | |
while text_to_truncate.endswith(lcs): | |
text_to_truncate = text_to_truncate[:-max_rep_len] | |
return text[:len(text_to_truncate)] | |
def get_text_size(text, font): | |
im = Image.new(mode="P", size=(0, 0)) | |
draw = ImageDraw.Draw(im) | |
_, _, width, height = draw.textbbox((0, 0), text=text, font=font) | |
return width, height | |
def render_text(draw, text, s_bbox, bbox_width, bbox_height, font_path, box_font_size): | |
font = ImageFont.truetype(font_path, box_font_size) | |
text_width, text_height = get_text_size(text, font) | |
while (text_width > bbox_width or text_height > bbox_height) and box_font_size > 6: | |
box_font_size = box_font_size - 1 | |
font = ImageFont.truetype(font_path, box_font_size) | |
text_width, text_height = get_text_size(text, font) | |
# Calculate text position (centered in bbox) | |
text_width, text_height = get_text_size(text, font) | |
x = s_bbox[0] | |
y = s_bbox[1] + (bbox_height - text_height) / 2 | |
draw.text((x, y), text, fill="black", font=font) | |
def render_math(image, draw, text, s_bbox, bbox_width, bbox_height, font_path): | |
try: | |
from surya.postprocessing.math.render import latex_to_pil | |
box_font_size = max(10, min(int(.2 * bbox_height), 24)) | |
img = latex_to_pil(text, bbox_width, bbox_height, fontsize=box_font_size) | |
img.thumbnail((bbox_width, bbox_height)) | |
image.paste(img, (s_bbox[0], s_bbox[1])) | |
except Exception as e: | |
print(f"Failed to render math: {e}") | |
box_font_size = max(10, min(int(.75 * bbox_height), 24)) | |
render_text(draw, text, s_bbox, bbox_width, bbox_height, font_path, box_font_size) | |
def draw_text_on_image(bboxes, texts, image_size: Tuple[int, int], langs: List[str], font_path=None, max_font_size=60, res_upscale=2, has_math=False): | |
if font_path is None: | |
font_path = get_font_path(langs) | |
new_image_size = (image_size[0] * res_upscale, image_size[1] * res_upscale) | |
image = Image.new('RGB', new_image_size, color='white') | |
draw = ImageDraw.Draw(image) | |
for bbox, text in zip(bboxes, texts): | |
s_bbox = [int(coord * res_upscale) for coord in bbox] | |
bbox_width = s_bbox[2] - s_bbox[0] | |
bbox_height = s_bbox[3] - s_bbox[1] | |
# Shrink the text to fit in the bbox if needed | |
if has_math and is_latex(text): | |
render_math(image, draw, text, s_bbox, bbox_width, bbox_height, font_path) | |
else: | |
box_font_size = max(6, min(int(.75 * bbox_height), max_font_size)) | |
render_text(draw, text, s_bbox, bbox_width, bbox_height, font_path, box_font_size) | |
return image | |