Spaces:
Runtime error
Runtime error
File size: 6,244 Bytes
2f22cf7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import gradio as gr
from inference import OneDMInference
import os
from PIL import Image
import cv2
import numpy as np
import torch
import torch.nn.functional as F
# Load the model
model = OneDMInference(
model_path='one_dm_finetuned.pt',
cfg_path='configs/finetuned.yml'
)
# Define Laplacian kernel (ensure it’s on the correct device if needed)
laplace = torch.tensor(
[[0, 1, 0],
[1, -4, 1],
[0, 1, 0]], dtype=torch.float, requires_grad=False
).view(1, 1, 3, 3)
def generate_laplace_image(image_path, target_size=(64, 64)):
"""
Generate a Laplace image from the input image using a Laplacian filter.
Adjusted to match model-expected dimensions (e.g., 64x64).
"""
# Read image
image = cv2.imread(image_path)
if image is None:
raise ValueError(f"Could not read image at {image_path}")
# Convert to grayscale
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Resize to model-compatible size (e.g., 64x64)
image = cv2.resize(image, target_size)
# Convert to tensor
x = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float()
# Normalize input
x = x / 255.0
# Apply Laplacian filter with proper padding
y = F.conv2d(x, laplace, stride=1, padding=1) # Padding=1 keeps spatial dims intact
# Process output
y = y.squeeze().numpy()
y = np.clip(y * 255.0, 0, 255)
y = y.astype(np.uint8)
# Apply thresholding
_, threshold = cv2.threshold(y, 0, 255, cv2.THRESH_OTSU)
# Save output
laplace_path = os.path.splitext(image_path)[0] + "_laplace.png"
cv2.imwrite(laplace_path, threshold)
return laplace_path
def generate_handwriting(text, style_image, laplace_image=None):
output_dir = "./generated"
os.makedirs(output_dir, exist_ok=True)
# Assume model expects 64x64 inputs based on logs (adjust if config specifies otherwise)
target_size = (64, 64)
# Generate Laplace image if not provided
if laplace_image is None:
laplace_image = generate_laplace_image(style_image, target_size)
else:
# Ensure provided Laplace image matches expected size
laplace_img = cv2.imread(laplace_image, cv2.IMREAD_GRAYSCALE)
if laplace_img.shape != target_size:
laplace_img = cv2.resize(laplace_img, target_size)
laplace_image = os.path.splitext(laplace_image)[0] + "_resized.png"
cv2.imwrite(laplace_image, laplace_img)
# Resize style image to match model expectations
style_img = cv2.imread(style_image)
style_img_resized = cv2.resize(style_img, target_size)
style_image_resized = os.path.splitext(style_image)[0] + "_resized.png"
cv2.imwrite(style_image_resized, style_img_resized)
# Generate handwriting for each word
words = text.split()
generated_image_paths = []
for word in words:
output_paths = model.generate(
text=word,
style_path=style_image_resized, # Use resized style image
laplace_path=laplace_image, # Use Laplace image
output_dir=output_dir
)
generated_image_paths.append(output_paths[0])
# Load generated images
images = [Image.open(img_path) for img_path in generated_image_paths]
# Constants for spacing and margins (adjusted for better spacing)
word_gap = 5 # Reduced from 20 to 5 for closer word spacing
line_gap = 20 # Reduced from 30 for tighter lines
max_words_per_line = 5
top_margin = 10 # Reduced from 30
left_margin = 10 # Reduced from 30
# Calculate line dimensions
lines = []
current_line = []
current_line_width = 0
current_line_height = 0
for img in images:
if len(current_line) >= max_words_per_line or current_line_width + img.size[0] > 500: # Add a max width constraint (e.g., 500px)
lines.append((current_line, current_line_width - word_gap, current_line_height))
current_line = []
current_line_width = 0
current_line_height = 0
current_line.append(img)
current_line_width += img.size[0] + word_gap
current_line_height = max(current_line_height, img.size[1])
# Add the last line if it has content
if current_line:
lines.append((current_line, current_line_width - word_gap, current_line_height))
# Calculate total dimensions
total_width = max(line[1] for line in lines) + (2 * left_margin) # Width of the widest line
total_height = sum(line[2] for line in lines) + (len(lines) - 1) * line_gap + top_margin
# Create merged image
merged_image = Image.new('RGB', (total_width, total_height), color=(255, 255, 255))
# Paste words into the image
y_offset = top_margin
for line_images, line_width, line_height in lines:
x_offset = left_margin # Align to the left instead of centering
for img in line_images:
# Adjust y_offset for each word to align baselines (optional, if heights vary significantly)
word_y_offset = y_offset + (line_height - img.size[1]) # Align to the bottom of the line
merged_image.paste(img, (x_offset, word_y_offset))
x_offset += img.size[0] + word_gap
y_offset += line_height + line_gap
# Save merged image
merged_image_path = os.path.join(output_dir, "merged_output.png")
merged_image.save(merged_image_path)
return merged_image_path, gr.update(value=laplace_image)
# Create Gradio interface
iface = gr.Interface(
fn=generate_handwriting,
inputs=[
gr.Textbox(label="Text to generate"),
gr.Image(label="Style Image", type="filepath"),
gr.Image(label="Laplace Image (Optional)", type="filepath")
],
outputs=[
gr.Image(label="Generated Handwriting"),
gr.Image(label="Laplace Image (Optional)")
],
title="Handwriting Generation",
description="Generate handwritten text using One-DM model. If no Laplace image is provided, it will be generated from the style image.",
examples=[
["Hello World",
"English_data/Dataset/test/169/c04-134-05-08.png",
"English_data/Dataset_laplace/test/169/c04-134-00-00.png"]
]
)
if __name__ == "__main__":
iface.launch(share=True) |