Spaces:

Kishan11
/

handwriting_generation_v2

Runtime error

App Files Files Community

handwriting_generation_v2 / app.py

Kishan11

Update app.py

2f22cf7 verified 2 months ago

raw

history blame contribute delete

6.24 kB

	import gradio as gr
	from inference import OneDMInference
	import os
	from PIL import Image
	import cv2
	import numpy as np
	import torch
	import torch.nn.functional as F

	# Load the model
	model = OneDMInference(
	model_path='one_dm_finetuned.pt',
	cfg_path='configs/finetuned.yml'
	)

	# Define Laplacian kernel (ensure it’s on the correct device if needed)
	laplace = torch.tensor(
	[[0, 1, 0],
	[1, -4, 1],
	[0, 1, 0]], dtype=torch.float, requires_grad=False
	).view(1, 1, 3, 3)

	def generate_laplace_image(image_path, target_size=(64, 64)):
	"""
	Generate a Laplace image from the input image using a Laplacian filter.
	Adjusted to match model-expected dimensions (e.g., 64x64).
	"""
	# Read image
	image = cv2.imread(image_path)
	if image is None:
	raise ValueError(f"Could not read image at {image_path}")

	# Convert to grayscale
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Resize to model-compatible size (e.g., 64x64)
	image = cv2.resize(image, target_size)

	# Convert to tensor
	x = torch.from_numpy(image).unsqueeze(0).unsqueeze(0).float()

	# Normalize input
	x = x / 255.0

	# Apply Laplacian filter with proper padding
	y = F.conv2d(x, laplace, stride=1, padding=1) # Padding=1 keeps spatial dims intact

	# Process output
	y = y.squeeze().numpy()
	y = np.clip(y * 255.0, 0, 255)
	y = y.astype(np.uint8)

	# Apply thresholding
	_, threshold = cv2.threshold(y, 0, 255, cv2.THRESH_OTSU)

	# Save output
	laplace_path = os.path.splitext(image_path)[0] + "_laplace.png"
	cv2.imwrite(laplace_path, threshold)

	return laplace_path
	def generate_handwriting(text, style_image, laplace_image=None):
	output_dir = "./generated"
	os.makedirs(output_dir, exist_ok=True)

	# Assume model expects 64x64 inputs based on logs (adjust if config specifies otherwise)
	target_size = (64, 64)

	# Generate Laplace image if not provided
	if laplace_image is None:
	laplace_image = generate_laplace_image(style_image, target_size)
	else:
	# Ensure provided Laplace image matches expected size
	laplace_img = cv2.imread(laplace_image, cv2.IMREAD_GRAYSCALE)
	if laplace_img.shape != target_size:
	laplace_img = cv2.resize(laplace_img, target_size)
	laplace_image = os.path.splitext(laplace_image)[0] + "_resized.png"
	cv2.imwrite(laplace_image, laplace_img)

	# Resize style image to match model expectations
	style_img = cv2.imread(style_image)
	style_img_resized = cv2.resize(style_img, target_size)
	style_image_resized = os.path.splitext(style_image)[0] + "_resized.png"
	cv2.imwrite(style_image_resized, style_img_resized)

	# Generate handwriting for each word
	words = text.split()
	generated_image_paths = []
	for word in words:
	output_paths = model.generate(
	text=word,
	style_path=style_image_resized, # Use resized style image
	laplace_path=laplace_image, # Use Laplace image
	output_dir=output_dir
	)
	generated_image_paths.append(output_paths[0])

	# Load generated images
	images = [Image.open(img_path) for img_path in generated_image_paths]

	# Constants for spacing and margins (adjusted for better spacing)
	word_gap = 5 # Reduced from 20 to 5 for closer word spacing
	line_gap = 20 # Reduced from 30 for tighter lines
	max_words_per_line = 5
	top_margin = 10 # Reduced from 30
	left_margin = 10 # Reduced from 30

	# Calculate line dimensions
	lines = []
	current_line = []
	current_line_width = 0
	current_line_height = 0

	for img in images:
	if len(current_line) >= max_words_per_line or current_line_width + img.size[0] > 500: # Add a max width constraint (e.g., 500px)
	lines.append((current_line, current_line_width - word_gap, current_line_height))
	current_line = []
	current_line_width = 0
	current_line_height = 0

	current_line.append(img)
	current_line_width += img.size[0] + word_gap
	current_line_height = max(current_line_height, img.size[1])

	# Add the last line if it has content
	if current_line:
	lines.append((current_line, current_line_width - word_gap, current_line_height))

	# Calculate total dimensions
	total_width = max(line[1] for line in lines) + (2 * left_margin) # Width of the widest line
	total_height = sum(line[2] for line in lines) + (len(lines) - 1) * line_gap + top_margin

	# Create merged image
	merged_image = Image.new('RGB', (total_width, total_height), color=(255, 255, 255))

	# Paste words into the image
	y_offset = top_margin
	for line_images, line_width, line_height in lines:
	x_offset = left_margin # Align to the left instead of centering
	for img in line_images:
	# Adjust y_offset for each word to align baselines (optional, if heights vary significantly)
	word_y_offset = y_offset + (line_height - img.size[1]) # Align to the bottom of the line
	merged_image.paste(img, (x_offset, word_y_offset))
	x_offset += img.size[0] + word_gap
	y_offset += line_height + line_gap

	# Save merged image
	merged_image_path = os.path.join(output_dir, "merged_output.png")
	merged_image.save(merged_image_path)

	return merged_image_path, gr.update(value=laplace_image)


	# Create Gradio interface
	iface = gr.Interface(
	fn=generate_handwriting,
	inputs=[
	gr.Textbox(label="Text to generate"),
	gr.Image(label="Style Image", type="filepath"),
	gr.Image(label="Laplace Image (Optional)", type="filepath")
	],
	outputs=[
	gr.Image(label="Generated Handwriting"),
	gr.Image(label="Laplace Image (Optional)")
	],
	title="Handwriting Generation",
	description="Generate handwritten text using One-DM model. If no Laplace image is provided, it will be generated from the style image.",
	examples=[
	["Hello World",
	"English_data/Dataset/test/169/c04-134-05-08.png",
	"English_data/Dataset_laplace/test/169/c04-134-00-00.png"]
	]
	)

	if __name__ == "__main__":
	iface.launch(share=True)