Spaces:

Derendering
/

Model-Output-Playground

Running

Charlie Li

supplementary

9604304 about 1 year ago

5.18 kB

	import gradio as gr
	import os
	import random
	import datetime
	from utils import *

	file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
	filename = "derendering_supp.zip"

	download_file(file_url, filename)
	unzip_file(filename)
	print("Downloaded and unzipped the file.")

	diagram = get_svg_content("derendering_supp/derender_diagram.svg")
	org = get_svg_content("org/cor.svg")

	org_content = f"""
	{org}
	"""


	def demo(Dataset, Model, Output_Format):
	if Model == "Small-i":
	inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
	elif Model == "Small-p":
	inkml_path = f"./derendering_supp/small-p_{Dataset}_inkml"
	elif Model == "Large-i":
	inkml_path = f"./derendering_supp/large-i_{Dataset}_inkml"

	path = f"./derendering_supp/{Dataset}/images_sample"
	samples = os.listdir(path)
	# Randomly pick a sample
	picked_samples = random.sample(samples, min(1, len(samples)))

	query_modes = ["d+t", "r+d", "vanilla"]
	plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
	text_outputs = []
	img_outputs = []
	video_outputs = []
	now = datetime.datetime.now()
	now = now.strftime("%Y-%m-%d %H:%M:%S")
	print(
	now,
	"Taking sample from dataset:",
	Dataset,
	"and model:",
	Model,
	"with output format:",
	Output_Format,
	)
	for name in picked_samples:
	img_path = os.path.join(path, name)
	img = load_and_pad_img_dir(img_path)

	for mode in query_modes:
	example_id = name.strip(".png")
	inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
	text_field = parse_inkml_annotations(inkml_file)["textField"]
	output_text = f"{plot_title[mode]}{text_field}"
	# Text output for three modes
	# d+t: OCR recognition input to the model
	# r+d: Recognition from the model
	# vanilla: None
	text_outputs.append(output_text)
	ink = inkml_to_ink(inkml_file)

	if Output_Format == "Image+Video":
	video_filename = mode + ".mp4"
	plot_ink_to_video(ink, video_filename, input_image=img)
	video_outputs.append(video_filename)
	else:
	video_outputs.append(None)

	fig, ax = plt.subplots()
	ax.axis("off")
	plot_ink(ink, ax, input_image=img)
	buf = BytesIO()
	fig.savefig(buf, format="png", bbox_inches="tight")
	plt.close(fig)
	buf.seek(0)
	res = Image.open(buf)
	img_outputs.append(res)
	return (
	img,
	text_outputs[0],
	img_outputs[0],
	video_outputs[0],
	text_outputs[1],
	img_outputs[1],
	video_outputs[1],
	text_outputs[2],
	img_outputs[2],
	video_outputs[2],
	)


	with gr.Blocks() as app:
	gr.HTML(org_content)
	gr.Markdown(
	f"""
	# InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br>
	<div>{diagram}</div>
	🔔 This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br>
	ℹ️ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types.<br>
	📝 Choose the output format: Image or Image+Video. While showing only images are faster, videos can demonstrate the writing process of the inks.<br>
	"""
	)
	with gr.Row():
	dataset = gr.Dropdown(
	["IMGUR5K", "IAM", "HierText"], label="Dataset", value="HierText"
	)
	model = gr.Dropdown(
	["Small-i", "Large-i", "Small-p"],
	label="InkSight Model Variant",
	value="Small-i",
	)
	output_format = gr.Dropdown(
	["Image", "Image+Video"], label="Output Format", value="Image"
	)
	im = gr.Image(label="Input Image")

	with gr.Row():
	d_t_img = gr.Image(label="Derender with Text")
	r_d_img = gr.Image(label="Recognize and Derender")
	vanilla_img = gr.Image(label="Vanilla")

	with gr.Row():
	d_t_text = gr.Textbox(
	label="OCR recognition input to the model", interactive=False
	)
	r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
	vanilla_text = gr.Textbox(label="Vanilla", interactive=False)

	with gr.Row():
	d_t_vid = gr.Video(label="Derender with Text", autoplay=True)
	r_d_vid = gr.Video(label="Recognize and Derender", autoplay=True)
	vanilla_vid = gr.Video(label="Vanilla", autoplay=True)

	with gr.Row():
	btn_sub = gr.Button("Sample")

	btn_sub.click(
	fn=demo,
	inputs=[dataset, model, output_format],
	outputs=[
	im,
	d_t_text,
	d_t_img,
	d_t_vid,
	r_d_text,
	r_d_img,
	r_d_vid,
	vanilla_text,
	vanilla_img,
	vanilla_vid,
	],
	)

	app.launch()