import gradio as gr
import os
import random
from utils import *
file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
filename = "derendering_supp.zip"
download_file(file_url, filename)
unzip_file(filename)
print("Downloaded and unzipped the file.")
diagram = get_svg_content("derendering_supp/derender_diagram.svg")
org = get_svg_content("org/cor.svg")
org_content = f"""
{org}
"""
def demo(Dataset, Model, Output_Format):
if Model == "Small-i":
inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
elif Model == "Small-p":
inkml_path = f"./derendering_supp/small-p_{Dataset}_inkml"
elif Model == "Large-i":
inkml_path = f"./derendering_supp/large-i_{Dataset}_inkml"
path = f"./derendering_supp/{Dataset}/images_sample"
samples = os.listdir(path)
# Randomly pick a sample
picked_samples = random.sample(samples, min(1, len(samples)))
query_modes = ["d+t", "r+d", "vanilla"]
plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
text_outputs = []
img_outputs = []
video_outputs = []
print("Output format:", Output_Format)
for name in picked_samples:
img_path = os.path.join(path, name)
img = load_and_pad_img_dir(img_path)
for mode in query_modes:
example_id = name.strip(".png")
inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
text_field = parse_inkml_annotations(inkml_file)["textField"]
output_text = f"{plot_title[mode]}{text_field}"
# Text output for three modes
# d+t: OCR recognition input to the model
# r+d: Recognition from the model
# vanilla: None
text_outputs.append(output_text)
ink = inkml_to_ink(inkml_file)
if Output_Format == "Image+Video":
video_filename = mode + ".mp4"
plot_ink_to_video(ink, video_filename, input_image=img)
video_outputs.append(video_filename)
else:
video_outputs.append(None)
fig, ax = plt.subplots()
ax.axis("off")
plot_ink(ink, ax, input_image=img)
buf = BytesIO()
fig.savefig(buf, format="png", bbox_inches="tight")
plt.close(fig)
buf.seek(0)
res = Image.open(buf)
img_outputs.append(res)
return (
img,
text_outputs[0],
img_outputs[0],
video_outputs[0],
text_outputs[1],
img_outputs[1],
video_outputs[1],
text_outputs[2],
img_outputs[2],
video_outputs[2],
)
with gr.Blocks() as app:
gr.HTML(org_content)
gr.Markdown(
f"""
# InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write