import gradio as gr from utils import * file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip" filename = "derendering_supp.zip" download_file(file_url, filename) unzip_file(filename) print("Downloaded and unzipped the file.") diagram = get_svg_content("derendering_supp/derender_diagram.svg") org = get_svg_content("org/cor.svg") org_content = f""" {org} """ def demo(Dataset, Model): if Model == "Small-i": inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml" elif Model == "Small-p": inkml_path = f"./derendering_supp/small-p_{Dataset}_inkml" elif Model == "Large-i": inkml_path = f"./derendering_supp/large-i_{Dataset}_inkml" path = f"./derendering_supp/{Dataset}/images_sample" samples = os.listdir(path) # Randomly pick a sample picked_samples = random.sample(samples, min(1, len(samples))) query_modes = ["d+t", "r+d", "vanilla"] plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""} text_outputs = [] for name in picked_samples: img_path = os.path.join(path, name) img = load_and_pad_img_dir(img_path) for mode in query_modes: example_id = name.strip(".png") inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml") text_field = parse_inkml_annotations(inkml_file)["textField"] output_text = f"{plot_title[mode]}{text_field}" text_outputs.append(output_text) # Append text output for the current mode ink = inkml_to_ink(inkml_file) plot_ink_to_video(ink, mode + ".mp4", input_image=img) return ( img, text_outputs[0], "d+t.mp4", text_outputs[1], "r+d.mp4", text_outputs[2], "vanilla.mp4", ) with gr.Blocks() as app: gr.HTML(org_content) gr.Markdown( f""" # InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write
{diagram}
🔔 This demo showcases the outputs of Small-i, Small-p, and Large-i on three public datasets (100 samples each).
ℹī¸ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types..
""" ) with gr.Row(): dataset = gr.Dropdown( ["IMGUR5K", "IAM", "HierText"], label="Dataset", value="HierText" ) model = gr.Dropdown( ["Small-i", "Large-i", "Small-p"], label="InkSight Model Variant", value="Small-i", ) im = gr.Image(label="Input Image") with gr.Row(): d_t_text = gr.Textbox( label="OCR recognition input to the model", interactive=False ) r_d_text = gr.Textbox(label="Recognition from the model", interactive=False) vanilla_text = gr.Textbox(label="Vanilla", interactive=False) with gr.Row(): d_t = gr.Video(label="Derender with Text", autoplay=True) r_d = gr.Video(label="Recognize and Derender", autoplay=True) vanilla = gr.Video(label="Vanilla", autoplay=True) with gr.Row(): btn_sub = gr.Button("Sample") btn_sub.click( fn=demo, inputs=[dataset, model], outputs=[im, d_t_text, d_t, r_d_text, r_d, vanilla_text, vanilla], ) app.launch()