|
import gradio as gr |
|
from utils import * |
|
|
|
file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip" |
|
filename = "derendering_supp.zip" |
|
|
|
download_file(file_url, filename) |
|
unzip_file(filename) |
|
print("Downloaded and unzipped the file.") |
|
|
|
diagram = get_svg_content("derendering_supp/derender_diagram.svg") |
|
org = get_svg_content("org/cor.svg") |
|
|
|
org_content = f""" |
|
{org} |
|
""" |
|
|
|
|
|
def demo(Dataset, Model): |
|
if Model == "Small-i": |
|
inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml" |
|
elif Model == "Small-p": |
|
inkml_path = f"./derendering_supp/small-p_{Dataset}_inkml" |
|
elif Model == "Large-i": |
|
inkml_path = f"./derendering_supp/large-i_{Dataset}_inkml" |
|
|
|
path = f"./derendering_supp/{Dataset}/images_sample" |
|
samples = os.listdir(path) |
|
|
|
picked_samples = random.sample(samples, min(1, len(samples))) |
|
|
|
query_modes = ["d+t", "r+d", "vanilla"] |
|
plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""} |
|
text_outputs = [] |
|
|
|
for name in picked_samples: |
|
img_path = os.path.join(path, name) |
|
img = load_and_pad_img_dir(img_path) |
|
|
|
for mode in query_modes: |
|
example_id = name.strip(".png") |
|
inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml") |
|
text_field = parse_inkml_annotations(inkml_file)["textField"] |
|
output_text = f"{plot_title[mode]}{text_field}" |
|
text_outputs.append(output_text) |
|
ink = inkml_to_ink(inkml_file) |
|
plot_ink_to_video(ink, mode + ".mp4", input_image=img) |
|
|
|
return ( |
|
img, |
|
text_outputs[0], |
|
"d+t.mp4", |
|
text_outputs[1], |
|
"r+d.mp4", |
|
text_outputs[2], |
|
"vanilla.mp4", |
|
) |
|
|
|
|
|
with gr.Blocks() as app: |
|
gr.HTML(org_content) |
|
gr.Markdown( |
|
f""" |
|
# InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br> |
|
<div>{diagram}</div> |
|
๐ This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br> |
|
โน๏ธ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types..<br> |
|
""" |
|
) |
|
with gr.Row(): |
|
dataset = gr.Dropdown( |
|
["IMGUR5K", "IAM", "HierText"], label="Dataset", value="HierText" |
|
) |
|
model = gr.Dropdown( |
|
["Small-i", "Large-i", "Small-p"], |
|
label="InkSight Model Variant", |
|
value="Small-i", |
|
) |
|
im = gr.Image(label="Input Image") |
|
with gr.Row(): |
|
d_t_text = gr.Textbox( |
|
label="OCR recognition input to the model", interactive=False |
|
) |
|
r_d_text = gr.Textbox(label="Recognition from the model", interactive=False) |
|
vanilla_text = gr.Textbox(label="Vanilla", interactive=False) |
|
|
|
with gr.Row(): |
|
d_t = gr.Video(label="Derender with Text", autoplay=True) |
|
r_d = gr.Video(label="Recognize and Derender", autoplay=True) |
|
vanilla = gr.Video(label="Vanilla", autoplay=True) |
|
|
|
with gr.Row(): |
|
btn_sub = gr.Button("Sample") |
|
|
|
btn_sub.click( |
|
fn=demo, |
|
inputs=[dataset, model], |
|
outputs=[im, d_t_text, d_t, r_d_text, r_d, vanilla_text, vanilla], |
|
) |
|
|
|
app.launch() |
|
|