Spaces:

Derendering
/

Model-Output-Playground

Running

App Files Files Community

Charlie Li commited on Feb 14

Commit

c5cb9ba

•

1 Parent(s): 4697797

add an option to show image only to make it faster.

Browse files

Files changed (1) hide show

app.py +58 -13

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import gradio as gr
 from utils import *
 file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
@@ -16,7 +18,7 @@ org_content = f"""
 """
-def demo(Dataset, Model):
     if Model == "Small-i":
         inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
     elif Model == "Small-p":
@@ -32,6 +34,9 @@ def demo(Dataset, Model):
     query_modes = ["d+t", "r+d", "vanilla"]
     plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
     text_outputs = []
     for name in picked_samples:
         img_path = os.path.join(path, name)
@@ -42,18 +47,40 @@ def demo(Dataset, Model):
             inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
             text_field = parse_inkml_annotations(inkml_file)["textField"]
             output_text = f"{plot_title[mode]}{text_field}"
-            text_outputs.append(output_text)  # Append text output for the current mode
             ink = inkml_to_ink(inkml_file)
-            plot_ink_to_video(ink, mode + ".mp4", input_image=img)
     return (
         img,
         text_outputs[0],
-        "d+t.mp4",
         text_outputs[1],
-        "r+d.mp4",
         text_outputs[2],
-        "vanilla.mp4",
     )
@@ -64,7 +91,8 @@ with gr.Blocks() as app:
         # InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br>
         <div>{diagram}</div>
         🔔 This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br>
-        ℹ️ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types..<br>
         """
     )
     with gr.Row():
@@ -76,6 +104,9 @@ with gr.Blocks() as app:
             label="InkSight Model Variant",
             value="Small-i",
         )
         im = gr.Image(label="Input Image")
     with gr.Row():
         d_t_text = gr.Textbox(
@@ -83,19 +114,33 @@ with gr.Blocks() as app:
         )
         r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
         vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
     with gr.Row():
-        d_t = gr.Video(label="Derender with Text", autoplay=True)
-        r_d = gr.Video(label="Recognize and Derender", autoplay=True)
-        vanilla = gr.Video(label="Vanilla", autoplay=True)
     with gr.Row():
         btn_sub = gr.Button("Sample")
     btn_sub.click(
         fn=demo,
-        inputs=[dataset, model],
-        outputs=[im, d_t_text, d_t, r_d_text, r_d, vanilla_text, vanilla],
     )
 app.launch()

 import gradio as gr
+import os
+import random
 from utils import *
 file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
 """
+def demo(Dataset, Model, Output_Format):
     if Model == "Small-i":
         inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
     elif Model == "Small-p":
     query_modes = ["d+t", "r+d", "vanilla"]
     plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
     text_outputs = []
+    img_outputs = []
+    video_outputs = []
+    print("Output format:", Output_Format)
     for name in picked_samples:
         img_path = os.path.join(path, name)
             inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
             text_field = parse_inkml_annotations(inkml_file)["textField"]
             output_text = f"{plot_title[mode]}{text_field}"
+            # Text output for three modes
+            # d+t: OCR recognition input to the model
+            # r+d: Recognition from the model
+            # vanilla: None
+            text_outputs.append(output_text)
             ink = inkml_to_ink(inkml_file)
+            if Output_Format == "Image+Video":
+                video_filename = mode + ".mp4"
+                plot_ink_to_video(ink, video_filename, input_image=img)
+                video_outputs.append(video_filename)
+            else:
+                video_outputs.append(None)
+            fig, ax = plt.subplots()
+            ax.axis("off")
+            plot_ink(ink, ax, input_image=img)
+            buf = BytesIO()
+            fig.savefig(buf, format="png", bbox_inches="tight")
+            plt.close(fig)
+            buf.seek(0)
+            res = Image.open(buf)
+            img_outputs.append(res)
     return (
         img,
         text_outputs[0],
+        img_outputs[0],
+        video_outputs[0],
         text_outputs[1],
+        img_outputs[1],
+        video_outputs[1],
         text_outputs[2],
+        img_outputs[2],
+        video_outputs[2],
     )
         # InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br>
         <div>{diagram}</div>
         🔔 This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br>
+        ℹ️ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types.<br>
+        📝 Choose the output format: Image or Image+Video. While showing only images are faster, videos can demonstrate the writing process of the inks.<br>
         """
     )
     with gr.Row():
             label="InkSight Model Variant",
             value="Small-i",
         )
+        output_format = gr.Dropdown(
+            ["Image", "Image+Video"], label="Output Format", value="Image"
+        )
         im = gr.Image(label="Input Image")
     with gr.Row():
         d_t_text = gr.Textbox(
         )
         r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
         vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
     with gr.Row():
+        d_t_img = gr.Image(label="Derender with Text")
+        r_d_img = gr.Image(label="Recognize and Derender")
+        vanilla_img = gr.Image(label="Vanilla")
+    with gr.Row():
+        d_t_vid = gr.Video(label="Derender with Text", autoplay=True)
+        r_d_vid = gr.Video(label="Recognize and Derender", autoplay=True)
+        vanilla_vid = gr.Video(label="Vanilla", autoplay=True)
     with gr.Row():
         btn_sub = gr.Button("Sample")
     btn_sub.click(
         fn=demo,
+        inputs=[dataset, model, output_format],
+        outputs=[
+            im,
+            d_t_text,
+            d_t_img,
+            d_t_vid,
+            r_d_text,
+            r_d_img,
+            r_d_vid,
+            vanilla_text,
+            vanilla_img,
+            vanilla_vid,
+        ],
     )
 app.launch()