Charlie Li commited on
Commit
c5cb9ba
β€’
1 Parent(s): 4697797

add an option to show image only to make it faster.

Browse files
Files changed (1) hide show
  1. app.py +58 -13
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import gradio as gr
 
 
2
  from utils import *
3
 
4
  file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
@@ -16,7 +18,7 @@ org_content = f"""
16
  """
17
 
18
 
19
- def demo(Dataset, Model):
20
  if Model == "Small-i":
21
  inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
22
  elif Model == "Small-p":
@@ -32,6 +34,9 @@ def demo(Dataset, Model):
32
  query_modes = ["d+t", "r+d", "vanilla"]
33
  plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
34
  text_outputs = []
 
 
 
35
 
36
  for name in picked_samples:
37
  img_path = os.path.join(path, name)
@@ -42,18 +47,40 @@ def demo(Dataset, Model):
42
  inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
43
  text_field = parse_inkml_annotations(inkml_file)["textField"]
44
  output_text = f"{plot_title[mode]}{text_field}"
45
- text_outputs.append(output_text) # Append text output for the current mode
 
 
 
 
46
  ink = inkml_to_ink(inkml_file)
47
- plot_ink_to_video(ink, mode + ".mp4", input_image=img)
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  return (
50
  img,
51
  text_outputs[0],
52
- "d+t.mp4",
 
53
  text_outputs[1],
54
- "r+d.mp4",
 
55
  text_outputs[2],
56
- "vanilla.mp4",
 
57
  )
58
 
59
 
@@ -64,7 +91,8 @@ with gr.Blocks() as app:
64
  # InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br>
65
  <div>{diagram}</div>
66
  πŸ”” This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br>
67
- ℹ️ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types..<br>
 
68
  """
69
  )
70
  with gr.Row():
@@ -76,6 +104,9 @@ with gr.Blocks() as app:
76
  label="InkSight Model Variant",
77
  value="Small-i",
78
  )
 
 
 
79
  im = gr.Image(label="Input Image")
80
  with gr.Row():
81
  d_t_text = gr.Textbox(
@@ -83,19 +114,33 @@ with gr.Blocks() as app:
83
  )
84
  r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
85
  vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
86
-
87
  with gr.Row():
88
- d_t = gr.Video(label="Derender with Text", autoplay=True)
89
- r_d = gr.Video(label="Recognize and Derender", autoplay=True)
90
- vanilla = gr.Video(label="Vanilla", autoplay=True)
 
 
 
 
91
 
92
  with gr.Row():
93
  btn_sub = gr.Button("Sample")
94
 
95
  btn_sub.click(
96
  fn=demo,
97
- inputs=[dataset, model],
98
- outputs=[im, d_t_text, d_t, r_d_text, r_d, vanilla_text, vanilla],
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
 
101
  app.launch()
 
1
  import gradio as gr
2
+ import os
3
+ import random
4
  from utils import *
5
 
6
  file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
 
18
  """
19
 
20
 
21
+ def demo(Dataset, Model, Output_Format):
22
  if Model == "Small-i":
23
  inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
24
  elif Model == "Small-p":
 
34
  query_modes = ["d+t", "r+d", "vanilla"]
35
  plot_title = {"r+d": "Recognized: ", "d+t": "OCR Input: ", "vanilla": ""}
36
  text_outputs = []
37
+ img_outputs = []
38
+ video_outputs = []
39
+ print("Output format:", Output_Format)
40
 
41
  for name in picked_samples:
42
  img_path = os.path.join(path, name)
 
47
  inkml_file = os.path.join(inkml_path, mode, example_id + ".inkml")
48
  text_field = parse_inkml_annotations(inkml_file)["textField"]
49
  output_text = f"{plot_title[mode]}{text_field}"
50
+ # Text output for three modes
51
+ # d+t: OCR recognition input to the model
52
+ # r+d: Recognition from the model
53
+ # vanilla: None
54
+ text_outputs.append(output_text)
55
  ink = inkml_to_ink(inkml_file)
 
56
 
57
+ if Output_Format == "Image+Video":
58
+ video_filename = mode + ".mp4"
59
+ plot_ink_to_video(ink, video_filename, input_image=img)
60
+ video_outputs.append(video_filename)
61
+ else:
62
+ video_outputs.append(None)
63
+
64
+ fig, ax = plt.subplots()
65
+ ax.axis("off")
66
+ plot_ink(ink, ax, input_image=img)
67
+ buf = BytesIO()
68
+ fig.savefig(buf, format="png", bbox_inches="tight")
69
+ plt.close(fig)
70
+ buf.seek(0)
71
+ res = Image.open(buf)
72
+ img_outputs.append(res)
73
  return (
74
  img,
75
  text_outputs[0],
76
+ img_outputs[0],
77
+ video_outputs[0],
78
  text_outputs[1],
79
+ img_outputs[1],
80
+ video_outputs[1],
81
  text_outputs[2],
82
+ img_outputs[2],
83
+ video_outputs[2],
84
  )
85
 
86
 
 
91
  # InkSight: Offline-to-Online Handwriting Conversion by Learning to Read and Write<br>
92
  <div>{diagram}</div>
93
  πŸ”” This demo showcases the outputs of <b>Small-i</b>, <b>Small-p</b>, and <b>Large-i</b> on three public datasets (100 samples each).<br>
94
+ ℹ️ Choose a model variant and dataset, then click 'Sample' to see an input with its corresponding outputs for all three inference types.<br>
95
+ πŸ“ Choose the output format: Image or Image+Video. While showing only images are faster, videos can demonstrate the writing process of the inks.<br>
96
  """
97
  )
98
  with gr.Row():
 
104
  label="InkSight Model Variant",
105
  value="Small-i",
106
  )
107
+ output_format = gr.Dropdown(
108
+ ["Image", "Image+Video"], label="Output Format", value="Image"
109
+ )
110
  im = gr.Image(label="Input Image")
111
  with gr.Row():
112
  d_t_text = gr.Textbox(
 
114
  )
115
  r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
116
  vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
 
117
  with gr.Row():
118
+ d_t_img = gr.Image(label="Derender with Text")
119
+ r_d_img = gr.Image(label="Recognize and Derender")
120
+ vanilla_img = gr.Image(label="Vanilla")
121
+ with gr.Row():
122
+ d_t_vid = gr.Video(label="Derender with Text", autoplay=True)
123
+ r_d_vid = gr.Video(label="Recognize and Derender", autoplay=True)
124
+ vanilla_vid = gr.Video(label="Vanilla", autoplay=True)
125
 
126
  with gr.Row():
127
  btn_sub = gr.Button("Sample")
128
 
129
  btn_sub.click(
130
  fn=demo,
131
+ inputs=[dataset, model, output_format],
132
+ outputs=[
133
+ im,
134
+ d_t_text,
135
+ d_t_img,
136
+ d_t_vid,
137
+ r_d_text,
138
+ r_d_img,
139
+ r_d_vid,
140
+ vanilla_text,
141
+ vanilla_img,
142
+ vanilla_vid,
143
+ ],
144
  )
145
 
146
  app.launch()