Charlie Li commited on
Commit
988d509
β€’
1 Parent(s): 015a301

pregenerate samples

Browse files
Files changed (3) hide show
  1. app.py +36 -65
  2. requirements.txt +1 -0
  3. utils.py +30 -0
app.py CHANGED
@@ -4,6 +4,9 @@ import random
4
  import datetime
5
  from utils import *
6
  from pathlib import Path
 
 
 
7
 
8
  file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
9
  filename = "derendering_supp.zip"
@@ -14,7 +17,7 @@ video_cache_dir.mkdir(exist_ok=True)
14
 
15
  download_file(file_url, filename)
16
  unzip_file(filename)
17
- print("Downloaded and unzipped the file.")
18
 
19
  diagram = get_svg_content("derendering_supp/derender_diagram.svg")
20
  org = get_svg_content("org/cor.svg")
@@ -51,43 +54,23 @@ sketches_base64_strings = {
51
  name: get_base64_encoded_gif(f"sketches/{name}") for name in sketches
52
  }
53
 
54
- datasets = ["IAM", "IMGUR5K", "HierText"]
55
- models = ["Small-i", "Large-i", "Small-p"]
56
- query_modes = ["d+t", "r+d", "vanilla"]
57
-
58
-
59
- def pregenerate_videos():
60
- for Dataset in datasets:
61
- for Model in models:
62
- inkml_path_base = f"./derendering_supp/{Model.lower()}_{Dataset}_inkml"
63
- for mode in query_modes:
64
- path = f"./derendering_supp/{Dataset}/images_sample"
65
- if not os.path.exists(path):
66
- continue
67
- samples = os.listdir(path)
68
- selected_samples = random.sample(samples, len(samples) // 3)
69
- for name in tqdm(
70
- selected_samples, desc=f"Generating {Model}-{Dataset}-{mode} videos"
71
- ):
72
- example_id = name.strip(".png")
73
- inkml_file = os.path.join(
74
- inkml_path_base, mode, f"{example_id}.inkml"
75
- )
76
- if not os.path.exists(inkml_file):
77
- continue
78
- video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
79
- video_filepath = video_cache_dir / video_filename
80
- if not video_filepath.exists():
81
- img_path = os.path.join(path, name)
82
- img = load_and_pad_img_dir(img_path)
83
- ink = inkml_to_ink(inkml_file)
84
- plot_ink_to_video(ink, str(video_filepath), input_image=img)
85
-
86
-
87
- pregenerate_videos()
88
-
89
-
90
- def demo(Dataset, Model, Output_Format):
91
  if Model == "Small-i":
92
  inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
93
  elif Model == "Small-p":
@@ -104,8 +87,6 @@ def demo(Dataset, Model, Output_Format):
104
  Dataset,
105
  "and model:",
106
  Model,
107
- "with output format:",
108
- Output_Format,
109
  )
110
  path = f"./derendering_supp/{Dataset}/images_sample"
111
  samples = os.listdir(path)
@@ -132,13 +113,10 @@ def demo(Dataset, Model, Output_Format):
132
  video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
133
  video_filepath = video_cache_dir / video_filename
134
 
135
- if Output_Format == "Image+Video":
136
- if not video_filepath.exists():
137
- plot_ink_to_video(ink, str(video_filepath), input_image=img)
138
- print("Cached video at:", video_filepath)
139
- video_outputs.append("./" + str(video_filepath))
140
- else:
141
- video_outputs.append(None)
142
 
143
  fig, ax = plt.subplots()
144
  ax.axis("off")
@@ -152,13 +130,13 @@ def demo(Dataset, Model, Output_Format):
152
  return (
153
  img,
154
  text_outputs[0],
155
- img_outputs[0],
156
  video_outputs[0],
157
  text_outputs[1],
158
- img_outputs[1],
159
  video_outputs[1],
160
  text_outputs[2],
161
- img_outputs[2],
162
  video_outputs[2],
163
  )
164
 
@@ -182,7 +160,6 @@ with gr.Blocks() as app:
182
  """
183
  πŸš€ This demo highlights the capabilities of Small-i, Small-p, and Large-i across three public datasets (word-level, with 100 random samples each).<br>
184
  🎲 Select a model variant and dataset (IAM, IMGUR5K, HierText), then hit 'Sample' to view a randomly selected input alongside its corresponding outputs for all three types of inference.<br>
185
- πŸ–ΌοΈ Output options: Image or Image+Video. Opting for images yields quicker results, adding videos offers a dynamic view of the digital ink writing process.<br>
186
  """
187
  )
188
  with gr.Row():
@@ -194,15 +171,12 @@ with gr.Blocks() as app:
194
  label="InkSight Model Variant",
195
  value="Small-i",
196
  )
197
- output_format = gr.Dropdown(
198
- ["Image", "Image+Video"], label="Output Format", value="Image"
199
- )
200
  im = gr.Image(label="Input Image")
201
 
202
- with gr.Row():
203
- d_t_img = gr.Image(label="Derender with Text")
204
- r_d_img = gr.Image(label="Recognize and Derender")
205
- vanilla_img = gr.Image(label="Vanilla")
206
 
207
  with gr.Row():
208
  d_t_text = gr.Textbox(
@@ -210,9 +184,6 @@ with gr.Blocks() as app:
210
  )
211
  r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
212
  vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
213
- gr.Markdown(
214
- "To visualize the writing process in video, select *Output format* as **Image+Video**."
215
- )
216
  with gr.Row():
217
  d_t_vid = gr.Video(
218
  label="Derender with Text (Click to stop/play)", autoplay=True
@@ -227,17 +198,17 @@ with gr.Blocks() as app:
227
 
228
  btn_sub.click(
229
  fn=demo,
230
- inputs=[dataset, model, output_format],
231
  outputs=[
232
  im,
233
  d_t_text,
234
- d_t_img,
235
  d_t_vid,
236
  r_d_text,
237
- r_d_img,
238
  r_d_vid,
239
  vanilla_text,
240
- vanilla_img,
241
  vanilla_vid,
242
  ],
243
  )
 
4
  import datetime
5
  from utils import *
6
  from pathlib import Path
7
+ import gdown
8
+
9
+ pre_generate = False
10
 
11
  file_url = "https://storage.googleapis.com/derendering_model/derendering_supp.zip"
12
  filename = "derendering_supp.zip"
 
17
 
18
  download_file(file_url, filename)
19
  unzip_file(filename)
20
+ print("Downloaded and unzipped the inks.")
21
 
22
  diagram = get_svg_content("derendering_supp/derender_diagram.svg")
23
  org = get_svg_content("org/cor.svg")
 
54
  name: get_base64_encoded_gif(f"sketches/{name}") for name in sketches
55
  }
56
 
57
+ if not pre_generate:
58
+ print("Downloading pre-generated videos from google drive.")
59
+ # Download from gdown 1oT6zw1EbWg3lavBMXsL28piULGNmqJzA
60
+ gdown.download(
61
+ "https://drive.google.com/uc?id=1oT6zw1EbWg3lavBMXsL28piULGNmqJzA",
62
+ str(video_cache_dir / "gdrive_file.zip"),
63
+ quiet=False,
64
+ )
65
+
66
+ # Unzip the file to video_cache_dir
67
+ unzip_file(str(video_cache_dir / "gdrive_file.zip"))
68
+ else:
69
+ pregenerate_videos(video_cache_dir=video_cache_dir)
70
+ print("Videos cached.")
71
+
72
+
73
+ def demo(Dataset, Model):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  if Model == "Small-i":
75
  inkml_path = f"./derendering_supp/small-i_{Dataset}_inkml"
76
  elif Model == "Small-p":
 
87
  Dataset,
88
  "and model:",
89
  Model,
 
 
90
  )
91
  path = f"./derendering_supp/{Dataset}/images_sample"
92
  samples = os.listdir(path)
 
113
  video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
114
  video_filepath = video_cache_dir / video_filename
115
 
116
+ if not video_filepath.exists():
117
+ plot_ink_to_video(ink, str(video_filepath), input_image=img)
118
+ print("Cached video at:", video_filepath)
119
+ video_outputs.append("./" + str(video_filepath))
 
 
 
120
 
121
  fig, ax = plt.subplots()
122
  ax.axis("off")
 
130
  return (
131
  img,
132
  text_outputs[0],
133
+ # img_outputs[0],
134
  video_outputs[0],
135
  text_outputs[1],
136
+ # img_outputs[1],
137
  video_outputs[1],
138
  text_outputs[2],
139
+ # img_outputs[2],
140
  video_outputs[2],
141
  )
142
 
 
160
  """
161
  πŸš€ This demo highlights the capabilities of Small-i, Small-p, and Large-i across three public datasets (word-level, with 100 random samples each).<br>
162
  🎲 Select a model variant and dataset (IAM, IMGUR5K, HierText), then hit 'Sample' to view a randomly selected input alongside its corresponding outputs for all three types of inference.<br>
 
163
  """
164
  )
165
  with gr.Row():
 
171
  label="InkSight Model Variant",
172
  value="Small-i",
173
  )
 
 
 
174
  im = gr.Image(label="Input Image")
175
 
176
+ # with gr.Row():
177
+ # d_t_img = gr.Image(label="Derender with Text")
178
+ # r_d_img = gr.Image(label="Recognize and Derender")
179
+ # vanilla_img = gr.Image(label="Vanilla")
180
 
181
  with gr.Row():
182
  d_t_text = gr.Textbox(
 
184
  )
185
  r_d_text = gr.Textbox(label="Recognition from the model", interactive=False)
186
  vanilla_text = gr.Textbox(label="Vanilla", interactive=False)
 
 
 
187
  with gr.Row():
188
  d_t_vid = gr.Video(
189
  label="Derender with Text (Click to stop/play)", autoplay=True
 
198
 
199
  btn_sub.click(
200
  fn=demo,
201
+ inputs=[dataset, model],
202
  outputs=[
203
  im,
204
  d_t_text,
205
+ # d_t_img,
206
  d_t_vid,
207
  r_d_text,
208
+ # r_d_img,
209
  r_d_vid,
210
  vanilla_text,
211
+ # vanilla_img,
212
  vanilla_vid,
213
  ],
214
  )
requirements.txt CHANGED
@@ -3,3 +3,4 @@ numpy
3
  matplotlib
4
  Pillow
5
  numpy
 
 
3
  matplotlib
4
  Pillow
5
  numpy
6
+ gdown
utils.py CHANGED
@@ -240,3 +240,33 @@ def parse_inkml_annotations(inkml_file):
240
  annotation_dict[annotation_type] = annotation_text
241
 
242
  return annotation_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  annotation_dict[annotation_type] = annotation_text
241
 
242
  return annotation_dict
243
+
244
+
245
+ def pregenerate_videos(video_cache_dir):
246
+ datasets = ["IAM", "IMGUR5K", "HierText"]
247
+ models = ["Small-i", "Large-i", "Small-p"]
248
+ query_modes = ["d+t", "r+d", "vanilla"]
249
+ for Dataset in datasets:
250
+ for Model in models:
251
+ inkml_path_base = f"./derendering_supp/{Model.lower()}_{Dataset}_inkml"
252
+ for mode in query_modes:
253
+ path = f"./derendering_supp/{Dataset}/images_sample"
254
+ if not os.path.exists(path):
255
+ continue
256
+ samples = os.listdir(path)
257
+ for name in tqdm(
258
+ samples, desc=f"Generating {Model}-{Dataset}-{mode} videos"
259
+ ):
260
+ example_id = name.strip(".png")
261
+ inkml_file = os.path.join(
262
+ inkml_path_base, mode, f"{example_id}.inkml"
263
+ )
264
+ if not os.path.exists(inkml_file):
265
+ continue
266
+ video_filename = f"{Model}_{Dataset}_{mode}_{example_id}.mp4"
267
+ video_filepath = video_cache_dir / video_filename
268
+ if not video_filepath.exists():
269
+ img_path = os.path.join(path, name)
270
+ img = load_and_pad_img_dir(img_path)
271
+ ink = inkml_to_ink(inkml_file)
272
+ plot_ink_to_video(ink, str(video_filepath), input_image=img)