mertunsall commited on
Commit
fc19570
·
1 Parent(s): 8dc7996
Files changed (1) hide show
  1. app.py +456 -36
app.py CHANGED
@@ -1,12 +1,23 @@
1
  from functools import lru_cache
2
  import traceback
 
3
 
4
  import gradio as gr
5
- from huggingface_hub import HfApi
6
  from huggingface_hub.utils import HfHubHTTPError
7
 
8
 
9
  DEFAULT_REPO_ID = "mlfoundations-cua-dev/human_eval"
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  api = HfApi()
@@ -40,6 +51,50 @@ def _get_subdirectories(repo_id: str, directory: str) -> list[str]:
40
  return child_dirs
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def refresh_repo(repo_id: str):
44
  try:
45
  top_dirs, top_files = _extract_top_level(repo_id)
@@ -49,6 +104,9 @@ def refresh_repo(repo_id: str):
49
  return (
50
  gr.update(choices=[], value=None, interactive=False),
51
  gr.update(choices=[], value=None, interactive=False),
 
 
 
52
  gr.update(value=f"❌ Unable to load repo `{repo_id}`: {error}"),
53
  )
54
  except Exception as error: # pragma: no cover - network and auth edge cases
@@ -57,6 +115,9 @@ def refresh_repo(repo_id: str):
57
  return (
58
  gr.update(choices=[], value=None, interactive=False),
59
  gr.update(choices=[], value=None, interactive=False),
 
 
 
60
  gr.update(value=f"❌ Unexpected error loading `{repo_id}`: {error}"),
61
  )
62
 
@@ -70,51 +131,359 @@ def refresh_repo(repo_id: str):
70
  if not top_dirs:
71
  status_lines.append("• No sub-folders found at root.")
72
 
73
- dropdown_value = top_dirs[0] if top_dirs else None
74
-
75
- # Get subdirectories for the first top-level folder
76
- subdirs = _get_subdirectories(repo_id, dropdown_value) if dropdown_value else []
77
- subdir_value = subdirs[0] if subdirs else None
78
-
79
- first_dropdown_update = gr.update(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  choices=top_dirs,
81
- value=dropdown_value,
82
- interactive=bool(top_dirs),
83
  label="Top-level folders",
84
- info="Choose a folder to explore"
 
85
  )
86
-
87
- second_dropdown_update = gr.update(
88
- choices=subdirs,
89
- value=subdir_value,
90
- interactive=bool(subdirs),
91
- label="Subdirectories",
92
- info="Choose a subdirectory"
 
 
 
 
 
 
 
 
93
  )
94
 
95
- return first_dropdown_update, second_dropdown_update, gr.update(value="\n".join(status_lines))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
 
98
  def update_second_dropdown(repo_id: str, top_level_dir: str):
99
- """Update the second dropdown when the first dropdown selection changes."""
100
  try:
101
  if not top_level_dir:
102
- return gr.update(choices=[], value=None, interactive=False)
103
-
104
- subdirs = _get_subdirectories(repo_id, top_level_dir)
105
- subdir_value = subdirs[0] if subdirs else None
106
-
107
- return gr.update(
108
- choices=subdirs,
109
- value=subdir_value,
110
- interactive=bool(subdirs),
111
- label="Subdirectories",
112
- info="Choose a subdirectory"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  )
114
  except Exception as error:
115
  print(f"[update_second_dropdown] Error for {repo_id}/{top_level_dir}: {error}", flush=True)
116
  print(traceback.format_exc(), flush=True)
117
- return gr.update(choices=[], value=None, interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  with gr.Blocks(title="HF Dataset Explorer") as demo:
@@ -135,24 +504,75 @@ Provide a dataset repository ID (e.g. `org/dataset`) to list its top-level folde
135
 
136
  status_display = gr.Markdown()
137
  folder_dropdown = gr.Dropdown(label="Top-level folders", interactive=False)
138
- subfolder_dropdown = gr.Dropdown(label="Subdirectories", interactive=False)
 
 
 
139
 
140
  reload_button.click(
141
  refresh_repo,
142
  inputs=repo_id_input,
143
- outputs=[folder_dropdown, subfolder_dropdown, status_display],
 
 
 
 
 
 
 
144
  )
145
 
146
  folder_dropdown.change(
147
  update_second_dropdown,
148
  inputs=[repo_id_input, folder_dropdown],
149
- outputs=subfolder_dropdown,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  )
151
 
152
  demo.load(
153
  refresh_repo,
154
  inputs=repo_id_input,
155
- outputs=[folder_dropdown, subfolder_dropdown, status_display],
 
 
 
 
 
 
 
156
  )
157
 
158
  if __name__ == "__main__":
 
1
  from functools import lru_cache
2
  import traceback
3
+ from typing import Optional
4
 
5
  import gradio as gr
6
+ from huggingface_hub import HfApi, hf_hub_url
7
  from huggingface_hub.utils import HfHubHTTPError
8
 
9
 
10
  DEFAULT_REPO_ID = "mlfoundations-cua-dev/human_eval"
11
+ IMAGE_EXTENSIONS = (
12
+ ".jpg",
13
+ ".jpeg",
14
+ ".png",
15
+ ".bmp",
16
+ ".gif",
17
+ ".webp",
18
+ ".tif",
19
+ ".tiff",
20
+ )
21
 
22
 
23
  api = HfApi()
 
51
  return child_dirs
52
 
53
 
54
+ def _build_path(*parts) -> str:
55
+ """Join path parts while skipping empty values."""
56
+ return "/".join(part for part in parts if part)
57
+
58
+
59
+ def _get_image_urls(repo_id: str, directory: str) -> list[str]:
60
+ """Return URLs for image files directly within the given directory."""
61
+ if not directory:
62
+ return []
63
+
64
+ prefix = f"{directory}/"
65
+ files = [path for path in _list_repo_files(repo_id) if path.startswith(prefix)]
66
+
67
+ image_files = [
68
+ path
69
+ for path in files
70
+ if "/" not in path[len(prefix) :]
71
+ and path.lower().endswith(IMAGE_EXTENSIONS)
72
+ ]
73
+
74
+ return [
75
+ hf_hub_url(repo_id=repo_id, filename=path, repo_type="dataset")
76
+ for path in sorted(image_files)
77
+ ]
78
+
79
+
80
+ def _dropdown_update(
81
+ *,
82
+ choices: list[str],
83
+ value: Optional[str],
84
+ label: str,
85
+ filled_info: str,
86
+ empty_info: str,
87
+ ):
88
+ has_choices = bool(choices)
89
+ return gr.update(
90
+ choices=choices,
91
+ value=value if has_choices else None,
92
+ interactive=has_choices,
93
+ label=label,
94
+ info=filled_info if has_choices else empty_info,
95
+ )
96
+
97
+
98
  def refresh_repo(repo_id: str):
99
  try:
100
  top_dirs, top_files = _extract_top_level(repo_id)
 
104
  return (
105
  gr.update(choices=[], value=None, interactive=False),
106
  gr.update(choices=[], value=None, interactive=False),
107
+ gr.update(choices=[], value=None, interactive=False),
108
+ gr.update(choices=[], value=None, interactive=False),
109
+ gr.update(value=[]),
110
  gr.update(value=f"❌ Unable to load repo `{repo_id}`: {error}"),
111
  )
112
  except Exception as error: # pragma: no cover - network and auth edge cases
 
115
  return (
116
  gr.update(choices=[], value=None, interactive=False),
117
  gr.update(choices=[], value=None, interactive=False),
118
+ gr.update(choices=[], value=None, interactive=False),
119
+ gr.update(choices=[], value=None, interactive=False),
120
+ gr.update(value=[]),
121
  gr.update(value=f"❌ Unexpected error loading `{repo_id}`: {error}"),
122
  )
123
 
 
131
  if not top_dirs:
132
  status_lines.append("• No sub-folders found at root.")
133
 
134
+ top_value = top_dirs[0] if top_dirs else None
135
+
136
+ second_dirs = _get_subdirectories(repo_id, top_value) if top_value else []
137
+ second_value = second_dirs[0] if second_dirs else None
138
+
139
+ third_dirs = (
140
+ _get_subdirectories(repo_id, _build_path(top_value, second_value))
141
+ if second_value
142
+ else []
143
+ )
144
+ third_value = third_dirs[0] if third_dirs else None
145
+
146
+ fourth_dirs = (
147
+ _get_subdirectories(repo_id, _build_path(top_value, second_value, third_value))
148
+ if third_value
149
+ else []
150
+ )
151
+ fourth_value = fourth_dirs[0] if fourth_dirs else None
152
+
153
+ image_urls = (
154
+ _get_image_urls(
155
+ repo_id, _build_path(top_value, second_value, third_value, fourth_value)
156
+ )
157
+ if fourth_value
158
+ else []
159
+ )
160
+
161
+ first_dropdown_update = _dropdown_update(
162
  choices=top_dirs,
163
+ value=top_value,
 
164
  label="Top-level folders",
165
+ filled_info="Choose a folder to explore",
166
+ empty_info="No folders found at the repository root",
167
  )
168
+
169
+ second_dropdown_update = _dropdown_update(
170
+ choices=second_dirs,
171
+ value=second_value,
172
+ label="Second-level folders",
173
+ filled_info="Choose a second-level folder",
174
+ empty_info="No subdirectories under the selected folder",
175
+ )
176
+
177
+ third_dropdown_update = _dropdown_update(
178
+ choices=third_dirs,
179
+ value=third_value,
180
+ label="Third-level folders",
181
+ filled_info="Choose a third-level folder",
182
+ empty_info="No third-level folders under the selection",
183
  )
184
 
185
+ fourth_dropdown_update = _dropdown_update(
186
+ choices=fourth_dirs,
187
+ value=fourth_value,
188
+ label="Fourth-level folders",
189
+ filled_info="Choose a fourth-level folder",
190
+ empty_info="No fourth-level folders under the selection",
191
+ )
192
+
193
+ gallery_update = gr.update(value=image_urls)
194
+
195
+ return (
196
+ first_dropdown_update,
197
+ second_dropdown_update,
198
+ third_dropdown_update,
199
+ fourth_dropdown_update,
200
+ gallery_update,
201
+ gr.update(value="\n".join(status_lines)),
202
+ )
203
 
204
 
205
  def update_second_dropdown(repo_id: str, top_level_dir: str):
206
+ """Update downstream dropdowns when the top-level selection changes."""
207
  try:
208
  if not top_level_dir:
209
+ empty_second = _dropdown_update(
210
+ choices=[],
211
+ value=None,
212
+ label="Second-level folders",
213
+ filled_info="Choose a second-level folder",
214
+ empty_info="Select a top-level folder first",
215
+ )
216
+ empty_third = _dropdown_update(
217
+ choices=[],
218
+ value=None,
219
+ label="Third-level folders",
220
+ filled_info="Choose a third-level folder",
221
+ empty_info="Select a higher-level folder first",
222
+ )
223
+ empty_fourth = _dropdown_update(
224
+ choices=[],
225
+ value=None,
226
+ label="Fourth-level folders",
227
+ filled_info="Choose a fourth-level folder",
228
+ empty_info="Select a higher-level folder first",
229
+ )
230
+ return empty_second, empty_third, empty_fourth, gr.update(value=[])
231
+
232
+ second_dirs = _get_subdirectories(repo_id, top_level_dir)
233
+ second_value = second_dirs[0] if second_dirs else None
234
+
235
+ third_dirs = (
236
+ _get_subdirectories(repo_id, _build_path(top_level_dir, second_value))
237
+ if second_value
238
+ else []
239
+ )
240
+ third_value = third_dirs[0] if third_dirs else None
241
+
242
+ fourth_dirs = (
243
+ _get_subdirectories(repo_id, _build_path(top_level_dir, second_value, third_value))
244
+ if third_value
245
+ else []
246
+ )
247
+ fourth_value = fourth_dirs[0] if fourth_dirs else None
248
+
249
+ image_urls = (
250
+ _get_image_urls(
251
+ repo_id,
252
+ _build_path(top_level_dir, second_value, third_value, fourth_value),
253
+ )
254
+ if fourth_value
255
+ else []
256
+ )
257
+
258
+ return (
259
+ _dropdown_update(
260
+ choices=second_dirs,
261
+ value=second_value,
262
+ label="Second-level folders",
263
+ filled_info="Choose a second-level folder",
264
+ empty_info="No subdirectories under the selected folder",
265
+ ),
266
+ _dropdown_update(
267
+ choices=third_dirs,
268
+ value=third_value,
269
+ label="Third-level folders",
270
+ filled_info="Choose a third-level folder",
271
+ empty_info="No third-level folders under the selection",
272
+ ),
273
+ _dropdown_update(
274
+ choices=fourth_dirs,
275
+ value=fourth_value,
276
+ label="Fourth-level folders",
277
+ filled_info="Choose a fourth-level folder",
278
+ empty_info="No fourth-level folders under the selection",
279
+ ),
280
+ gr.update(value=image_urls),
281
  )
282
  except Exception as error:
283
  print(f"[update_second_dropdown] Error for {repo_id}/{top_level_dir}: {error}", flush=True)
284
  print(traceback.format_exc(), flush=True)
285
+ empty_second = _dropdown_update(
286
+ choices=[],
287
+ value=None,
288
+ label="Second-level folders",
289
+ filled_info="Choose a second-level folder",
290
+ empty_info="Unable to load subdirectories",
291
+ )
292
+ empty_third = _dropdown_update(
293
+ choices=[],
294
+ value=None,
295
+ label="Third-level folders",
296
+ filled_info="Choose a third-level folder",
297
+ empty_info="Unable to load subdirectories",
298
+ )
299
+ empty_fourth = _dropdown_update(
300
+ choices=[],
301
+ value=None,
302
+ label="Fourth-level folders",
303
+ filled_info="Choose a fourth-level folder",
304
+ empty_info="Unable to load subdirectories",
305
+ )
306
+ return empty_second, empty_third, empty_fourth, gr.update(value=[])
307
+
308
+
309
+ def update_third_dropdown(repo_id: str, top_level_dir: str, second_level_dir: str):
310
+ """Update the third and fourth dropdowns when the second-level changes."""
311
+ try:
312
+ if not top_level_dir or not second_level_dir:
313
+ empty_third = _dropdown_update(
314
+ choices=[],
315
+ value=None,
316
+ label="Third-level folders",
317
+ filled_info="Choose a third-level folder",
318
+ empty_info="Select higher-level folders first",
319
+ )
320
+ empty_fourth = _dropdown_update(
321
+ choices=[],
322
+ value=None,
323
+ label="Fourth-level folders",
324
+ filled_info="Choose a fourth-level folder",
325
+ empty_info="Select higher-level folders first",
326
+ )
327
+ return empty_third, empty_fourth, gr.update(value=[])
328
+
329
+ third_dirs = _get_subdirectories(
330
+ repo_id, _build_path(top_level_dir, second_level_dir)
331
+ )
332
+ third_value = third_dirs[0] if third_dirs else None
333
+
334
+ fourth_dirs = (
335
+ _get_subdirectories(
336
+ repo_id, _build_path(top_level_dir, second_level_dir, third_value)
337
+ )
338
+ if third_value
339
+ else []
340
+ )
341
+ fourth_value = fourth_dirs[0] if fourth_dirs else None
342
+
343
+ image_urls = (
344
+ _get_image_urls(
345
+ repo_id,
346
+ _build_path(top_level_dir, second_level_dir, third_value, fourth_value),
347
+ )
348
+ if fourth_value
349
+ else []
350
+ )
351
+
352
+ return (
353
+ _dropdown_update(
354
+ choices=third_dirs,
355
+ value=third_value,
356
+ label="Third-level folders",
357
+ filled_info="Choose a third-level folder",
358
+ empty_info="No third-level folders under the selection",
359
+ ),
360
+ _dropdown_update(
361
+ choices=fourth_dirs,
362
+ value=fourth_value,
363
+ label="Fourth-level folders",
364
+ filled_info="Choose a fourth-level folder",
365
+ empty_info="No fourth-level folders under the selection",
366
+ ),
367
+ gr.update(value=image_urls),
368
+ )
369
+ except Exception as error:
370
+ print(
371
+ f"[update_third_dropdown] Error for {repo_id}/{top_level_dir}/{second_level_dir}: {error}",
372
+ flush=True,
373
+ )
374
+ print(traceback.format_exc(), flush=True)
375
+ empty_third = _dropdown_update(
376
+ choices=[],
377
+ value=None,
378
+ label="Third-level folders",
379
+ filled_info="Choose a third-level folder",
380
+ empty_info="Unable to load subdirectories",
381
+ )
382
+ empty_fourth = _dropdown_update(
383
+ choices=[],
384
+ value=None,
385
+ label="Fourth-level folders",
386
+ filled_info="Choose a fourth-level folder",
387
+ empty_info="Unable to load subdirectories",
388
+ )
389
+ return empty_third, empty_fourth, gr.update(value=[])
390
+
391
+
392
+ def update_fourth_dropdown(
393
+ repo_id: str,
394
+ top_level_dir: str,
395
+ second_level_dir: str,
396
+ third_level_dir: str,
397
+ ):
398
+ """Update the fourth dropdown and gallery when the third-level changes."""
399
+ try:
400
+ if not top_level_dir or not second_level_dir or not third_level_dir:
401
+ empty_fourth = _dropdown_update(
402
+ choices=[],
403
+ value=None,
404
+ label="Fourth-level folders",
405
+ filled_info="Choose a fourth-level folder",
406
+ empty_info="Select higher-level folders first",
407
+ )
408
+ return empty_fourth, gr.update(value=[])
409
+
410
+ fourth_dirs = _get_subdirectories(
411
+ repo_id,
412
+ _build_path(top_level_dir, second_level_dir, third_level_dir),
413
+ )
414
+ fourth_value = fourth_dirs[0] if fourth_dirs else None
415
+
416
+ image_urls = (
417
+ _get_image_urls(
418
+ repo_id,
419
+ _build_path(
420
+ top_level_dir,
421
+ second_level_dir,
422
+ third_level_dir,
423
+ fourth_value,
424
+ ),
425
+ )
426
+ if fourth_value
427
+ else []
428
+ )
429
+
430
+ return (
431
+ _dropdown_update(
432
+ choices=fourth_dirs,
433
+ value=fourth_value,
434
+ label="Fourth-level folders",
435
+ filled_info="Choose a fourth-level folder",
436
+ empty_info="No fourth-level folders under the selection",
437
+ ),
438
+ gr.update(value=image_urls),
439
+ )
440
+ except Exception as error:
441
+ print(
442
+ "[update_fourth_dropdown] Error for "
443
+ f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}: {error}",
444
+ flush=True,
445
+ )
446
+ print(traceback.format_exc(), flush=True)
447
+ empty_fourth = _dropdown_update(
448
+ choices=[],
449
+ value=None,
450
+ label="Fourth-level folders",
451
+ filled_info="Choose a fourth-level folder",
452
+ empty_info="Unable to load subdirectories",
453
+ )
454
+ return empty_fourth, gr.update(value=[])
455
+
456
+
457
+ def update_gallery(
458
+ repo_id: str,
459
+ top_level_dir: str,
460
+ second_level_dir: str,
461
+ third_level_dir: str,
462
+ fourth_level_dir: str,
463
+ ):
464
+ """Update the image gallery when the fourth-level selection changes."""
465
+ try:
466
+ if not all([top_level_dir, second_level_dir, third_level_dir, fourth_level_dir]):
467
+ return gr.update(value=[])
468
+
469
+ image_urls = _get_image_urls(
470
+ repo_id,
471
+ _build_path(
472
+ top_level_dir,
473
+ second_level_dir,
474
+ third_level_dir,
475
+ fourth_level_dir,
476
+ ),
477
+ )
478
+ return gr.update(value=image_urls)
479
+ except Exception as error:
480
+ print(
481
+ "[update_gallery] Error for "
482
+ f"{repo_id}/{top_level_dir}/{second_level_dir}/{third_level_dir}/{fourth_level_dir}: {error}",
483
+ flush=True,
484
+ )
485
+ print(traceback.format_exc(), flush=True)
486
+ return gr.update(value=[])
487
 
488
 
489
  with gr.Blocks(title="HF Dataset Explorer") as demo:
 
504
 
505
  status_display = gr.Markdown()
506
  folder_dropdown = gr.Dropdown(label="Top-level folders", interactive=False)
507
+ second_level_dropdown = gr.Dropdown(label="Second-level folders", interactive=False)
508
+ third_level_dropdown = gr.Dropdown(label="Third-level folders", interactive=False)
509
+ fourth_level_dropdown = gr.Dropdown(label="Fourth-level folders", interactive=False)
510
+ image_gallery = gr.Gallery(label="Images", columns=4)
511
 
512
  reload_button.click(
513
  refresh_repo,
514
  inputs=repo_id_input,
515
+ outputs=[
516
+ folder_dropdown,
517
+ second_level_dropdown,
518
+ third_level_dropdown,
519
+ fourth_level_dropdown,
520
+ image_gallery,
521
+ status_display,
522
+ ],
523
  )
524
 
525
  folder_dropdown.change(
526
  update_second_dropdown,
527
  inputs=[repo_id_input, folder_dropdown],
528
+ outputs=[
529
+ second_level_dropdown,
530
+ third_level_dropdown,
531
+ fourth_level_dropdown,
532
+ image_gallery,
533
+ ],
534
+ )
535
+
536
+ second_level_dropdown.change(
537
+ update_third_dropdown,
538
+ inputs=[repo_id_input, folder_dropdown, second_level_dropdown],
539
+ outputs=[third_level_dropdown, fourth_level_dropdown, image_gallery],
540
+ )
541
+
542
+ third_level_dropdown.change(
543
+ update_fourth_dropdown,
544
+ inputs=[
545
+ repo_id_input,
546
+ folder_dropdown,
547
+ second_level_dropdown,
548
+ third_level_dropdown,
549
+ ],
550
+ outputs=[fourth_level_dropdown, image_gallery],
551
+ )
552
+
553
+ fourth_level_dropdown.change(
554
+ update_gallery,
555
+ inputs=[
556
+ repo_id_input,
557
+ folder_dropdown,
558
+ second_level_dropdown,
559
+ third_level_dropdown,
560
+ fourth_level_dropdown,
561
+ ],
562
+ outputs=[image_gallery],
563
  )
564
 
565
  demo.load(
566
  refresh_repo,
567
  inputs=repo_id_input,
568
+ outputs=[
569
+ folder_dropdown,
570
+ second_level_dropdown,
571
+ third_level_dropdown,
572
+ fourth_level_dropdown,
573
+ image_gallery,
574
+ status_display,
575
+ ],
576
  )
577
 
578
  if __name__ == "__main__":