inoki-giskard commited on
Commit
01942d8
1 Parent(s): d6b3b9f

Add dataset probing and validation

Browse files
Files changed (1) hide show
  1. app.py +67 -2
app.py CHANGED
@@ -1,10 +1,50 @@
1
  import gradio as gr
 
2
 
3
 
4
  theme = gr.themes.Soft(
5
  primary_hue="green",
6
  )
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  with gr.Blocks(theme=theme) as iface:
9
  with gr.Row():
10
  with gr.Column():
@@ -28,13 +68,38 @@ with gr.Blocks(theme=theme) as iface:
28
  placeholder="tweet_eval",
29
  )
30
 
31
- gr.Dropdown(
32
  label="Hugging Face dataset subset",
 
 
 
 
 
33
  )
34
 
35
- gr.Dropdown(
36
  label="Hugging Face dataset split",
 
 
 
 
 
37
  )
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  iface.queue(max_size=20)
40
  iface.launch()
 
1
  import gradio as gr
2
+ import datasets
3
 
4
 
5
  theme = gr.themes.Soft(
6
  primary_hue="green",
7
  )
8
 
9
+
10
+ def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
11
+ try:
12
+ configs = datasets.get_dataset_config_names(dataset_id)
13
+ except Exception:
14
+ # Dataset may not exist
15
+ return None, dataset_config, dataset_split
16
+
17
+ if dataset_config not in configs:
18
+ # Need to choose dataset subset (config)
19
+ return dataset_id, configs, dataset_split
20
+
21
+ ds = datasets.load_dataset(dataset_id, dataset_config)
22
+
23
+ if isinstance(ds, datasets.DatasetDict):
24
+ # Need to choose dataset split
25
+ if dataset_split not in ds.keys():
26
+ return dataset_id, None, list(ds.keys())
27
+ elif not isinstance(ds, datasets.Dataset):
28
+ # Unknown type
29
+ return dataset_id, None, None
30
+ return dataset_id, dataset_config, dataset_split
31
+
32
+
33
+ def try_submit(dataset_id, dataset_config, dataset_split):
34
+ # Validate dataset
35
+ d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
36
+
37
+ if d_id is None:
38
+ gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
39
+ if isinstance(config, list):
40
+ gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_config}" config. Please choose a valid config.')
41
+ config = gr.Dropdown.update(choices=config, value=config[0])
42
+ if isinstance(split, list):
43
+ gr.Warning(f'Dataset "{dataset_id}" does have "{dataset_split}" split. Please choose a valid split.')
44
+ split = gr.Dropdown.update(choices=split, value=split[0])
45
+
46
+ return config, split
47
+
48
  with gr.Blocks(theme=theme) as iface:
49
  with gr.Row():
50
  with gr.Column():
 
68
  placeholder="tweet_eval",
69
  )
70
 
71
+ dataset_config_input = gr.Dropdown(
72
  label="Hugging Face dataset subset",
73
+ choices=[
74
+ "default",
75
+ ],
76
+ allow_custom_value=True,
77
+ value="default",
78
  )
79
 
80
+ dataset_split_input = gr.Dropdown(
81
  label="Hugging Face dataset split",
82
+ choices=[
83
+ "test",
84
+ ],
85
+ allow_custom_value=True,
86
+ value="test",
87
  )
88
 
89
+ with gr.Row():
90
+ run_btn = gr.Button("Validate and submit", variant="primary")
91
+ run_btn.click(
92
+ try_submit,
93
+ inputs=[
94
+ dataset_id_input,
95
+ dataset_config_input,
96
+ dataset_split_input
97
+ ],
98
+ outputs=[
99
+ dataset_config_input,
100
+ dataset_split_input
101
+ ],
102
+ )
103
+
104
  iface.queue(max_size=20)
105
  iface.launch()