inoki-giskard ZeroCommand commited on
Commit
9e4233f
1 Parent(s): e84a2e8

restructure and improve user interface with dropdown (#14)

Browse files

- change structure and improve ui (1b0a56bc85f9a75dfb26b429a583738482a69162)
- clean up and change run btn| (4434857b3c7422d8e0b9532200df04e683af5fd5)


Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

Files changed (9) hide show
  1. app.py +8 -367
  2. app_leaderboard.py +0 -0
  3. app_legacy.py +373 -0
  4. app_text_classification.py +232 -0
  5. cicd +0 -1
  6. config.yaml +3 -6
  7. text_classification.py +133 -38
  8. utils.py +23 -3
  9. wordings.py +17 -0
app.py CHANGED
@@ -1,374 +1,15 @@
1
- import gradio as gr
2
- import datasets
3
- import huggingface_hub
4
- import os
5
- import time
6
- import subprocess
7
- import logging
8
-
9
- import json
10
-
11
- from transformers.pipelines import TextClassificationPipeline
12
-
13
- from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
14
- from utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
15
-
16
- HF_REPO_ID = 'HF_REPO_ID'
17
- HF_SPACE_ID = 'SPACE_ID'
18
- HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
19
-
20
- theme = gr.themes.Soft(
21
- primary_hue="green",
22
- )
23
-
24
- def check_model(model_id):
25
- try:
26
- task = huggingface_hub.model_info(model_id).pipeline_tag
27
- except Exception:
28
- return None, None
29
-
30
- try:
31
- from transformers import pipeline
32
- ppl = pipeline(task=task, model=model_id)
33
-
34
- return model_id, ppl
35
- except Exception as e:
36
- return model_id, e
37
-
38
-
39
- def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
40
- try:
41
- configs = datasets.get_dataset_config_names(dataset_id)
42
- except Exception:
43
- # Dataset may not exist
44
- return None, dataset_config, dataset_split
45
-
46
- if dataset_config not in configs:
47
- # Need to choose dataset subset (config)
48
- return dataset_id, configs, dataset_split
49
-
50
- ds = datasets.load_dataset(dataset_id, dataset_config)
51
-
52
- if isinstance(ds, datasets.DatasetDict):
53
- # Need to choose dataset split
54
- if dataset_split not in ds.keys():
55
- return dataset_id, None, list(ds.keys())
56
- elif not isinstance(ds, datasets.Dataset):
57
- # Unknown type
58
- return dataset_id, None, None
59
- return dataset_id, dataset_config, dataset_split
60
-
61
- def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping='{}'):
62
- # Validate model
63
- if m_id is None:
64
- gr.Warning('Model is not accessible. Please set your HF_TOKEN if it is a private model.')
65
- return (
66
- gr.update(interactive=False), # Submit button
67
- gr.update(visible=True), # Loading row
68
- gr.update(visible=False), # Preview row
69
- gr.update(visible=False), # Model prediction input
70
- gr.update(visible=False), # Model prediction preview
71
- gr.update(visible=False), # Label mapping preview
72
- gr.update(visible=False), # feature mapping preview
73
- )
74
- if isinstance(ppl, Exception):
75
- gr.Warning(f'Failed to load model": {ppl}')
76
- return (
77
- gr.update(interactive=False), # Submit button
78
- gr.update(visible=True), # Loading row
79
- gr.update(visible=False), # Preview row
80
- gr.update(visible=False), # Model prediction input
81
- gr.update(visible=False), # Model prediction preview
82
- gr.update(visible=False), # Label mapping preview
83
- gr.update(visible=False), # feature mapping preview
84
- )
85
-
86
- # Validate dataset
87
- d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
88
-
89
- dataset_ok = False
90
- if d_id is None:
91
- gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
92
- elif isinstance(config, list):
93
- gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.')
94
- config = gr.update(choices=config, value=config[0])
95
- elif isinstance(split, list):
96
- gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.')
97
- split = gr.update(choices=split, value=split[0])
98
- else:
99
- dataset_ok = True
100
-
101
- if not dataset_ok:
102
- return (
103
- gr.update(interactive=False), # Submit button
104
- gr.update(visible=True), # Loading row
105
- gr.update(visible=False), # Preview row
106
- gr.update(visible=False), # Model prediction input
107
- gr.update(visible=False), # Model prediction preview
108
- gr.update(visible=False), # Label mapping preview
109
- gr.update(visible=False), # feature mapping preview
110
- )
111
-
112
- # TODO: Validate column mapping by running once
113
- prediction_result = None
114
- id2label_df = None
115
- if isinstance(ppl, TextClassificationPipeline):
116
- try:
117
- print('validating phase, ', column_mapping)
118
- column_mapping = json.loads(column_mapping)
119
- except Exception:
120
- column_mapping = {}
121
-
122
- column_mapping, prediction_input, prediction_result, id2label_df, feature_df = \
123
- text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
124
-
125
- column_mapping = json.dumps(column_mapping, indent=2)
126
 
127
- if prediction_result is None and id2label_df is not None:
128
- gr.Warning('The model failed to predict with the first row in the dataset. Please provide column mappings in "Advance" settings.')
129
- return (
130
- gr.update(interactive=False), # Submit button
131
- gr.update(visible=False), # Loading row
132
- gr.update(visible=True), # Preview row
133
- gr.update(value=f'**Sample Input**: {prediction_input}', visible=True), # Model prediction input
134
- gr.update(visible=False), # Model prediction preview
135
- gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
136
- gr.update(value=feature_df, visible=True, interactive=True), # feature mapping preview
137
- )
138
- elif id2label_df is None:
139
- gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
140
- return (
141
- gr.update(interactive=False), # Submit button
142
- gr.update(visible=False), # Loading row
143
- gr.update(visible=True), # Preview row
144
- gr.update(value=f'**Sample Input**: {prediction_input}', visible=True), # Model prediction input
145
- gr.update(value=prediction_result, visible=True), # Model prediction preview
146
- gr.update(visible=True, interactive=True), # Label mapping preview
147
- gr.update(visible=True, interactive=True), # feature mapping preview
148
- )
149
 
150
- gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
151
-
152
- return (
153
- gr.update(interactive=True), # Submit button
154
- gr.update(visible=False), # Loading row
155
- gr.update(visible=True), # Preview row
156
- gr.update(value=f'**Sample Input**: {prediction_input}', visible=True), # Model prediction input
157
- gr.update(value=prediction_result, visible=True), # Model prediction preview
158
- gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
159
- gr.update(value=feature_df, visible=True, interactive=True), # feature mapping preview
160
- )
161
-
162
-
163
- def try_submit(m_id, d_id, config, split, id2label_mapping_dataframe, feature_mapping_dataframe, local):
164
- label_mapping = {}
165
- for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
166
- label_mapping.update({str(i): label})
167
-
168
- feature_mapping = {}
169
- for i, feature in feature_mapping_dataframe["Dataset Features"].items():
170
- feature_mapping.update({feature_mapping_dataframe["Model Input Features"][i]: feature})
171
-
172
- # TODO: Set column mapping for some dataset such as `amazon_polarity`
173
-
174
- if local:
175
- command = [
176
- "python",
177
- "cli.py",
178
- "--loader", "huggingface",
179
- "--model", m_id,
180
- "--dataset", d_id,
181
- "--dataset_config", config,
182
- "--dataset_split", split,
183
- "--hf_token", os.environ.get(HF_WRITE_TOKEN),
184
- "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
185
- "--output_format", "markdown",
186
- "--output_portal", "huggingface",
187
- "--feature_mapping", json.dumps(feature_mapping),
188
- "--label_mapping", json.dumps(label_mapping),
189
- "--scan_config", "../config.yaml",
190
- ]
191
-
192
- eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
193
- start = time.time()
194
- logging.info(f"Start local evaluation on {eval_str}")
195
-
196
- evaluator = subprocess.Popen(
197
- command,
198
- cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
199
- stderr=subprocess.STDOUT,
200
- )
201
- result = evaluator.wait()
202
-
203
- logging.info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
204
 
205
- gr.Info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
206
- else:
207
- gr.Info("TODO: Submit task to an endpoint")
208
-
209
- return gr.update(interactive=True) # Submit button
210
 
211
 
212
- with gr.Blocks(theme=theme) as iface:
213
  with gr.Tab("Text Classification"):
214
- def check_dataset_and_get_config(dataset_id):
215
- try:
216
- configs = datasets.get_dataset_config_names(dataset_id)
217
- return gr.Dropdown(configs, value=configs[0], visible=True)
218
- except Exception:
219
- # Dataset may not exist
220
- pass
221
-
222
- def check_dataset_and_get_split(dataset_config, dataset_id):
223
- try:
224
- splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
225
- return gr.Dropdown(splits, value=splits[0], visible=True)
226
- except Exception as e:
227
- # Dataset may not exist
228
- gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
229
- pass
230
-
231
- def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split, id2label_mapping_dataframe=None, feature_mapping_dataframe=None):
232
- column_mapping = '{}'
233
- _, ppl = check_model(model_id=model_id)
234
-
235
- if id2label_mapping_dataframe is not None:
236
- labels = convert_column_mapping_to_json(id2label_mapping_dataframe.value, label="data")
237
- features = convert_column_mapping_to_json(feature_mapping_dataframe.value, label="text")
238
- column_mapping = json.dumps({**labels, **features}, indent=2)
239
-
240
- if check_column_mapping_keys_validity(column_mapping, ppl) is False:
241
- gr.Warning('Label mapping table has invalid contents. Please check again.')
242
- return (gr.update(interactive=False),
243
- gr.update(),
244
- gr.update(),
245
- gr.update(),
246
- gr.update(),
247
- gr.update(),
248
- gr.update())
249
- else:
250
- if model_id and dataset_id and dataset_config and dataset_split:
251
- return try_validate(model_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping)
252
- else:
253
- return (gr.update(interactive=False),
254
- gr.update(visible=True),
255
- gr.update(visible=False),
256
- gr.update(visible=False),
257
- gr.update(visible=False),
258
- gr.update(visible=False),
259
- gr.update(visible=False))
260
- with gr.Row():
261
- gr.Markdown('''
262
- <h1 style="text-align: center;">
263
- Giskard Evaluator
264
- </h1>
265
- Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
266
- ''')
267
- with gr.Row():
268
- run_local = gr.Checkbox(value=True, label="Run in this Space")
269
- use_inference = read_inference_type('./config.yaml') == 'hf_inference_api'
270
- run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
271
-
272
- with gr.Row() as advanced_row:
273
- selected = read_scanners('./config.yaml')
274
- scan_config = selected + ['data_leakage']
275
- scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
276
-
277
- with gr.Row():
278
- model_id_input = gr.Textbox(
279
- label="Hugging Face model id",
280
- placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
281
- )
282
-
283
- dataset_id_input = gr.Textbox(
284
- label="Hugging Face Dataset id",
285
- placeholder="tweet_eval",
286
- )
287
- with gr.Row():
288
- dataset_config_input = gr.Dropdown(['default'], value='default', label='Dataset Config', visible=False)
289
- dataset_split_input = gr.Dropdown(['default'], value='default', label='Dataset Split', visible=False)
290
-
291
- dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
292
- dataset_id_input.submit(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
293
-
294
- dataset_config_input.change(
295
- check_dataset_and_get_split,
296
- inputs=[dataset_config_input, dataset_id_input],
297
- outputs=[dataset_split_input])
298
-
299
- with gr.Row(visible=True) as loading_row:
300
- gr.Markdown('''
301
- <p style="text-align: center;">
302
- 🚀🐢Please validate your model and dataset first...
303
- </p>
304
- ''')
305
-
306
- with gr.Row(visible=False) as preview_row:
307
- gr.Markdown('''
308
- <h1 style="text-align: center;">
309
- Confirm Pre-processing Details
310
- </h1>
311
- Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
312
- ''')
313
-
314
- with gr.Row():
315
- id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping", interactive=True, visible=False)
316
- feature_mapping_dataframe = gr.DataFrame(label="Preview of feature mapping", interactive=True, visible=False)
317
- with gr.Row():
318
- example_input = gr.Markdown('Sample Input: ', visible=False)
319
-
320
- with gr.Row():
321
- example_labels = gr.Label(label='Model Prediction Sample', visible=False)
322
-
323
- run_btn = gr.Button(
324
- "Get Evaluation Result",
325
- variant="primary",
326
- interactive=False,
327
- size="lg",
328
- )
329
-
330
- model_id_input.blur(gate_validate_btn,
331
- inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
332
- outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
333
- dataset_id_input.blur(gate_validate_btn,
334
- inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
335
- outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
336
- dataset_config_input.change(gate_validate_btn,
337
- inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
338
- outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
339
- dataset_split_input.change(gate_validate_btn,
340
- inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
341
- outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
342
- id2label_mapping_dataframe.input(gate_validate_btn,
343
- inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
344
- outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
345
- feature_mapping_dataframe.input(gate_validate_btn,
346
- inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
347
- outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
348
- scanners.change(write_scanners, inputs=scanners)
349
- run_inference.change(
350
- write_inference_type,
351
- inputs=[run_inference]
352
- )
353
-
354
- run_btn.click(
355
- try_submit,
356
- inputs=[
357
- model_id_input,
358
- dataset_id_input,
359
- dataset_config_input,
360
- dataset_split_input,
361
- id2label_mapping_dataframe,
362
- feature_mapping_dataframe,
363
- run_local,
364
- ],
365
- outputs=[
366
- run_btn,
367
- ],
368
- )
369
-
370
- with gr.Tab("More"):
371
  pass
372
-
373
- if __name__ == "__main__":
374
- iface.queue(max_size=20).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ # Start apps
3
+ # from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ from app_text_classification import get_demo as get_demo_text_classification
 
 
 
 
8
 
9
 
10
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
11
  with gr.Tab("Text Classification"):
12
+ get_demo_text_classification()
13
+ with gr.Tab("Leaderboard - Text Classification"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  pass
15
+ demo.launch()
 
 
app_leaderboard.py ADDED
File without changes
app_legacy.py ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import datasets
3
+ import huggingface_hub
4
+ import os
5
+ import time
6
+ import subprocess
7
+ import logging
8
+
9
+ import json
10
+
11
+ from transformers.pipelines import TextClassificationPipeline
12
+
13
+ from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
14
+ from utils import read_scanners, write_scanners, read_inference_type, write_inference_type, convert_column_mapping_to_json
15
+ from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD
16
+
17
+ HF_REPO_ID = 'HF_REPO_ID'
18
+ HF_SPACE_ID = 'SPACE_ID'
19
+ HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
20
+
21
+ def check_model(model_id):
22
+ try:
23
+ task = huggingface_hub.model_info(model_id).pipeline_tag
24
+ except Exception:
25
+ return None, None
26
+
27
+ try:
28
+ from transformers import pipeline
29
+ ppl = pipeline(task=task, model=model_id)
30
+
31
+ return model_id, ppl
32
+ except Exception as e:
33
+ return model_id, e
34
+
35
+
36
+ def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
37
+ try:
38
+ configs = datasets.get_dataset_config_names(dataset_id)
39
+ except Exception:
40
+ # Dataset may not exist
41
+ return None, dataset_config, dataset_split
42
+
43
+ if dataset_config not in configs:
44
+ # Need to choose dataset subset (config)
45
+ return dataset_id, configs, dataset_split
46
+
47
+ ds = datasets.load_dataset(dataset_id, dataset_config)
48
+
49
+ if isinstance(ds, datasets.DatasetDict):
50
+ # Need to choose dataset split
51
+ if dataset_split not in ds.keys():
52
+ return dataset_id, None, list(ds.keys())
53
+ elif not isinstance(ds, datasets.Dataset):
54
+ # Unknown type
55
+ return dataset_id, None, None
56
+ return dataset_id, dataset_config, dataset_split
57
+
58
+ def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping='{}'):
59
+ # Validate model
60
+ if m_id is None:
61
+ gr.Warning('Model is not accessible. Please set your HF_TOKEN if it is a private model.')
62
+ return (
63
+ gr.update(interactive=False), # Submit button
64
+ gr.update(visible=True), # Loading row
65
+ gr.update(visible=False), # Preview row
66
+ gr.update(visible=False), # Model prediction input
67
+ gr.update(visible=False), # Model prediction preview
68
+ gr.update(visible=False), # Label mapping preview
69
+ gr.update(visible=False), # feature mapping preview
70
+ )
71
+ if isinstance(ppl, Exception):
72
+ gr.Warning(f'Failed to load model": {ppl}')
73
+ return (
74
+ gr.update(interactive=False), # Submit button
75
+ gr.update(visible=True), # Loading row
76
+ gr.update(visible=False), # Preview row
77
+ gr.update(visible=False), # Model prediction input
78
+ gr.update(visible=False), # Model prediction preview
79
+ gr.update(visible=False), # Label mapping preview
80
+ gr.update(visible=False), # feature mapping preview
81
+ )
82
+
83
+ # Validate dataset
84
+ d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
85
+
86
+ dataset_ok = False
87
+ if d_id is None:
88
+ gr.Warning(f'Dataset "{dataset_id}" is not accessible. Please set your HF_TOKEN if it is a private dataset.')
89
+ elif isinstance(config, list):
90
+ gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_config}" config. Please choose a valid config.')
91
+ config = gr.update(choices=config, value=config[0])
92
+ elif isinstance(split, list):
93
+ gr.Warning(f'Dataset "{dataset_id}" does not have "{dataset_split}" split. Please choose a valid split.')
94
+ split = gr.update(choices=split, value=split[0])
95
+ else:
96
+ dataset_ok = True
97
+
98
+ if not dataset_ok:
99
+ return (
100
+ gr.update(interactive=False), # Submit button
101
+ gr.update(visible=True), # Loading row
102
+ gr.update(visible=False), # Preview row
103
+ gr.update(visible=False), # Model prediction input
104
+ gr.update(visible=False), # Model prediction preview
105
+ gr.update(visible=False), # Label mapping preview
106
+ gr.update(visible=False), # feature mapping preview
107
+ )
108
+
109
+ # TODO: Validate column mapping by running once
110
+ prediction_result = None
111
+ id2label_df = None
112
+ if isinstance(ppl, TextClassificationPipeline):
113
+ try:
114
+ column_mapping = json.loads(column_mapping)
115
+ except Exception:
116
+ column_mapping = {}
117
+
118
+ column_mapping, prediction_input, prediction_result, id2label_df, feature_df = \
119
+ text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
120
+
121
+ column_mapping = json.dumps(column_mapping, indent=2)
122
+
123
+ if prediction_result is None and id2label_df is not None:
124
+ gr.Warning('The model failed to predict with the first row in the dataset. Please provide feature mappings in "Advance" settings.')
125
+ return (
126
+ gr.update(interactive=False), # Submit button
127
+ gr.update(visible=False), # Loading row
128
+ gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
129
+ gr.update(value=f'**Sample Input**: {prediction_input}', visible=True), # Model prediction input
130
+ gr.update(visible=False), # Model prediction preview
131
+ gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
132
+ gr.update(value=feature_df, visible=True, interactive=True), # feature mapping preview
133
+ )
134
+ elif id2label_df is None:
135
+ gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
136
+ return (
137
+ gr.update(interactive=False), # Submit button
138
+ gr.update(visible=False), # Loading row
139
+ gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
140
+ gr.update(value=f'**Sample Input**: {prediction_input}', visible=True), # Model prediction input
141
+ gr.update(value=prediction_result, visible=True), # Model prediction preview
142
+ gr.update(visible=True, interactive=True), # Label mapping preview
143
+ gr.update(visible=True, interactive=True), # feature mapping preview
144
+ )
145
+
146
+ gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
147
+
148
+ return (
149
+ gr.update(interactive=True), # Submit button
150
+ gr.update(visible=False), # Loading row
151
+ gr.update(CONFIRM_MAPPING_DETAILS_MD, visible=True), # Preview row
152
+ gr.update(value=f'**Sample Input**: {prediction_input}', visible=True), # Model prediction input
153
+ gr.update(value=prediction_result, visible=True), # Model prediction preview
154
+ gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
155
+ gr.update(value=feature_df, visible=True, interactive=True), # feature mapping preview
156
+ )
157
+
158
+
159
+ def try_submit(m_id, d_id, config, split, id2label_mapping_dataframe, feature_mapping_dataframe, local):
160
+ label_mapping = {}
161
+ for i, label in id2label_mapping_dataframe["Model Prediction Labels"].items():
162
+ label_mapping.update({str(i): label})
163
+
164
+ feature_mapping = {}
165
+ for i, feature in feature_mapping_dataframe["Dataset Features"].items():
166
+ feature_mapping.update({feature_mapping_dataframe["Model Input Features"][i]: feature})
167
+
168
+ # TODO: Set column mapping for some dataset such as `amazon_polarity`
169
+
170
+ if local:
171
+ command = [
172
+ "python",
173
+ "cli.py",
174
+ "--loader", "huggingface",
175
+ "--model", m_id,
176
+ "--dataset", d_id,
177
+ "--dataset_config", config,
178
+ "--dataset_split", split,
179
+ "--hf_token", os.environ.get(HF_WRITE_TOKEN),
180
+ "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
181
+ "--output_format", "markdown",
182
+ "--output_portal", "huggingface",
183
+ "--feature_mapping", json.dumps(feature_mapping),
184
+ "--label_mapping", json.dumps(label_mapping),
185
+ "--scan_config", "../config.yaml",
186
+ ]
187
+
188
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
189
+ start = time.time()
190
+ logging.info(f"Start local evaluation on {eval_str}")
191
+
192
+ evaluator = subprocess.Popen(
193
+ command,
194
+ cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
195
+ stderr=subprocess.STDOUT,
196
+ )
197
+ result = evaluator.wait()
198
+
199
+ logging.info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
200
+
201
+ gr.Info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
202
+ else:
203
+ gr.Info("TODO: Submit task to an endpoint")
204
+
205
+ return gr.update(interactive=True) # Submit button
206
+
207
+
208
+ def get_demo():
209
+ # gr.themes.Soft(
210
+ # primary_hue="green",
211
+ # )
212
+
213
+ def check_dataset_and_get_config(dataset_id):
214
+ try:
215
+ configs = datasets.get_dataset_config_names(dataset_id)
216
+ return gr.Dropdown(configs, value=configs[0], visible=True)
217
+ except Exception:
218
+ # Dataset may not exist
219
+ pass
220
+
221
+ def check_dataset_and_get_split(dataset_config, dataset_id):
222
+ try:
223
+ splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
224
+ return gr.Dropdown(splits, value=splits[0], visible=True)
225
+ except Exception as e:
226
+ # Dataset may not exist
227
+ gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
228
+ pass
229
+
230
+ def clear_column_mapping_tables():
231
+ return [
232
+ gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
233
+ gr.update(value=[], visible=False, interactive=True),
234
+ gr.update(value=[], visible=False, interactive=True),
235
+ ]
236
+
237
+ def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split, id2label_mapping_dataframe=None, feature_mapping_dataframe=None):
238
+ column_mapping = '{}'
239
+ _, ppl = check_model(model_id=model_id)
240
+
241
+ if id2label_mapping_dataframe is not None:
242
+ labels = convert_column_mapping_to_json(id2label_mapping_dataframe.value, label="data")
243
+ features = convert_column_mapping_to_json(feature_mapping_dataframe.value, label="text")
244
+ column_mapping = json.dumps({**labels, **features}, indent=2)
245
+
246
+ if check_column_mapping_keys_validity(column_mapping, ppl) is False:
247
+ gr.Warning('Label mapping table has invalid contents. Please check again.')
248
+ return (gr.update(interactive=False),
249
+ gr.update(CONFIRM_MAPPING_DETAILS_FAIL_MD, visible=True),
250
+ gr.update(),
251
+ gr.update(),
252
+ gr.update(),
253
+ gr.update(),
254
+ gr.update())
255
+ else:
256
+ if model_id and dataset_id and dataset_config and dataset_split:
257
+ return try_validate(model_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping)
258
+ else:
259
+ return (gr.update(interactive=False),
260
+ gr.update(visible=True),
261
+ gr.update(visible=False),
262
+ gr.update(visible=False),
263
+ gr.update(visible=False),
264
+ gr.update(visible=False),
265
+ gr.update(visible=False))
266
+ with gr.Row():
267
+ gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
268
+ with gr.Row():
269
+ run_local = gr.Checkbox(value=True, label="Run in this Space")
270
+ use_inference = read_inference_type('./config.yaml') == 'hf_inference_api'
271
+ run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
272
+
273
+ with gr.Row():
274
+ selected = read_scanners('./config.yaml')
275
+ scan_config = selected + ['data_leakage']
276
+ scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
277
+
278
+ with gr.Row():
279
+ model_id_input = gr.Textbox(
280
+ label="Hugging Face model id",
281
+ placeholder="cardiffnlp/twitter-roberta-base-sentiment-latest",
282
+ )
283
+
284
+ dataset_id_input = gr.Textbox(
285
+ label="Hugging Face Dataset id",
286
+ placeholder="tweet_eval",
287
+ )
288
+ with gr.Row():
289
+ dataset_config_input = gr.Dropdown(label='Dataset Config', visible=False)
290
+ dataset_split_input = gr.Dropdown(label='Dataset Split', visible=False)
291
+
292
+ with gr.Row(visible=True) as loading_row:
293
+ gr.Markdown('''
294
+ <p style="text-align: center;">
295
+ 🚀🐢Please validate your model and dataset first...
296
+ </p>
297
+ ''')
298
+
299
+ with gr.Row(visible=False) as preview_row:
300
+ gr.Markdown('''
301
+ <h1 style="text-align: center;">
302
+ Confirm Pre-processing Details
303
+ </h1>
304
+ Base on your model and dataset, we inferred this label mapping and feature mapping. <b>If the mapping is incorrect, please modify it in the table below.</b>
305
+ ''')
306
+
307
+ with gr.Row():
308
+ id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping", interactive=True, visible=False)
309
+ feature_mapping_dataframe = gr.DataFrame(label="Preview of feature mapping", interactive=True, visible=False)
310
+ with gr.Row():
311
+ example_input = gr.Markdown('Sample Input: ', visible=False)
312
+
313
+ with gr.Row():
314
+ example_labels = gr.Label(label='Model Prediction Sample', visible=False)
315
+
316
+ run_btn = gr.Button(
317
+ "Get Evaluation Result",
318
+ variant="primary",
319
+ interactive=False,
320
+ size="lg",
321
+ )
322
+
323
+ model_id_input.blur(clear_column_mapping_tables, outputs=[id2label_mapping_dataframe, feature_mapping_dataframe])
324
+
325
+
326
+ dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
327
+ dataset_id_input.submit(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
328
+
329
+ dataset_config_input.change(
330
+ check_dataset_and_get_split,
331
+ inputs=[dataset_config_input, dataset_id_input],
332
+ outputs=[dataset_split_input])
333
+
334
+ dataset_id_input.blur(clear_column_mapping_tables, outputs=[id2label_mapping_dataframe, feature_mapping_dataframe])
335
+ # model_id_input.blur(gate_validate_btn,
336
+ # inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
337
+ # outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
338
+ # dataset_id_input.blur(gate_validate_btn,
339
+ # inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
340
+ # outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
341
+ dataset_config_input.change(gate_validate_btn,
342
+ inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
343
+ outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
344
+ dataset_split_input.change(gate_validate_btn,
345
+ inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
346
+ outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
347
+ id2label_mapping_dataframe.input(gate_validate_btn,
348
+ inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
349
+ outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
350
+ feature_mapping_dataframe.input(gate_validate_btn,
351
+ inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe, feature_mapping_dataframe],
352
+ outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe, feature_mapping_dataframe])
353
+ scanners.change(write_scanners, inputs=scanners)
354
+ run_inference.change(
355
+ write_inference_type,
356
+ inputs=[run_inference]
357
+ )
358
+
359
+ run_btn.click(
360
+ try_submit,
361
+ inputs=[
362
+ model_id_input,
363
+ dataset_id_input,
364
+ dataset_config_input,
365
+ dataset_split_input,
366
+ id2label_mapping_dataframe,
367
+ feature_mapping_dataframe,
368
+ run_local,
369
+ ],
370
+ outputs=[
371
+ run_btn,
372
+ ],
373
+ )
app_text_classification.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import datasets
3
+ import os
4
+ import time
5
+ import subprocess
6
+ import logging
7
+
8
+ import json
9
+
10
+ from transformers.pipelines import TextClassificationPipeline
11
+
12
+ from text_classification import get_labels_and_features_from_dataset, check_model, get_example_prediction, check_column_mapping_keys_validity, text_classification_fix_column_mapping
13
+ from utils import read_scanners, write_scanners, read_inference_type, read_column_mapping, write_column_mapping, write_inference_type, convert_column_mapping_to_json
14
+ from wordings import CONFIRM_MAPPING_DETAILS_MD, CONFIRM_MAPPING_DETAILS_FAIL_MD, CONFIRM_MAPPING_DETAILS_FAIL_RAW
15
+
16
+ HF_REPO_ID = 'HF_REPO_ID'
17
+ HF_SPACE_ID = 'SPACE_ID'
18
+ HF_WRITE_TOKEN = 'HF_WRITE_TOKEN'
19
+
20
+ MAX_LABELS = 20
21
+ MAX_FEATURES = 20
22
+
23
+ EXAMPLE_MODEL_ID = 'cardiffnlp/twitter-roberta-base-sentiment-latest'
24
+ EXAMPLE_DATA_ID = 'tweet_eval'
25
+ CONFIG_PATH='./config.yaml'
26
+
27
+ def try_submit(m_id, d_id, config, split, local):
28
+ all_mappings = read_column_mapping(CONFIG_PATH)
29
+
30
+ if "labels" not in all_mappings.keys():
31
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
32
+ return gr.update(interactive=True)
33
+ label_mapping = all_mappings["labels"]
34
+
35
+ if "features" not in all_mappings.keys():
36
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
37
+ return gr.update(interactive=True)
38
+ feature_mapping = all_mappings["features"]
39
+
40
+ # TODO: Set column mapping for some dataset such as `amazon_polarity`
41
+ if local:
42
+ command = [
43
+ "python",
44
+ "cli.py",
45
+ "--loader", "huggingface",
46
+ "--model", m_id,
47
+ "--dataset", d_id,
48
+ "--dataset_config", config,
49
+ "--dataset_split", split,
50
+ "--hf_token", os.environ.get(HF_WRITE_TOKEN),
51
+ "--discussion_repo", os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID),
52
+ "--output_format", "markdown",
53
+ "--output_portal", "huggingface",
54
+ "--feature_mapping", json.dumps(feature_mapping),
55
+ "--label_mapping", json.dumps(label_mapping),
56
+ "--scan_config", "../config.yaml",
57
+ ]
58
+
59
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
60
+ start = time.time()
61
+ logging.info(f"Start local evaluation on {eval_str}")
62
+
63
+ evaluator = subprocess.Popen(
64
+ command,
65
+ cwd=os.path.join(os.path.dirname(os.path.realpath(__file__)), "cicd"),
66
+ stderr=subprocess.STDOUT,
67
+ )
68
+ result = evaluator.wait()
69
+
70
+ logging.info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
71
+
72
+ gr.Info(f"Finished local evaluation exit code {result} on {eval_str}: {time.time() - start:.2f}s")
73
+ else:
74
+ gr.Info("TODO: Submit task to an endpoint")
75
+
76
+ return gr.update(interactive=True) # Submit button
77
+
78
+
79
+ def check_dataset_and_get_config(dataset_id):
80
+ try:
81
+ configs = datasets.get_dataset_config_names(dataset_id)
82
+ return gr.Dropdown(configs, value=configs[0], visible=True)
83
+ except Exception:
84
+ # Dataset may not exist
85
+ pass
86
+
87
+ def check_dataset_and_get_split(dataset_id, dataset_config):
88
+ try:
89
+ splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
90
+ return gr.Dropdown(splits, value=splits[0], visible=True)
91
+ except Exception:
92
+ # Dataset may not exist
93
+ # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
94
+ pass
95
+
96
+ def get_demo():
97
+ with gr.Row():
98
+ gr.Markdown(CONFIRM_MAPPING_DETAILS_MD)
99
+ with gr.Row():
100
+ model_id_input = gr.Textbox(
101
+ label="Hugging Face model id",
102
+ placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
103
+ )
104
+
105
+ dataset_id_input = gr.Textbox(
106
+ label="Hugging Face Dataset id",
107
+ placeholder=EXAMPLE_DATA_ID + " (press enter to confirm)",
108
+ )
109
+
110
+ with gr.Row():
111
+ dataset_config_input = gr.Dropdown(label='Dataset Config', visible=False)
112
+ dataset_split_input = gr.Dropdown(label='Dataset Split', visible=False)
113
+
114
+ with gr.Row():
115
+ example_input = gr.Markdown('Example Input', visible=False)
116
+ with gr.Row():
117
+ example_prediction = gr.Label(label='Model Prediction Sample', visible=False)
118
+
119
+ with gr.Row():
120
+ column_mappings = []
121
+ with gr.Column():
122
+ for _ in range(MAX_LABELS):
123
+ column_mappings.append(gr.Dropdown(visible=False))
124
+ with gr.Column():
125
+ for _ in range(MAX_LABELS, MAX_LABELS + MAX_FEATURES):
126
+ column_mappings.append(gr.Dropdown(visible=False))
127
+
128
+ with gr.Accordion(label='Model Wrap Advance Config (optional)', open=False):
129
+ run_local = gr.Checkbox(value=True, label="Run in this Space")
130
+ use_inference = read_inference_type('./config.yaml') == 'hf_inference_api'
131
+ run_inference = gr.Checkbox(value=use_inference, label="Run with Inference API")
132
+
133
+ with gr.Accordion(label='Scanner Advance Config (optional)', open=False):
134
+ selected = read_scanners('./config.yaml')
135
+ scan_config = selected + ['data_leakage']
136
+ scanners = gr.CheckboxGroup(choices=scan_config, value=selected, label='Scan Settings', visible=True)
137
+
138
+ with gr.Row():
139
+ run_btn = gr.Button(
140
+ "Get Evaluation Result",
141
+ variant="primary",
142
+ interactive=True,
143
+ size="lg",
144
+ )
145
+
146
+ @gr.on(triggers=[label.change for label in column_mappings],
147
+ inputs=[dataset_id_input, dataset_config_input, dataset_split_input, *column_mappings])
148
+ def write_column_mapping_to_config(dataset_id, dataset_config, dataset_split, *labels):
149
+ ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
150
+ if labels is None:
151
+ return
152
+ labels = [*labels]
153
+ all_mappings = read_column_mapping(CONFIG_PATH)
154
+
155
+ if "labels" not in all_mappings.keys():
156
+ all_mappings["labels"] = dict()
157
+ for i, label in enumerate(labels[:MAX_LABELS]):
158
+ if label:
159
+ all_mappings["labels"][label] = ds_labels[i]
160
+
161
+ if "features" not in all_mappings.keys():
162
+ all_mappings["features"] = dict()
163
+ for i, feat in enumerate(labels[MAX_LABELS:(MAX_LABELS + MAX_FEATURES)]):
164
+ if feat:
165
+ all_mappings["features"][feat] = ds_features[i]
166
+ write_column_mapping(all_mappings)
167
+
168
+ def list_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split, model_id2label, model_features):
169
+ ds_labels, ds_features = get_labels_and_features_from_dataset(dataset_id, dataset_config, dataset_split)
170
+ if ds_labels is None or ds_features is None:
171
+ return [gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
172
+ model_labels = list(model_id2label.values())
173
+ lables = [gr.Dropdown(label=f"{label}", choices=model_labels, value=model_id2label[i], interactive=True, visible=True) for i, label in enumerate(ds_labels[:MAX_LABELS])]
174
+ lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
175
+ features = [gr.Dropdown(label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True) for feature in model_features]
176
+ features += [gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))]
177
+ return lables + features
178
+
179
+ @gr.on(triggers=[model_id_input.change, dataset_config_input.change])
180
+ def clear_column_mapping_config():
181
+ write_column_mapping(None)
182
+
183
+ @gr.on(triggers=[model_id_input.change, dataset_config_input.change, dataset_split_input.change],
184
+ inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
185
+ outputs=[example_input, example_prediction, *column_mappings])
186
+ def check_model_and_show_prediction(model_id, dataset_id, dataset_config, dataset_split):
187
+ ppl = check_model(model_id)
188
+ if ppl is None or not isinstance(ppl, TextClassificationPipeline):
189
+ gr.Warning("Please check your model.")
190
+ return (
191
+ gr.update(visible=False),
192
+ gr.update(visible=False),
193
+ *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)]
194
+ )
195
+ model_id2label = ppl.model.config.id2label
196
+ model_features = ['text']
197
+ column_mappings = list_labels_and_features_from_dataset(
198
+ dataset_id,
199
+ dataset_config,
200
+ dataset_split,
201
+ model_id2label,
202
+ model_features
203
+ )
204
+
205
+ if ppl is None:
206
+ gr.Warning("Model not found")
207
+ return (
208
+ gr.update(visible=False),
209
+ gr.update(visible=False),
210
+ *column_mappings
211
+ )
212
+ prediction_input, prediction_output = get_example_prediction(ppl, dataset_id, dataset_config, dataset_split)
213
+ return (
214
+ gr.update(value=prediction_input, visible=True),
215
+ gr.update(value=prediction_output, visible=True),
216
+ *column_mappings
217
+ )
218
+
219
+ dataset_id_input.blur(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
220
+
221
+ dataset_config_input.change(
222
+ check_dataset_and_get_split,
223
+ inputs=[dataset_id_input, dataset_config_input],
224
+ outputs=[dataset_split_input])
225
+
226
+ gr.on(
227
+ triggers=[
228
+ run_btn.click,
229
+ ],
230
+ fn=try_submit,
231
+ inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, run_local],
232
+ outputs=[run_btn])
cicd DELETED
@@ -1 +0,0 @@
1
- Subproject commit 96913a4f713372d3325002e0ec97320bae55d323
 
 
config.yaml CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  detectors:
2
  - ethical_bias
3
  - text_perturbation
@@ -6,10 +9,4 @@ detectors:
6
  - underconfidence
7
  - overconfidence
8
  - spurious_correlation
9
-
10
- configuration:
11
- ethical_bias:
12
- threshold:
13
- 0.01
14
-
15
  inference_type: hf_pipeline
 
1
+ configuration:
2
+ ethical_bias:
3
+ threshold: 0.01
4
  detectors:
5
  - ethical_bias
6
  - text_perturbation
 
9
  - underconfidence
10
  - overconfidence
11
  - spurious_correlation
 
 
 
 
 
 
12
  inference_type: hf_pipeline
text_classification.py CHANGED
@@ -2,7 +2,33 @@ import datasets
2
  import logging
3
  import json
4
  import pandas as pd
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
8
  for model_label in id2label_mapping.keys():
@@ -60,10 +86,20 @@ def check_column_mapping_keys_validity(column_mapping, ppl):
60
 
61
  return user_labels == model_labels == original_labels
62
 
 
 
 
 
 
 
 
 
 
63
  def infer_text_input_column(column_mapping, dataset_features):
64
  # Check whether we need to infer the text input column
65
  infer_text_input_column = True
66
  feature_map_df = None
 
67
  if "text" in column_mapping.keys():
68
  dataset_text_column = column_mapping["text"]
69
  if dataset_text_column in dataset_features.keys():
@@ -82,33 +118,21 @@ def infer_text_input_column(column_mapping, dataset_features):
82
  logging.debug(f"Candidates are {candidates}")
83
  column_mapping["text"] = candidates[0]
84
 
85
- return column_mapping, feature_map_df
86
-
87
- def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
88
- # We assume dataset is ok here
89
- ds = datasets.load_dataset(d_id, config)[split]
90
- try:
91
- dataset_features = ds.features
92
- except AttributeError:
93
- # Dataset does not have features, need to provide everything
94
- return None, None, None, None, None
95
-
96
- column_mapping, feature_map_df = infer_text_input_column(column_mapping, dataset_features)
97
-
98
- # Load dataset as DataFrame
99
- df = ds.to_pandas()
100
-
101
- # Retrieve all labels
102
- id2label_mapping = {}
103
- id2label = ppl.model.config.id2label
104
- label2id = {v: k for k, v in id2label.items()}
105
-
106
- # Infer labels
107
- id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
108
- id2label_mapping_dataset_model = {
109
- v: k for k, v in id2label_mapping.items()
110
- }
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  if "data" in column_mapping.keys():
113
  if isinstance(column_mapping["data"], list):
114
  # Use the column mapping passed by user
@@ -118,13 +142,63 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
118
  column_mapping["label"] = {
119
  i: None for i in id2label.keys()
120
  }
121
- return column_mapping, None, None, None, feature_map_df
 
 
 
 
 
 
 
122
 
123
  id2label_df = pd.DataFrame({
124
  "Dataset Labels": dataset_labels,
125
- "Model Prediction Labels": [id2label_mapping_dataset_model[label] for label in dataset_labels],
126
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
 
 
 
 
 
128
  # get a sample prediction from the model on the dataset
129
  prediction_input = None
130
  prediction_result = None
@@ -133,21 +207,42 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
133
  prediction_input = df.head(1).at[0, column_mapping["text"]]
134
  results = ppl({"text": prediction_input}, top_k=None)
135
  prediction_result = {
136
- f'{result["label"]}({label2id[result["label"]]})': result["score"] for result in results
137
  }
138
- except Exception as e:
139
  # Pipeline prediction failed, need to provide labels
140
- print(e, '>>>> error')
141
- return column_mapping, prediction_input, None, id2label_df, feature_map_df
142
 
 
143
  prediction_result = {
144
- f'[{label2id[result["label"]]}]{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result["score"] for result in results
145
  }
 
146
 
147
- if "data" not in column_mapping.keys():
148
- # Column mapping should contain original model labels
149
- column_mapping["label"] = {
150
- str(i): id2label_mapping_dataset_model[label] for i, label in zip(id2label.keys(), dataset_labels)
151
- }
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  return column_mapping, prediction_input, prediction_result, id2label_df, feature_map_df
 
2
  import logging
3
  import json
4
  import pandas as pd
5
+ import huggingface_hub
6
+ from transformers import pipeline
7
 
8
+ def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
9
+ try:
10
+ ds = datasets.load_dataset(dataset_id, dataset_config)[split]
11
+ dataset_features = ds.features
12
+ labels = dataset_features["label"].names
13
+ features = [f for f in dataset_features.keys() if f != "label"]
14
+ return labels, features
15
+ except Exception as e:
16
+ logging.warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
17
+ return None, None
18
+
19
+ def check_model(model_id):
20
+ try:
21
+ task = huggingface_hub.model_info(model_id).pipeline_tag
22
+ except Exception:
23
+ return None
24
+
25
+ try:
26
+ ppl = pipeline(task=task, model=model_id)
27
+
28
+ return ppl
29
+ except Exception:
30
+ return None
31
+
32
 
33
  def text_classificaiton_match_label_case_unsensative(id2label_mapping, label):
34
  for model_label in id2label_mapping.keys():
 
86
 
87
  return user_labels == model_labels == original_labels
88
 
89
+ '''
90
+ params:
91
+ column_mapping: dict
92
+ dataset_features: dict
93
+ example: {
94
+ 'text': Value(dtype='string', id=None),
95
+ 'label': ClassLabel(names=['negative', 'neutral', 'positive'], id=None)
96
+ }
97
+ '''
98
  def infer_text_input_column(column_mapping, dataset_features):
99
  # Check whether we need to infer the text input column
100
  infer_text_input_column = True
101
  feature_map_df = None
102
+
103
  if "text" in column_mapping.keys():
104
  dataset_text_column = column_mapping["text"]
105
  if dataset_text_column in dataset_features.keys():
 
118
  logging.debug(f"Candidates are {candidates}")
119
  column_mapping["text"] = candidates[0]
120
 
121
+ return column_mapping, feature_map_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
+ '''
124
+ params:
125
+ column_mapping: dict
126
+ id2label_mapping: dict
127
+ example:
128
+ id2label_mapping: {
129
+ 'negative': 'negative',
130
+ 'neutral': 'neutral',
131
+ 'positive': 'positive'
132
+ }
133
+ '''
134
+ def infer_output_label_column(column_mapping, id2label_mapping, id2label, dataset_labels):
135
+ # Check whether we need to infer the output label column
136
  if "data" in column_mapping.keys():
137
  if isinstance(column_mapping["data"], list):
138
  # Use the column mapping passed by user
 
142
  column_mapping["label"] = {
143
  i: None for i in id2label.keys()
144
  }
145
+ return column_mapping, None
146
+
147
+ if "data" not in column_mapping.keys():
148
+ # Column mapping should contain original model labels
149
+ column_mapping["label"] = {
150
+ str(i): id2label_mapping[label] for i, label in zip(id2label.keys(), dataset_labels)
151
+ }
152
+ # print('>>>>> column_mapping >>>>>', column_mapping)
153
 
154
  id2label_df = pd.DataFrame({
155
  "Dataset Labels": dataset_labels,
156
+ "Model Prediction Labels": [id2label_mapping[label] for label in dataset_labels],
157
  })
158
+
159
+ return column_mapping, id2label_df
160
+
161
+ def check_dataset_features_validity(d_id, config, split):
162
+ # We assume dataset is ok here
163
+ ds = datasets.load_dataset(d_id, config)[split]
164
+ try:
165
+ dataset_features = ds.features
166
+ except AttributeError:
167
+ # Dataset does not have features, need to provide everything
168
+ return None, None
169
+ # Load dataset as DataFrame
170
+ df = ds.to_pandas()
171
+
172
+ return df, dataset_features
173
+
174
+ def get_example_prediction(ppl, dataset_id, dataset_config, dataset_split):
175
+ # get a sample prediction from the model on the dataset
176
+ prediction_input = None
177
+ prediction_result = None
178
+ try:
179
+ # Use the first item to test prediction
180
+ ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split]
181
+ if "text" not in ds.features.keys():
182
+ # Dataset does not have text column
183
+ prediction_input = ds[0][ds.features.keys()[0]]
184
+ else:
185
+ prediction_input = ds[0]["text"]
186
+
187
+ print('prediction_input', prediction_input)
188
+ results = ppl(prediction_input, top_k=None)
189
+ # Display results in original label and mapped label
190
+ prediction_result = {
191
+ f'{result["label"]}': result["score"] for result in results
192
+ }
193
+ except Exception:
194
+ # Pipeline prediction failed, need to provide labels
195
+ return prediction_input, None
196
 
197
+
198
+ return prediction_input, prediction_result
199
+
200
+
201
+ def get_sample_prediction(ppl, df, column_mapping, id2label_mapping):
202
  # get a sample prediction from the model on the dataset
203
  prediction_input = None
204
  prediction_result = None
 
207
  prediction_input = df.head(1).at[0, column_mapping["text"]]
208
  results = ppl({"text": prediction_input}, top_k=None)
209
  prediction_result = {
210
+ f'{result["label"]}': result["score"] for result in results
211
  }
212
+ except Exception:
213
  # Pipeline prediction failed, need to provide labels
214
+ return prediction_input, None
 
215
 
216
+ # Display results in original label and mapped label
217
  prediction_result = {
218
+ f'{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result["score"] for result in results
219
  }
220
+ return prediction_input, prediction_result
221
 
222
+ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
223
+ # load dataset as pd DataFrame
224
+ # get features column from dataset
225
+ df, dataset_features = check_dataset_features_validity(d_id, config, split)
 
226
 
227
+ column_mapping, feature_map_df = infer_text_input_column(column_mapping, dataset_features)
228
+ if feature_map_df is None:
229
+ # dataset does not have any features
230
+ return None, None, None, None, None
231
+
232
+ # Retrieve all labels
233
+ id2label = ppl.model.config.id2label
234
+
235
+ # Infer labels
236
+ id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
237
+ column_mapping, id2label_df = infer_output_label_column(column_mapping, id2label_mapping, id2label, dataset_labels)
238
+ if id2label_df is None:
239
+ # does not able to infer output label column
240
+ return column_mapping, None, None, None, feature_map_df
241
+
242
+ # Get a sample prediction
243
+ prediction_input, prediction_result = get_sample_prediction(ppl, df, column_mapping, id2label_mapping)
244
+ if prediction_result is None:
245
+ # does not able to get a sample prediction
246
+ return column_mapping, prediction_input, None, id2label_df, feature_map_df
247
+
248
  return column_mapping, prediction_input, prediction_result, id2label_df, feature_map_df
utils.py CHANGED
@@ -12,7 +12,7 @@ def read_scanners(path):
12
  scanners = []
13
  with open(path, "r") as f:
14
  config = yaml.load(f, Loader=yaml.FullLoader)
15
- scanners = config.get("detectors", None)
16
  return scanners
17
 
18
  # convert a list of scanners to yaml file
@@ -30,7 +30,7 @@ def read_inference_type(path):
30
  inference_type = ""
31
  with open(path, "r") as f:
32
  config = yaml.load(f, Loader=yaml.FullLoader)
33
- inference_type = config.get("inference_type", None)
34
  return inference_type
35
 
36
  # write model_type to yaml file
@@ -45,10 +45,30 @@ def write_inference_type(use_inference):
45
  # save inference_type to inference_type in yaml
46
  yaml.dump(config, f, Dumper=Dumper)
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  # convert column mapping dataframe to json
49
  def convert_column_mapping_to_json(df, label=""):
50
  column_mapping = {}
51
  column_mapping[label] = []
52
  for _, row in df.iterrows():
53
  column_mapping[label].append(row.tolist())
54
- return column_mapping
 
12
  scanners = []
13
  with open(path, "r") as f:
14
  config = yaml.load(f, Loader=yaml.FullLoader)
15
+ scanners = config.get("detectors", [])
16
  return scanners
17
 
18
  # convert a list of scanners to yaml file
 
30
  inference_type = ""
31
  with open(path, "r") as f:
32
  config = yaml.load(f, Loader=yaml.FullLoader)
33
+ inference_type = config.get("inference_type", "")
34
  return inference_type
35
 
36
  # write model_type to yaml file
 
45
  # save inference_type to inference_type in yaml
46
  yaml.dump(config, f, Dumper=Dumper)
47
 
48
+ # read column mapping from yaml file
49
+ def read_column_mapping(path):
50
+ column_mapping = {}
51
+ with open(path, "r") as f:
52
+ config = yaml.load(f, Loader=yaml.FullLoader)
53
+ column_mapping = config.get("column_mapping", dict())
54
+ return column_mapping
55
+
56
+ # write column mapping to yaml file
57
+ def write_column_mapping(mapping):
58
+ with open(YAML_PATH, "r") as f:
59
+ config = yaml.load(f, Loader=yaml.FullLoader)
60
+ if mapping is None:
61
+ del config["column_mapping"]
62
+ else:
63
+ config["column_mapping"] = mapping
64
+ with open(YAML_PATH, "w") as f:
65
+ # save column_mapping to column_mapping in yaml
66
+ yaml.dump(config, f, Dumper=Dumper)
67
+
68
  # convert column mapping dataframe to json
69
  def convert_column_mapping_to_json(df, label=""):
70
  column_mapping = {}
71
  column_mapping[label] = []
72
  for _, row in df.iterrows():
73
  column_mapping[label].append(row.tolist())
74
+ return column_mapping
wordings.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CONFIRM_MAPPING_DETAILS_MD = '''
2
+ <h1 style="text-align: center;">
3
+ Giskard Evaluator
4
+ </h1>
5
+ Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
6
+ '''
7
+
8
+ CONFIRM_MAPPING_DETAILS_FAIL_MD = '''
9
+ <h1 style="text-align: center;">
10
+ Confirm Pre-processing Details
11
+ </h1>
12
+ Sorry, we cannot align the input/output of your dataset with the model. <b>Pleaser double check your model and dataset.</b>
13
+ '''
14
+
15
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW= '''
16
+ Sorry, we cannot align the input/output of your dataset with the model. Pleaser double check your model and dataset.
17
+ '''