inoki-giskard commited on
Commit
01c4e21
1 Parent(s): 583defc

Output label mapping in column mapping

Browse files
Files changed (1) hide show
  1. app.py +56 -5
app.py CHANGED
@@ -6,6 +6,8 @@ import os
6
  import time
7
  from pathlib import Path
8
 
 
 
9
  import pandas as pd
10
 
11
  from transformers.pipelines import TextClassificationPipeline
@@ -84,15 +86,27 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
84
  return id2label_mapping
85
 
86
 
87
- def try_validate(model_id, dataset_id, dataset_config, dataset_split):
88
  # Validate model
89
  m_id, ppl = check_model(model_id=model_id)
90
  if m_id is None:
91
  gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
92
- return dataset_config, dataset_split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
 
 
 
 
 
 
93
  if isinstance(ppl, Exception):
94
  gr.Warning(f'Failed to load "{model_id} model": {ppl}')
95
- return dataset_config, dataset_split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
 
 
 
 
 
 
96
 
97
  # Validate dataset
98
  d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
@@ -110,12 +124,23 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
110
  dataset_ok = True
111
 
112
  if not dataset_ok:
113
- return config, split, gr.update(interactive=False), gr.update(visible=False), gr.update(visible=False)
 
 
 
 
 
 
114
 
115
  # TODO: Validate column mapping by running once
116
  prediction_result = {}
117
  id2label_df = None
118
  if isinstance(ppl, TextClassificationPipeline):
 
 
 
 
 
119
  # Retrieve all labels
120
  id2label_mapping = {}
121
  try:
@@ -137,15 +162,27 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split):
137
  "Model labels": [id2label[label] for label in id2label.keys()],
138
  "Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
139
  })
 
 
 
 
140
  except AttributeError:
141
  # Dataset does not have features
142
  pass
143
 
 
 
144
  del ppl
145
 
146
  gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
147
 
148
- return config, split, gr.update(interactive=True), gr.update(value=prediction_result, visible=True), gr.update(value=id2label_df, visible=True)
 
 
 
 
 
 
149
 
150
 
151
  def try_submit(m_id, d_id, config, split, local):
@@ -240,6 +277,18 @@ with gr.Blocks(theme=theme) as iface:
240
 
241
  id2label_mapping_dataframe = gr.DataFrame(visible=False)
242
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  with gr.Row():
244
  validate_btn = gr.Button("Validate model and dataset", variant="primary")
245
  run_btn = gr.Button(
@@ -254,6 +303,7 @@ with gr.Blocks(theme=theme) as iface:
254
  dataset_id_input,
255
  dataset_config_input,
256
  dataset_split_input,
 
257
  ],
258
  outputs=[
259
  dataset_config_input,
@@ -261,6 +311,7 @@ with gr.Blocks(theme=theme) as iface:
261
  run_btn,
262
  example_labels,
263
  id2label_mapping_dataframe,
 
264
  ],
265
  )
266
  run_btn.click(
 
6
  import time
7
  from pathlib import Path
8
 
9
+ import json
10
+
11
  import pandas as pd
12
 
13
  from transformers.pipelines import TextClassificationPipeline
 
86
  return id2label_mapping
87
 
88
 
89
+ def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_mapping):
90
  # Validate model
91
  m_id, ppl = check_model(model_id=model_id)
92
  if m_id is None:
93
  gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
94
+ return (
95
+ dataset_config, dataset_split,
96
+ gr.update(interactive=False), # Submit button
97
+ gr.update(visible=False), # Model prediction preview
98
+ gr.update(visible=False), # Label mapping preview
99
+ gr.update(visible=True), # Column mapping
100
+ )
101
  if isinstance(ppl, Exception):
102
  gr.Warning(f'Failed to load "{model_id} model": {ppl}')
103
+ return (
104
+ dataset_config, dataset_split,
105
+ gr.update(interactive=False), # Submit button
106
+ gr.update(visible=False), # Model prediction preview
107
+ gr.update(visible=False), # Label mapping preview
108
+ gr.update(visible=True), # Column mapping
109
+ )
110
 
111
  # Validate dataset
112
  d_id, config, split = check_dataset(dataset_id=dataset_id, dataset_config=dataset_config, dataset_split=dataset_split)
 
124
  dataset_ok = True
125
 
126
  if not dataset_ok:
127
+ return (
128
+ config, split,
129
+ gr.update(interactive=False), # Submit button
130
+ gr.update(visible=False), # Model prediction preview
131
+ gr.update(visible=False), # Label mapping preview
132
+ gr.update(visible=True), # Column mapping
133
+ )
134
 
135
  # TODO: Validate column mapping by running once
136
  prediction_result = {}
137
  id2label_df = None
138
  if isinstance(ppl, TextClassificationPipeline):
139
+ try:
140
+ column_mapping = json.loads(column_mapping)
141
+ except Exception:
142
+ column_mapping = {}
143
+
144
  # Retrieve all labels
145
  id2label_mapping = {}
146
  try:
 
162
  "Model labels": [id2label[label] for label in id2label.keys()],
163
  "Dataset labels": [id2label_mapping[id2label[label]] for label in id2label.keys()],
164
  })
165
+ if "label" not in column_mapping.keys():
166
+ column_mapping["label"] = {
167
+ i: id2label_mapping[id2label[i]] for i in id2label.keys()
168
+ }
169
  except AttributeError:
170
  # Dataset does not have features
171
  pass
172
 
173
+ column_mapping = json.dumps(column_mapping, indent=2)
174
+
175
  del ppl
176
 
177
  gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
178
 
179
+ return (
180
+ config, split,
181
+ gr.update(interactive=True), # Submit button
182
+ gr.update(value=prediction_result, visible=True), # Model prediction preview
183
+ gr.update(value=id2label_df, visible=True), # Label mapping preview
184
+ gr.update(value=column_mapping, visible=True, interactive=True), # Column mapping
185
+ )
186
 
187
 
188
  def try_submit(m_id, d_id, config, split, local):
 
277
 
278
  id2label_mapping_dataframe = gr.DataFrame(visible=False)
279
 
280
+ with gr.Row():
281
+ column_mapping_input = gr.Textbox(
282
+ value="",
283
+ lines=5,
284
+ label="Column mapping",
285
+ placeholder="Description of mapping of columns in model to dataset, in json format, e.g.:\n"
286
+ '{\n'
287
+ ' "text": "context",\n'
288
+ ' "label": {0: "Positive", 1: "Negative"}\n'
289
+ '}',
290
+ )
291
+
292
  with gr.Row():
293
  validate_btn = gr.Button("Validate model and dataset", variant="primary")
294
  run_btn = gr.Button(
 
303
  dataset_id_input,
304
  dataset_config_input,
305
  dataset_split_input,
306
+ column_mapping_input,
307
  ],
308
  outputs=[
309
  dataset_config_input,
 
311
  run_btn,
312
  example_labels,
313
  id2label_mapping_dataframe,
314
+ column_mapping_input,
315
  ],
316
  )
317
  run_btn.click(