giskard-evaluator

Runtime error

App Files Files Community

199

inoki-giskard

ZeroCommand commited on Dec 14, 2023

Commit

0c7d7d0

•

1 Parent(s): 1aa43b4

GSK-2362-improve-uiux-for-hfspace (#7)

Browse files

- updated version of ui (9e212ded38fae09473a8aca5e1a861e261e4ebb3)
- Add welcome message at the top (9037bf70daec0ec9985ce71e5a5938ed49c5f85d)
- add pre-check for column mapping values (536b2a2019f4c1fa278a04c10199e04acb58c3aa)
- polish up and add more information (ac0eaffe10ec54745aa4dc0899522e2dcdf91b4c)

Co-authored-by: zcy <ZeroCommand@users.noreply.huggingface.co>

Files changed (2) hide show

app.py +99 -93
text_classification.py +29 -18

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import json
 from transformers.pipelines import TextClassificationPipeline
-from text_classification import text_classification_fix_column_mapping
 HF_REPO_ID = 'HF_REPO_ID'
@@ -59,26 +59,27 @@ def check_dataset(dataset_id, dataset_config="default", dataset_split="test"):
         return dataset_id, None, None
     return dataset_id, dataset_config, dataset_split
-def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_mapping):
     # Validate model
-    m_id, ppl = check_model(model_id=model_id)
     if m_id is None:
-        gr.Warning(f'Model "{model_id}" is not accessible. Please set your HF_TOKEN if it is a private model.')
         return (
-            dataset_config, dataset_split,
             gr.update(interactive=False),   # Submit button
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
-            gr.update(visible=True),        # Column mapping
         )
     if isinstance(ppl, Exception):
-        gr.Warning(f'Failed to load "{model_id} model": {ppl}')
         return (
-            dataset_config, dataset_split,
             gr.update(interactive=False),   # Submit button
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
-            gr.update(visible=True),        # Column mapping
         )
     # Validate dataset
@@ -98,11 +99,13 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_map
     if not dataset_ok:
         return (
-            config, split,
             gr.update(interactive=False),   # Submit button
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
-            gr.update(visible=True),        # Column mapping
         )
     # TODO: Validate column mapping by running once
@@ -110,45 +113,48 @@ def try_validate(model_id, dataset_id, dataset_config, dataset_split, column_map
     id2label_df = None
     if isinstance(ppl, TextClassificationPipeline):
         try:
             column_mapping = json.loads(column_mapping)
         except Exception:
             column_mapping = {}
-        column_mapping, prediction_result, id2label_df = \
             text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
         column_mapping = json.dumps(column_mapping, indent=2)
-    del ppl
     if prediction_result is None:
         gr.Warning('The model failed to predict with the first row in the dataset. Please provide column mappings in "Advance" settings.')
         return (
-            config, split,
             gr.update(interactive=False),   # Submit button
             gr.update(visible=False),   # Model prediction preview
             gr.update(visible=False),   # Label mapping preview
-            gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
         )
     elif id2label_df is None:
         gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
         return (
-            config, split,
             gr.update(interactive=False),   # Submit button
             gr.update(value=prediction_result, visible=True),   # Model prediction preview
             gr.update(visible=False),   # Label mapping preview
-            gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
         )
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
     return (
         gr.update(visible=False),       # Loading row
         gr.update(visible=True),        # Preview row
-        gr.update(interactive=True),    # Submit button
         gr.update(value=prediction_result, visible=True),   # Model prediction preview
-        gr.update(value=id2label_df, visible=True), # Label mapping preview
-        gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
     )
@@ -200,36 +206,56 @@ def try_submit(m_id, d_id, config, split, column_mappings, local):
 with gr.Blocks(theme=theme) as iface:
     with gr.Tab("Text Classification"):
-        global_ds_id = gr.State('ds')
         def check_dataset_and_get_config(dataset_id):
-            global_ds_id.value = dataset_id
             try:
                 configs = datasets.get_dataset_config_names(dataset_id)
-                print(configs)
                 return gr.Dropdown(configs, value=configs[0], visible=True)
             except Exception:
                 # Dataset may not exist
                 pass
-        def check_dataset_and_get_split(choice):
-            print('choice: ',choice, global_ds_id.value)
             try:
-                splits = list(datasets.load_dataset(global_ds_id.value, choice).keys())
-                print('splits: ',splits)
                 return gr.Dropdown(splits, value=splits[0], visible=True)
             except Exception as e:
                 # Dataset may not exist
-                print(e)
                 pass
-        def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split):
-            print('model_id: ',model_id)
-            if model_id and dataset_id and dataset_config and dataset_split:
-                return gr.update(interactive=True)
             else:
-                return gr.update(interactive=False)
         with gr.Row():
             model_id_input = gr.Textbox(
                 label="Hugging Face model id",
@@ -245,22 +271,10 @@ with gr.Blocks(theme=theme) as iface:
             dataset_split_input = gr.Dropdown(['default'], value=['default'], label='Dataset Split', visible=False)
             dataset_id_input.change(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
-            dataset_config_input.change(check_dataset_and_get_split, dataset_config_input, dataset_split_input)
-        with gr.Row():
-            validate_btn = gr.Button("Validate Model and Dataset", variant="primary", interactive=False)
-            model_id_input.change(gate_validate_btn,
-                                  inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                  outputs=[validate_btn])
-            dataset_id_input.change(gate_validate_btn,
-                                  inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                  outputs=[validate_btn])
-            dataset_config_input.change(gate_validate_btn,
-                                    inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                    outputs=[validate_btn])
-            dataset_split_input.change(gate_validate_btn,
-                                    inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
-                                    outputs=[validate_btn])
         with gr.Row(visible=True) as loading_row:
             gr.Markdown('''
@@ -270,51 +284,45 @@ with gr.Blocks(theme=theme) as iface:
                         ''')
         with gr.Row(visible=False) as preview_row:
-            with gr.Column():
-                id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping")
-                gr.Markdown('''
-                            <span style="background-color:#5fc269; color:white">Does this look right? If not, Check and update your feature mapping -></span>
-                            ''')
-                example_labels = gr.Label(label='Model Prediction Sample')
-            with gr.Accordion("Advance", open=False):
-                run_local = gr.Checkbox(value=True, label="Run in this Space")
-                column_mapping_input = gr.Textbox(
-                    value="",
-                    lines=6,
-                    label="Column mapping",
-                    placeholder="Description of mapping of columns in model to dataset, in json format, e.g.:\n"
-                                '{\n'
-                                '   "text": "context",\n'
-                                '   "label": {0: "Positive", 1: "Negative"}\n'
-                                '}',
-                )
         run_btn = gr.Button(
             "Get Evaluation Result",
             variant="primary",
             interactive=False,
         )
-        validate_btn.click(
-            try_validate,
-            inputs=[
-                model_id_input,
-                dataset_id_input,
-                dataset_config_input,
-                dataset_split_input,
-            ],
-            outputs=[
-                loading_row,
-                preview_row,
-                run_btn,
-                example_labels,
-                id2label_mapping_dataframe,
-                column_mapping_input,
-            ],
-        )
         run_btn.click(
             try_submit,
@@ -323,8 +331,6 @@ with gr.Blocks(theme=theme) as iface:
                 dataset_id_input,
                 dataset_config_input,
                 dataset_split_input,
-                column_mapping_input,
-                run_local,
             ],
             outputs=[
                 run_btn,

 from transformers.pipelines import TextClassificationPipeline
+from text_classification import check_column_mapping_keys_validity, text_classification_fix_column_mapping
 HF_REPO_ID = 'HF_REPO_ID'
         return dataset_id, None, None
     return dataset_id, dataset_config, dataset_split
+def try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping='{}'):
     # Validate model
     if m_id is None:
+        gr.Warning('Model is not accessible. Please set your HF_TOKEN if it is a private model.')
         return (
             gr.update(interactive=False),   # Submit button
+            gr.update(visible=True),       # Loading row
+            gr.update(visible=False),        # Preview row
+            gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
         )
     if isinstance(ppl, Exception):
+        gr.Warning(f'Failed to load model": {ppl}')
         return (
             gr.update(interactive=False),   # Submit button
+            gr.update(visible=True),       # Loading row
+            gr.update(visible=False),        # Preview row
+            gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
         )
     # Validate dataset
     if not dataset_ok:
         return (
             gr.update(interactive=False),   # Submit button
+            gr.update(visible=True),       # Loading row
+            gr.update(visible=False),        # Preview row
+            gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),       # Model prediction preview
             gr.update(visible=False),       # Label mapping preview
+            # gr.update(visible=True),        # Column mapping
         )
     # TODO: Validate column mapping by running once
     id2label_df = None
     if isinstance(ppl, TextClassificationPipeline):
         try:
+            print('validating phase, ', column_mapping)
             column_mapping = json.loads(column_mapping)
         except Exception:
             column_mapping = {}
+        column_mapping, prediction_input, prediction_result, id2label_df = \
             text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split)
         column_mapping = json.dumps(column_mapping, indent=2)
     if prediction_result is None:
         gr.Warning('The model failed to predict with the first row in the dataset. Please provide column mappings in "Advance" settings.')
         return (
             gr.update(interactive=False),   # Submit button
+            gr.update(visible=True),       # Loading row
+            gr.update(visible=False),        # Preview row
+            gr.update(visible=False),       # Model prediction input
             gr.update(visible=False),   # Model prediction preview
             gr.update(visible=False),   # Label mapping preview
+            # gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
         )
     elif id2label_df is None:
         gr.Warning('The prediction result does not conform the labels in the dataset. Please provide label mappings in "Advance" settings.')
         return (
             gr.update(interactive=False),   # Submit button
+            gr.update(visible=False),       # Loading row
+            gr.update(visible=True),        # Preview row
+            gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
             gr.update(value=prediction_result, visible=True),   # Model prediction preview
             gr.update(visible=False),   # Label mapping preview
+            # gr.update(value=column_mapping, visible=True, interactive=True),    # Column mapping
         )
     gr.Info("Model and dataset validations passed. Your can submit the evaluation task.")
     return (
+        gr.update(interactive=True),    # Submit button
         gr.update(visible=False),       # Loading row
         gr.update(visible=True),        # Preview row
+        gr.update(value=f'**Sample Input**: {prediction_input}', visible=True),       # Model prediction input
         gr.update(value=prediction_result, visible=True),   # Model prediction preview
+        gr.update(value=id2label_df, visible=True, interactive=True), # Label mapping preview
     )
 with gr.Blocks(theme=theme) as iface:
     with gr.Tab("Text Classification"):
         def check_dataset_and_get_config(dataset_id):
             try:
                 configs = datasets.get_dataset_config_names(dataset_id)
                 return gr.Dropdown(configs, value=configs[0], visible=True)
             except Exception:
                 # Dataset may not exist
                 pass
+        def check_dataset_and_get_split(dataset_config, dataset_id):
             try:
+                splits = list(datasets.load_dataset(dataset_id, dataset_config).keys())
                 return gr.Dropdown(splits, value=splits[0], visible=True)
             except Exception as e:
                 # Dataset may not exist
+                gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
                 pass
+        def gate_validate_btn(model_id, dataset_id, dataset_config, dataset_split, id2label_mapping_dataframe=None):
+            column_mapping = '{}'
+            m_id, ppl = check_model(model_id=model_id)
+            if id2label_mapping_dataframe is not None:
+                column_mapping = id2label_mapping_dataframe.to_json(orient="split")
+            if check_column_mapping_keys_validity(column_mapping, ppl) is False:
+                gr.Warning('Label mapping table has invalid contents. Please check again.')
+                return (gr.update(interactive=False),
+                        gr.update(),
+                        gr.update(),
+                        gr.update(),
+                        gr.update(),
+                        gr.update())
             else:
+                if model_id and dataset_id and dataset_config and dataset_split:
+                    return try_validate(m_id, ppl, dataset_id, dataset_config, dataset_split, column_mapping)
+                else:
+                    del ppl
+                    return (gr.update(interactive=False),
+                            gr.update(visible=True),
+                            gr.update(visible=False),
+                            gr.update(visible=False),
+                            gr.update(visible=False),
+                            gr.update(visible=False))
+        with gr.Row():
+            gr.Markdown('''
+                <h1 style="text-align: center;">
+                Giskard Evaluator
+                </h1>
+                Welcome to Giskard Evaluator Space! Get your report immediately by simply input your model id and dataset id below. Follow our leads and improve your model in no time.
+                ''')
         with gr.Row():
             model_id_input = gr.Textbox(
                 label="Hugging Face model id",
             dataset_split_input = gr.Dropdown(['default'], value=['default'], label='Dataset Split', visible=False)
             dataset_id_input.change(check_dataset_and_get_config, dataset_id_input, dataset_config_input)
+            dataset_config_input.change(
+                check_dataset_and_get_split,
+                inputs=[dataset_config_input, dataset_id_input],
+                outputs=[dataset_split_input])
         with gr.Row(visible=True) as loading_row:
             gr.Markdown('''
                         ''')
         with gr.Row(visible=False) as preview_row:
+            gr.Markdown('''
+                <h1 style="text-align: center;">
+                Confirm Label Details
+                </h1>
+                Base on your model and dataset, we inferred this label mapping. **If the mapping is incorrect, please modify it in the table below.**
+                ''')
+        with gr.Row():
+            id2label_mapping_dataframe = gr.DataFrame(label="Preview of label mapping", interactive=True, visible=False)
+        with gr.Row():
+            example_input = gr.Markdown('Sample Input: ', visible=False)
+        with gr.Row():
+            example_labels = gr.Label(label='Model Prediction Sample', visible=False)
         run_btn = gr.Button(
             "Get Evaluation Result",
             variant="primary",
             interactive=False,
+            size="lg",
         )
+        model_id_input.change(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
+        dataset_id_input.change(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input,  example_labels, id2label_mapping_dataframe])
+        dataset_config_input.change(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
+        dataset_split_input.change(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
+        id2label_mapping_dataframe.input(gate_validate_btn,
+                                inputs=[model_id_input, dataset_id_input, dataset_config_input, dataset_split_input, id2label_mapping_dataframe],
+                                outputs=[run_btn, loading_row, preview_row, example_input, example_labels, id2label_mapping_dataframe])
         run_btn.click(
             try_submit,
                 dataset_id_input,
                 dataset_config_input,
                 dataset_split_input,
             ],
             outputs=[
                 run_btn,

text_classification.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import datasets
 import logging
 import pandas as pd
@@ -36,6 +35,20 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
     return id2label_mapping, dataset_labels
 def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
     # We assume dataset is ok here
     ds = datasets.load_dataset(d_id, config)[split]
@@ -72,10 +85,12 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
     id2label_mapping = {}
     id2label = ppl.model.config.id2label
     label2id = {v: k for k, v in id2label.items()}
     prediction_result = None
     try:
         # Use the first item to test prediction
-        results = ppl({"text": df.head(1).at[0, column_mapping["text"]]}, top_k=None)
         prediction_result = {
             f'{result["label"]}({label2id[result["label"]]})': result["score"] for result in results
         }
@@ -85,33 +100,29 @@ def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, sp
     # Infer labels
     id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
-    if "label" in column_mapping.keys():
-        if not isinstance(column_mapping["label"], dict) or set(column_mapping["label"].values()) != set(dataset_labels):
-            logging.warning(f'Provided {column_mapping["label"]} does not match labels in Dataset')
-            return column_mapping, prediction_result, None
-        if isinstance(column_mapping["label"], dict):
             # Use the column mapping passed by user
-            for i, model_label in column_mapping["label"].items():
-                id2label_mapping[model_label] = dataset_labels[int(i)]
     elif None in id2label_mapping.values():
         column_mapping["label"] = {
             i: None for i in id2label.keys()
         }
         return column_mapping, prediction_result, None
-    id2label_mapping = {
-        v: k for k, v in id2label_mapping.items()
     }
     id2label_df = pd.DataFrame({
-        "ID": list(range(len(dataset_labels))),
-        "Labels": dataset_labels,
-        "Labels in original model": [f"{id2label_mapping[label]}({label2id[id2label_mapping[label]]})" for label in dataset_labels],
     })
-    if "label" not in column_mapping.keys():
         # Column mapping should contain original model labels
         column_mapping["label"] = {
             str(i): id2label_mapping[label] for i, label in zip(id2label.keys(), dataset_labels)
         }
-    return column_mapping, prediction_result, id2label_df

 import datasets
 import logging
+import json
 import pandas as pd
     return id2label_mapping, dataset_labels
+def check_column_mapping_keys_validity(column_mapping, ppl):
+    # get the element in all the list elements
+    column_mapping = json.loads(column_mapping)
+    if "data" not in column_mapping.keys():
+        return True
+    user_labels = set([pair[0] for pair in column_mapping["data"]])
+    model_labels = set([pair[1] for pair in column_mapping["data"]])
+    id2label = ppl.model.config.id2label
+    original_labels = set(id2label.values())
+    return user_labels == model_labels == original_labels
 def text_classification_fix_column_mapping(column_mapping, ppl, d_id, config, split):
     # We assume dataset is ok here
     ds = datasets.load_dataset(d_id, config)[split]
     id2label_mapping = {}
     id2label = ppl.model.config.id2label
     label2id = {v: k for k, v in id2label.items()}
+    prediction_input = None
     prediction_result = None
     try:
         # Use the first item to test prediction
+        prediction_input = df.head(1).at[0, column_mapping["text"]]
+        results = ppl({"text": prediction_input}, top_k=None)
         prediction_result = {
             f'{result["label"]}({label2id[result["label"]]})': result["score"] for result in results
         }
     # Infer labels
     id2label_mapping, dataset_labels = text_classification_map_model_and_dataset_labels(id2label, dataset_features)
+    if "data" in column_mapping.keys():
+        if isinstance(column_mapping["data"], list):
             # Use the column mapping passed by user
+            for user_label, model_label in column_mapping["data"]:
+                id2label_mapping[model_label] = user_label
     elif None in id2label_mapping.values():
         column_mapping["label"] = {
             i: None for i in id2label.keys()
         }
         return column_mapping, prediction_result, None
+    prediction_result = {
+        f'[{label2id[result["label"]]}]{result["label"]}(original) - {id2label_mapping[result["label"]]}(mapped)': result["score"] for result in results
     }
     id2label_df = pd.DataFrame({
+        "Dataset Labels": dataset_labels,
+        "Model Prediction Labels": [id2label_mapping[label] for label in dataset_labels],
     })
+    if "data" not in column_mapping.keys():
         # Column mapping should contain original model labels
         column_mapping["label"] = {
             str(i): id2label_mapping[label] for i, label in zip(id2label.keys(), dataset_labels)
         }
+    return column_mapping, prediction_input, prediction_result, id2label_df