giskard-evaluator

Running

App Files Files Community

200

inoki-giskard commited on Mar 14

Commit

d1e5b15

•

1 Parent(s): 029ed97

Add an HTML widget to show the error info for model id validation

Browse files

Files changed (3) hide show

app_text_classification.py +58 -39
text_classification_ui_helpers.py +138 -64
wordings.py +8 -1

app_text_classification.py CHANGED Viewed

@@ -18,20 +18,21 @@ from text_classification_ui_helpers import (
 import logging
 from wordings import (
-  CONFIRM_MAPPING_DETAILS_MD,
-  INTRODUCTION_MD,
-  USE_INFERENCE_API_TIP,
-  CHECK_LOG_SECTION_RAW,
-  HF_TOKEN_INVALID_STYLED
 )
 MAX_LABELS = 40
 MAX_FEATURES = 20
-EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
 CONFIG_PATH = "./config.yaml"
 logger = logging.getLogger(__name__)
 def get_demo():
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
@@ -39,10 +40,14 @@ def get_demo():
             label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
         )
     with gr.Row():
-        model_id_input = gr.Textbox(
-            label="Hugging Face Model id",
-            placeholder=EXAMPLE_MODEL_ID + " (press enter to confirm)",
-        )
         with gr.Column():
             dataset_id_input = gr.Dropdown(
@@ -53,8 +58,12 @@ def get_demo():
             )
     with gr.Row():
-        dataset_config_input = gr.Dropdown(label="Dataset Config", visible=False, allow_custom_value=True)
-        dataset_split_input = gr.Dropdown(label="Dataset Split", visible=False, allow_custom_value=True)
     with gr.Row():
         first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
@@ -72,7 +81,9 @@ def get_demo():
     with gr.Row():
         validation_result = gr.HTML(visible=False)
     with gr.Row():
-        example_input = gr.Textbox(label="Example Input", visible=False, interactive=False)
         example_prediction = gr.Label(label="Model Sample Prediction", visible=False)
     with gr.Row():
@@ -119,15 +130,15 @@ def get_demo():
             # Reason: data_leakage barely raises any issues and takes too many requests
             # when using inference API, causing rate limit error
             scan_config = [
-                "ethical_bias",
-                "text_perturbation",
                 "robustness",
                 "performance",
                 "underconfidence",
                 "overconfidence",
                 "spurious_correlation",
                 "data_leakage",
-                ]
             return gr.update(
                 choices=scan_config, value=selected, label="Scan Settings", visible=True
             )
@@ -148,7 +159,6 @@ def get_demo():
             every=0.5,
         )
     scanners.change(write_scanners, inputs=[scanners, uid_label])
     gr.on(
@@ -161,20 +171,28 @@ def get_demo():
         inputs=[dataset_id_input],
         outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
     )
     gr.on(
         triggers=[dataset_id_input.input, dataset_id_input.select],
         fn=check_dataset,
         inputs=[dataset_id_input],
-        outputs=[dataset_config_input, dataset_split_input, loading_dataset_info]
     )
-    dataset_config_input.change(fn=get_dataset_splits, inputs=[dataset_id_input, dataset_config_input], outputs=[dataset_split_input])
     gr.on(
-        triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
         fn=empty_column_mapping,
-        inputs=[uid_label]
     )
     gr.on(
@@ -199,7 +217,6 @@ def get_demo():
     gr.on(
         triggers=[
             model_id_input.change,
-            model_id_input.input,
             dataset_id_input.change,
             dataset_config_input.change,
             dataset_split_input.change,
@@ -212,12 +229,14 @@ def get_demo():
             dataset_split_input,
         ],
         outputs=[
-            example_btn,
             first_line_ds,
             validation_result,
             example_input,
             example_prediction,
-            column_mapping_accordion,],
     )
     gr.on(
@@ -258,14 +277,14 @@ def get_demo():
             uid_label,
         ],
         outputs=[
-            run_btn,
-            logs,
-            uid_label,
             validation_result,
             example_input,
             example_prediction,
             column_mapping_accordion,
-          ],
     )
     gr.on(
@@ -276,11 +295,11 @@ def get_demo():
         fn=enable_run_btn,
         inputs=[
             uid_label,
-            inference_token,
-            model_id_input,
-            dataset_id_input,
-            dataset_config_input,
-            dataset_split_input
         ],
         outputs=[run_btn],
     )
@@ -290,11 +309,11 @@ def get_demo():
         fn=enable_run_btn,
         inputs=[
             uid_label,
-            inference_token,
-            model_id_input,
-            dataset_id_input,
-            dataset_config_input,
-            dataset_split_input
         ],  # FIXME
         outputs=[run_btn],
     )

 import logging
 from wordings import (
+    EXAMPLE_MODEL_ID,
+    CONFIRM_MAPPING_DETAILS_MD,
+    INTRODUCTION_MD,
+    USE_INFERENCE_API_TIP,
+    CHECK_LOG_SECTION_RAW,
+    HF_TOKEN_INVALID_STYLED,
 )
 MAX_LABELS = 40
 MAX_FEATURES = 20
 CONFIG_PATH = "./config.yaml"
 logger = logging.getLogger(__name__)
 def get_demo():
     with gr.Row():
         gr.Markdown(INTRODUCTION_MD)
             label="Evaluation ID:", value=uuid.uuid4, visible=False, interactive=False
         )
     with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                model_id_input = gr.Textbox(
+                    label="Hugging Face Model id",
+                    placeholder=f"e.g. {EXAMPLE_MODEL_ID}",
+                )
+            with gr.Row():
+                model_id_error_info = gr.HTML(visible=False)
         with gr.Column():
             dataset_id_input = gr.Dropdown(
             )
     with gr.Row():
+        dataset_config_input = gr.Dropdown(
+            label="Dataset Config", visible=False, allow_custom_value=True
+        )
+        dataset_split_input = gr.Dropdown(
+            label="Dataset Split", visible=False, allow_custom_value=True
+        )
     with gr.Row():
         first_line_ds = gr.DataFrame(label="Dataset Preview", visible=False)
     with gr.Row():
         validation_result = gr.HTML(visible=False)
     with gr.Row():
+        example_input = gr.Textbox(
+            label="Example Input", visible=False, interactive=False
+        )
         example_prediction = gr.Label(label="Model Sample Prediction", visible=False)
     with gr.Row():
             # Reason: data_leakage barely raises any issues and takes too many requests
             # when using inference API, causing rate limit error
             scan_config = [
+                "ethical_bias",
+                "text_perturbation",
                 "robustness",
                 "performance",
                 "underconfidence",
                 "overconfidence",
                 "spurious_correlation",
                 "data_leakage",
+            ]
             return gr.update(
                 choices=scan_config, value=selected, label="Scan Settings", visible=True
             )
             every=0.5,
         )
     scanners.change(write_scanners, inputs=[scanners, uid_label])
     gr.on(
         inputs=[dataset_id_input],
         outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
     )
     gr.on(
         triggers=[dataset_id_input.input, dataset_id_input.select],
         fn=check_dataset,
         inputs=[dataset_id_input],
+        outputs=[dataset_config_input, dataset_split_input, loading_dataset_info],
     )
+    dataset_config_input.change(
+        fn=get_dataset_splits,
+        inputs=[dataset_id_input, dataset_config_input],
+        outputs=[dataset_split_input],
+    )
     gr.on(
+        triggers=[
+            model_id_input.change,
+            dataset_id_input.change,
+            dataset_config_input.change,
+        ],
         fn=empty_column_mapping,
+        inputs=[uid_label],
     )
     gr.on(
     gr.on(
         triggers=[
             model_id_input.change,
             dataset_id_input.change,
             dataset_config_input.change,
             dataset_split_input.change,
             dataset_split_input,
         ],
         outputs=[
+            example_btn,
             first_line_ds,
             validation_result,
             example_input,
             example_prediction,
+            column_mapping_accordion,
+            model_id_error_info,
+        ],
     )
     gr.on(
             uid_label,
         ],
         outputs=[
+            run_btn,
+            logs,
+            uid_label,
             validation_result,
             example_input,
             example_prediction,
             column_mapping_accordion,
+        ],
     )
     gr.on(
         fn=enable_run_btn,
         inputs=[
             uid_label,
+            inference_token,
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
         ],
         outputs=[run_btn],
     )
         fn=enable_run_btn,
         inputs=[
             uid_label,
+            inference_token,
+            model_id_input,
+            dataset_id_input,
+            dataset_config_input,
+            dataset_split_input,
         ],  # FIXME
         outputs=[run_btn],
     )

text_classification_ui_helpers.py CHANGED Viewed

@@ -9,10 +9,10 @@ import pandas as pd
 import leaderboard
 from io_utils import (
-  read_column_mapping,
-  write_column_mapping,
-  read_scanners,
-  write_scanners,
 )
 from run_jobs import save_job_to_pipe
 from text_classification import (
@@ -24,9 +24,11 @@ from text_classification import (
     HuggingFaceInferenceAPIResponse,
 )
 from wordings import (
     CHECK_CONFIG_OR_SPLIT_RAW,
     CONFIRM_MAPPING_DETAILS_FAIL_RAW,
     MAPPING_STYLED_ERROR_WARNING,
     NOT_TEXT_CLASSIFICATION_MODEL_RAW,
     UNMATCHED_MODEL_DATASET_STYLED_ERROR,
     CHECK_LOG_SECTION_RAW,
@@ -42,6 +44,7 @@ MAX_FEATURES = 20
 ds_dict = None
 ds_config = None
 def get_related_datasets_from_leaderboard(model_id, dataset_id_input):
     records = leaderboard.records
     model_records = records[records["model_id"] == model_id]
@@ -49,54 +52,56 @@ def get_related_datasets_from_leaderboard(model_id, dataset_id_input):
     if len(datasets_unique) == 0:
         return gr.update(choices=[])
     if dataset_id_input in datasets_unique:
         return gr.update(choices=datasets_unique)
     return gr.update(choices=datasets_unique, value="")
 logger = logging.getLogger(__file__)
 def get_dataset_splits(dataset_id, dataset_config):
     try:
-        splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
         return gr.update(choices=splits, value=splits[0], visible=True)
     except Exception as e:
-        logger.warning(f"Check your dataset {dataset_id} and config {dataset_config}: {e}")
         return gr.update(visible=False)
 def check_dataset(dataset_id):
     logger.info(f"Loading {dataset_id}")
     try:
         configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
         if len(configs) == 0:
-            return (
-                gr.update(visible=False),
-                gr.update(visible=False),
-                ""
-            )
-        splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
         return (
             gr.update(choices=configs, value=configs[0], visible=True),
             gr.update(choices=splits, value=splits[0], visible=True),
-            ""
         )
     except Exception as e:
         logger.warning(f"Check your dataset {dataset_id}: {e}")
         if "doesn't exist" in str(e):
             gr.Warning(get_dataset_fetch_error_raw(e))
-        if "forbidden" in str(e).lower(): # GSK-2770
             gr.Warning(get_dataset_fetch_error_raw(e))
-        return (
-            gr.update(visible=False),
-            gr.update(visible=False),
-            ""
-        )
 def empty_column_mapping(uid):
     write_column_mapping(None, uid)
 def write_column_mapping_to_config(uid, *labels):
     # TODO: Substitute 'text' with more features for zero-shot
     # we are not using ds features because we only support "text" for now
@@ -114,13 +119,14 @@ def write_column_mapping_to_config(uid, *labels):
     write_column_mapping(all_mappings, uid)
 def export_mappings(all_mappings, key, subkeys, values):
     if key not in all_mappings.keys():
         all_mappings[key] = dict()
     if subkeys is None:
         subkeys = list(all_mappings[key].keys())
-    if not subkeys:
         logging.debug(f"subkeys is empty for {key}")
         return all_mappings
@@ -139,7 +145,9 @@ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels,
         ds_labels = list(shared_labels)
     if len(ds_labels) > MAX_LABELS:
         ds_labels = ds_labels[:MAX_LABELS]
-        gr.Warning(f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd.")
     # sort labels to make sure the order is consistent
     # prediction gives the order based on probability
@@ -183,11 +191,47 @@ def precheck_model_ds_enable_example_btn(
     model_id, dataset_id, dataset_config, dataset_split
 ):
     model_task = check_model_task(model_id)
     preload_hf_inference_api(model_id)
     if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
         return (
-            gr.update(interactive=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
@@ -198,41 +242,36 @@ def precheck_model_ds_enable_example_btn(
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
         df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
-        ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds[dataset_split])
-        if model_task is None or model_task != "text-classification":
-          gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
-          return (
-              gr.update(interactive=False),
-              gr.update(value=df, visible=True),
-              gr.update(visible=False),
-              gr.update(visible=False),
-              gr.update(visible=False),
-              gr.update(visible=False),
-          )
         if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
             gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
             return (
-                gr.update(interactive=False),
                 gr.update(value=df, visible=True),
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
             )
         return (
-                gr.update(interactive=True),
-                gr.update(value=df, visible=True),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-                gr.update(visible=False),
-            )
     except Exception as e:
         # Config or split wrong
-        logger.warning(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
         return (
             gr.update(interactive=False),
             gr.update(visible=False),
@@ -240,6 +279,7 @@ def precheck_model_ds_enable_example_btn(
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
         )
@@ -266,7 +306,7 @@ def align_columns_and_show_prediction(
     dropdown_placement = [
         gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
     ]
     hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
     prediction_input, prediction_response = get_example_prediction(
@@ -296,8 +336,10 @@ def align_columns_and_show_prediction(
         )
     model_labels = list(prediction_response.keys())
-    ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
     ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
     # when dataset does not have labels or features
@@ -312,7 +354,7 @@ def align_columns_and_show_prediction(
             "",
             *dropdown_placement,
         )
     if len(ds_labels) != len(model_labels):
         return (
             gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
@@ -339,7 +381,11 @@ def align_columns_and_show_prediction(
     ):
         return (
             gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
-            gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
             gr.update(value=prediction_response, visible=True),
             gr.update(visible=True, open=True),
             gr.update(interactive=(inference_token != "")),
@@ -349,7 +395,11 @@ def align_columns_and_show_prediction(
     return (
         gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
-        gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
         gr.update(value=prediction_response, visible=True),
         gr.update(visible=True, open=False),
         gr.update(interactive=(inference_token != "")),
@@ -370,14 +420,22 @@ def check_column_mapping_keys_validity(all_mappings):
     return True
-def enable_run_btn(uid, inference_token, model_id, dataset_id, dataset_config, dataset_split):
     if inference_token == "":
         logger.warning("Inference API is not enabled")
         return gr.update(interactive=False)
-    if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
         logger.warning("Model id or dataset id is not selected")
         return gr.update(interactive=False)
     all_mappings = read_column_mapping(uid)
     if not check_column_mapping_keys_validity(all_mappings):
         logger.warning("Column mapping is not valid")
@@ -388,17 +446,24 @@ def enable_run_btn(uid, inference_token, model_id, dataset_id, dataset_config, d
         return gr.update(interactive=False)
     return gr.update(interactive=True)
-def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys=None):
     label_mapping = {}
     if len(all_mappings["labels"].keys()) != len(ds_labels):
-        logger.warning(f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
-                    \nall_mappings: {all_mappings}\nds_labels: {ds_labels}""")
     if len(all_mappings["features"].keys()) != len(ds_features):
-        logger.warning(f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
-                    \nall_mappings: {all_mappings}\nds_features: {ds_features}""")
-    for i, label in zip(range(len(ds_labels)),  ds_labels):
         # align the saved labels with dataset labels order
         label_mapping.update({str(i): all_mappings["labels"][label]})
@@ -408,15 +473,17 @@ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, la
     feature_mapping = all_mappings["features"]
     if len(label_keys) > 0:
-      feature_mapping.update({"label": label_keys[0]})
     return label_mapping, feature_mapping
 def show_hf_token_info(token):
     valid = check_hf_token_validity(token)
     if not valid:
         return gr.update(visible=True)
     return gr.update(visible=False)
 def try_submit(m_id, d_id, config, split, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     if not check_column_mapping_keys_validity(all_mappings):
@@ -425,7 +492,9 @@ def try_submit(m_id, d_id, config, split, inference_token, uid):
     # get ds labels and features again for alignment
     ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
     ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
-    label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys)
     eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
     save_job_to_pipe(
@@ -451,7 +520,12 @@ def try_submit(m_id, d_id, config, split, inference_token, uid):
     return (
         gr.update(interactive=False),  # Submit button
-        gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
         new_uid,  # Allocate a new uuid
         gr.update(visible=False),
         gr.update(visible=False),

 import leaderboard
 from io_utils import (
+    read_column_mapping,
+    write_column_mapping,
+    read_scanners,
+    write_scanners,
 )
 from run_jobs import save_job_to_pipe
 from text_classification import (
     HuggingFaceInferenceAPIResponse,
 )
 from wordings import (
+    EXAMPLE_MODEL_ID,
     CHECK_CONFIG_OR_SPLIT_RAW,
     CONFIRM_MAPPING_DETAILS_FAIL_RAW,
     MAPPING_STYLED_ERROR_WARNING,
+    NOT_FOUND_MODEL_RAW,
     NOT_TEXT_CLASSIFICATION_MODEL_RAW,
     UNMATCHED_MODEL_DATASET_STYLED_ERROR,
     CHECK_LOG_SECTION_RAW,
 ds_dict = None
 ds_config = None
 def get_related_datasets_from_leaderboard(model_id, dataset_id_input):
     records = leaderboard.records
     model_records = records[records["model_id"] == model_id]
     if len(datasets_unique) == 0:
         return gr.update(choices=[])
     if dataset_id_input in datasets_unique:
         return gr.update(choices=datasets_unique)
     return gr.update(choices=datasets_unique, value="")
 logger = logging.getLogger(__file__)
 def get_dataset_splits(dataset_id, dataset_config):
     try:
+        splits = datasets.get_dataset_split_names(
+            dataset_id, dataset_config, trust_remote_code=True
+        )
         return gr.update(choices=splits, value=splits[0], visible=True)
     except Exception as e:
+        logger.warning(
+            f"Check your dataset {dataset_id} and config {dataset_config}: {e}"
+        )
         return gr.update(visible=False)
 def check_dataset(dataset_id):
     logger.info(f"Loading {dataset_id}")
     try:
         configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
         if len(configs) == 0:
+            return (gr.update(visible=False), gr.update(visible=False), "")
+        splits = datasets.get_dataset_split_names(
+            dataset_id, configs[0], trust_remote_code=True
+        )
         return (
             gr.update(choices=configs, value=configs[0], visible=True),
             gr.update(choices=splits, value=splits[0], visible=True),
+            "",
         )
     except Exception as e:
         logger.warning(f"Check your dataset {dataset_id}: {e}")
         if "doesn't exist" in str(e):
             gr.Warning(get_dataset_fetch_error_raw(e))
+        if "forbidden" in str(e).lower():  # GSK-2770
             gr.Warning(get_dataset_fetch_error_raw(e))
+        return (gr.update(visible=False), gr.update(visible=False), "")
 def empty_column_mapping(uid):
     write_column_mapping(None, uid)
 def write_column_mapping_to_config(uid, *labels):
     # TODO: Substitute 'text' with more features for zero-shot
     # we are not using ds features because we only support "text" for now
     write_column_mapping(all_mappings, uid)
 def export_mappings(all_mappings, key, subkeys, values):
     if key not in all_mappings.keys():
         all_mappings[key] = dict()
     if subkeys is None:
         subkeys = list(all_mappings[key].keys())
+    if not subkeys:
         logging.debug(f"subkeys is empty for {key}")
         return all_mappings
         ds_labels = list(shared_labels)
     if len(ds_labels) > MAX_LABELS:
         ds_labels = ds_labels[:MAX_LABELS]
+        gr.Warning(
+            f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd."
+        )
     # sort labels to make sure the order is consistent
     # prediction gives the order based on probability
     model_id, dataset_id, dataset_config, dataset_split
 ):
     model_task = check_model_task(model_id)
+    if not model_task:
+        # Model might be not found
+        error_msg_html = f"<p style='color: red;'>{NOT_FOUND_MODEL_RAW}</p>"
+        if model_id.startswith("http://") or model_id.startswith("https://"):
+            error_msg = f"Please input your model id, such as {EXAMPLE_MODEL_ID}, instead of URL"
+            gr.Warning(error_msg)
+            error_msg_html = f"<p style='color: red;'>{error_msg}</p>"
+        else:
+            gr.Warning(NOT_FOUND_MODEL_RAW)
+        return (
+            gr.update(interactive=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(value=error_msg_html, visible=True),
+        )
+    if model_task != "text-classification":
+        gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
+        return (
+            gr.update(interactive=False),
+            gr.update(value=df, visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(
+                value=f"<p style='color: red;'>{NOT_TEXT_CLASSIFICATION_MODEL_RAW}",
+                visible=True,
+            ),
+        )
     preload_hf_inference_api(model_id)
     if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
         return (
+            gr.update(interactive=False),
+            gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
         df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
+        ds_labels, ds_features, _ = get_labels_and_features_from_dataset(
+            ds[dataset_split]
+        )
         if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
             gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
             return (
+                gr.update(interactive=False),
                 gr.update(value=df, visible=True),
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
+                gr.update(visible=False),
             )
         return (
+            gr.update(interactive=True),
+            gr.update(value=df, visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+        )
     except Exception as e:
         # Config or split wrong
+        logger.warning(
+            f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}"
+        )
         return (
             gr.update(interactive=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
+            gr.update(visible=False),
         )
     dropdown_placement = [
         gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
     ]
     hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
     prediction_input, prediction_response = get_example_prediction(
         )
     model_labels = list(prediction_response.keys())
+    ds = datasets.load_dataset(
+        dataset_id, dataset_config, split=dataset_split, trust_remote_code=True
+    )
     ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
     # when dataset does not have labels or features
             "",
             *dropdown_placement,
         )
     if len(ds_labels) != len(model_labels):
         return (
             gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
     ):
         return (
             gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
+            gr.update(
+                value=prediction_input,
+                lines=min(len(prediction_input) // 225 + 1, 5),
+                visible=True,
+            ),
             gr.update(value=prediction_response, visible=True),
             gr.update(visible=True, open=True),
             gr.update(interactive=(inference_token != "")),
     return (
         gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
+        gr.update(
+            value=prediction_input,
+            lines=min(len(prediction_input) // 225 + 1, 5),
+            visible=True,
+        ),
         gr.update(value=prediction_response, visible=True),
         gr.update(visible=True, open=False),
         gr.update(interactive=(inference_token != "")),
     return True
+def enable_run_btn(
+    uid, inference_token, model_id, dataset_id, dataset_config, dataset_split
+):
     if inference_token == "":
         logger.warning("Inference API is not enabled")
         return gr.update(interactive=False)
+    if (
+        model_id == ""
+        or dataset_id == ""
+        or dataset_config == ""
+        or dataset_split == ""
+    ):
         logger.warning("Model id or dataset id is not selected")
         return gr.update(interactive=False)
     all_mappings = read_column_mapping(uid)
     if not check_column_mapping_keys_validity(all_mappings):
         logger.warning("Column mapping is not valid")
         return gr.update(interactive=False)
     return gr.update(interactive=True)
+def construct_label_and_feature_mapping(
+    all_mappings, ds_labels, ds_features, label_keys=None
+):
     label_mapping = {}
     if len(all_mappings["labels"].keys()) != len(ds_labels):
+        logger.warning(
+            f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
+                    \nall_mappings: {all_mappings}\nds_labels: {ds_labels}"""
+        )
     if len(all_mappings["features"].keys()) != len(ds_features):
+        logger.warning(
+            f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
+                    \nall_mappings: {all_mappings}\nds_features: {ds_features}"""
+        )
+    for i, label in zip(range(len(ds_labels)), ds_labels):
         # align the saved labels with dataset labels order
         label_mapping.update({str(i): all_mappings["labels"][label]})
     feature_mapping = all_mappings["features"]
     if len(label_keys) > 0:
+        feature_mapping.update({"label": label_keys[0]})
     return label_mapping, feature_mapping
 def show_hf_token_info(token):
     valid = check_hf_token_validity(token)
     if not valid:
         return gr.update(visible=True)
     return gr.update(visible=False)
 def try_submit(m_id, d_id, config, split, inference_token, uid):
     all_mappings = read_column_mapping(uid)
     if not check_column_mapping_keys_validity(all_mappings):
     # get ds labels and features again for alignment
     ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
     ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
+    label_mapping, feature_mapping = construct_label_and_feature_mapping(
+        all_mappings, ds_labels, ds_features, label_keys
+    )
     eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
     save_job_to_pipe(
     return (
         gr.update(interactive=False),  # Submit button
+        gr.update(
+            value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ",
+            lines=5,
+            visible=True,
+            interactive=False,
+        ),
         new_uid,  # Allocate a new uuid
         gr.update(visible=False),
         gr.update(visible=False),

wordings.py CHANGED Viewed

@@ -1,3 +1,5 @@
 INTRODUCTION_MD = """
                 <div style="display: flex; justify-content: center;">
                 <h1 style="text-align: center;">
@@ -49,6 +51,10 @@ UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
                         </h3>
                         """
 NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
                         Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
                       """
@@ -61,7 +67,7 @@ USE_INFERENCE_API_TIP = """
                 . Please input your <a href="https://huggingface.co/docs/hub/security-tokens#user-access-tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
             """
-HF_TOKEN_INVALID_STYLED= """
                 <p style="text-align: left;color: red; ">
                 Your Hugging Face token is invalid. Please double check your token.
                 </p>
@@ -72,5 +78,6 @@ VALIDATED_MODEL_DATASET_STYLED = """
             Your model and dataset have been validated!
             </h3>"""
 def get_dataset_fetch_error_raw(error):
     return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""

+EXAMPLE_MODEL_ID = "cardiffnlp/twitter-roberta-base-sentiment-latest"
 INTRODUCTION_MD = """
                 <div style="display: flex; justify-content: center;">
                 <h1 style="text-align: center;">
                         </h3>
                         """
+NOT_FOUND_MODEL_RAW = """
+    We cannot find your model on Hugging Face. Please ensure that the model is accessible.
+"""
 NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
                         Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
                       """
                 . Please input your <a href="https://huggingface.co/docs/hub/security-tokens#user-access-tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
             """
+HF_TOKEN_INVALID_STYLED = """
                 <p style="text-align: left;color: red; ">
                 Your Hugging Face token is invalid. Please double check your token.
                 </p>
             Your model and dataset have been validated!
             </h3>"""
 def get_dataset_fetch_error_raw(error):
     return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""