import collections import json import logging import os import threading import uuid import leaderboard import datasets import gradio as gr import pandas as pd from io_utils import ( get_yaml_path, read_column_mapping, save_job_to_pipe, write_column_mapping, write_log_to_user_file, ) from text_classification import ( check_model_task, get_example_prediction, get_labels_and_features_from_dataset, ) from wordings import ( CHECK_CONFIG_OR_SPLIT_RAW, CONFIRM_MAPPING_DETAILS_FAIL_RAW, MAPPING_STYLED_ERROR_WARNING, get_styled_input, ) MAX_LABELS = 40 MAX_FEATURES = 20 HF_REPO_ID = "HF_REPO_ID" HF_SPACE_ID = "SPACE_ID" HF_WRITE_TOKEN = "HF_WRITE_TOKEN" HF_GSK_HUB_URL = "GSK_HUB_URL" HF_GSK_HUB_PROJECT_KEY = "GSK_HUB_PROJECT_KEY" HF_GSK_HUB_KEY = "GSK_API_KEY" HF_GSK_HUB_HF_TOKEN = "GSK_HF_TOKEN" HF_GSK_HUB_UNLOCK_TOKEN = "GSK_HUB_UNLOCK_TOKEN" LEADERBOARD = "giskard-bot/evaluator-leaderboard" global ds_dict, ds_config ds_dict = None ds_config = None def get_related_datasets_from_leaderboard(model_id): records = leaderboard.records model_records = records[records["model_id"] == model_id] datasets_unique = list(model_records["dataset_id"].unique()) if len(datasets_unique) == 0: all_unique_datasets = list(records["dataset_id"].unique()) return gr.update(choices=all_unique_datasets, value="") return gr.update(choices=datasets_unique, value=datasets_unique[0]) logger = logging.getLogger(__file__) def check_dataset(dataset_id): logger.info(f"Loading {dataset_id}") try: configs = datasets.get_dataset_config_names(dataset_id) if len(configs) == 0: return ( gr.update(), gr.update(), "" ) splits = list( datasets.load_dataset( dataset_id, configs[0] ).keys() ) return ( gr.update(choices=configs, value=configs[0], visible=True), gr.update(choices=splits, value=splits[0], visible=True), "" ) except Exception as e: logger.warn(f"Check your dataset {dataset_id}: {e}") return ( gr.update(), gr.update(), "" ) def write_column_mapping_to_config(uid, *labels): # TODO: Substitute 'text' with more features for zero-shot # we are not using ds features because we only support "text" for now all_mappings = read_column_mapping(uid) if labels is None: return all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS]) all_mappings = export_mappings( all_mappings, "features", ["text"], labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)], ) write_column_mapping(all_mappings, uid) def export_mappings(all_mappings, key, subkeys, values): if key not in all_mappings.keys(): all_mappings[key] = dict() if subkeys is None: subkeys = list(all_mappings[key].keys()) if not subkeys: logging.debug(f"subkeys is empty for {key}") return all_mappings for i, subkey in enumerate(subkeys): if subkey: all_mappings[key][subkey] = values[i % len(values)] return all_mappings def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid): all_mappings = read_column_mapping(uid) # For flattened raw datasets with no labels # check if there are shared labels between model and dataset shared_labels = set(model_labels).intersection(set(ds_labels)) if shared_labels: ds_labels = list(shared_labels) if len(ds_labels) > MAX_LABELS: ds_labels = ds_labels[:MAX_LABELS] gr.Warning(f"The number of labels is truncated to length {MAX_LABELS}") ds_labels.sort() model_labels.sort() lables = [ gr.Dropdown( label=f"{label}", choices=model_labels, value=model_labels[i % len(model_labels)], interactive=True, visible=True, ) for i, label in enumerate(ds_labels) ] lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))] all_mappings = export_mappings(all_mappings, "labels", ds_labels, model_labels) # TODO: Substitute 'text' with more features for zero-shot features = [ gr.Dropdown( label=f"{feature}", choices=ds_features, value=ds_features[0], interactive=True, visible=True, ) for feature in ["text"] ] features += [ gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features)) ] all_mappings = export_mappings(all_mappings, "features", ["text"], ds_features) write_column_mapping(all_mappings, uid) return lables + features def precheck_model_ds_enable_example_btn( model_id, dataset_id, dataset_config, dataset_split ): model_task = check_model_task(model_id) if model_task is None or model_task != "text-classification": gr.Warning("Please check your model.") return gr.update(interactive=False), "" if dataset_config is None or dataset_split is None or len(dataset_config) == 0: return (gr.update(), gr.update(), "") try: ds = datasets.load_dataset(dataset_id, dataset_config) df: pd.DataFrame = ds[dataset_split].to_pandas().head(5) ds_labels, ds_features = get_labels_and_features_from_dataset(ds[dataset_split]) if not isinstance(ds_labels, list) or not isinstance(ds_features, list): gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW) return (gr.update(interactive=False), gr.update(value=df, visible=True), "") return (gr.update(interactive=True), gr.update(value=df, visible=True), "") except Exception as e: # Config or split wrong gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}") return (gr.update(interactive=False), gr.update(value=pd.DataFrame(), visible=False), "") def align_columns_and_show_prediction( model_id, dataset_id, dataset_config, dataset_split, uid, run_inference, inference_token ): model_task = check_model_task(model_id) if model_task is None or model_task != "text-classification": gr.Warning("Please check your model.") return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=False, open=False), gr.update(interactive=False), "", *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)], ) dropdown_placement = [ gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES) ] prediction_input, prediction_output = get_example_prediction( model_id, dataset_id, dataset_config, dataset_split ) model_labels = list(prediction_output.keys()) ds = datasets.load_dataset(dataset_id, dataset_config)[dataset_split] ds_labels, ds_features = get_labels_and_features_from_dataset(ds) # when dataset does not have labels or features if not isinstance(ds_labels, list) or not isinstance(ds_features, list): gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW) return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=False, open=False), gr.update(interactive=False), "", *dropdown_placement, ) column_mappings = list_labels_and_features_from_dataset( ds_labels, ds_features, model_labels, uid, ) # when labels or features are not aligned # show manually column mapping if ( collections.Counter(model_labels) != collections.Counter(ds_labels) or ds_features[0] != "text" ): return ( gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True), gr.update(visible=False), gr.update(visible=True, open=True), gr.update(interactive=(run_inference and inference_token != "")), "", *column_mappings, ) return ( gr.update(value=get_styled_input(prediction_input), visible=True), gr.update(value=prediction_output, visible=True), gr.update(visible=True, open=False), gr.update(interactive=(run_inference and inference_token != "")), "", *column_mappings, ) def check_column_mapping_keys_validity(all_mappings): if all_mappings is None: gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW) return (gr.update(interactive=True), gr.update(visible=False)) if "labels" not in all_mappings.keys(): gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW) return (gr.update(interactive=True), gr.update(visible=False)) def construct_label_and_feature_mapping(all_mappings): label_mapping = {} for i, label in zip( range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys() ): label_mapping.update({str(i): label}) if "features" not in all_mappings.keys(): gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW) return (gr.update(interactive=True), gr.update(visible=False)) feature_mapping = all_mappings["features"] return label_mapping, feature_mapping def try_submit(m_id, d_id, config, split, inference, inference_token, uid): all_mappings = read_column_mapping(uid) check_column_mapping_keys_validity(all_mappings) label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings) leaderboard_dataset = None if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator": leaderboard_dataset = LEADERBOARD if inference: inference_type = "hf_inference_api" # TODO: Set column mapping for some dataset such as `amazon_polarity` command = [ "giskard_scanner", "--loader", "huggingface", "--model", m_id, "--dataset", d_id, "--dataset_config", config, "--dataset_split", split, "--output_format", "markdown", "--output_portal", "huggingface", "--feature_mapping", json.dumps(feature_mapping), "--label_mapping", json.dumps(label_mapping), "--scan_config", get_yaml_path(uid), "--inference_type", inference_type, "--inference_api_token", inference_token, ] # The token to publish post if os.environ.get(HF_WRITE_TOKEN): command.append("--hf_token") command.append(os.environ.get(HF_WRITE_TOKEN)) # The repo to publish post if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID): command.append("--discussion_repo") # TODO: Replace by the model id command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID)) # The repo to publish for ranking if leaderboard_dataset: command.append("--leaderboard_dataset") command.append(leaderboard_dataset) # The info to upload to Giskard hub if os.environ.get(HF_GSK_HUB_KEY): command.append("--giskard_hub_api_key") command.append(os.environ.get(HF_GSK_HUB_KEY)) if os.environ.get(HF_GSK_HUB_URL): command.append("--giskard_hub_url") command.append(os.environ.get(HF_GSK_HUB_URL)) if os.environ.get(HF_GSK_HUB_PROJECT_KEY): command.append("--giskard_hub_project_key") command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY)) if os.environ.get(HF_GSK_HUB_HF_TOKEN): command.append("--giskard_hub_hf_token") command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN)) if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN): command.append("--giskard_hub_unlock_token") command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN)) eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>" logging.info(f"Start local evaluation on {eval_str}") save_job_to_pipe(uid, command, eval_str, threading.Lock()) write_log_to_user_file( uid, f"Start local evaluation on {eval_str}. Please wait for your job to start...\n", ) gr.Info(f"Start local evaluation on {eval_str}") return ( gr.update(interactive=False), # Submit button gr.update(lines=5, visible=True, interactive=False), uuid.uuid4(), # Allocate a new uuid )