ZeroCommand commited on
Commit
ae8fa3a
·
1 Parent(s): 9b20db6

add login button

Browse files
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
- from utils.run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
 
5
  from app_debug import get_demo as get_demo_debug
6
  from app_leaderboard import get_demo as get_demo_leaderboard
7
  from app_text_classification import get_demo as get_demo_text_classification
8
+ from run_jobs import start_process_run_job, stop_thread
9
 
10
  try:
11
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="green")) as demo:
app_leaderboard.py CHANGED
@@ -5,10 +5,10 @@ import gradio as gr
5
  import pandas as pd
6
  import datetime
7
 
8
- from utils.fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
- import utils.leaderboard as leaderboard
12
  logger = logging.getLogger(__name__)
13
  global update_time
14
  update_time = datetime.datetime.fromtimestamp(0)
 
5
  import pandas as pd
6
  import datetime
7
 
8
+ from fetch_utils import (check_dataset_and_get_config,
9
  check_dataset_and_get_split)
10
 
11
+ import leaderboard
12
  logger = logging.getLogger(__name__)
13
  global update_time
14
  update_time = datetime.datetime.fromtimestamp(0)
app_text_classification.py CHANGED
@@ -2,13 +2,12 @@ import uuid
2
 
3
  import gradio as gr
4
 
5
- from utils.io_utils import read_scanners, write_scanners
6
- from utils.ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
9
  get_dataset_splits,
10
  check_dataset,
11
- show_hf_token_info,
12
  precheck_model_ds_enable_example_btn,
13
  try_submit,
14
  empty_column_mapping,
@@ -161,12 +160,6 @@ def get_demo():
161
  inputs=[uid_label]
162
  )
163
 
164
- gr.on(
165
- triggers=[model_id_input.change, dataset_id_input.change, dataset_config_input.change],
166
- fn=empty_column_mapping,
167
- inputs=[uid_label]
168
- )
169
-
170
  gr.on(
171
  triggers=[label.change for label in column_mappings],
172
  fn=write_column_mapping_to_config,
 
2
 
3
  import gradio as gr
4
 
5
+ from io_utils import read_scanners, write_scanners
6
+ from text_classification_ui_helpers import (
7
  get_related_datasets_from_leaderboard,
8
  align_columns_and_show_prediction,
9
  get_dataset_splits,
10
  check_dataset,
 
11
  precheck_model_ds_enable_example_btn,
12
  try_submit,
13
  empty_column_mapping,
 
160
  inputs=[uid_label]
161
  )
162
 
 
 
 
 
 
 
163
  gr.on(
164
  triggers=[label.change for label in column_mappings],
165
  fn=write_column_mapping_to_config,
fetch_utils.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import datasets
4
+
5
+
6
+ def check_dataset_and_get_config(dataset_id):
7
+ try:
8
+ configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
9
+ return configs
10
+ except Exception:
11
+ # Dataset may not exist
12
+ return None
13
+
14
+
15
+ def check_dataset_and_get_split(dataset_id, dataset_config):
16
+ try:
17
+ ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
18
+ except Exception as e:
19
+ # Dataset may not exist
20
+ logging.warning(
21
+ f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
22
+ )
23
+ return None
24
+ try:
25
+ splits = list(ds.keys())
26
+ return splits
27
+ except Exception as e:
28
+ # Dataset has no splits
29
+ logging.warning(
30
+ f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
31
+ )
32
+ return None
io_utils.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import yaml
4
+
5
+ YAML_PATH = "./cicd/configs"
6
+ LOG_FILE = "temp_log"
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class Dumper(yaml.Dumper):
11
+ def increase_indent(self, flow=False, *args, **kwargs):
12
+ return super().increase_indent(flow=flow, indentless=False)
13
+
14
+ def get_submitted_yaml_path(uid):
15
+ if not os.path.exists(f"{YAML_PATH}/submitted"):
16
+ os.makedirs(f"{YAML_PATH}/submitted")
17
+ if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
18
+ logger.error(f"config.yaml does not exist for {uid}")
19
+ os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
20
+ if not os.path.exists(f"{YAML_PATH}/submitted/{uid}_config.yaml"):
21
+ os.system(f"cp {YAML_PATH}/{uid}_config.yaml {YAML_PATH}/submitted/{uid}_config.yaml")
22
+ return f"{YAML_PATH}/submitted/{uid}_config.yaml"
23
+
24
+ def get_yaml_path(uid):
25
+ if not os.path.exists(YAML_PATH):
26
+ os.makedirs(YAML_PATH)
27
+ if not os.path.exists(f"{YAML_PATH}/{uid}_config.yaml"):
28
+ os.system(f"cp config.yaml {YAML_PATH}/{uid}_config.yaml")
29
+ return f"{YAML_PATH}/{uid}_config.yaml"
30
+
31
+
32
+ # read scanners from yaml file
33
+ # return a list of scanners
34
+ def read_scanners(uid):
35
+ scanners = []
36
+ with open(get_yaml_path(uid), "r") as f:
37
+ config = yaml.load(f, Loader=yaml.FullLoader)
38
+ scanners = config.get("detectors", [])
39
+ return scanners
40
+
41
+
42
+ # convert a list of scanners to yaml file
43
+ def write_scanners(scanners, uid):
44
+ with open(get_yaml_path(uid), "r") as f:
45
+ config = yaml.load(f, Loader=yaml.FullLoader)
46
+ if config:
47
+ config["detectors"] = scanners
48
+ # save scanners to detectors in yaml
49
+ with open(get_yaml_path(uid), "w") as f:
50
+ yaml.dump(config, f, Dumper=Dumper)
51
+
52
+
53
+ # read model_type from yaml file
54
+ def read_inference_type(uid):
55
+ inference_type = ""
56
+ with open(get_yaml_path(uid), "r") as f:
57
+ config = yaml.load(f, Loader=yaml.FullLoader)
58
+ inference_type = config.get("inference_type", "")
59
+ return inference_type
60
+
61
+
62
+ # write model_type to yaml file
63
+ def write_inference_type(use_inference, inference_token, uid):
64
+ with open(get_yaml_path(uid), "r") as f:
65
+ config = yaml.load(f, Loader=yaml.FullLoader)
66
+ if use_inference:
67
+ config["inference_type"] = "hf_inference_api"
68
+ config["inference_token"] = inference_token
69
+ else:
70
+ config["inference_type"] = "hf_pipeline"
71
+ # FIXME: A quick and temp fix for missing token
72
+ config["inference_token"] = ""
73
+ # save inference_type to inference_type in yaml
74
+ with open(get_yaml_path(uid), "w") as f:
75
+ yaml.dump(config, f, Dumper=Dumper)
76
+
77
+
78
+ # read column mapping from yaml file
79
+ def read_column_mapping(uid):
80
+ column_mapping = {}
81
+ with open(get_yaml_path(uid), "r") as f:
82
+ config = yaml.load(f, Loader=yaml.FullLoader)
83
+ if config:
84
+ column_mapping = config.get("column_mapping", dict())
85
+ if column_mapping is None:
86
+ column_mapping = {}
87
+ return column_mapping
88
+
89
+
90
+ # write column mapping to yaml file
91
+ def write_column_mapping(mapping, uid):
92
+ with open(get_yaml_path(uid), "r") as f:
93
+ config = yaml.load(f, Loader=yaml.FullLoader)
94
+
95
+ if config is None:
96
+ return
97
+ if mapping is None and "column_mapping" in config.keys():
98
+ del config["column_mapping"]
99
+ else:
100
+ config["column_mapping"] = mapping
101
+ with open(get_yaml_path(uid), "w") as f:
102
+ # yaml Dumper will by default sort the keys
103
+ yaml.dump(config, f, Dumper=Dumper, sort_keys=False)
104
+
105
+
106
+ # convert column mapping dataframe to json
107
+ def convert_column_mapping_to_json(df, label=""):
108
+ column_mapping = {}
109
+ column_mapping[label] = []
110
+ for _, row in df.iterrows():
111
+ column_mapping[label].append(row.tolist())
112
+ return column_mapping
113
+
114
+
115
+ def get_log_file_with_uid(uid):
116
+ try:
117
+ print(f"Loading {uid}.log")
118
+ with open(f"./tmp/{uid}.log", "a") as file:
119
+ return file.read()
120
+ except Exception:
121
+ return "Log file does not exist"
122
+
123
+
124
+ def get_logs_file():
125
+ try:
126
+ with open(LOG_FILE, "r") as file:
127
+ return file.read()
128
+ except Exception:
129
+ return "Log file does not exist"
130
+
131
+
132
+ def write_log_to_user_file(task_id, log):
133
+ with open(f"./tmp/{task_id}.log", "a") as f:
134
+ f.write(log)
isolated_env.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import subprocess
3
 
4
- from utils.io_utils import write_log_to_user_file
5
 
6
 
7
  def prepare_venv(execution_id, deps):
 
1
  import os
2
  import subprocess
3
 
4
+ from io_utils import write_log_to_user_file
5
 
6
 
7
  def prepare_venv(execution_id, deps):
leaderboard.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ records = pd.DataFrame()
4
+
5
+ LEADERBOARD = "giskard-bot/evaluator-leaderboard"
pipe.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ jobs = list()
3
+ current = None
run_jobs.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ import subprocess
5
+ import threading
6
+ import time
7
+ from pathlib import Path
8
+
9
+ import pipe
10
+ from app_env import (
11
+ HF_GSK_HUB_HF_TOKEN,
12
+ HF_GSK_HUB_KEY,
13
+ HF_GSK_HUB_PROJECT_KEY,
14
+ HF_GSK_HUB_UNLOCK_TOKEN,
15
+ HF_GSK_HUB_URL,
16
+ HF_REPO_ID,
17
+ HF_SPACE_ID,
18
+ HF_WRITE_TOKEN,
19
+ )
20
+ from io_utils import LOG_FILE, get_submitted_yaml_path, write_log_to_user_file
21
+ from isolated_env import prepare_venv
22
+ from leaderboard import LEADERBOARD
23
+
24
+ is_running = False
25
+
26
+ logger = logging.getLogger(__file__)
27
+
28
+
29
+ def start_process_run_job():
30
+ try:
31
+ logging.debug("Running jobs in thread")
32
+ global thread, is_running
33
+ thread = threading.Thread(target=run_job)
34
+ thread.daemon = True
35
+ is_running = True
36
+ thread.start()
37
+
38
+ except Exception as e:
39
+ print("Failed to start thread: ", e)
40
+
41
+
42
+ def stop_thread():
43
+ logging.debug("Stop thread")
44
+ global is_running
45
+ is_running = False
46
+
47
+
48
+ def prepare_env_and_get_command(
49
+ m_id,
50
+ d_id,
51
+ config,
52
+ split,
53
+ inference_token,
54
+ uid,
55
+ label_mapping,
56
+ feature_mapping,
57
+ ):
58
+ leaderboard_dataset = None
59
+ if os.environ.get("SPACE_ID") == "giskardai/giskard-evaluator":
60
+ leaderboard_dataset = LEADERBOARD
61
+
62
+ executable = "giskard_scanner"
63
+ try:
64
+ # Copy the current requirements (might be changed)
65
+ with open("requirements.txt", "r") as f:
66
+ executable = prepare_venv(
67
+ uid,
68
+ "\n".join(f.readlines()),
69
+ )
70
+ logger.info(f"Using {executable} as executable")
71
+ except Exception as e:
72
+ logger.warn(f"Create env failed due to {e}, using the current env as fallback.")
73
+ executable = "giskard_scanner"
74
+
75
+ command = [
76
+ executable,
77
+ "--loader",
78
+ "huggingface",
79
+ "--model",
80
+ m_id,
81
+ "--dataset",
82
+ d_id,
83
+ "--dataset_config",
84
+ config,
85
+ "--dataset_split",
86
+ split,
87
+ "--output_format",
88
+ "markdown",
89
+ "--output_portal",
90
+ "huggingface",
91
+ "--feature_mapping",
92
+ json.dumps(feature_mapping),
93
+ "--label_mapping",
94
+ json.dumps(label_mapping),
95
+ "--scan_config",
96
+ get_submitted_yaml_path(uid),
97
+ "--inference_type",
98
+ "hf_inference_api",
99
+ "--inference_api_token",
100
+ inference_token,
101
+ ]
102
+ # The token to publish post
103
+ if os.environ.get(HF_WRITE_TOKEN):
104
+ command.append("--hf_token")
105
+ command.append(os.environ.get(HF_WRITE_TOKEN))
106
+
107
+ # The repo to publish post
108
+ if os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID):
109
+ command.append("--discussion_repo")
110
+ # TODO: Replace by the model id
111
+ command.append(os.environ.get(HF_REPO_ID) or os.environ.get(HF_SPACE_ID))
112
+
113
+ # The repo to publish for ranking
114
+ if leaderboard_dataset:
115
+ command.append("--leaderboard_dataset")
116
+ command.append(leaderboard_dataset)
117
+
118
+ # The info to upload to Giskard hub
119
+ if os.environ.get(HF_GSK_HUB_KEY):
120
+ command.append("--giskard_hub_api_key")
121
+ command.append(os.environ.get(HF_GSK_HUB_KEY))
122
+ if os.environ.get(HF_GSK_HUB_URL):
123
+ command.append("--giskard_hub_url")
124
+ command.append(os.environ.get(HF_GSK_HUB_URL))
125
+ if os.environ.get(HF_GSK_HUB_PROJECT_KEY):
126
+ command.append("--giskard_hub_project_key")
127
+ command.append(os.environ.get(HF_GSK_HUB_PROJECT_KEY))
128
+ if os.environ.get(HF_GSK_HUB_HF_TOKEN):
129
+ command.append("--giskard_hub_hf_token")
130
+ command.append(os.environ.get(HF_GSK_HUB_HF_TOKEN))
131
+ if os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN):
132
+ command.append("--giskard_hub_unlock_token")
133
+ command.append(os.environ.get(HF_GSK_HUB_UNLOCK_TOKEN))
134
+
135
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
136
+
137
+ write_log_to_user_file(
138
+ uid,
139
+ f"Start local evaluation on {eval_str}. Please wait for your job to start...\n",
140
+ )
141
+
142
+ return command
143
+
144
+
145
+ def save_job_to_pipe(task_id, job, description, lock):
146
+ with lock:
147
+ pipe.jobs.append((task_id, job, description))
148
+
149
+
150
+ def pop_job_from_pipe():
151
+ if len(pipe.jobs) == 0:
152
+ return
153
+ job_info = pipe.jobs.pop()
154
+ pipe.current = job_info[2]
155
+ task_id = job_info[0]
156
+
157
+ # Link to LOG_FILE
158
+ log_file_path = Path(LOG_FILE)
159
+ if log_file_path.exists():
160
+ log_file_path.unlink()
161
+ os.symlink(f"./tmp/{task_id}.log", LOG_FILE)
162
+
163
+ write_log_to_user_file(task_id, f"Running job id {task_id}\n")
164
+ command = prepare_env_and_get_command(*job_info[1])
165
+
166
+ with open(f"./tmp/{task_id}.log", "a") as log_file:
167
+ p = subprocess.Popen(command, stdout=log_file, stderr=subprocess.STDOUT)
168
+ p.wait()
169
+ pipe.current = None
170
+
171
+
172
+ def run_job():
173
+ global is_running
174
+ while is_running:
175
+ try:
176
+ pop_job_from_pipe()
177
+ time.sleep(10)
178
+ except KeyboardInterrupt:
179
+ logging.debug("KeyboardInterrupt stop background thread")
180
+ is_running = False
181
+ break
text_classification_ui_helpers.py ADDED
@@ -0,0 +1,460 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import logging
3
+ import threading
4
+ import uuid
5
+
6
+ import datasets
7
+ import gradio as gr
8
+ import pandas as pd
9
+
10
+ import leaderboard
11
+ from io_utils import (
12
+ read_column_mapping,
13
+ write_column_mapping,
14
+ read_scanners,
15
+ write_scanners,
16
+ )
17
+ from run_jobs import save_job_to_pipe
18
+ from text_classification import (
19
+ strip_model_id_from_url,
20
+ check_model_task,
21
+ preload_hf_inference_api,
22
+ get_example_prediction,
23
+ get_labels_and_features_from_dataset,
24
+ check_hf_token_validity,
25
+ HuggingFaceInferenceAPIResponse,
26
+ )
27
+ from wordings import (
28
+ CHECK_CONFIG_OR_SPLIT_RAW,
29
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW,
30
+ MAPPING_STYLED_ERROR_WARNING,
31
+ NOT_TEXT_CLASSIFICATION_MODEL_RAW,
32
+ UNMATCHED_MODEL_DATASET_STYLED_ERROR,
33
+ CHECK_LOG_SECTION_RAW,
34
+ VALIDATED_MODEL_DATASET_STYLED,
35
+ get_dataset_fetch_error_raw,
36
+ )
37
+ import os
38
+ from app_env import HF_WRITE_TOKEN
39
+
40
+ MAX_LABELS = 40
41
+ MAX_FEATURES = 20
42
+
43
+ ds_dict = None
44
+ ds_config = None
45
+
46
+ def get_related_datasets_from_leaderboard(model_id):
47
+ records = leaderboard.records
48
+ model_id = strip_model_id_from_url(model_id)
49
+ model_records = records[records["model_id"] == model_id]
50
+ datasets_unique = list(model_records["dataset_id"].unique())
51
+
52
+ if len(datasets_unique) == 0:
53
+ return gr.update(choices=[])
54
+
55
+ return gr.update(choices=datasets_unique)
56
+
57
+
58
+ logger = logging.getLogger(__file__)
59
+
60
+ def get_dataset_splits(dataset_id, dataset_config):
61
+ try:
62
+ splits = datasets.get_dataset_split_names(dataset_id, dataset_config, trust_remote_code=True)
63
+ return gr.update(choices=splits, value=splits[0], visible=True)
64
+ except Exception as e:
65
+ logger.warn(f"Check your dataset {dataset_id} and config {dataset_config}: {e}")
66
+ return gr.update(visible=False)
67
+
68
+ def check_dataset(dataset_id):
69
+ logger.info(f"Loading {dataset_id}")
70
+ try:
71
+ configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
72
+ if len(configs) == 0:
73
+ return (
74
+ gr.update(visible=False),
75
+ gr.update(visible=False),
76
+ ""
77
+ )
78
+ splits = datasets.get_dataset_split_names(dataset_id, configs[0], trust_remote_code=True)
79
+ return (
80
+ gr.update(choices=configs, value=configs[0], visible=True),
81
+ gr.update(choices=splits, value=splits[0], visible=True),
82
+ ""
83
+ )
84
+ except Exception as e:
85
+ logger.warn(f"Check your dataset {dataset_id}: {e}")
86
+ if "doesn't exist" in str(e):
87
+ gr.Warning(get_dataset_fetch_error_raw(e))
88
+ if "forbidden" in str(e).lower(): # GSK-2770
89
+ gr.Warning(get_dataset_fetch_error_raw(e))
90
+ return (
91
+ gr.update(visible=False),
92
+ gr.update(visible=False),
93
+ ""
94
+ )
95
+
96
+ def empty_column_mapping(uid):
97
+ write_column_mapping(None, uid)
98
+
99
+ def write_column_mapping_to_config(uid, *labels):
100
+ # TODO: Substitute 'text' with more features for zero-shot
101
+ # we are not using ds features because we only support "text" for now
102
+ all_mappings = read_column_mapping(uid)
103
+
104
+ if labels is None:
105
+ return
106
+ all_mappings = export_mappings(all_mappings, "labels", None, labels[:MAX_LABELS])
107
+ all_mappings = export_mappings(
108
+ all_mappings,
109
+ "features",
110
+ ["text"],
111
+ labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)],
112
+ )
113
+
114
+ write_column_mapping(all_mappings, uid)
115
+
116
+ def export_mappings(all_mappings, key, subkeys, values):
117
+ if key not in all_mappings.keys():
118
+ all_mappings[key] = dict()
119
+ if subkeys is None:
120
+ subkeys = list(all_mappings[key].keys())
121
+
122
+ if not subkeys:
123
+ logging.debug(f"subkeys is empty for {key}")
124
+ return all_mappings
125
+
126
+ for i, subkey in enumerate(subkeys):
127
+ if subkey:
128
+ all_mappings[key][subkey] = values[i % len(values)]
129
+ return all_mappings
130
+
131
+
132
+ def list_labels_and_features_from_dataset(ds_labels, ds_features, model_labels, uid):
133
+ all_mappings = read_column_mapping(uid)
134
+ # For flattened raw datasets with no labels
135
+ # check if there are shared labels between model and dataset
136
+ shared_labels = set(model_labels).intersection(set(ds_labels))
137
+ if shared_labels:
138
+ ds_labels = list(shared_labels)
139
+ if len(ds_labels) > MAX_LABELS:
140
+ ds_labels = ds_labels[:MAX_LABELS]
141
+ gr.Warning(f"Too many labels to display for this spcae. We do not support more than {MAX_LABELS} in this space. You can use cli tool at https://github.com/Giskard-AI/cicd.")
142
+
143
+ # sort labels to make sure the order is consistent
144
+ # prediction gives the order based on probability
145
+ ds_labels.sort()
146
+ model_labels.sort()
147
+
148
+ lables = [
149
+ gr.Dropdown(
150
+ label=f"{label}",
151
+ choices=model_labels,
152
+ value=model_labels[i % len(model_labels)],
153
+ interactive=True,
154
+ visible=True,
155
+ )
156
+ for i, label in enumerate(ds_labels)
157
+ ]
158
+ lables += [gr.Dropdown(visible=False) for _ in range(MAX_LABELS - len(lables))]
159
+ all_mappings = export_mappings(all_mappings, "labels", ds_labels, model_labels)
160
+
161
+ # TODO: Substitute 'text' with more features for zero-shot
162
+ features = [
163
+ gr.Dropdown(
164
+ label=f"{feature}",
165
+ choices=ds_features,
166
+ value=ds_features[0],
167
+ interactive=True,
168
+ visible=True,
169
+ )
170
+ for feature in ["text"]
171
+ ]
172
+ features += [
173
+ gr.Dropdown(visible=False) for _ in range(MAX_FEATURES - len(features))
174
+ ]
175
+ all_mappings = export_mappings(all_mappings, "features", ["text"], ds_features)
176
+ write_column_mapping(all_mappings, uid)
177
+
178
+ return lables + features
179
+
180
+
181
+ def precheck_model_ds_enable_example_btn(
182
+ model_id, dataset_id, dataset_config, dataset_split
183
+ ):
184
+ model_id = strip_model_id_from_url(model_id)
185
+ model_task = check_model_task(model_id)
186
+ preload_hf_inference_api(model_id)
187
+
188
+ if dataset_config is None or dataset_split is None or len(dataset_config) == 0:
189
+ return (
190
+ gr.update(interactive=False),
191
+ gr.update(visible=False),
192
+ gr.update(visible=False),
193
+ gr.update(visible=False),
194
+ gr.update(visible=False),
195
+ gr.update(visible=False),
196
+ )
197
+
198
+ try:
199
+ ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
200
+ df: pd.DataFrame = ds[dataset_split].to_pandas().head(5)
201
+ ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds[dataset_split])
202
+
203
+ if model_task is None or model_task != "text-classification":
204
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
205
+ return (
206
+ gr.update(interactive=False),
207
+ gr.update(value=df, visible=True),
208
+ gr.update(visible=False),
209
+ gr.update(visible=False),
210
+ gr.update(visible=False),
211
+ gr.update(visible=False),
212
+ )
213
+
214
+ if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
215
+ gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
216
+ return (
217
+ gr.update(interactive=False),
218
+ gr.update(value=df, visible=True),
219
+ gr.update(visible=False),
220
+ gr.update(visible=False),
221
+ gr.update(visible=False),
222
+ gr.update(visible=False),
223
+ )
224
+
225
+ return (
226
+ gr.update(interactive=True),
227
+ gr.update(value=df, visible=True),
228
+ gr.update(visible=False),
229
+ gr.update(visible=False),
230
+ gr.update(visible=False),
231
+ gr.update(visible=False),
232
+ )
233
+ except Exception as e:
234
+ # Config or split wrong
235
+ logger.warn(f"Check your dataset {dataset_id} and config {dataset_config} on split {dataset_split}: {e}")
236
+ return (
237
+ gr.update(interactive=False),
238
+ gr.update(visible=False),
239
+ gr.update(visible=False),
240
+ gr.update(visible=False),
241
+ gr.update(visible=False),
242
+ gr.update(visible=False),
243
+ )
244
+
245
+
246
+ def align_columns_and_show_prediction(
247
+ model_id,
248
+ dataset_id,
249
+ dataset_config,
250
+ dataset_split,
251
+ uid,
252
+ profile: gr.OAuthProfile | None,
253
+ oauth_token: gr.OAuthToken | None,
254
+ ):
255
+ model_id = strip_model_id_from_url(model_id)
256
+ model_task = check_model_task(model_id)
257
+ if model_task is None or model_task != "text-classification":
258
+ gr.Warning(NOT_TEXT_CLASSIFICATION_MODEL_RAW)
259
+ return (
260
+ gr.update(visible=False),
261
+ gr.update(visible=False),
262
+ gr.update(visible=False, open=False),
263
+ gr.update(interactive=False),
264
+ "",
265
+ *[gr.update(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)],
266
+ )
267
+
268
+ dropdown_placement = [
269
+ gr.Dropdown(visible=False) for _ in range(MAX_LABELS + MAX_FEATURES)
270
+ ]
271
+
272
+ hf_token = os.environ.get(HF_WRITE_TOKEN, default="")
273
+
274
+ prediction_input, prediction_response = get_example_prediction(
275
+ model_id, dataset_id, dataset_config, dataset_split, hf_token
276
+ )
277
+
278
+ if prediction_input is None or prediction_response is None:
279
+ return (
280
+ gr.update(visible=False),
281
+ gr.update(visible=False),
282
+ gr.update(visible=False),
283
+ gr.update(visible=False, open=False),
284
+ gr.update(interactive=False),
285
+ "",
286
+ *dropdown_placement,
287
+ )
288
+
289
+ if isinstance(prediction_response, HuggingFaceInferenceAPIResponse):
290
+ return (
291
+ gr.update(visible=False),
292
+ gr.update(visible=False),
293
+ gr.update(visible=False),
294
+ gr.update(visible=False, open=False),
295
+ gr.update(interactive=False),
296
+ f"Hugging Face Inference API is loading your model. {prediction_response.message}",
297
+ *dropdown_placement,
298
+ )
299
+
300
+ model_labels = list(prediction_response.keys())
301
+
302
+ ds = datasets.load_dataset(dataset_id, dataset_config, split=dataset_split, trust_remote_code=True)
303
+ ds_labels, ds_features, _ = get_labels_and_features_from_dataset(ds)
304
+
305
+ # when dataset does not have labels or features
306
+ if not isinstance(ds_labels, list) or not isinstance(ds_features, list):
307
+ gr.Warning(CHECK_CONFIG_OR_SPLIT_RAW)
308
+ return (
309
+ gr.update(visible=False),
310
+ gr.update(visible=False),
311
+ gr.update(visible=False),
312
+ gr.update(visible=False, open=False),
313
+ gr.update(interactive=False),
314
+ "",
315
+ *dropdown_placement,
316
+ )
317
+
318
+ if len(ds_labels) != len(model_labels):
319
+ return (
320
+ gr.update(value=UNMATCHED_MODEL_DATASET_STYLED_ERROR, visible=True),
321
+ gr.update(visible=False),
322
+ gr.update(visible=False),
323
+ gr.update(visible=False, open=False),
324
+ gr.update(interactive=False),
325
+ "",
326
+ *dropdown_placement,
327
+ )
328
+
329
+ column_mappings = list_labels_and_features_from_dataset(
330
+ ds_labels,
331
+ ds_features,
332
+ model_labels,
333
+ uid,
334
+ )
335
+
336
+ # when labels or features are not aligned
337
+ # show manually column mapping
338
+ if (
339
+ collections.Counter(model_labels) != collections.Counter(ds_labels)
340
+ or ds_features[0] != "text"
341
+ ):
342
+ return (
343
+ gr.update(value=MAPPING_STYLED_ERROR_WARNING, visible=True),
344
+ gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
345
+ gr.update(value=prediction_response, visible=True),
346
+ gr.update(visible=True, open=True),
347
+ gr.update(interactive=(profile is not None and oauth_token is not None)),
348
+ "",
349
+ *column_mappings,
350
+ )
351
+
352
+ return (
353
+ gr.update(value=VALIDATED_MODEL_DATASET_STYLED, visible=True),
354
+ gr.update(value=prediction_input, lines=min(len(prediction_input)//225 + 1, 5), visible=True),
355
+ gr.update(value=prediction_response, visible=True),
356
+ gr.update(visible=True, open=False),
357
+ gr.update(interactive=(profile is not None and oauth_token is not None)),
358
+ "",
359
+ *column_mappings,
360
+ )
361
+
362
+
363
+ def check_column_mapping_keys_validity(all_mappings):
364
+ if all_mappings is None:
365
+ logger.warning("all_mapping is None")
366
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
367
+ return False
368
+
369
+ if "labels" not in all_mappings.keys():
370
+ logger.warning(f"Label mapping is not valid, all_mappings: {all_mappings}")
371
+ return False
372
+
373
+ return True
374
+
375
+ def enable_run_btn(uid, model_id, dataset_id, dataset_config, dataset_split, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
376
+ if profile is None:
377
+ return gr.update(interactive=False)
378
+ if oath_token is None:
379
+ return gr.update(interactive=False)
380
+ if model_id == "" or dataset_id == "" or dataset_config == "" or dataset_split == "":
381
+ logger.warn("Model id or dataset id is not selected")
382
+ return gr.update(interactive=False)
383
+
384
+ all_mappings = read_column_mapping(uid)
385
+ if not check_column_mapping_keys_validity(all_mappings):
386
+ logger.warn("Column mapping is not valid")
387
+ return gr.update(interactive=False)
388
+
389
+ def construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys=None):
390
+ label_mapping = {}
391
+ if len(all_mappings["labels"].keys()) != len(ds_labels):
392
+ logger.warn(f"""Label mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
393
+ \nall_mappings: {all_mappings}\nds_labels: {ds_labels}""")
394
+
395
+ if len(all_mappings["features"].keys()) != len(ds_features):
396
+ logger.warn(f"""Feature mapping corrupted: {CONFIRM_MAPPING_DETAILS_FAIL_RAW}.
397
+ \nall_mappings: {all_mappings}\nds_features: {ds_features}""")
398
+
399
+ for i, label in zip(range(len(ds_labels)), ds_labels):
400
+ # align the saved labels with dataset labels order
401
+ label_mapping.update({str(i): all_mappings["labels"][label]})
402
+
403
+ if "features" not in all_mappings.keys():
404
+ logger.warning("features not in all_mappings")
405
+ gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
406
+
407
+ feature_mapping = all_mappings["features"]
408
+ if len(label_keys) > 0:
409
+ feature_mapping.update({"label": label_keys[0]})
410
+ return label_mapping, feature_mapping
411
+
412
+ def show_hf_token_info(token):
413
+ valid = check_hf_token_validity(token)
414
+ if not valid:
415
+ return gr.update(visible=True)
416
+ return gr.update(visible=False)
417
+
418
+ def try_submit(m_id, d_id, config, split, uid, profile: gr.OAuthProfile | None, oath_token: gr.OAuthToken | None):
419
+ print(oath_token.token)
420
+ print(".>>>>>>>>>>>>>>>>>>>>>>")
421
+ all_mappings = read_column_mapping(uid)
422
+ if not check_column_mapping_keys_validity(all_mappings):
423
+ return (gr.update(interactive=True), gr.update(visible=False))
424
+
425
+ # get ds labels and features again for alignment
426
+ ds = datasets.load_dataset(d_id, config, split=split, trust_remote_code=True)
427
+ ds_labels, ds_features, label_keys = get_labels_and_features_from_dataset(ds)
428
+ label_mapping, feature_mapping = construct_label_and_feature_mapping(all_mappings, ds_labels, ds_features, label_keys)
429
+
430
+ eval_str = f"[{m_id}]<{d_id}({config}, {split} set)>"
431
+ save_job_to_pipe(
432
+ uid,
433
+ (
434
+ m_id,
435
+ d_id,
436
+ config,
437
+ split,
438
+ oath_token.token,
439
+ uid,
440
+ label_mapping,
441
+ feature_mapping,
442
+ ),
443
+ eval_str,
444
+ threading.Lock(),
445
+ )
446
+ gr.Info("Your evaluation has been submitted")
447
+
448
+ new_uid = uuid.uuid4()
449
+ scanners = read_scanners(uid)
450
+ write_scanners(scanners, new_uid)
451
+
452
+ return (
453
+ gr.update(interactive=False), # Submit button
454
+ gr.update(value=f"{CHECK_LOG_SECTION_RAW}Your job id is: {uid}. ", lines=5, visible=True, interactive=False),
455
+ new_uid, # Allocate a new uuid
456
+ gr.update(visible=False),
457
+ gr.update(visible=False),
458
+ gr.update(visible=False),
459
+ gr.update(visible=False),
460
+ )
utils.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+
3
+ import yaml
4
+
5
+
6
+ # read scanners from yaml file
7
+ # return a list of scanners
8
+ def read_scanners(path):
9
+ scanners = []
10
+ with open(path, "r") as f:
11
+ config = yaml.load(f, Loader=yaml.FullLoader)
12
+ scanners = config.get("detectors", None)
13
+ return scanners
14
+
15
+
16
+ # convert a list of scanners to yaml file
17
+ def write_scanners(scanners):
18
+ with open("./scan_config.yaml", "w") as f:
19
+ # save scanners to detectors in yaml
20
+ yaml.dump({"detectors": scanners}, f)
21
+
22
+
23
+ # convert column mapping dataframe to json
24
+ def convert_column_mapping_to_json(df, label=""):
25
+ column_mapping = {}
26
+ column_mapping[label] = []
27
+ for _, row in df.iterrows():
28
+ column_mapping[label].append(row.tolist())
29
+ return column_mapping
wordings.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INTRODUCTION_MD = """
2
+ <h1 style="text-align: center;">
3
+ 🐢Giskard Evaluator - Text Classification
4
+ </h1>
5
+ Welcome to the Giskard Evaluator Space! Get a model vulnerability report immediately by simply sharing your model and dataset id below.
6
+ You can also checkout our library documentation <a href="https://docs.giskard.ai/en/latest/getting_started/quickstart/index.html">here</a>.
7
+ """
8
+ CONFIRM_MAPPING_DETAILS_MD = """
9
+ <h1 style="text-align: center;">
10
+ Confirm Pre-processing Details
11
+ </h1>
12
+ Make sure the output variable's labels and the input variable's name are accurately mapped across both the dataset and the model. You can select the output variable's labels from the dropdowns below.
13
+ """
14
+ CONFIRM_MAPPING_DETAILS_FAIL_MD = """
15
+ <h1 style="text-align: center;">
16
+ Confirm Pre-processing Details
17
+ </h1>
18
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
19
+ """
20
+
21
+ CONFIRM_MAPPING_DETAILS_FAIL_RAW = """
22
+ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
23
+ """
24
+
25
+ CHECK_CONFIG_OR_SPLIT_RAW = """
26
+ Please check your dataset config or split.
27
+ """
28
+
29
+ CHECK_LOG_SECTION_RAW = """
30
+ Your have successfully submitted a Giskard evaluation. Further details are available in the Logs tab. You can find your report will be posted to your model's community discussion.
31
+ """
32
+
33
+ PREDICTION_SAMPLE_MD = """
34
+ <h1 style="text-align: center;">
35
+ Model Prediction Sample
36
+ </h1>
37
+ Here's a sample of your model's prediction on an example from the dataset.
38
+ """
39
+
40
+ MAPPING_STYLED_ERROR_WARNING = """
41
+ <h3 style="text-align: center;color: orange; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
42
+ ⚠️ We're unable to automatically map the input variable's name and output variable's labels of your dataset with the model's. Please manually check the mapping below.
43
+ </h3>
44
+ """
45
+
46
+ UNMATCHED_MODEL_DATASET_STYLED_ERROR = """
47
+ <h3 style="text-align: center;color: #fa5f5f; background-color: #fbe2e2; border-radius: 8px; padding: 10px; ">
48
+ ❌ Your model and dataset have different numbers of labels. Please double check your model and dataset.
49
+ </h3>
50
+ """
51
+
52
+ NOT_TEXT_CLASSIFICATION_MODEL_RAW = """
53
+ Your model does not fall under the category of text classification. This page is specifically designated for the evaluation of text classification models.
54
+ """
55
+
56
+ USE_INFERENCE_API_TIP = """
57
+ To speed up the evaluation, we recommend using the
58
+ <a href="https://huggingface.co/docs/api-inference/detailed_parameters#text-classification-task">
59
+ Hugging Face Inference API
60
+ </a>
61
+ . Please input your <a href="https://huggingface.co/settings/tokens">Hugging Face token</a> to do so. You can find it <a href="https://huggingface.co/settings/tokens">here</a>.
62
+ """
63
+
64
+ LOG_IN_TIPS = """
65
+ To use the Hugging Face Inference API, you need to log in to your Hugging Face account.
66
+ """
67
+
68
+ HF_TOKEN_INVALID_STYLED= """
69
+ <p style="text-align: left;color: red; ">
70
+ Your Hugging Face token is invalid. Please double check your token.
71
+ </p>
72
+ """
73
+
74
+ VALIDATED_MODEL_DATASET_STYLED = """
75
+ <h3 style="text-align: center;color: #4ca154; background-color: #e2fbe8; border-radius: 8px; padding: 10px; ">
76
+ Your model and dataset have been validated!
77
+ </h3>"""
78
+
79
+ def get_dataset_fetch_error_raw(error):
80
+ return f"""Sorry you cannot use this dataset because {error}. Contact HF team to support this dataset."""