giskard-evaluator

Running

App Files Files Community

200

inoki-giskard commited on Jan 9, 2024

Commit

58c39e0

1 Parent(s): e84a865

Format code and imports

Browse files

Files changed (12) hide show

app.py +2 -1
app_debug.py +17 -5
app_leaderboard.py +8 -4
app_text_classification.py +44 -40
io_utils.py +1 -3
pipe.py +1 -2
run_jobs.py +2 -1
text_classification.py +3 -3
text_classification_ui_helpers.py +17 -20
utils.py +7 -2
validate_queue.py +0 -1
wordings.py +1 -1

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import atexit
 import gradio as gr
 from app_leaderboard import get_demo as get_demo_leaderboard
 from app_text_classification import get_demo as get_demo_text_classification
-from app_debug import get_demo as get_demo_debug
 from run_jobs import start_process_run_job, stop_thread
 try:

 import atexit
 import gradio as gr
+from app_debug import get_demo as get_demo_debug
 from app_leaderboard import get_demo as get_demo_leaderboard
 from app_text_classification import get_demo as get_demo_text_classification
 from run_jobs import start_process_run_job, stop_thread
 try:

app_debug.py CHANGED Viewed

@@ -1,10 +1,14 @@
-import gradio as gr
-import pipe
 from os import listdir
 from os.path import isfile, join
 LOG_PATH = "./tmp"
 CONFIG_PATH = "./cicd/configs"
 def get_accordions_of_files(path, files):
     components = []
     for file in files:
@@ -15,14 +19,23 @@ def get_accordions_of_files(path, files):
                         gr.Markdown(f.read())
     return components
 def get_accordions_of_log_files():
-    log_files = [f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")]
     return get_accordions_of_files(LOG_PATH, log_files)
 def get_accordions_of_config_files():
-    config_files = [f for f in listdir(CONFIG_PATH) if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")]
     return get_accordions_of_files(CONFIG_PATH, config_files)
 def get_demo(demo):
     with gr.Row():
         # check if jobs is an attribute of pipe
@@ -34,4 +47,3 @@ def get_demo(demo):
     with gr.Accordion(label="Log Files", open=False):
         log_accordions = get_accordions_of_log_files()
         demo.load(get_accordions_of_log_files, outputs=log_accordions, every=1)

 from os import listdir
 from os.path import isfile, join
+import gradio as gr
+import pipe
 LOG_PATH = "./tmp"
 CONFIG_PATH = "./cicd/configs"
 def get_accordions_of_files(path, files):
     components = []
     for file in files:
                         gr.Markdown(f.read())
     return components
 def get_accordions_of_log_files():
+    log_files = [
+        f for f in listdir(LOG_PATH) if isfile(join(LOG_PATH, f)) and f.endswith("_log")
+    ]
     return get_accordions_of_files(LOG_PATH, log_files)
 def get_accordions_of_config_files():
+    config_files = [
+        f
+        for f in listdir(CONFIG_PATH)
+        if isfile(join(CONFIG_PATH, f)) and f.endswith(".yaml")
+    ]
     return get_accordions_of_files(CONFIG_PATH, config_files)
 def get_demo(demo):
     with gr.Row():
         # check if jobs is an attribute of pipe
     with gr.Accordion(label="Log Files", open=False):
         log_accordions = get_accordions_of_log_files()
         demo.load(get_accordions_of_log_files, outputs=log_accordions, every=1)

app_leaderboard.py CHANGED Viewed

@@ -3,7 +3,8 @@ import logging
 import datasets
 import gradio as gr
-from fetch_utils import check_dataset_and_get_config, check_dataset_and_get_split
 def get_records_from_dataset_repo(dataset_id):
@@ -94,7 +95,10 @@ def get_demo():
             label="Model id", choices=model_ids, value=model_ids[0], interactive=True
         )
         dataset_select = gr.Dropdown(
-            label="Dataset id", choices=dataset_ids, value=dataset_ids[0], interactive=True
         )
     with gr.Row():
@@ -123,9 +127,9 @@ def get_demo():
         df = records[(records["task"] == task)]
         # filter the table based on the model_id and dataset_id
         if model_id and model_id != "Any":
-            df = df[(df['model_id'] == model_id)]
         if dataset_id and dataset_id != "Any":
-            df = df[(df['dataset_id'] == dataset_id)]
         # filter the table based on the columns
         df = df[columns]

 import datasets
 import gradio as gr
+from fetch_utils import (check_dataset_and_get_config,
+                         check_dataset_and_get_split)
 def get_records_from_dataset_repo(dataset_id):
             label="Model id", choices=model_ids, value=model_ids[0], interactive=True
         )
         dataset_select = gr.Dropdown(
+            label="Dataset id",
+            choices=dataset_ids,
+            value=dataset_ids[0],
+            interactive=True,
         )
     with gr.Row():
         df = records[(records["task"] == task)]
         # filter the table based on the model_id and dataset_id
         if model_id and model_id != "Any":
+            df = df[(df["model_id"] == model_id)]
         if dataset_id and dataset_id != "Any":
+            df = df[(df["dataset_id"] == dataset_id)]
         # filter the table based on the columns
         df = df[columns]

app_text_classification.py CHANGED Viewed

@@ -1,22 +1,16 @@
-import gradio as gr
 import uuid
-from io_utils import (
-    read_scanners,
-    write_scanners,
-    read_inference_type,
-    get_logs_file,
-    write_inference_type,
-)
-from wordings import INTRODUCTION_MD, CONFIRM_MAPPING_DETAILS_MD
-from text_classification_ui_helpers import (
-    try_submit,
-    check_dataset_and_get_config,
-    check_dataset_and_get_split,
-    check_model_and_show_prediction,
-    write_column_mapping_to_config,
-    select_run_mode,
-    deselect_run_inference,
-)
 MAX_LABELS = 20
 MAX_FEATURES = 20
@@ -70,17 +64,24 @@ def get_demo(demo):
     with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
         run_inference = gr.Checkbox(value="False", label="Run with Inference API")
         @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
         def get_run_mode(uid):
-            return (
-                gr.update(value=read_inference_type(uid) == "hf_inference_api" and not run_local.value)
             )
-        inference_token = gr.Textbox(value="", label="HF Token for Inference API", visible=False, interactive=True)
-    with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
-        scanners = gr.CheckboxGroup(
-            label="Scan Settings", visible=True
         )
         @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
         def get_scanners(uid):
             selected = read_scanners(uid)
@@ -88,9 +89,9 @@ def get_demo(demo):
             # Reason: data_leakage barely raises any issues and takes too many requests
             # when using inference API, causing rate limit error
             scan_config = selected + ["data_leakage"]
-            return (gr.update(
-                    choices=scan_config, value=selected, label="Scan Settings", visible=True
-                ))
     with gr.Row():
         run_btn = gr.Button(
@@ -105,8 +106,9 @@ def get_demo(demo):
         demo.load(get_logs_file, uid_label, logs, every=0.5)
     dataset_id_input.change(
-        check_dataset_and_get_config,
-        inputs=[dataset_id_input], outputs=[dataset_config_input]
     )
     dataset_config_input.change(
@@ -118,19 +120,21 @@ def get_demo(demo):
     scanners.change(write_scanners, inputs=[scanners, uid_label])
     run_inference.change(
-        select_run_mode,
-        inputs=[run_inference, inference_token, uid_label],
-        outputs=[inference_token, run_local])
     run_local.change(
-        deselect_run_inference,
-        inputs=[run_local],
-        outputs=[inference_token, run_inference])
     inference_token.change(
-        write_inference_type,
-        inputs=[run_inference, inference_token, uid_label])
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,

 import uuid
+import gradio as gr
+from io_utils import (get_logs_file, read_inference_type, read_scanners,
+                      write_inference_type, write_scanners)
+from text_classification_ui_helpers import (check_dataset_and_get_config,
+                                            check_dataset_and_get_split,
+                                            check_model_and_show_prediction,
+                                            deselect_run_inference,
+                                            select_run_mode, try_submit,
+                                            write_column_mapping_to_config)
+from wordings import CONFIRM_MAPPING_DETAILS_MD, INTRODUCTION_MD
 MAX_LABELS = 20
 MAX_FEATURES = 20
     with gr.Accordion(label="Model Wrap Advance Config (optional)", open=False):
         run_local = gr.Checkbox(value=True, label="Run in this Space")
         run_inference = gr.Checkbox(value="False", label="Run with Inference API")
         @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[run_inference])
         def get_run_mode(uid):
+            return gr.update(
+                value=read_inference_type(uid) == "hf_inference_api"
+                and not run_local.value
             )
+        inference_token = gr.Textbox(
+            value="",
+            label="HF Token for Inference API",
+            visible=False,
+            interactive=True,
         )
+    with gr.Accordion(label="Scanner Advance Config (optional)", open=False):
+        scanners = gr.CheckboxGroup(label="Scan Settings", visible=True)
         @gr.on(triggers=[uid_label.change], inputs=[uid_label], outputs=[scanners])
         def get_scanners(uid):
             selected = read_scanners(uid)
             # Reason: data_leakage barely raises any issues and takes too many requests
             # when using inference API, causing rate limit error
             scan_config = selected + ["data_leakage"]
+            return gr.update(
+                choices=scan_config, value=selected, label="Scan Settings", visible=True
+            )
     with gr.Row():
         run_btn = gr.Button(
         demo.load(get_logs_file, uid_label, logs, every=0.5)
     dataset_id_input.change(
+        check_dataset_and_get_config,
+        inputs=[dataset_id_input],
+        outputs=[dataset_config_input],
     )
     dataset_config_input.change(
     scanners.change(write_scanners, inputs=[scanners, uid_label])
     run_inference.change(
+        select_run_mode,
+        inputs=[run_inference, inference_token, uid_label],
+        outputs=[inference_token, run_local],
+    )
     run_local.change(
+        deselect_run_inference,
+        inputs=[run_local],
+        outputs=[inference_token, run_inference],
+    )
     inference_token.change(
+        write_inference_type, inputs=[run_inference, inference_token, uid_label]
+    )
     gr.on(
         triggers=[label.change for label in column_mappings],
         fn=write_column_mapping_to_config,

io_utils.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import subprocess
 import yaml
 import pipe
@@ -44,7 +45,6 @@ def write_scanners(scanners, uid):
     f.close()
 # read model_type from yaml file
 def read_inference_type(uid):
     inference_type = ""
@@ -73,7 +73,6 @@ def write_inference_type(use_inference, inference_token, uid):
     f.close()
 # read column mapping from yaml file
 def read_column_mapping(uid):
     column_mapping = {}
@@ -103,7 +102,6 @@ def write_column_mapping(mapping, uid):
     f.close()
 # convert column mapping dataframe to json
 def convert_column_mapping_to_json(df, label=""):
     column_mapping = {}

 import os
 import subprocess
 import yaml
 import pipe
     f.close()
 # read model_type from yaml file
 def read_inference_type(uid):
     inference_type = ""
     f.close()
 # read column mapping from yaml file
 def read_column_mapping(uid):
     column_mapping = {}
     f.close()
 # convert column mapping dataframe to json
 def convert_column_mapping_to_json(df, label=""):
     column_mapping = {}

pipe.py CHANGED Viewed

@@ -1,4 +1,3 @@
 def init():
     global jobs
-    jobs = list()

 def init():
     global jobs
+    jobs = list()

run_jobs.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import threading
 import time
-import logging
 import pipe
 from io_utils import pop_job_from_pipe

+import logging
 import threading
 import time
 import pipe
 from io_utils import pop_job_from_pipe

text_classification.py CHANGED Viewed

@@ -9,9 +9,9 @@ from transformers import pipeline
 def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
     if not dataset_config:
-        dataset_config = 'default'
     if not split:
-        split = 'train'
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         dataset_features = ds.features
@@ -54,7 +54,7 @@ def text_classification_map_model_and_dataset_labels(id2label, dataset_features)
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:

 def get_labels_and_features_from_dataset(dataset_id, dataset_config, split):
     if not dataset_config:
+        dataset_config = "default"
     if not split:
+        split = "train"
     try:
         ds = datasets.load_dataset(dataset_id, dataset_config)[split]
         dataset_features = ds.features
             continue
         if len(feature.names) != len(id2label_mapping.keys()):
             continue
         dataset_labels = feature.names
         # Try to match labels
         for label in feature.names:

text_classification_ui_helpers.py CHANGED Viewed

@@ -9,11 +9,13 @@ import gradio as gr
 from transformers.pipelines import TextClassificationPipeline
 from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
-                      write_column_mapping, write_log_to_user_file,
-                      write_inference_type)
 from text_classification import (check_model, get_example_prediction,
                                  get_labels_and_features_from_dataset)
-from wordings import CONFIRM_MAPPING_DETAILS_FAIL_RAW, MAPPING_STYLED_ERROR_WARNING, CHECK_CONFIG_OR_SPLIT_RAW
 MAX_LABELS = 20
 MAX_FEATURES = 20
@@ -42,30 +44,23 @@ def check_dataset_and_get_split(dataset_id, dataset_config):
         # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         pass
 def select_run_mode(run_inf, inf_token, uid):
     if run_inf:
         if len(inf_token) > 0:
             write_inference_type(run_inf, inf_token, uid)
-        return (
-            gr.update(visible=True),
-            gr.update(value=False))
     else:
-        return (
-            gr.update(visible=False),
-            gr.update(value=True))
 def deselect_run_inference(run_local):
     if run_local:
-        return (
-            gr.update(visible=False),
-            gr.update(value=False)
-        )
     else:
-        return (
-            gr.update(visible=True),
-            gr.update(value=True)
-        )
 def write_column_mapping_to_config(
     dataset_id, dataset_config, dataset_split, uid, *labels
 ):
@@ -83,7 +78,7 @@ def write_column_mapping_to_config(
         all_mappings["labels"] = dict()
     for i, label in enumerate(labels[:MAX_LABELS]):
         if label:
-            all_mappings["labels"][label] = ds_labels[i%len(ds_labels)]
     if "features" not in all_mappings.keys():
         all_mappings["features"] = dict()
     for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
@@ -204,7 +199,9 @@ def try_submit(m_id, d_id, config, split, local, uid):
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     label_mapping = {}
-    for i, label in zip(range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()):
         label_mapping.update({str(i): label})
     if "features" not in all_mappings.keys():

 from transformers.pipelines import TextClassificationPipeline
 from io_utils import (get_yaml_path, read_column_mapping, save_job_to_pipe,
+                      write_column_mapping, write_inference_type,
+                      write_log_to_user_file)
 from text_classification import (check_model, get_example_prediction,
                                  get_labels_and_features_from_dataset)
+from wordings import (CHECK_CONFIG_OR_SPLIT_RAW,
+                      CONFIRM_MAPPING_DETAILS_FAIL_RAW,
+                      MAPPING_STYLED_ERROR_WARNING)
 MAX_LABELS = 20
 MAX_FEATURES = 20
         # gr.Warning(f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}")
         pass
 def select_run_mode(run_inf, inf_token, uid):
     if run_inf:
         if len(inf_token) > 0:
             write_inference_type(run_inf, inf_token, uid)
+        return (gr.update(visible=True), gr.update(value=False))
     else:
+        return (gr.update(visible=False), gr.update(value=True))
 def deselect_run_inference(run_local):
     if run_local:
+        return (gr.update(visible=False), gr.update(value=False))
     else:
+        return (gr.update(visible=True), gr.update(value=True))
 def write_column_mapping_to_config(
     dataset_id, dataset_config, dataset_split, uid, *labels
 ):
         all_mappings["labels"] = dict()
     for i, label in enumerate(labels[:MAX_LABELS]):
         if label:
+            all_mappings["labels"][label] = ds_labels[i % len(ds_labels)]
     if "features" not in all_mappings.keys():
         all_mappings["features"] = dict()
     for _, feat in enumerate(labels[MAX_LABELS : (MAX_LABELS + MAX_FEATURES)]):
         gr.Warning(CONFIRM_MAPPING_DETAILS_FAIL_RAW)
         return (gr.update(interactive=True), gr.update(visible=False))
     label_mapping = {}
+    for i, label in zip(
+        range(len(all_mappings["labels"].keys())), all_mappings["labels"].keys()
+    ):
         label_mapping.update({str(i): label})
     if "features" not in all_mappings.keys():

utils.py CHANGED Viewed

@@ -1,5 +1,8 @@
-import yaml
 import sys
 # read scanners from yaml file
 # return a list of scanners
 def read_scanners(path):
@@ -9,16 +12,18 @@ def read_scanners(path):
         scanners = config.get("detectors", None)
     return scanners
 # convert a list of scanners to yaml file
 def write_scanners(scanners):
     with open("./scan_config.yaml", "w") as f:
         # save scanners to detectors in yaml
         yaml.dump({"detectors": scanners}, f)
 # convert column mapping dataframe to json
 def convert_column_mapping_to_json(df, label=""):
     column_mapping = {}
     column_mapping[label] = []
     for _, row in df.iterrows():
         column_mapping[label].append(row.tolist())
-    return column_mapping

 import sys
+import yaml
 # read scanners from yaml file
 # return a list of scanners
 def read_scanners(path):
         scanners = config.get("detectors", None)
     return scanners
 # convert a list of scanners to yaml file
 def write_scanners(scanners):
     with open("./scan_config.yaml", "w") as f:
         # save scanners to detectors in yaml
         yaml.dump({"detectors": scanners}, f)
 # convert column mapping dataframe to json
 def convert_column_mapping_to_json(df, label=""):
     column_mapping = {}
     column_mapping[label] = []
     for _, row in df.iterrows():
         column_mapping[label].append(row.tolist())
+    return column_mapping

validate_queue.py CHANGED Viewed

@@ -13,7 +13,6 @@ def sleep_a_while():
     return str(seconds)
 with gr.Blocks() as iface:
     text = gr.Textbox(label="Slept second")

     return str(seconds)
 with gr.Blocks() as iface:
     text = gr.Textbox(label="Slept second")

wordings.py CHANGED Viewed

@@ -36,4 +36,4 @@ MAPPING_STYLED_ERROR_WARNING = """
                         <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
                         Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
                         </h3>
-                        """

                         <h3 style="text-align: center;color: coral; background-color: #fff0f3; border-radius: 8px; padding: 10px; ">
                         Sorry, we cannot auto-align the labels/features of your dataset and model. Please double check.
                         </h3>
+                        """