Spaces:

stacklok
/

llm_security_leaderboard

Running

App Files Files Community

kantord

eleftherias commited on 10 days ago

Commit

431363c

verified ·

1 Parent(s): f0763c9

Create local setup (#1)

Browse files

- Create local setup (1629b01ca419d717ce2fcbfea04fa634bfd1a66b)

Co-authored-by: Eleftheria Stein-Kousathana <eleftherias@users.noreply.huggingface.co>

Files changed (5) hide show

README.md +25 -0
app.py +10 -20
src/populate.py +9 -8
src/submission/submit.py +16 -12
utils/check_local.py +4 -0

README.md CHANGED Viewed

@@ -11,6 +11,31 @@ license: apache-2.0
 short_description: Benchmark the ability of LLMs to produce secure code.
 ---
 # Start the configuration
 Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).

 short_description: Benchmark the ability of LLMs to produce secure code.
 ---
+# Running locally
+Ensure [cmake](https://cmake.org/cmake/help/latest/) is installed on your system.
+Ensure you're running with Python version **3.10**.
+### (Optional) Create a virtual environment
+```bash
+python -m venv venv
+source venv/bin/activate
+```
+### Install the required packages
+```bash
+pip install -r requirements.txt
+```
+### Run the application
+```bash
+python app.py
+```
 # Start the configuration
 Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).

app.py CHANGED Viewed

@@ -102,10 +102,6 @@ def init_leaderboard(dataframe):
         interactive=False,
     )
-def get_evaluation_queue_df(path, cols):
-    # Implementation to retrieve DataFrames
-    pass
 def start_evaluation(row):
     logger.info(f"Starting evaluation for row ID {row.get('id')}")
     # Implementation to start evaluation
@@ -134,10 +130,6 @@ def process_evaluation_queue():
         finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
         # Assign statuses to each DataFrame
-        finished_eval_queue_df = finished_eval_queue_df.copy()
-        running_eval_queue_df = running_eval_queue_df.copy()
-        pending_eval_queue_df = pending_eval_queue_df.copy()
         finished_eval_queue_df['status'] = 'FINISHED'
         running_eval_queue_df['status'] = 'RUNNING'
         pending_eval_queue_df['status'] = 'PENDING'
@@ -177,6 +169,7 @@ def process_evaluation_queue():
                 logger.warning(f"Unknown status '{status}' for row ID {row.get('id')}")
         logger.info("Completed processing of evaluation queue")
     except Exception as e:
         logger.error(f"Error processing evaluation queue: {e}", exc_info=True)
@@ -200,7 +193,7 @@ with demo:
                 with gr.Column():
                     with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
                         open=False,
                     ):
                         with gr.Row():
@@ -211,8 +204,8 @@ with demo:
                                 row_count=5,
                             )
                     with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
                     ):
                         with gr.Row():
                             running_eval_table = gr.components.Dataframe(
@@ -223,7 +216,7 @@ with demo:
                             )
                     with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
                         open=False,
                     ):
                         with gr.Row():
@@ -233,6 +226,11 @@ with demo:
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
@@ -290,12 +288,4 @@ with demo:
                 show_copy_button=True,
             )
-# Schedule the job with enhanced settings
-scheduler.add_job(
-    process_evaluation_queue,
-    trigger="interval",
-    seconds=30,
-    next_run_time=None,  # Prevents the job from running immediately upon scheduler start
-    id='process_evaluation_queue_job'
-)
 demo.queue(default_concurrency_limit=40).launch()

         interactive=False,
     )
 def start_evaluation(row):
     logger.info(f"Starting evaluation for row ID {row.get('id')}")
     # Implementation to start evaluation
         finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
         # Assign statuses to each DataFrame
         finished_eval_queue_df['status'] = 'FINISHED'
         running_eval_queue_df['status'] = 'RUNNING'
         pending_eval_queue_df['status'] = 'PENDING'
                 logger.warning(f"Unknown status '{status}' for row ID {row.get('id')}")
         logger.info("Completed processing of evaluation queue")
+        return finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
     except Exception as e:
         logger.error(f"Error processing evaluation queue: {e}", exc_info=True)
                 with gr.Column():
                     with gr.Accordion(
+                        f"✅ Finished Evaluations",
                         open=False,
                     ):
                         with gr.Row():
                                 row_count=5,
                             )
                     with gr.Accordion(
+                            f"🔄 Running Evaluation Queue",
+                            open=False,
                     ):
                         with gr.Row():
                             running_eval_table = gr.components.Dataframe(
                             )
                     with gr.Accordion(
+                        f"⏳ Pending Evaluation Queue",
                         open=False,
                     ):
                         with gr.Row():
                                 datatype=EVAL_TYPES,
                                 row_count=5,
                             )
+                # Process the evaluation queue every 2 minutes
+                timer = gr.Timer(120, active=True)
+                timer.tick(process_evaluation_queue, inputs=[], outputs=[finished_eval_table, running_eval_table, pending_eval_table])
             with gr.Row():
                 gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
                 show_copy_button=True,
             )
 demo.queue(default_concurrency_limit=40).launch()

src/populate.py CHANGED Viewed

@@ -39,15 +39,16 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
             all_evals.append(data)
         elif ".md" not in entry:
             # this is a folder
-            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
             for sub_entry in sub_entries:
-                file_path = os.path.join(save_path, entry, sub_entry)
-                with open(file_path) as fp:
-                    data = json.load(fp)
-                data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
-                data[EvalQueueColumn.revision.name] = data.get("revision", "main")
-                all_evals.append(data)
     pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
     running_list = [e for e in all_evals if e["status"] == "RUNNING"]

             all_evals.append(data)
         elif ".md" not in entry:
             # this is a folder
+            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
             for sub_entry in sub_entries:
+                if ".json" in sub_entry:
+                    file_path = os.path.join(save_path, entry, sub_entry)
+                    with open(file_path) as fp:
+                        data = json.load(fp)
+                    data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
+                    data[EvalQueueColumn.revision.name] = data.get("revision", "main")
+                    all_evals.append(data)
     pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
     running_list = [e for e in all_evals if e["status"] == "RUNNING"]

src/submission/submit.py CHANGED Viewed

@@ -11,6 +11,7 @@ from src.submission.check_validity import (
     get_model_size,
     is_model_on_hub,
 )
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
@@ -109,18 +110,21 @@ def add_new_eval(
     with open(out_path, "w") as f:
         f.write(json.dumps(eval_entry))
-    logger.debug("Uploading eval file")
-    API.upload_file(
-        path_or_fileobj=out_path,
-        path_in_repo=out_path.split("eval-queue/")[1],
-        repo_id=QUEUE_REPO,
-        repo_type="dataset",
-        commit_message=f"Add {model} to eval queue",
-    )
-    logger.debug("Eval file uploaded")
-    logger.debug("Removing local eval file")
-    # Remove the local file
-    os.remove(out_path)
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."

     get_model_size,
     is_model_on_hub,
 )
+from utils.check_local import is_running_on_huggingface
 REQUESTED_MODELS = None
 USERS_TO_SUBMISSION_DATES = None
     with open(out_path, "w") as f:
         f.write(json.dumps(eval_entry))
+    if is_running_on_huggingface():
+        logger.debug("Uploading eval file")
+        API.upload_file(
+            path_or_fileobj=out_path,
+            path_in_repo=out_path.split("eval-queue/")[1],
+            repo_id=QUEUE_REPO,
+            repo_type="dataset",
+            commit_message=f"Add {model} to eval queue",
+        )
+        logger.debug("Eval file uploaded")
+        logger.debug("Removing local eval file")
+        # Remove the local file
+        os.remove(out_path)
+    else:
+        logger.info("Running locally. Skipping file upload.")
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."

utils/check_local.py ADDED Viewed

	@@ -0,0 +1,4 @@

+import os
+def is_running_on_huggingface():
+    return "SPACE_ID" in os.environ  # Hugging Face Spaces set this environment variable