Create local setup (#1)
Browse files- Create local setup (1629b01ca419d717ce2fcbfea04fa634bfd1a66b)
Co-authored-by: Eleftheria Stein-Kousathana <eleftherias@users.noreply.huggingface.co>
- README.md +25 -0
- app.py +10 -20
- src/populate.py +9 -8
- src/submission/submit.py +16 -12
- utils/check_local.py +4 -0
README.md
CHANGED
@@ -11,6 +11,31 @@ license: apache-2.0
|
|
11 |
short_description: Benchmark the ability of LLMs to produce secure code.
|
12 |
---
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Start the configuration
|
15 |
|
16 |
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
|
|
11 |
short_description: Benchmark the ability of LLMs to produce secure code.
|
12 |
---
|
13 |
|
14 |
+
# Running locally
|
15 |
+
|
16 |
+
Ensure [cmake](https://cmake.org/cmake/help/latest/) is installed on your system.
|
17 |
+
|
18 |
+
Ensure you're running with Python version **3.10**.
|
19 |
+
|
20 |
+
### (Optional) Create a virtual environment
|
21 |
+
|
22 |
+
```bash
|
23 |
+
python -m venv venv
|
24 |
+
source venv/bin/activate
|
25 |
+
```
|
26 |
+
|
27 |
+
### Install the required packages
|
28 |
+
|
29 |
+
```bash
|
30 |
+
pip install -r requirements.txt
|
31 |
+
```
|
32 |
+
|
33 |
+
### Run the application
|
34 |
+
|
35 |
+
```bash
|
36 |
+
python app.py
|
37 |
+
```
|
38 |
+
|
39 |
# Start the configuration
|
40 |
|
41 |
Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
|
app.py
CHANGED
@@ -102,10 +102,6 @@ def init_leaderboard(dataframe):
|
|
102 |
interactive=False,
|
103 |
)
|
104 |
|
105 |
-
def get_evaluation_queue_df(path, cols):
|
106 |
-
# Implementation to retrieve DataFrames
|
107 |
-
pass
|
108 |
-
|
109 |
def start_evaluation(row):
|
110 |
logger.info(f"Starting evaluation for row ID {row.get('id')}")
|
111 |
# Implementation to start evaluation
|
@@ -134,10 +130,6 @@ def process_evaluation_queue():
|
|
134 |
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
135 |
|
136 |
# Assign statuses to each DataFrame
|
137 |
-
finished_eval_queue_df = finished_eval_queue_df.copy()
|
138 |
-
running_eval_queue_df = running_eval_queue_df.copy()
|
139 |
-
pending_eval_queue_df = pending_eval_queue_df.copy()
|
140 |
-
|
141 |
finished_eval_queue_df['status'] = 'FINISHED'
|
142 |
running_eval_queue_df['status'] = 'RUNNING'
|
143 |
pending_eval_queue_df['status'] = 'PENDING'
|
@@ -177,6 +169,7 @@ def process_evaluation_queue():
|
|
177 |
logger.warning(f"Unknown status '{status}' for row ID {row.get('id')}")
|
178 |
|
179 |
logger.info("Completed processing of evaluation queue")
|
|
|
180 |
|
181 |
except Exception as e:
|
182 |
logger.error(f"Error processing evaluation queue: {e}", exc_info=True)
|
@@ -200,7 +193,7 @@ with demo:
|
|
200 |
|
201 |
with gr.Column():
|
202 |
with gr.Accordion(
|
203 |
-
f"✅ Finished Evaluations
|
204 |
open=False,
|
205 |
):
|
206 |
with gr.Row():
|
@@ -211,8 +204,8 @@ with demo:
|
|
211 |
row_count=5,
|
212 |
)
|
213 |
with gr.Accordion(
|
214 |
-
|
215 |
-
|
216 |
):
|
217 |
with gr.Row():
|
218 |
running_eval_table = gr.components.Dataframe(
|
@@ -223,7 +216,7 @@ with demo:
|
|
223 |
)
|
224 |
|
225 |
with gr.Accordion(
|
226 |
-
f"⏳ Pending Evaluation Queue
|
227 |
open=False,
|
228 |
):
|
229 |
with gr.Row():
|
@@ -233,6 +226,11 @@ with demo:
|
|
233 |
datatype=EVAL_TYPES,
|
234 |
row_count=5,
|
235 |
)
|
|
|
|
|
|
|
|
|
|
|
236 |
with gr.Row():
|
237 |
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
238 |
|
@@ -290,12 +288,4 @@ with demo:
|
|
290 |
show_copy_button=True,
|
291 |
)
|
292 |
|
293 |
-
# Schedule the job with enhanced settings
|
294 |
-
scheduler.add_job(
|
295 |
-
process_evaluation_queue,
|
296 |
-
trigger="interval",
|
297 |
-
seconds=30,
|
298 |
-
next_run_time=None, # Prevents the job from running immediately upon scheduler start
|
299 |
-
id='process_evaluation_queue_job'
|
300 |
-
)
|
301 |
demo.queue(default_concurrency_limit=40).launch()
|
|
|
102 |
interactive=False,
|
103 |
)
|
104 |
|
|
|
|
|
|
|
|
|
105 |
def start_evaluation(row):
|
106 |
logger.info(f"Starting evaluation for row ID {row.get('id')}")
|
107 |
# Implementation to start evaluation
|
|
|
130 |
finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
|
131 |
|
132 |
# Assign statuses to each DataFrame
|
|
|
|
|
|
|
|
|
133 |
finished_eval_queue_df['status'] = 'FINISHED'
|
134 |
running_eval_queue_df['status'] = 'RUNNING'
|
135 |
pending_eval_queue_df['status'] = 'PENDING'
|
|
|
169 |
logger.warning(f"Unknown status '{status}' for row ID {row.get('id')}")
|
170 |
|
171 |
logger.info("Completed processing of evaluation queue")
|
172 |
+
return finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
|
173 |
|
174 |
except Exception as e:
|
175 |
logger.error(f"Error processing evaluation queue: {e}", exc_info=True)
|
|
|
193 |
|
194 |
with gr.Column():
|
195 |
with gr.Accordion(
|
196 |
+
f"✅ Finished Evaluations",
|
197 |
open=False,
|
198 |
):
|
199 |
with gr.Row():
|
|
|
204 |
row_count=5,
|
205 |
)
|
206 |
with gr.Accordion(
|
207 |
+
f"🔄 Running Evaluation Queue",
|
208 |
+
open=False,
|
209 |
):
|
210 |
with gr.Row():
|
211 |
running_eval_table = gr.components.Dataframe(
|
|
|
216 |
)
|
217 |
|
218 |
with gr.Accordion(
|
219 |
+
f"⏳ Pending Evaluation Queue",
|
220 |
open=False,
|
221 |
):
|
222 |
with gr.Row():
|
|
|
226 |
datatype=EVAL_TYPES,
|
227 |
row_count=5,
|
228 |
)
|
229 |
+
|
230 |
+
# Process the evaluation queue every 2 minutes
|
231 |
+
timer = gr.Timer(120, active=True)
|
232 |
+
timer.tick(process_evaluation_queue, inputs=[], outputs=[finished_eval_table, running_eval_table, pending_eval_table])
|
233 |
+
|
234 |
with gr.Row():
|
235 |
gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
|
236 |
|
|
|
288 |
show_copy_button=True,
|
289 |
)
|
290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
demo.queue(default_concurrency_limit=40).launch()
|
src/populate.py
CHANGED
@@ -39,15 +39,16 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
|
39 |
all_evals.append(data)
|
40 |
elif ".md" not in entry:
|
41 |
# this is a folder
|
42 |
-
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
|
43 |
for sub_entry in sub_entries:
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
|
|
51 |
|
52 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
53 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
|
|
39 |
all_evals.append(data)
|
40 |
elif ".md" not in entry:
|
41 |
# this is a folder
|
42 |
+
sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
|
43 |
for sub_entry in sub_entries:
|
44 |
+
if ".json" in sub_entry:
|
45 |
+
file_path = os.path.join(save_path, entry, sub_entry)
|
46 |
+
with open(file_path) as fp:
|
47 |
+
data = json.load(fp)
|
48 |
+
|
49 |
+
data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
|
50 |
+
data[EvalQueueColumn.revision.name] = data.get("revision", "main")
|
51 |
+
all_evals.append(data)
|
52 |
|
53 |
pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
|
54 |
running_list = [e for e in all_evals if e["status"] == "RUNNING"]
|
src/submission/submit.py
CHANGED
@@ -11,6 +11,7 @@ from src.submission.check_validity import (
|
|
11 |
get_model_size,
|
12 |
is_model_on_hub,
|
13 |
)
|
|
|
14 |
|
15 |
REQUESTED_MODELS = None
|
16 |
USERS_TO_SUBMISSION_DATES = None
|
@@ -109,18 +110,21 @@ def add_new_eval(
|
|
109 |
with open(out_path, "w") as f:
|
110 |
f.write(json.dumps(eval_entry))
|
111 |
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
124 |
|
125 |
return styled_message(
|
126 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
|
|
11 |
get_model_size,
|
12 |
is_model_on_hub,
|
13 |
)
|
14 |
+
from utils.check_local import is_running_on_huggingface
|
15 |
|
16 |
REQUESTED_MODELS = None
|
17 |
USERS_TO_SUBMISSION_DATES = None
|
|
|
110 |
with open(out_path, "w") as f:
|
111 |
f.write(json.dumps(eval_entry))
|
112 |
|
113 |
+
if is_running_on_huggingface():
|
114 |
+
logger.debug("Uploading eval file")
|
115 |
+
API.upload_file(
|
116 |
+
path_or_fileobj=out_path,
|
117 |
+
path_in_repo=out_path.split("eval-queue/")[1],
|
118 |
+
repo_id=QUEUE_REPO,
|
119 |
+
repo_type="dataset",
|
120 |
+
commit_message=f"Add {model} to eval queue",
|
121 |
+
)
|
122 |
+
logger.debug("Eval file uploaded")
|
123 |
+
logger.debug("Removing local eval file")
|
124 |
+
# Remove the local file
|
125 |
+
os.remove(out_path)
|
126 |
+
else:
|
127 |
+
logger.info("Running locally. Skipping file upload.")
|
128 |
|
129 |
return styled_message(
|
130 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
utils/check_local.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
def is_running_on_huggingface():
|
4 |
+
return "SPACE_ID" in os.environ # Hugging Face Spaces set this environment variable
|