kantord eleftherias commited on
Commit
431363c
·
verified ·
1 Parent(s): f0763c9

Create local setup (#1)

Browse files

- Create local setup (1629b01ca419d717ce2fcbfea04fa634bfd1a66b)


Co-authored-by: Eleftheria Stein-Kousathana <eleftherias@users.noreply.huggingface.co>

Files changed (5) hide show
  1. README.md +25 -0
  2. app.py +10 -20
  3. src/populate.py +9 -8
  4. src/submission/submit.py +16 -12
  5. utils/check_local.py +4 -0
README.md CHANGED
@@ -11,6 +11,31 @@ license: apache-2.0
11
  short_description: Benchmark the ability of LLMs to produce secure code.
12
  ---
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Start the configuration
15
 
16
  Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
 
11
  short_description: Benchmark the ability of LLMs to produce secure code.
12
  ---
13
 
14
+ # Running locally
15
+
16
+ Ensure [cmake](https://cmake.org/cmake/help/latest/) is installed on your system.
17
+
18
+ Ensure you're running with Python version **3.10**.
19
+
20
+ ### (Optional) Create a virtual environment
21
+
22
+ ```bash
23
+ python -m venv venv
24
+ source venv/bin/activate
25
+ ```
26
+
27
+ ### Install the required packages
28
+
29
+ ```bash
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+ ### Run the application
34
+
35
+ ```bash
36
+ python app.py
37
+ ```
38
+
39
  # Start the configuration
40
 
41
  Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
app.py CHANGED
@@ -102,10 +102,6 @@ def init_leaderboard(dataframe):
102
  interactive=False,
103
  )
104
 
105
- def get_evaluation_queue_df(path, cols):
106
- # Implementation to retrieve DataFrames
107
- pass
108
-
109
  def start_evaluation(row):
110
  logger.info(f"Starting evaluation for row ID {row.get('id')}")
111
  # Implementation to start evaluation
@@ -134,10 +130,6 @@ def process_evaluation_queue():
134
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
135
 
136
  # Assign statuses to each DataFrame
137
- finished_eval_queue_df = finished_eval_queue_df.copy()
138
- running_eval_queue_df = running_eval_queue_df.copy()
139
- pending_eval_queue_df = pending_eval_queue_df.copy()
140
-
141
  finished_eval_queue_df['status'] = 'FINISHED'
142
  running_eval_queue_df['status'] = 'RUNNING'
143
  pending_eval_queue_df['status'] = 'PENDING'
@@ -177,6 +169,7 @@ def process_evaluation_queue():
177
  logger.warning(f"Unknown status '{status}' for row ID {row.get('id')}")
178
 
179
  logger.info("Completed processing of evaluation queue")
 
180
 
181
  except Exception as e:
182
  logger.error(f"Error processing evaluation queue: {e}", exc_info=True)
@@ -200,7 +193,7 @@ with demo:
200
 
201
  with gr.Column():
202
  with gr.Accordion(
203
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
204
  open=False,
205
  ):
206
  with gr.Row():
@@ -211,8 +204,8 @@ with demo:
211
  row_count=5,
212
  )
213
  with gr.Accordion(
214
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
215
- open=False,
216
  ):
217
  with gr.Row():
218
  running_eval_table = gr.components.Dataframe(
@@ -223,7 +216,7 @@ with demo:
223
  )
224
 
225
  with gr.Accordion(
226
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
227
  open=False,
228
  ):
229
  with gr.Row():
@@ -233,6 +226,11 @@ with demo:
233
  datatype=EVAL_TYPES,
234
  row_count=5,
235
  )
 
 
 
 
 
236
  with gr.Row():
237
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
238
 
@@ -290,12 +288,4 @@ with demo:
290
  show_copy_button=True,
291
  )
292
 
293
- # Schedule the job with enhanced settings
294
- scheduler.add_job(
295
- process_evaluation_queue,
296
- trigger="interval",
297
- seconds=30,
298
- next_run_time=None, # Prevents the job from running immediately upon scheduler start
299
- id='process_evaluation_queue_job'
300
- )
301
  demo.queue(default_concurrency_limit=40).launch()
 
102
  interactive=False,
103
  )
104
 
 
 
 
 
105
  def start_evaluation(row):
106
  logger.info(f"Starting evaluation for row ID {row.get('id')}")
107
  # Implementation to start evaluation
 
130
  finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
131
 
132
  # Assign statuses to each DataFrame
 
 
 
 
133
  finished_eval_queue_df['status'] = 'FINISHED'
134
  running_eval_queue_df['status'] = 'RUNNING'
135
  pending_eval_queue_df['status'] = 'PENDING'
 
169
  logger.warning(f"Unknown status '{status}' for row ID {row.get('id')}")
170
 
171
  logger.info("Completed processing of evaluation queue")
172
+ return finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df
173
 
174
  except Exception as e:
175
  logger.error(f"Error processing evaluation queue: {e}", exc_info=True)
 
193
 
194
  with gr.Column():
195
  with gr.Accordion(
196
+ f"✅ Finished Evaluations",
197
  open=False,
198
  ):
199
  with gr.Row():
 
204
  row_count=5,
205
  )
206
  with gr.Accordion(
207
+ f"🔄 Running Evaluation Queue",
208
+ open=False,
209
  ):
210
  with gr.Row():
211
  running_eval_table = gr.components.Dataframe(
 
216
  )
217
 
218
  with gr.Accordion(
219
+ f"⏳ Pending Evaluation Queue",
220
  open=False,
221
  ):
222
  with gr.Row():
 
226
  datatype=EVAL_TYPES,
227
  row_count=5,
228
  )
229
+
230
+ # Process the evaluation queue every 2 minutes
231
+ timer = gr.Timer(120, active=True)
232
+ timer.tick(process_evaluation_queue, inputs=[], outputs=[finished_eval_table, running_eval_table, pending_eval_table])
233
+
234
  with gr.Row():
235
  gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
236
 
 
288
  show_copy_button=True,
289
  )
290
 
 
 
 
 
 
 
 
 
291
  demo.queue(default_concurrency_limit=40).launch()
src/populate.py CHANGED
@@ -39,15 +39,16 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
  # this is a folder
42
- sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
43
  for sub_entry in sub_entries:
44
- file_path = os.path.join(save_path, entry, sub_entry)
45
- with open(file_path) as fp:
46
- data = json.load(fp)
47
-
48
- data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
49
- data[EvalQueueColumn.revision.name] = data.get("revision", "main")
50
- all_evals.append(data)
 
51
 
52
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
53
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
 
39
  all_evals.append(data)
40
  elif ".md" not in entry:
41
  # this is a folder
42
+ sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(os.path.join(save_path, entry, e)) and not e.startswith(".")]
43
  for sub_entry in sub_entries:
44
+ if ".json" in sub_entry:
45
+ file_path = os.path.join(save_path, entry, sub_entry)
46
+ with open(file_path) as fp:
47
+ data = json.load(fp)
48
+
49
+ data[EvalQueueColumn.model.name] = make_clickable_model(data["model"])
50
+ data[EvalQueueColumn.revision.name] = data.get("revision", "main")
51
+ all_evals.append(data)
52
 
53
  pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
54
  running_list = [e for e in all_evals if e["status"] == "RUNNING"]
src/submission/submit.py CHANGED
@@ -11,6 +11,7 @@ from src.submission.check_validity import (
11
  get_model_size,
12
  is_model_on_hub,
13
  )
 
14
 
15
  REQUESTED_MODELS = None
16
  USERS_TO_SUBMISSION_DATES = None
@@ -109,18 +110,21 @@ def add_new_eval(
109
  with open(out_path, "w") as f:
110
  f.write(json.dumps(eval_entry))
111
 
112
- logger.debug("Uploading eval file")
113
- API.upload_file(
114
- path_or_fileobj=out_path,
115
- path_in_repo=out_path.split("eval-queue/")[1],
116
- repo_id=QUEUE_REPO,
117
- repo_type="dataset",
118
- commit_message=f"Add {model} to eval queue",
119
- )
120
- logger.debug("Eval file uploaded")
121
- logger.debug("Removing local eval file")
122
- # Remove the local file
123
- os.remove(out_path)
 
 
 
124
 
125
  return styled_message(
126
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
 
11
  get_model_size,
12
  is_model_on_hub,
13
  )
14
+ from utils.check_local import is_running_on_huggingface
15
 
16
  REQUESTED_MODELS = None
17
  USERS_TO_SUBMISSION_DATES = None
 
110
  with open(out_path, "w") as f:
111
  f.write(json.dumps(eval_entry))
112
 
113
+ if is_running_on_huggingface():
114
+ logger.debug("Uploading eval file")
115
+ API.upload_file(
116
+ path_or_fileobj=out_path,
117
+ path_in_repo=out_path.split("eval-queue/")[1],
118
+ repo_id=QUEUE_REPO,
119
+ repo_type="dataset",
120
+ commit_message=f"Add {model} to eval queue",
121
+ )
122
+ logger.debug("Eval file uploaded")
123
+ logger.debug("Removing local eval file")
124
+ # Remove the local file
125
+ os.remove(out_path)
126
+ else:
127
+ logger.info("Running locally. Skipping file upload.")
128
 
129
  return styled_message(
130
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
utils/check_local.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import os
2
+
3
+ def is_running_on_huggingface():
4
+ return "SPACE_ID" in os.environ # Hugging Face Spaces set this environment variable