pminervini commited on
Commit
a88d51c
1 Parent(s): 9bfc5f2
backend-cli.py CHANGED
@@ -3,6 +3,7 @@
3
  import os
4
  import json
5
 
 
6
  from datetime import datetime
7
 
8
  from huggingface_hub import snapshot_download
@@ -99,7 +100,6 @@ def process_finished_requests() -> bool:
99
  # Sort the evals by priority (first submitted first run)
100
  eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
101
 
102
- import random
103
  random.shuffle(eval_requests)
104
 
105
  from src.leaderboard.read_evals import get_raw_eval_results
@@ -115,8 +115,11 @@ def process_finished_requests() -> bool:
115
  from typing import Optional
116
  eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
117
 
 
 
 
118
  # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
119
- for task in TASKS_HARNESS:
120
  task_name = task.benchmark
121
 
122
  if eval_result is None or task_name not in eval_result.results:
@@ -145,7 +148,6 @@ def process_pending_requests() -> bool:
145
  # Sort the evals by priority (first submitted first run)
146
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
147
 
148
- import random
149
  random.shuffle(eval_requests)
150
 
151
  print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
 
3
  import os
4
  import json
5
 
6
+ import random
7
  from datetime import datetime
8
 
9
  from huggingface_hub import snapshot_download
 
100
  # Sort the evals by priority (first submitted first run)
101
  eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
102
 
 
103
  random.shuffle(eval_requests)
104
 
105
  from src.leaderboard.read_evals import get_raw_eval_results
 
115
  from typing import Optional
116
  eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
117
 
118
+ task_lst = TASKS_HARNESS.copy()
119
+ random.shuffle(task_lst)
120
+
121
  # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
122
+ for task in task_lst:
123
  task_name = task.benchmark
124
 
125
  if eval_result is None or task_name not in eval_result.results:
 
148
  # Sort the evals by priority (first submitted first run)
149
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
150
 
 
151
  random.shuffle(eval_requests)
152
 
153
  print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
src/leaderboard/read_evals.py CHANGED
@@ -133,7 +133,7 @@ class EvalResult:
133
  self.num_params = request.get("params", 0)
134
  self.date = request.get("submitted_time", "")
135
  except Exception:
136
- print(f"Could not find request file for {self.org}/{self.model}")
137
 
138
  def is_complete(self) -> bool:
139
  for task in Tasks:
@@ -169,7 +169,7 @@ class EvalResult:
169
 
170
 
171
  def get_request_file_for_model(requests_path, model_name, precision):
172
- """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
173
  request_files = os.path.join(
174
  requests_path,
175
  f"{model_name}_eval_request_*.json",
@@ -179,11 +179,12 @@ def get_request_file_for_model(requests_path, model_name, precision):
179
  # Select correct request file (precision)
180
  request_file = ""
181
  request_files = sorted(request_files, reverse=True)
 
182
  for tmp_request_file in request_files:
183
  with open(tmp_request_file, "r") as f:
184
  req_content = json.load(f)
185
  if (
186
- req_content["status"] in ["FINISHED"]
187
  and req_content["precision"] == precision.split(".")[-1]
188
  ):
189
  request_file = tmp_request_file
 
133
  self.num_params = request.get("params", 0)
134
  self.date = request.get("submitted_time", "")
135
  except Exception:
136
+ print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path}")
137
 
138
  def is_complete(self) -> bool:
139
  for task in Tasks:
 
169
 
170
 
171
  def get_request_file_for_model(requests_path, model_name, precision):
172
+ """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED and RUNNING"""
173
  request_files = os.path.join(
174
  requests_path,
175
  f"{model_name}_eval_request_*.json",
 
179
  # Select correct request file (precision)
180
  request_file = ""
181
  request_files = sorted(request_files, reverse=True)
182
+ # print('XXX', request_files)
183
  for tmp_request_file in request_files:
184
  with open(tmp_request_file, "r") as f:
185
  req_content = json.load(f)
186
  if (
187
+ req_content["status"] in ["FINISHED", "RUNNING"]
188
  and req_content["precision"] == precision.split(".")[-1]
189
  ):
190
  request_file = tmp_request_file
submit-cli.py CHANGED
@@ -118,7 +118,7 @@ def main():
118
 
119
  filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
120
 
121
- for i in range(min(50, len(filtered_model_lst))):
122
  model = filtered_model_lst[i]
123
 
124
  print(f'Considering {model.id} ..')
 
118
 
119
  filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
120
 
121
+ for i in range(min(200, len(filtered_model_lst))):
122
  model = filtered_model_lst[i]
123
 
124
  print(f'Considering {model.id} ..')