pminervini commited on
Commit
c5558c5
1 Parent(s): a654acb
Files changed (1) hide show
  1. backend-cli.py +77 -14
backend-cli.py CHANGED
@@ -3,6 +3,7 @@
3
  import os
4
  import json
5
 
 
6
  import random
7
  from datetime import datetime
8
 
@@ -17,6 +18,10 @@ from src.leaderboard.read_evals import EvalResult
17
  from src.envs import QUEUE_REPO, RESULTS_REPO, API
18
  from src.utils import my_snapshot_download
19
 
 
 
 
 
20
  import time
21
 
22
  import logging
@@ -124,15 +129,11 @@ def process_finished_requests(thr: int) -> bool:
124
 
125
  # Get all eval request that are FINISHED, if you want to run other evals, change this parameter
126
  eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
127
- # Sort the evals by priority (first submitted first run)
128
  eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
129
 
130
- # XXX
131
- # eval_requests = [r for r in eval_requests if 'bloom-560m' in r.model]
132
-
133
  random.shuffle(eval_requests)
134
 
135
- from src.leaderboard.read_evals import get_raw_eval_results
136
  eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
137
 
138
  result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
@@ -143,9 +144,10 @@ def process_finished_requests(thr: int) -> bool:
143
  result_name: str = request_to_result_name(eval_request)
144
 
145
  # Check the corresponding result
146
- from typing import Optional
147
  eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
148
 
 
 
149
  task_lst = TASKS_HARNESS.copy()
150
  random.shuffle(task_lst)
151
 
@@ -169,6 +171,58 @@ def process_finished_requests(thr: int) -> bool:
169
  return False
170
 
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def process_pending_requests() -> bool:
173
  sanity_checks()
174
 
@@ -176,7 +230,7 @@ def process_pending_requests() -> bool:
176
 
177
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
178
  eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
179
- # Sort the evals by priority (first submitted first run)
180
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
181
 
182
  random.shuffle(eval_requests)
@@ -207,19 +261,28 @@ def process_pending_requests() -> bool:
207
  if __name__ == "__main__":
208
  wait = True
209
 
210
- import socket
211
- if socket.gethostname() in {'hamburg'} or os.path.isdir("/home/pminervi"):
212
  wait = False
213
 
214
  if wait:
215
  time.sleep(60 * random.randint(5, 10))
216
- pass
217
 
218
- # res = False
219
- res = process_pending_requests()
 
 
 
220
 
221
  if res is False:
222
- res = process_finished_requests(100)
 
 
 
 
 
223
 
224
  if res is False:
225
- res = process_finished_requests(0)
 
 
 
 
3
  import os
4
  import json
5
 
6
+ import socket
7
  import random
8
  from datetime import datetime
9
 
 
18
  from src.envs import QUEUE_REPO, RESULTS_REPO, API
19
  from src.utils import my_snapshot_download
20
 
21
+ from src.leaderboard.read_evals import get_raw_eval_results
22
+
23
+ from typing import Optional
24
+
25
  import time
26
 
27
  import logging
 
129
 
130
  # Get all eval request that are FINISHED, if you want to run other evals, change this parameter
131
  eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
132
+ # Sort the evals by priority (first submitted, first run)
133
  eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
134
 
 
 
 
135
  random.shuffle(eval_requests)
136
 
 
137
  eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
138
 
139
  result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
 
144
  result_name: str = request_to_result_name(eval_request)
145
 
146
  # Check the corresponding result
 
147
  eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
148
 
149
+ breakpoint()
150
+
151
  task_lst = TASKS_HARNESS.copy()
152
  random.shuffle(task_lst)
153
 
 
171
  return False
172
 
173
 
174
+ def maybe_refresh_results(thr: int) -> bool:
175
+ sanity_checks()
176
+
177
+ current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
178
+
179
+ # Get all eval request that are FINISHED, if you want to run other evals, change this parameter
180
+ eval_requests: list[EvalRequest] = get_eval_requests(job_status=current_finished_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
181
+ # Sort the evals by priority (first submitted, first run)
182
+ eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
183
+
184
+ random.shuffle(eval_requests)
185
+
186
+ eval_results: list[EvalResult] = get_raw_eval_results(EVAL_RESULTS_PATH_BACKEND, EVAL_REQUESTS_PATH_BACKEND, True)
187
+
188
+ result_name_to_request = {request_to_result_name(r): r for r in eval_requests}
189
+ result_name_to_result = {r.eval_name: r for r in eval_results}
190
+
191
+ for eval_request in eval_requests:
192
+ if eval_request.likes >= thr:
193
+ result_name: str = request_to_result_name(eval_request)
194
+
195
+ # Check the corresponding result
196
+ eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
197
+
198
+ breakpoint()
199
+
200
+ task_lst = TASKS_HARNESS.copy()
201
+ random.shuffle(task_lst)
202
+
203
+ # Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
204
+ for task in task_lst:
205
+ task_name = task.benchmark
206
+
207
+ if (eval_result is None or
208
+ task_name not in eval_result.results or
209
+ 'nq' in task_name or 'trivia' in task_name or 'tqa' in task_name or 'self' in task_name):
210
+ eval_request: EvalRequest = result_name_to_request[result_name]
211
+
212
+ my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
213
+ my_set_eval_request(api=API, eval_request=eval_request, set_to_status=RUNNING_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
214
+
215
+ results = process_evaluation(task, eval_request)
216
+
217
+ my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
218
+ my_set_eval_request(api=API, eval_request=eval_request, set_to_status=FINISHED_STATUS, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
219
+
220
+ return True
221
+
222
+
223
+ return False
224
+
225
+
226
  def process_pending_requests() -> bool:
227
  sanity_checks()
228
 
 
230
 
231
  # Get all eval request that are PENDING, if you want to run other evals, change this parameter
232
  eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
233
+ # Sort the evals by priority (first submitted, first run)
234
  eval_requests = sort_models_by_priority(api=API, models=eval_requests)
235
 
236
  random.shuffle(eval_requests)
 
261
  if __name__ == "__main__":
262
  wait = True
263
 
264
+ if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
 
265
  wait = False
266
 
267
  if wait:
268
  time.sleep(60 * random.randint(5, 10))
 
269
 
270
+ res = False
271
+
272
+ if random.randint(0, 1) == 0:
273
+ res = process_pending_requests()
274
+ time.sleep(60)
275
 
276
  if res is False:
277
+ if random.randint(0, 1) == 0:
278
+ res = maybe_refresh_results(100)
279
+ else:
280
+ res = process_finished_requests(100)
281
+
282
+ time.sleep(60)
283
 
284
  if res is False:
285
+ if random.randint(0, 1) == 0:
286
+ res = maybe_refresh_results(0)
287
+ else:
288
+ res = process_finished_requests(0)