pminervini commited on
Commit
13218df
1 Parent(s): e034fec
Files changed (1) hide show
  1. backend-cli.py +17 -12
backend-cli.py CHANGED
@@ -122,7 +122,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
122
  return results
123
 
124
 
125
- def process_finished_requests(thr: int) -> bool:
126
  sanity_checks()
127
 
128
  current_finished_status = [FINISHED_STATUS, FAILED_STATUS]
@@ -155,7 +155,11 @@ def process_finished_requests(thr: int) -> bool:
155
  for task in task_lst:
156
  task_name = task.benchmark
157
 
158
- if eval_result is None or task_name not in eval_result.results:
 
 
 
 
159
  eval_request: EvalRequest = result_name_to_request[result_name]
160
 
161
  my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
@@ -171,7 +175,7 @@ def process_finished_requests(thr: int) -> bool:
171
  return False
172
 
173
 
174
- def maybe_refresh_results(thr: int) -> bool:
175
  sanity_checks()
176
 
177
  current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
@@ -195,8 +199,6 @@ def maybe_refresh_results(thr: int) -> bool:
195
  # Check the corresponding result
196
  eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
197
 
198
- # breakpoint()
199
-
200
  task_lst = TASKS_HARNESS.copy()
201
  random.shuffle(task_lst)
202
 
@@ -204,11 +206,12 @@ def maybe_refresh_results(thr: int) -> bool:
204
  for task in task_lst:
205
  task_name = task.benchmark
206
 
 
 
 
207
 
208
- # task_lst = ['nq', 'trivia', 'tqa', 'self', 'xsum', 'cnn', 'memo']
209
  task_lst = ['nq', 'trivia', 'tqa', 'self']
210
- if (eval_result is None or
211
- task_name not in eval_result.results or
212
  any(ss in task_name for ss in task_lst)):
213
  eval_request: EvalRequest = result_name_to_request[result_name]
214
 
@@ -262,9 +265,11 @@ def process_pending_requests() -> bool:
262
 
263
  if __name__ == "__main__":
264
  wait = True
 
265
 
266
  if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
267
  wait = False
 
268
 
269
  if wait:
270
  time.sleep(60 * random.randint(5, 10))
@@ -277,14 +282,14 @@ if __name__ == "__main__":
277
 
278
  if res is False:
279
  if random.randint(0, 1) == 0:
280
- res = maybe_refresh_results(100)
281
  else:
282
- res = process_finished_requests(100)
283
 
284
  time.sleep(60)
285
 
286
  if res is False:
287
  if random.randint(0, 1) == 0:
288
- res = maybe_refresh_results(0)
289
  else:
290
- res = process_finished_requests(0)
 
122
  return results
123
 
124
 
125
+ def process_finished_requests(thr: int, hard_task_lst: Optional[list[str]] = None) -> bool:
126
  sanity_checks()
127
 
128
  current_finished_status = [FINISHED_STATUS, FAILED_STATUS]
 
155
  for task in task_lst:
156
  task_name = task.benchmark
157
 
158
+ do_run_task = False
159
+ if hard_task_lst is None or any(ss in task_name for ss in hard_task_lst):
160
+ do_run_task = True
161
+
162
+ if (eval_result is None or task_name not in eval_result.results) and do_run_task:
163
  eval_request: EvalRequest = result_name_to_request[result_name]
164
 
165
  my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
 
175
  return False
176
 
177
 
178
+ def maybe_refresh_results(thr: int, hard_task_lst: Optional[list[str]] = None) -> bool:
179
  sanity_checks()
180
 
181
  current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
 
199
  # Check the corresponding result
200
  eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
201
 
 
 
202
  task_lst = TASKS_HARNESS.copy()
203
  random.shuffle(task_lst)
204
 
 
206
  for task in task_lst:
207
  task_name = task.benchmark
208
 
209
+ do_run_task = False
210
+ if hard_task_lst is None or any(ss in task_name for ss in hard_task_lst):
211
+ do_run_task = True
212
 
 
213
  task_lst = ['nq', 'trivia', 'tqa', 'self']
214
+ if (eval_result is None or do_run_task or task_name not in eval_result.results or
 
215
  any(ss in task_name for ss in task_lst)):
216
  eval_request: EvalRequest = result_name_to_request[result_name]
217
 
 
265
 
266
  if __name__ == "__main__":
267
  wait = True
268
+ hard_task_lst = None
269
 
270
  if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
271
  wait = False
272
+ hard_task_lst = ['nq', 'trivia', 'tqa']
273
 
274
  if wait:
275
  time.sleep(60 * random.randint(5, 10))
 
282
 
283
  if res is False:
284
  if random.randint(0, 1) == 0:
285
+ res = maybe_refresh_results(100, hard_task_lst=hard_task_lst)
286
  else:
287
+ res = process_finished_requests(100, hard_task_lst=hard_task_lst)
288
 
289
  time.sleep(60)
290
 
291
  if res is False:
292
  if random.randint(0, 1) == 0:
293
+ res = maybe_refresh_results(0, hard_task_lst=hard_task_lst)
294
  else:
295
+ res = process_finished_requests(0, hard_task_lst=hard_task_lst)