djstrong commited on
Commit
44ddd16
1 Parent(s): 8445932
Files changed (2) hide show
  1. app.py +3 -0
  2. src/leaderboard/read_evals.py +12 -10
app.py CHANGED
@@ -63,6 +63,8 @@ leaderboard_df = original_df.copy()
63
  leaderboard_df = leaderboard_df[leaderboard_df[AutoEvalColumn.still_on_hub.name] == True]
64
  # leaderboard_df = leaderboard_df[('speakleash' not in leaderboard_df['model_name_for_query']) | ('Bielik' in leaderboard_df['model_name_for_query'])]
65
 
 
 
66
  (
67
  finished_eval_queue_df,
68
  running_eval_queue_df,
@@ -400,6 +402,7 @@ with demo:
400
  elem_id="citation-button",
401
  show_copy_button=True,
402
  )
 
403
 
404
  scheduler = BackgroundScheduler()
405
  scheduler.add_job(restart_space, "interval", seconds=1800)
 
63
  leaderboard_df = leaderboard_df[leaderboard_df[AutoEvalColumn.still_on_hub.name] == True]
64
  # leaderboard_df = leaderboard_df[('speakleash' not in leaderboard_df['model_name_for_query']) | ('Bielik' in leaderboard_df['model_name_for_query'])]
65
 
66
+ original_df.to_csv("output.csv")
67
+
68
  (
69
  finished_eval_queue_df,
70
  running_eval_queue_df,
 
402
  elem_id="citation-button",
403
  show_copy_button=True,
404
  )
405
+ csv = gr.File(interactive=False, value="output.csv")
406
 
407
  scheduler = BackgroundScheduler()
408
  scheduler.add_job(restart_space, "interval", seconds=1800)
src/leaderboard/read_evals.py CHANGED
@@ -433,16 +433,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
433
  for k,v in eval_results.items():
434
  v.results = {k: v for k, (v, start_date) in v.results.items()}
435
 
436
- results = []
437
- for v in eval_results.values():
438
- try:
439
- print(v)
440
- v.to_dict() # we test if the dict version is complete
441
- # if v.results:
442
- results.append(v)
443
- except KeyError: # not all eval values present
444
- print(f"not all eval values present {v.eval_name} {v.full_model}")
445
- continue
446
 
447
  all_models = []
448
  missing_results_for_task = {}
@@ -473,6 +464,17 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
473
  missing_metadata.append(f"{v.full_model}")
474
  all_models.append((v.full_model, v.num_params, v.still_on_hub))
475
 
 
 
 
 
 
 
 
 
 
 
 
476
  print(f"Missing sbatch results:")
477
  for r in for_run:
478
  if r[0]==5 and r[1] in ['polish_eq_bench']: continue
 
433
  for k,v in eval_results.items():
434
  v.results = {k: v for k, (v, start_date) in v.results.items()}
435
 
436
+
 
 
 
 
 
 
 
 
 
437
 
438
  all_models = []
439
  missing_results_for_task = {}
 
464
  missing_metadata.append(f"{v.full_model}")
465
  all_models.append((v.full_model, v.num_params, v.still_on_hub))
466
 
467
+ results = []
468
+ for v in eval_results.values():
469
+ try:
470
+ print(v)
471
+ v.to_dict() # we test if the dict version is complete
472
+ # if v.results:
473
+ results.append(v)
474
+ except KeyError: # not all eval values present
475
+ print(f"not all eval values present {v.eval_name} {v.full_model}")
476
+ continue
477
+
478
  print(f"Missing sbatch results:")
479
  for r in for_run:
480
  if r[0]==5 and r[1] in ['polish_eq_bench']: continue