djstrong commited on
Commit
c4f06f0
1 Parent(s): 75a0c2a
Files changed (2) hide show
  1. src/about.py +3 -3
  2. src/leaderboard/read_evals.py +7 -0
src/about.py CHANGED
@@ -37,9 +37,9 @@ class Tasks(Enum):
37
  task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "generate_until", 0.0) # generate_until
38
  task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "generate_until", 0.0) # generate_until
39
  task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
40
- task24 = Task("polish_poquad_open_book", "levenshtein,none", "poquad_open_book", "other", 0.0)
41
- task25 = Task("polish_eq_bench_first_turn", "first_eqbench,none", "eq_bench_first_turn", "other", 0.0)
42
- task26 = Task("polish_eq_bench", "average_eqbench,none", "eq_bench", "other", 0.0)
43
  # task27 = Task("polish_eq_bench", "revised_eqbench,none", "eq_bench_revised", "other", 0.0)
44
 
45
  NUM_FEWSHOT = 0 # Change with your few shot
 
37
  task22 = Task("polish_polqa_open_book", "levenshtein,none", "polqa_open_book_g", "generate_until", 0.0) # generate_until
38
  task23 = Task("polish_polqa_closed_book", "levenshtein,none", "polqa_closed_book_g", "generate_until", 0.0) # generate_until
39
  task20 = Task("polish_poleval2018_task3_test_10k", "word_perplexity,none", "poleval2018_task3_test_10k", "other")
40
+ # task24 = Task("polish_poquad_open_book", "levenshtein,none", "poquad_open_book", "other", 0.0)
41
+ # task25 = Task("polish_eq_bench_first_turn", "first_eqbench,none", "eq_bench_first_turn", "other", 0.0)
42
+ # task26 = Task("polish_eq_bench", "average_eqbench,none", "eq_bench", "other", 0.0)
43
  # task27 = Task("polish_eq_bench", "revised_eqbench,none", "eq_bench_revised", "other", 0.0)
44
 
45
  NUM_FEWSHOT = 0 # Change with your few shot
src/leaderboard/read_evals.py CHANGED
@@ -428,6 +428,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
428
  print(f"not all eval values present {v.eval_name} {v.full_model}")
429
  continue
430
 
 
431
  missing_results_for_task = {}
432
  missing_metadata = []
433
  for v in eval_results.values():
@@ -441,6 +442,7 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
441
  missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
442
  if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
443
  missing_metadata.append(f"{v.full_model}")
 
444
 
445
  # print('missing_results_for_task', missing_results_for_task)
446
  for task, models in missing_results_for_task.items():
@@ -455,4 +457,9 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
455
  print(model)
456
  print()
457
 
 
 
 
 
 
458
  return results
 
428
  print(f"not all eval values present {v.eval_name} {v.full_model}")
429
  continue
430
 
431
+ all_models = []
432
  missing_results_for_task = {}
433
  missing_metadata = []
434
  for v in eval_results.values():
 
442
  missing_results_for_task[task_name] = [f"{v.full_model}|{v.org_and_model}"]
443
  if r[AutoEvalColumn.lang.name] is None or r[AutoEvalColumn.lang.name] == "?":
444
  missing_metadata.append(f"{v.full_model}")
445
+ all_models.append((v.full_model, v.num_params))
446
 
447
  # print('missing_results_for_task', missing_results_for_task)
448
  for task, models in missing_results_for_task.items():
 
457
  print(model)
458
  print()
459
 
460
+ print(f"All models:")
461
+ for model in all_models:
462
+ print(model)
463
+ print()
464
+
465
  return results