pminervini commited on
Commit
9bfc5f2
1 Parent(s): 4c2b065
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +12 -8
src/leaderboard/read_evals.py CHANGED
@@ -88,18 +88,22 @@ class EvalResult:
88
  # We average all scores of a given metric (mostly for mmlu)
89
 
90
  def post_process_results(results: dict) -> dict:
 
91
  res_copy = results.copy()
92
 
93
- for k, v in res_copy.items():
94
- if "exact_match" in k:
95
- results[k.replace("exact_match", "em")] = v
96
 
97
- res_copy = results.copy()
 
 
 
 
98
 
99
- for k, v in res_copy.items():
100
- if "," in k:
101
- tokens = k.split(",")
102
- results[tokens[0]] = v
103
 
104
  return results
105
 
 
88
  # We average all scores of a given metric (mostly for mmlu)
89
 
90
  def post_process_results(results: dict) -> dict:
91
+ # {'nq_open': {'em': 0.018005540166204988, 'em_stderr': 0.0022134216580395583}}
92
  res_copy = results.copy()
93
 
94
+ for task_name in res_copy.keys():
95
+ entry_copy = results[task_name].copy()
 
96
 
97
+ for k, v in entry_copy.items():
98
+ if "exact_match" in k:
99
+ results[task_name][k.replace("exact_match", "em")] = v
100
+
101
+ entry_copy = results[task_name].copy()
102
 
103
+ for k, v in entry_copy.items():
104
+ if "," in k:
105
+ tokens = k.split(",")
106
+ results[task_name][tokens[0]] = v
107
 
108
  return results
109