XufengDuan commited on
Commit
8548d58
1 Parent(s): 9c713c2

update scripts

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +22 -3
src/leaderboard/read_evals.py CHANGED
@@ -66,11 +66,30 @@ class EvalResult:
66
  results = {}
67
  for task in utils.Tasks:
68
  task = task.value
 
 
 
69
 
70
- # We average all scores of a given metric (not all metrics are present in all files)
71
- accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
72
 
73
- results[task.benchmark] = accs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  return self(
76
  eval_name=result_key,
 
66
  results = {}
67
  for task in utils.Tasks:
68
  task = task.value
69
+ if isinstance(task.metric, str):
70
+ accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
71
+ task.benchmark == k and isinstance(v, dict)])
72
 
73
+ # 过滤掉 None 值,确保 accs 只包含有效的数值
74
+ accs = accs[accs != None]
75
 
76
+ results[task.benchmark] = accs
77
+
78
+ elif isinstance(task.metric, list):
79
+ accs = np.array([str(v.get(task.metric, None)) for k, v in data["results"].items() if
80
+ task.benchmark == k and isinstance(v, dict)])
81
+
82
+ accs = accs[accs != None]
83
+
84
+ results[task.benchmark] = accs
85
+
86
+ else:
87
+ print(f"Skipping task with unhandled metric type: {type(task.metric)}")
88
+
89
+ # # We average all scores of a given metric (not all metrics are present in all files)
90
+ # accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
91
+ #
92
+ # results[task.benchmark] = accs
93
 
94
  return self(
95
  eval_name=result_key,