Spaces:
Running
Running
XufengDuan
commited on
Commit
•
8548d58
1
Parent(s):
9c713c2
update scripts
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -66,11 +66,30 @@ class EvalResult:
|
|
66 |
results = {}
|
67 |
for task in utils.Tasks:
|
68 |
task = task.value
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
return self(
|
76 |
eval_name=result_key,
|
|
|
66 |
results = {}
|
67 |
for task in utils.Tasks:
|
68 |
task = task.value
|
69 |
+
if isinstance(task.metric, str):
|
70 |
+
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
|
71 |
+
task.benchmark == k and isinstance(v, dict)])
|
72 |
|
73 |
+
# 过滤掉 None 值,确保 accs 只包含有效的数值
|
74 |
+
accs = accs[accs != None]
|
75 |
|
76 |
+
results[task.benchmark] = accs
|
77 |
+
|
78 |
+
elif isinstance(task.metric, list):
|
79 |
+
accs = np.array([str(v.get(task.metric, None)) for k, v in data["results"].items() if
|
80 |
+
task.benchmark == k and isinstance(v, dict)])
|
81 |
+
|
82 |
+
accs = accs[accs != None]
|
83 |
+
|
84 |
+
results[task.benchmark] = accs
|
85 |
+
|
86 |
+
else:
|
87 |
+
print(f"Skipping task with unhandled metric type: {type(task.metric)}")
|
88 |
+
|
89 |
+
# # We average all scores of a given metric (not all metrics are present in all files)
|
90 |
+
# accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
91 |
+
#
|
92 |
+
# results[task.benchmark] = accs
|
93 |
|
94 |
return self(
|
95 |
eval_name=result_key,
|