Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update src/leaderboard/read_evals.py
Browse files- src/leaderboard/read_evals.py +18 -18
src/leaderboard/read_evals.py
CHANGED
@@ -68,27 +68,27 @@ class EvalResult:
|
|
68 |
if architectures:
|
69 |
architecture = ";".join(architectures)
|
70 |
|
71 |
-
# Extract results available in this file (some results are split in several files)
|
72 |
-
results = {}
|
73 |
-
for task in Tasks:
|
74 |
-
task = task.value
|
75 |
-
|
76 |
-
# We average all scores of a given metric (not all metrics are present in all files)
|
77 |
-
accs = np.array([v for k, v in data[task.benchmark].items() if task.metric == k])
|
78 |
-
if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
-
continue
|
80 |
-
|
81 |
-
mean_acc = np.mean(accs) * 100.0
|
82 |
-
results[task.benchmark] = mean_acc
|
83 |
-
# if "scores" not in data:
|
84 |
-
# raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
|
85 |
-
|
86 |
-
# scores = data["scores"]
|
87 |
# results = {}
|
88 |
# for task in Tasks:
|
89 |
# task = task.value
|
90 |
-
|
91 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
return self(
|
94 |
eval_name=result_key,
|
|
|
68 |
if architectures:
|
69 |
architecture = ";".join(architectures)
|
70 |
|
71 |
+
# # Extract results available in this file (some results are split in several files)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
# results = {}
|
73 |
# for task in Tasks:
|
74 |
# task = task.value
|
75 |
+
|
76 |
+
# # We average all scores of a given metric (not all metrics are present in all files)
|
77 |
+
# accs = np.array([v for k, v in data[task.benchmark].items() if task.metric == k])
|
78 |
+
# if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
+
# continue
|
80 |
+
|
81 |
+
# mean_acc = np.mean(accs) * 100.0
|
82 |
+
# results[task.benchmark] = mean_acc
|
83 |
+
if "scores" not in data:
|
84 |
+
raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
|
85 |
+
|
86 |
+
scores = data["scores"]
|
87 |
+
results = {}
|
88 |
+
for task in Tasks:
|
89 |
+
task = task.value
|
90 |
+
if task.metric in scores:
|
91 |
+
results[task.benchmark] = Decimal(scores[task.metric])
|
92 |
|
93 |
return self(
|
94 |
eval_name=result_key,
|