sh1gechan commited on
Commit
5ceb136
·
verified ·
1 Parent(s): e97ba7c

Update src/leaderboard/read_evals.py

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +18 -18
src/leaderboard/read_evals.py CHANGED
@@ -68,27 +68,27 @@ class EvalResult:
68
  if architectures:
69
  architecture = ";".join(architectures)
70
 
71
- # Extract results available in this file (some results are split in several files)
72
- results = {}
73
- for task in Tasks:
74
- task = task.value
75
-
76
- # We average all scores of a given metric (not all metrics are present in all files)
77
- accs = np.array([v for k, v in data[task.benchmark].items() if task.metric == k])
78
- if accs.size == 0 or any([acc is None for acc in accs]):
79
- continue
80
-
81
- mean_acc = np.mean(accs) * 100.0
82
- results[task.benchmark] = mean_acc
83
- # if "scores" not in data:
84
- # raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
85
-
86
- # scores = data["scores"]
87
  # results = {}
88
  # for task in Tasks:
89
  # task = task.value
90
- # if task.metric in scores:
91
- # results[task.benchmark] = Decimal(scores[task.metric])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  return self(
94
  eval_name=result_key,
 
68
  if architectures:
69
  architecture = ";".join(architectures)
70
 
71
+ # # Extract results available in this file (some results are split in several files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # results = {}
73
  # for task in Tasks:
74
  # task = task.value
75
+
76
+ # # We average all scores of a given metric (not all metrics are present in all files)
77
+ # accs = np.array([v for k, v in data[task.benchmark].items() if task.metric == k])
78
+ # if accs.size == 0 or any([acc is None for acc in accs]):
79
+ # continue
80
+
81
+ # mean_acc = np.mean(accs) * 100.0
82
+ # results[task.benchmark] = mean_acc
83
+ if "scores" not in data:
84
+ raise KeyError(f"'scores' key not found in JSON file: {json_filepath}")
85
+
86
+ scores = data["scores"]
87
+ results = {}
88
+ for task in Tasks:
89
+ task = task.value
90
+ if task.metric in scores:
91
+ results[task.benchmark] = Decimal(scores[task.metric])
92
 
93
  return self(
94
  eval_name=result_key,