Spaces:
Paused
Paused
edbeeching
commited on
Commit
•
59c748f
1
Parent(s):
f90ad24
finished this part of leaderboard refactor
Browse files
utils.py
CHANGED
@@ -44,6 +44,7 @@ def make_clickable_model(model_name):
|
|
44 |
|
45 |
@dataclass
|
46 |
class EvalResult:
|
|
|
47 |
org : str
|
48 |
model : str
|
49 |
is_8bit : bool
|
@@ -51,8 +52,9 @@ class EvalResult:
|
|
51 |
|
52 |
def to_dict(self):
|
53 |
data_dict = {}
|
|
|
54 |
data_dict["base_model"] = make_clickable_model(f"{self.org}/{self.model}")
|
55 |
-
data_dict["total ⬆️"] = sum([v for k,v in self.results.items()])
|
56 |
data_dict["# params"] = "unknown (todo)"
|
57 |
|
58 |
for benchmark in BENCHMARKS:
|
@@ -86,8 +88,8 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, dict]:
|
|
86 |
for benchmark, metric in zip(BENCHMARKS, METRICS):
|
87 |
if benchmark in json_filepath:
|
88 |
accs = np.array([v[metric] for k, v in data["results"].items()])
|
89 |
-
mean_acc = np.mean(accs)
|
90 |
-
eval_result = EvalResult(org, model, is_8bit, {benchmark:mean_acc})
|
91 |
|
92 |
return result_key, eval_result
|
93 |
|
|
|
44 |
|
45 |
@dataclass
|
46 |
class EvalResult:
|
47 |
+
eval_name : str
|
48 |
org : str
|
49 |
model : str
|
50 |
is_8bit : bool
|
|
|
52 |
|
53 |
def to_dict(self):
|
54 |
data_dict = {}
|
55 |
+
data_dict["eval_name"] = self.eval_name
|
56 |
data_dict["base_model"] = make_clickable_model(f"{self.org}/{self.model}")
|
57 |
+
data_dict["total ⬆️"] = round(sum([v for k,v in self.results.items()]),3)
|
58 |
data_dict["# params"] = "unknown (todo)"
|
59 |
|
60 |
for benchmark in BENCHMARKS:
|
|
|
88 |
for benchmark, metric in zip(BENCHMARKS, METRICS):
|
89 |
if benchmark in json_filepath:
|
90 |
accs = np.array([v[metric] for k, v in data["results"].items()])
|
91 |
+
mean_acc = round(np.mean(accs),3)
|
92 |
+
eval_result = EvalResult(result_key, org, model, is_8bit, {benchmark:mean_acc})
|
93 |
|
94 |
return result_key, eval_result
|
95 |
|