edbeeching commited on
Commit
59c748f
1 Parent(s): f90ad24

finished this part of leaderboard refactor

Browse files
Files changed (1) hide show
  1. utils.py +5 -3
utils.py CHANGED
@@ -44,6 +44,7 @@ def make_clickable_model(model_name):
44
 
45
  @dataclass
46
  class EvalResult:
 
47
  org : str
48
  model : str
49
  is_8bit : bool
@@ -51,8 +52,9 @@ class EvalResult:
51
 
52
  def to_dict(self):
53
  data_dict = {}
 
54
  data_dict["base_model"] = make_clickable_model(f"{self.org}/{self.model}")
55
- data_dict["total ⬆️"] = sum([v for k,v in self.results.items()])
56
  data_dict["# params"] = "unknown (todo)"
57
 
58
  for benchmark in BENCHMARKS:
@@ -86,8 +88,8 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, dict]:
86
  for benchmark, metric in zip(BENCHMARKS, METRICS):
87
  if benchmark in json_filepath:
88
  accs = np.array([v[metric] for k, v in data["results"].items()])
89
- mean_acc = np.mean(accs)
90
- eval_result = EvalResult(org, model, is_8bit, {benchmark:mean_acc})
91
 
92
  return result_key, eval_result
93
 
 
44
 
45
  @dataclass
46
  class EvalResult:
47
+ eval_name : str
48
  org : str
49
  model : str
50
  is_8bit : bool
 
52
 
53
  def to_dict(self):
54
  data_dict = {}
55
+ data_dict["eval_name"] = self.eval_name
56
  data_dict["base_model"] = make_clickable_model(f"{self.org}/{self.model}")
57
+ data_dict["total ⬆️"] = round(sum([v for k,v in self.results.items()]),3)
58
  data_dict["# params"] = "unknown (todo)"
59
 
60
  for benchmark in BENCHMARKS:
 
88
  for benchmark, metric in zip(BENCHMARKS, METRICS):
89
  if benchmark in json_filepath:
90
  accs = np.array([v[metric] for k, v in data["results"].items()])
91
+ mean_acc = round(np.mean(accs),3)
92
+ eval_result = EvalResult(result_key, org, model, is_8bit, {benchmark:mean_acc})
93
 
94
  return result_key, eval_result
95