meg-huggingface commited on
Commit
5bbe032
1 Parent(s): 7dd365e

Add handling for when some metrics aren't calculated

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +13 -3
src/leaderboard/read_evals.py CHANGED
@@ -34,6 +34,8 @@ class EvalResult:
34
  @classmethod
35
  def init_from_json_file(self, json_filepath):
36
  """Inits the result from the specific model result file"""
 
 
37
  with open(json_filepath) as fp:
38
  data = json.load(fp)
39
 
@@ -68,7 +70,12 @@ class EvalResult:
68
  # Extract results available in this file (some results are split in several files)
69
  results = {}
70
  for task in Tasks:
71
- task = task.value
 
 
 
 
 
72
 
73
  # We average all scores of a given metric (not all metrics are present in all files)
74
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
@@ -108,7 +115,7 @@ class EvalResult:
108
 
109
  def to_dict(self):
110
  """Converts the Eval Result to a dict compatible with our dataframe display"""
111
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
112
  data_dict = {
113
  "eval_name": self.eval_name, # not a column, just a save name,
114
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -127,7 +134,10 @@ class EvalResult:
127
  }
128
 
129
  for task in Tasks:
130
- data_dict[task.value.col_name] = self.results[task.value.benchmark]
 
 
 
131
 
132
  return data_dict
133
 
 
34
  @classmethod
35
  def init_from_json_file(self, json_filepath):
36
  """Inits the result from the specific model result file"""
37
+ print("Looking at json_filepath:")
38
+ print(json_filepath)
39
  with open(json_filepath) as fp:
40
  data = json.load(fp)
41
 
 
70
  # Extract results available in this file (some results are split in several files)
71
  results = {}
72
  for task in Tasks:
73
+ print("Looking at task:")
74
+ print(task)
75
+ try:
76
+ task = task.value
77
+ except Exception as e:
78
+ print(e)
79
 
80
  # We average all scores of a given metric (not all metrics are present in all files)
81
  accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
 
115
 
116
  def to_dict(self):
117
  """Converts the Eval Result to a dict compatible with our dataframe display"""
118
+ average = sum([v for v in self.results.values()]) / len(self.results.values())
119
  data_dict = {
120
  "eval_name": self.eval_name, # not a column, just a save name,
121
  AutoEvalColumn.precision.name: self.precision.value.name,
 
134
  }
135
 
136
  for task in Tasks:
137
+ try:
138
+ data_dict[task.value.col_name] = self.results[task.value.benchmark]
139
+ except KeyError:
140
+ data_dict[task.value.col_name] = None
141
 
142
  return data_dict
143