Aaron Mueller commited on
Commit
e996d92
·
1 Parent(s): fb22d4b

testing now

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +14 -9
src/leaderboard/read_evals.py CHANGED
@@ -16,7 +16,7 @@ from src.submission.check_validity import is_model_on_hub
16
  class EvalResult:
17
  """Represents one full evaluation. Built from a combination of the result and request file for a given run.
18
  """
19
- eval_name: str # org_model_precision (uid)
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
@@ -33,6 +33,7 @@ class EvalResult:
33
 
34
  config = data.get("config")
35
  track = data.get("track")
 
36
 
37
  # Get model and org
38
  org_and_model = config.get("model_name", config.get("model_args", None))
@@ -45,7 +46,7 @@ class EvalResult:
45
  org = org_and_model[0]
46
  model = org_and_model[1]
47
  full_model = "/".join(org_and_model)
48
- eval_name = full_model + f" ({track})"
49
 
50
  still_on_hub, _, model_config = is_model_on_hub(
51
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
@@ -88,14 +89,14 @@ class EvalResult:
88
 
89
  def update_with_request_file(self, requests_path):
90
  """Finds the relevant request file for the current model and updates info with it"""
91
- request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
92
 
93
  try:
94
  with open(request_file, "r") as f:
95
  request = json.load(f)
96
  self.date = request.get("submitted_time", "")
97
  except Exception:
98
- print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
99
 
100
  def to_dict(self):
101
  """Converts the Eval Result to a dict compatible with our dataframe display"""
@@ -108,13 +109,17 @@ class EvalResult:
108
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
109
  }
110
 
111
- for task in Tasks:
 
 
 
 
112
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
113
 
114
  return data_dict
115
 
116
 
117
- def get_request_file_for_model(requests_path, model_name, precision):
118
  """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
119
  request_files = os.path.join(
120
  requests_path,
@@ -122,7 +127,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
122
  )
123
  request_files = glob.glob(request_files)
124
 
125
- # Select correct request file (precision)
126
  request_file = ""
127
  request_files = sorted(request_files, reverse=True)
128
  for tmp_request_file in request_files:
@@ -130,7 +135,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
130
  req_content = json.load(f)
131
  if (
132
  req_content["status"] in ["FINISHED"]
133
- and req_content["precision"] == precision.split(".")[-1]
134
  ):
135
  request_file = tmp_request_file
136
  return request_file
@@ -175,4 +180,4 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
175
  except KeyError: # not all eval values present
176
  continue
177
 
178
- return results
 
16
  class EvalResult:
17
  """Represents one full evaluation. Built from a combination of the result and request file for a given run.
18
  """
19
+ eval_name: str # org_model_track (uid)
20
  full_model: str # org/model (path on hub)
21
  org: str
22
  model: str
 
33
 
34
  config = data.get("config")
35
  track = data.get("track")
36
+ self.track = track
37
 
38
  # Get model and org
39
  org_and_model = config.get("model_name", config.get("model_args", None))
 
46
  org = org_and_model[0]
47
  model = org_and_model[1]
48
  full_model = "/".join(org_and_model)
49
+ eval_name = "_".join(org_and_model) + f"_{track}"
50
 
51
  still_on_hub, _, model_config = is_model_on_hub(
52
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
 
89
 
90
  def update_with_request_file(self, requests_path):
91
  """Finds the relevant request file for the current model and updates info with it"""
92
+ request_file = get_request_file_for_model(requests_path, self.full_model, self.track)
93
 
94
  try:
95
  with open(request_file, "r") as f:
96
  request = json.load(f)
97
  self.date = request.get("submitted_time", "")
98
  except Exception:
99
+ print(f"Could not find request file for {self.org}/{self.model}")
100
 
101
  def to_dict(self):
102
  """Converts the Eval Result to a dict compatible with our dataframe display"""
 
109
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
110
  }
111
 
112
+ if self.track.lower() == "multimodal":
113
+ taskset = TasksMultimodal
114
+ else:
115
+ taskset = Tasks
116
+ for task in taskset:
117
  data_dict[task.value.col_name] = self.results[task.value.benchmark]
118
 
119
  return data_dict
120
 
121
 
122
+ def get_request_file_for_model(requests_path, model_name, track):
123
  """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
124
  request_files = os.path.join(
125
  requests_path,
 
127
  )
128
  request_files = glob.glob(request_files)
129
 
130
+ # Select correct request file (track)
131
  request_file = ""
132
  request_files = sorted(request_files, reverse=True)
133
  for tmp_request_file in request_files:
 
135
  req_content = json.load(f)
136
  if (
137
  req_content["status"] in ["FINISHED"]
138
+ and req_content["track"] == track
139
  ):
140
  request_file = tmp_request_file
141
  return request_file
 
180
  except KeyError: # not all eval values present
181
  continue
182
 
183
+ return results