Clémentine commited on
Commit
3dfaf22
1 Parent(s): eaace79

add model architecture as column

Browse files
app.py CHANGED
@@ -54,7 +54,7 @@ except Exception:
54
  restart_space()
55
 
56
 
57
- raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
58
  update_collections(original_df.copy())
59
  leaderboard_df = original_df.copy()
60
 
 
54
  restart_space()
55
 
56
 
57
+ raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
58
  update_collections(original_df.copy())
59
  leaderboard_df = original_df.copy()
60
 
src/display/utils.py CHANGED
@@ -34,6 +34,7 @@ class AutoEvalColumn: # Auto evals column
34
  gsm8k = ColumnContent("GSM8K", "number", True)
35
  drop = ColumnContent("DROP", "number", True)
36
  model_type = ColumnContent("Type", "str", False)
 
37
  weight_type = ColumnContent("Weight type", "str", False, True)
38
  precision = ColumnContent("Precision", "str", False) # , True)
39
  license = ColumnContent("Hub License", "str", False)
 
34
  gsm8k = ColumnContent("GSM8K", "number", True)
35
  drop = ColumnContent("DROP", "number", True)
36
  model_type = ColumnContent("Type", "str", False)
37
+ architecture = ColumnContent("Architecture", "str", False)
38
  weight_type = ColumnContent("Weight type", "str", False, True)
39
  precision = ColumnContent("Precision", "str", False) # , True)
40
  license = ColumnContent("Hub License", "str", False)
src/leaderboard/read_evals.py CHANGED
@@ -6,6 +6,7 @@ from dataclasses import dataclass
6
 
7
  import dateutil
8
  from datetime import datetime
 
9
  import numpy as np
10
 
11
  from src.display.formatting import make_clickable_model
@@ -15,24 +16,26 @@ from src.submission.check_validity import is_model_on_hub
15
 
16
  @dataclass
17
  class EvalResult:
18
- eval_name: str
19
- full_model: str
20
- org: str
 
21
  model: str
22
- revision: str
23
  results: dict
24
  precision: str = ""
25
- model_type: ModelType = ModelType.Unknown
26
- weight_type: str = "Original"
27
- architecture: str = "Unknown"
28
  license: str = "?"
29
  likes: int = 0
30
  num_params: int = 0
31
- date: str = ""
32
  still_on_hub: bool = False
33
 
34
  @classmethod
35
  def init_from_json_file(self, json_filepath):
 
36
  with open(json_filepath) as fp:
37
  data = json.load(fp)
38
 
@@ -58,9 +61,14 @@ class EvalResult:
58
  result_key = f"{org}_{model}_{precision}"
59
  full_model = "/".join(org_and_model)
60
 
61
- still_on_hub, error = is_model_on_hub(
62
  full_model, config.get("model_sha", "main"), trust_remote_code=True
63
  )
 
 
 
 
 
64
 
65
  # Extract results available in this file (some results are split in several files)
66
  results = {}
@@ -96,18 +104,21 @@ class EvalResult:
96
  org=org,
97
  model=model,
98
  results=results,
99
- precision=precision, # todo model_type=, weight_type=
100
- revision=config.get("model_sha", ""),
101
  still_on_hub=still_on_hub,
 
102
  )
103
 
104
- def update_with_request_file(self):
105
- request_file = get_request_file_for_model(self.full_model, self.precision)
 
106
 
107
  try:
108
  with open(request_file, "r") as f:
109
  request = json.load(f)
110
  self.model_type = ModelType.from_str(request.get("model_type", ""))
 
111
  self.license = request.get("license", "?")
112
  self.likes = request.get("likes", 0)
113
  self.num_params = request.get("params", 0)
@@ -116,6 +127,7 @@ class EvalResult:
116
  print(f"Could not find request file for {self.org}/{self.model}")
117
 
118
  def to_dict(self):
 
119
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
120
  data_dict = {
121
  "eval_name": self.eval_name, # not a column, just a save name,
@@ -123,6 +135,7 @@ class EvalResult:
123
  AutoEvalColumn.model_type.name: self.model_type.value.name,
124
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
125
  AutoEvalColumn.weight_type.name: self.weight_type,
 
126
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
127
  AutoEvalColumn.dummy.name: self.full_model,
128
  AutoEvalColumn.revision.name: self.revision,
@@ -139,9 +152,10 @@ class EvalResult:
139
  return data_dict
140
 
141
 
142
- def get_request_file_for_model(model_name, precision):
 
143
  request_files = os.path.join(
144
- "eval-queue",
145
  f"{model_name}_eval_request_*.json",
146
  )
147
  request_files = glob.glob(request_files)
@@ -160,8 +174,9 @@ def get_request_file_for_model(model_name, precision):
160
  return request_file
161
 
162
 
163
- def get_raw_eval_results(results_path: str) -> list[EvalResult]:
164
- json_filepaths = []
 
165
 
166
  for root, _, files in os.walk(results_path):
167
  # We should only have json files in model results
@@ -174,15 +189,14 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
174
  except dateutil.parser._parser.ParserError:
175
  files = [files[-1]]
176
 
177
- # up_to_date = files[-1]
178
  for file in files:
179
- json_filepaths.append(os.path.join(root, file))
180
 
181
  eval_results = {}
182
- for json_filepath in json_filepaths:
183
  # Creation of result
184
- eval_result = EvalResult.init_from_json_file(json_filepath)
185
- eval_result.update_with_request_file()
186
 
187
  # Store results of same eval together
188
  eval_name = eval_result.eval_name
 
6
 
7
  import dateutil
8
  from datetime import datetime
9
+ from transformers import AutoConfig
10
  import numpy as np
11
 
12
  from src.display.formatting import make_clickable_model
 
16
 
17
  @dataclass
18
  class EvalResult:
19
+ # Also see src.display.utils.AutoEvalColumn for what will be displayed.
20
+ eval_name: str # org_model_precision (uid)
21
+ full_model: str # org/model (path on hub)
22
+ org: str
23
  model: str
24
+ revision: str # commit hash, "" if main
25
  results: dict
26
  precision: str = ""
27
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
28
+ weight_type: str = "Original" # Original or Adapter
29
+ architecture: str = "Unknown" # From config file
30
  license: str = "?"
31
  likes: int = 0
32
  num_params: int = 0
33
+ date: str = "" # submission date of request file
34
  still_on_hub: bool = False
35
 
36
  @classmethod
37
  def init_from_json_file(self, json_filepath):
38
+ """Inits the result from the specific model result file"""
39
  with open(json_filepath) as fp:
40
  data = json.load(fp)
41
 
 
61
  result_key = f"{org}_{model}_{precision}"
62
  full_model = "/".join(org_and_model)
63
 
64
+ still_on_hub, error, model_config = is_model_on_hub(
65
  full_model, config.get("model_sha", "main"), trust_remote_code=True
66
  )
67
+ architecture = "?"
68
+ if model_config is not None:
69
+ architectures = getattr(model_config, "architectures", None)
70
+ if architectures:
71
+ architecture = ";".join(architectures)
72
 
73
  # Extract results available in this file (some results are split in several files)
74
  results = {}
 
104
  org=org,
105
  model=model,
106
  results=results,
107
+ precision=precision,
108
+ revision= config.get("model_sha", ""),
109
  still_on_hub=still_on_hub,
110
+ architecture=architecture
111
  )
112
 
113
+ def update_with_request_file(self, requests_path):
114
+ """Finds the relevant request file for the current model and updates info with it"""
115
+ request_file = get_request_file_for_model(requests_path, self.full_model, self.precision)
116
 
117
  try:
118
  with open(request_file, "r") as f:
119
  request = json.load(f)
120
  self.model_type = ModelType.from_str(request.get("model_type", ""))
121
+ self.weight_type = request.get("weight_type", "?")
122
  self.license = request.get("license", "?")
123
  self.likes = request.get("likes", 0)
124
  self.num_params = request.get("params", 0)
 
127
  print(f"Could not find request file for {self.org}/{self.model}")
128
 
129
  def to_dict(self):
130
+ """Converts the Eval Result to a dict compatible with our dataframe display"""
131
  average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
132
  data_dict = {
133
  "eval_name": self.eval_name, # not a column, just a save name,
 
135
  AutoEvalColumn.model_type.name: self.model_type.value.name,
136
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
137
  AutoEvalColumn.weight_type.name: self.weight_type,
138
+ AutoEvalColumn.architecture.name: self.architecture,
139
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
140
  AutoEvalColumn.dummy.name: self.full_model,
141
  AutoEvalColumn.revision.name: self.revision,
 
152
  return data_dict
153
 
154
 
155
+ def get_request_file_for_model(requests_path, model_name, precision):
156
+ """Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
157
  request_files = os.path.join(
158
+ requests_path,
159
  f"{model_name}_eval_request_*.json",
160
  )
161
  request_files = glob.glob(request_files)
 
174
  return request_file
175
 
176
 
177
+ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResult]:
178
+ """From the path of the results folder root, extract all needed info for results"""
179
+ model_result_filepaths = []
180
 
181
  for root, _, files in os.walk(results_path):
182
  # We should only have json files in model results
 
189
  except dateutil.parser._parser.ParserError:
190
  files = [files[-1]]
191
 
 
192
  for file in files:
193
+ model_result_filepaths.append(os.path.join(root, file))
194
 
195
  eval_results = {}
196
+ for model_result_filepath in model_result_filepaths:
197
  # Creation of result
198
+ eval_result = EvalResult.init_from_json_file(model_result_filepath)
199
+ eval_result.update_with_request_file(requests_path)
200
 
201
  # Store results of same eval together
202
  eval_name = eval_result.eval_name
src/populate.py CHANGED
@@ -9,8 +9,8 @@ from src.leaderboard.filter_models import filter_models
9
  from src.leaderboard.read_evals import get_raw_eval_results
10
 
11
 
12
- def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
- raw_data = get_raw_eval_results(results_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
  all_data_json.append(baseline_row)
16
  filter_models(all_data_json)
 
9
  from src.leaderboard.read_evals import get_raw_eval_results
10
 
11
 
12
+ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
+ raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
  all_data_json.append(baseline_row)
16
  filter_models(all_data_json)
src/submission/check_validity.py CHANGED
@@ -38,17 +38,18 @@ def check_model_card(repo_id: str) -> tuple[bool, str]:
38
 
39
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False) -> tuple[bool, str]:
40
  try:
41
- AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
42
- return True, None
43
 
44
  except ValueError:
45
  return (
46
  False,
47
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
 
48
  )
49
 
50
  except Exception:
51
- return False, "was not found on hub!"
52
 
53
 
54
  def get_model_size(model_info: ModelInfo, precision: str):
 
38
 
39
  def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False) -> tuple[bool, str]:
40
  try:
41
+ config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
42
+ return True, None, config
43
 
44
  except ValueError:
45
  return (
46
  False,
47
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
48
+ None
49
  )
50
 
51
  except Exception:
52
+ return False, "was not found on hub!", None
53
 
54
 
55
  def get_model_size(model_info: ModelInfo, precision: str):
src/submission/submit.py CHANGED
@@ -48,12 +48,12 @@ def add_new_eval(
48
 
49
  # Is the model on the hub?
50
  if weight_type in ["Delta", "Adapter"]:
51
- base_model_on_hub, error = is_model_on_hub(base_model, revision, H4_TOKEN)
52
  if not base_model_on_hub:
53
  return styled_error(f'Base model "{base_model}" {error}')
54
 
55
  if not weight_type == "Adapter":
56
- model_on_hub, error = is_model_on_hub(model, revision)
57
  if not model_on_hub:
58
  return styled_error(f'Model "{model}" {error}')
59
 
 
48
 
49
  # Is the model on the hub?
50
  if weight_type in ["Delta", "Adapter"]:
51
+ base_model_on_hub, error, _ = is_model_on_hub(base_model, revision, H4_TOKEN)
52
  if not base_model_on_hub:
53
  return styled_error(f'Base model "{base_model}" {error}')
54
 
55
  if not weight_type == "Adapter":
56
+ model_on_hub, error, _ = is_model_on_hub(model, revision)
57
  if not model_on_hub:
58
  return styled_error(f'Model "{model}" {error}')
59