Clémentine commited on
Commit
9b2e755
1 Parent(s): 0c7ef71

simplified display, added an extra config repo to carry dynamic information

Browse files
app.py CHANGED
@@ -30,6 +30,7 @@ from src.display.utils import (
30
  from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
31
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
  from src.submission.submit import add_new_eval
 
33
  from src.tools.collections import update_collections
34
  from src.tools.plots import (
35
  create_metric_plot_obj,
@@ -100,10 +101,11 @@ def update_table(
100
  size_query: list,
101
  show_deleted: bool,
102
  show_merges: bool,
 
103
  show_flagged: bool,
104
  query: str,
105
  ):
106
- filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, show_merges, show_flagged)
107
  filtered_df = filter_queries(query, filtered_df)
108
  df = select_columns(filtered_df, columns)
109
  return df
@@ -119,13 +121,13 @@ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
119
 
120
 
121
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
122
- always_here_cols = [
123
- AutoEvalColumn.model_type_symbol.name,
124
- AutoEvalColumn.model.name,
125
- ]
126
  # We use COLS to maintain sorting
127
  filtered_df = df[
128
- always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
129
  ]
130
  return filtered_df
131
 
@@ -151,7 +153,7 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
151
 
152
 
153
  def filter_models(
154
- df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, show_merges: bool, show_flagged: bool
155
  ) -> pd.DataFrame:
156
  # Show all models
157
  if show_deleted:
@@ -162,6 +164,9 @@ def filter_models(
162
  if not show_merges:
163
  filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
164
 
 
 
 
165
  if not show_flagged:
166
  filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
167
 
@@ -176,7 +181,16 @@ def filter_models(
176
 
177
  return filtered_df
178
 
179
- leaderboard_df = filter_models(leaderboard_df, [t.to_str(" : ") for t in ModelType], list(NUMERIC_INTERVALS.keys()), [i.value.name for i in Precision], False, False, False)
 
 
 
 
 
 
 
 
 
180
 
181
  demo = gr.Blocks(css=custom_css)
182
  with demo:
@@ -216,6 +230,9 @@ with demo:
216
  merged_models_visibility = gr.Checkbox(
217
  value=False, label="Show merges", interactive=True
218
  )
 
 
 
219
  flagged_models_visibility = gr.Checkbox(
220
  value=False, label="Show flagged models", interactive=True
221
  )
@@ -274,6 +291,7 @@ with demo:
274
  filter_columns_size,
275
  deleted_models_visibility,
276
  merged_models_visibility,
 
277
  flagged_models_visibility,
278
  search_bar,
279
  ],
@@ -292,6 +310,7 @@ with demo:
292
  filter_columns_size,
293
  deleted_models_visibility,
294
  merged_models_visibility,
 
295
  flagged_models_visibility,
296
  search_bar,
297
  ],
@@ -300,7 +319,7 @@ with demo:
300
  # Check query parameter once at startup and update search bar + hidden component
301
  demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
302
 
303
- for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, merged_models_visibility, flagged_models_visibility]:
304
  selector.change(
305
  update_table,
306
  [
@@ -311,6 +330,7 @@ with demo:
311
  filter_columns_size,
312
  deleted_models_visibility,
313
  merged_models_visibility,
 
314
  flagged_models_visibility,
315
  search_bar,
316
  ],
@@ -439,6 +459,7 @@ with demo:
439
 
440
  scheduler = BackgroundScheduler()
441
  scheduler.add_job(restart_space, "interval", seconds=10800)
 
442
  scheduler.start()
443
 
444
  demo.queue(default_concurrency_limit=40).launch()
 
30
  from src.envs import API, EVAL_REQUESTS_PATH, DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, DYNAMIC_INFO_PATH, EVAL_RESULTS_PATH, H4_TOKEN, IS_PUBLIC, QUEUE_REPO, REPO_ID, RESULTS_REPO
31
  from src.populate import get_evaluation_queue_df, get_leaderboard_df
32
  from src.submission.submit import add_new_eval
33
+ from src.scripts.update_all_request_files import update_dynamic_files
34
  from src.tools.collections import update_collections
35
  from src.tools.plots import (
36
  create_metric_plot_obj,
 
101
  size_query: list,
102
  show_deleted: bool,
103
  show_merges: bool,
104
+ show_moe: bool,
105
  show_flagged: bool,
106
  query: str,
107
  ):
108
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, show_merges, show_moe, show_flagged)
109
  filtered_df = filter_queries(query, filtered_df)
110
  df = select_columns(filtered_df, columns)
111
  return df
 
121
 
122
 
123
  def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
124
+ always_here_cols = [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
125
+ dummy_col = [AutoEvalColumn.dummy.name]
126
+ #AutoEvalColumn.model_type_symbol.name,
127
+ #AutoEvalColumn.model.name,
128
  # We use COLS to maintain sorting
129
  filtered_df = df[
130
+ always_here_cols + [c for c in COLS if c in df.columns and c in columns] + dummy_col
131
  ]
132
  return filtered_df
133
 
 
153
 
154
 
155
  def filter_models(
156
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, show_merges: bool, show_moe:bool, show_flagged: bool
157
  ) -> pd.DataFrame:
158
  # Show all models
159
  if show_deleted:
 
164
  if not show_merges:
165
  filtered_df = filtered_df[filtered_df[AutoEvalColumn.merged.name] == False]
166
 
167
+ if not show_moe:
168
+ filtered_df = filtered_df[filtered_df[AutoEvalColumn.moe.name] == False]
169
+
170
  if not show_flagged:
171
  filtered_df = filtered_df[filtered_df[AutoEvalColumn.flagged.name] == False]
172
 
 
181
 
182
  return filtered_df
183
 
184
+ leaderboard_df = filter_models(
185
+ df=leaderboard_df,
186
+ type_query=[t.to_str(" : ") for t in ModelType],
187
+ size_query=list(NUMERIC_INTERVALS.keys()),
188
+ precision_query=[i.value.name for i in Precision],
189
+ show_deleted=False,
190
+ show_merges=False,
191
+ show_moe=True,
192
+ show_flagged=False
193
+ )
194
 
195
  demo = gr.Blocks(css=custom_css)
196
  with demo:
 
230
  merged_models_visibility = gr.Checkbox(
231
  value=False, label="Show merges", interactive=True
232
  )
233
+ moe_models_visibility = gr.Checkbox(
234
+ value=True, label="Show MoE", interactive=True
235
+ )
236
  flagged_models_visibility = gr.Checkbox(
237
  value=False, label="Show flagged models", interactive=True
238
  )
 
291
  filter_columns_size,
292
  deleted_models_visibility,
293
  merged_models_visibility,
294
+ moe_models_visibility,
295
  flagged_models_visibility,
296
  search_bar,
297
  ],
 
310
  filter_columns_size,
311
  deleted_models_visibility,
312
  merged_models_visibility,
313
+ moe_models_visibility,
314
  flagged_models_visibility,
315
  search_bar,
316
  ],
 
319
  # Check query parameter once at startup and update search bar + hidden component
320
  demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
321
 
322
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, merged_models_visibility, moe_models_visibility, flagged_models_visibility]:
323
  selector.change(
324
  update_table,
325
  [
 
330
  filter_columns_size,
331
  deleted_models_visibility,
332
  merged_models_visibility,
333
+ moe_models_visibility,
334
  flagged_models_visibility,
335
  search_bar,
336
  ],
 
459
 
460
  scheduler = BackgroundScheduler()
461
  scheduler.add_job(restart_space, "interval", seconds=10800)
462
+ scheduler.add_job(update_dynamic_files, "interval", seconds=10000) # taking about 3 min
463
  scheduler.start()
464
 
465
  demo.queue(default_concurrency_limit=40).launch()
src/display/utils.py CHANGED
@@ -50,9 +50,10 @@ auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "
50
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
51
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
52
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
53
- auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
54
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
55
- auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, False)])
 
56
  # Dummy column for the search bar (hidden by the custom CSS)
57
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
58
 
@@ -108,6 +109,7 @@ human_baseline_row = {
108
  AutoEvalColumn.gsm8k.name: 100,
109
  AutoEvalColumn.dummy.name: "human_baseline",
110
  AutoEvalColumn.model_type.name: "",
 
111
  }
112
 
113
  @dataclass
@@ -168,10 +170,8 @@ class Precision(Enum):
168
 
169
 
170
  # Column selection
171
- COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
172
- TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
173
- COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
174
- TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
175
 
176
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
177
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
 
50
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
51
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
52
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
53
+ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
54
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
55
+ auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
56
+ auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
57
  # Dummy column for the search bar (hidden by the custom CSS)
58
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
59
 
 
109
  AutoEvalColumn.gsm8k.name: 100,
110
  AutoEvalColumn.dummy.name: "human_baseline",
111
  AutoEvalColumn.model_type.name: "",
112
+ AutoEvalColumn.flagged.name: False,
113
  }
114
 
115
  @dataclass
 
170
 
171
 
172
  # Column selection
173
+ COLS = [c.name for c in fields(AutoEvalColumn)]
174
+ TYPES = [c.type for c in fields(AutoEvalColumn)]
 
 
175
 
176
  EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
177
  EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
src/leaderboard/read_evals.py CHANGED
@@ -30,7 +30,7 @@ class EvalResult:
30
  likes: int = 0
31
  num_params: int = 0
32
  date: str = "" # submission date of request file
33
- still_on_hub: bool = False
34
  is_merge: bool = False
35
  flagged: bool = False
36
  tags: list = None
@@ -106,12 +106,12 @@ class EvalResult:
106
  try:
107
  with open(request_file, "r") as f:
108
  request = json.load(f)
109
- self.model_type = ModelType.from_str(request.get("model_type", ""))
110
  self.weight_type = WeightType[request.get("weight_type", "Original")]
111
  self.num_params = request.get("params", 0)
112
  self.date = request.get("submitted_time", "")
113
- self.architecture = request["architectures"]
114
- except Exception:
115
  print(f"Could not find request file for {self.org}/{self.model}")
116
 
117
  def update_with_dynamic_file_dict(self, file_dict):
@@ -119,7 +119,6 @@ class EvalResult:
119
  self.likes = file_dict.get("likes", 0)
120
  self.still_on_hub = file_dict["still_on_hub"]
121
  self.flagged = any("flagged" in tag for tag in file_dict["tags"])
122
- self.is_merge = "merge" in file_dict["tags"]
123
  self.tags = file_dict["tags"]
124
 
125
 
@@ -130,7 +129,6 @@ class EvalResult:
130
  "eval_name": self.eval_name, # not a column, just a save name,
131
  AutoEvalColumn.precision.name: self.precision.value.name,
132
  AutoEvalColumn.model_type.name: self.model_type.value.name,
133
- AutoEvalColumn.merged.name: self.is_merge,
134
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
135
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
136
  AutoEvalColumn.architecture.name: self.architecture,
@@ -142,6 +140,8 @@ class EvalResult:
142
  AutoEvalColumn.likes.name: self.likes,
143
  AutoEvalColumn.params.name: self.num_params,
144
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
 
 
145
  AutoEvalColumn.flagged.name: self.flagged
146
  }
147
 
@@ -199,7 +199,8 @@ def get_raw_eval_results(results_path: str, requests_path: str, dynamic_path: st
199
  # Creation of result
200
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
201
  eval_result.update_with_request_file(requests_path)
202
- eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
 
203
 
204
  # Store results of same eval together
205
  eval_name = eval_result.eval_name
 
30
  likes: int = 0
31
  num_params: int = 0
32
  date: str = "" # submission date of request file
33
+ still_on_hub: bool = True
34
  is_merge: bool = False
35
  flagged: bool = False
36
  tags: list = None
 
106
  try:
107
  with open(request_file, "r") as f:
108
  request = json.load(f)
109
+ self.model_type = ModelType.from_str(request.get("model_type", "Unknown"))
110
  self.weight_type = WeightType[request.get("weight_type", "Original")]
111
  self.num_params = request.get("params", 0)
112
  self.date = request.get("submitted_time", "")
113
+ self.architecture = request.get("architectures", "Unknown")
114
+ except Exception as e:
115
  print(f"Could not find request file for {self.org}/{self.model}")
116
 
117
  def update_with_dynamic_file_dict(self, file_dict):
 
119
  self.likes = file_dict.get("likes", 0)
120
  self.still_on_hub = file_dict["still_on_hub"]
121
  self.flagged = any("flagged" in tag for tag in file_dict["tags"])
 
122
  self.tags = file_dict["tags"]
123
 
124
 
 
129
  "eval_name": self.eval_name, # not a column, just a save name,
130
  AutoEvalColumn.precision.name: self.precision.value.name,
131
  AutoEvalColumn.model_type.name: self.model_type.value.name,
 
132
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
133
  AutoEvalColumn.weight_type.name: self.weight_type.value.name,
134
  AutoEvalColumn.architecture.name: self.architecture,
 
140
  AutoEvalColumn.likes.name: self.likes,
141
  AutoEvalColumn.params.name: self.num_params,
142
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
143
+ AutoEvalColumn.merged.name: "merge" in self.tags if self.tags else False,
144
+ AutoEvalColumn.moe.name: ("moe" in self.tags if self.tags else False) or "moe" in self.full_model.lower(),
145
  AutoEvalColumn.flagged.name: self.flagged
146
  }
147
 
 
199
  # Creation of result
200
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
201
  eval_result.update_with_request_file(requests_path)
202
+ if eval_result.full_model in dynamic_data:
203
+ eval_result.update_with_dynamic_file_dict(dynamic_data[eval_result.full_model])
204
 
205
  # Store results of same eval together
206
  eval_name = eval_result.eval_name
src/scripts/update_all_request_files.py CHANGED
@@ -1,31 +1,10 @@
1
- from huggingface_hub import HfApi, ModelFilter, snapshot_download
2
  from huggingface_hub import ModelCard
3
 
4
  import json
5
- import os
6
  import time
7
- import shutil
8
- from src.submission.check_validity import is_model_on_hub, check_model_card, get_model_size
9
- from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_FILE_PATH, API
10
-
11
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
12
-
13
- TMP_FOLDER = "tmp_requests"
14
- snapshot_download(
15
- repo_id=DYNAMIC_INFO_REPO, local_dir=TMP_FOLDER, repo_type="dataset", tqdm_class=None, etag_timeout=30
16
- )
17
-
18
- # Get models
19
- start = time.time()
20
-
21
- models = list(API.list_models(
22
- filter=ModelFilter(task="text-generation"),
23
- full=False,
24
- cardData=True,
25
- fetch_config=True,
26
- ))
27
-
28
- print(f"Downloaded list of models in {time.time() - start:.2f} seconds")
29
 
30
  def update_models(file_path, models):
31
  """
@@ -80,18 +59,37 @@ def update_models(file_path, models):
80
  with open(file_path, 'w') as f:
81
  json.dump(model_infos, f, indent=2)
82
 
83
- start = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- updated_ids = update_models(DYNAMIC_INFO_FILE_PATH, models)
86
 
87
- print(f"updated in {time.time() - start:.2f} seconds, updated ids: {len(updated_ids)}")
88
 
89
- API.upload_file(
90
- path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
91
- path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
92
- repo_id=DYNAMIC_INFO_REPO,
93
- repo_type="dataset",
94
- commit_message=f"Daily request file update.",
95
- )
96
 
97
- shutil.rmtree(TMP_FOLDER)
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import ModelFilter, snapshot_download
2
  from huggingface_hub import ModelCard
3
 
4
  import json
 
5
  import time
6
+ from src.submission.check_validity import is_model_on_hub, check_model_card
7
+ from src.envs import DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH, DYNAMIC_INFO_FILE_PATH, API
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def update_models(file_path, models):
10
  """
 
59
  with open(file_path, 'w') as f:
60
  json.dump(model_infos, f, indent=2)
61
 
62
+ def update_dynamic_files():
63
+ """ This will only update metadata for models already linked in the repo, not add missing ones.
64
+ """
65
+ snapshot_download(
66
+ repo_id=DYNAMIC_INFO_REPO, local_dir=DYNAMIC_INFO_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30
67
+ )
68
+
69
+ print("UPDATE_DYNAMIC: Loaded snapshot")
70
+ # Get models
71
+ start = time.time()
72
+
73
+ models = list(API.list_models(
74
+ filter=ModelFilter(task="text-generation"),
75
+ full=False,
76
+ cardData=True,
77
+ fetch_config=True,
78
+ ))
79
+
80
+ print(f"UPDATE_DYNAMIC: Downloaded list of models in {time.time() - start:.2f} seconds")
81
 
82
+ start = time.time()
83
 
84
+ update_models(DYNAMIC_INFO_FILE_PATH, models)
85
 
86
+ print(f"UPDATE_DYNAMIC: updated in {time.time() - start:.2f} seconds")
 
 
 
 
 
 
87
 
88
+ API.upload_file(
89
+ path_or_fileobj=DYNAMIC_INFO_FILE_PATH,
90
+ path_in_repo=DYNAMIC_INFO_FILE_PATH.split("/")[-1],
91
+ repo_id=DYNAMIC_INFO_REPO,
92
+ repo_type="dataset",
93
+ commit_message=f"Daily request file update.",
94
+ )
95
+ print(f"UPDATE_DYNAMIC: pushed to hub")
src/submission/check_validity.py CHANGED
@@ -52,7 +52,7 @@ def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_rem
52
  return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
53
  return True, None, config
54
 
55
- except ValueError:
56
  return (
57
  False,
58
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
 
52
  return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
53
  return True, None, config
54
 
55
+ except ValueError as e:
56
  return (
57
  False,
58
  "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",