t0-0 commited on
Commit
0109b82
1 Parent(s): 5b66510

Display llm-jp-eval version and backend library

Browse files
Files changed (3) hide show
  1. app.py +36 -0
  2. src/display/utils.py +26 -0
  3. src/leaderboard/read_evals.py +10 -1
app.py CHANGED
@@ -26,9 +26,11 @@ from src.display.utils import (
26
  TYPES,
27
  AddSpecialTokens,
28
  AutoEvalColumn,
 
29
  ModelType,
30
  NumFewShots,
31
  Precision,
 
32
  fields,
33
  )
34
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO
@@ -75,6 +77,8 @@ def filter_models(
75
  precision_query: list,
76
  add_special_tokens_query: list,
77
  num_few_shots_query: list,
 
 
78
  ) -> pd.DataFrame:
79
  print(f"Initial df shape: {df.shape}")
80
  print(f"Initial df content:\n{df}")
@@ -110,6 +114,14 @@ def filter_models(
110
  ]
111
  print(f"After num_few_shots filter: {filtered_df.shape}")
112
 
 
 
 
 
 
 
 
 
113
  print("Filtered dataframe head:")
114
  print(filtered_df.head())
115
  return filtered_df
@@ -177,6 +189,8 @@ def update_table(
177
  size_query: list,
178
  add_special_tokens_query: list,
179
  num_few_shots_query: list,
 
 
180
  query: str,
181
  ):
182
  print(
@@ -191,6 +205,8 @@ def update_table(
191
  precision_query,
192
  add_special_tokens_query,
193
  num_few_shots_query,
 
 
194
  )
195
  print(f"filtered_df shape after filter_models: {filtered_df.shape}")
196
 
@@ -236,6 +252,8 @@ leaderboard_df = filter_models(
236
  [i.value.name for i in Precision],
237
  [i.value.name for i in AddSpecialTokens],
238
  [i.value.name for i in NumFewShots],
 
 
239
  )
240
 
241
  leaderboard_df_filtered = filter_models(
@@ -245,6 +263,8 @@ leaderboard_df_filtered = filter_models(
245
  [i.value.name for i in Precision],
246
  [i.value.name for i in AddSpecialTokens],
247
  [i.value.name for i in NumFewShots],
 
 
248
  )
249
 
250
  # DataFrameの初期化部分のみを修正
@@ -309,6 +329,18 @@ with gr.Blocks() as demo_leaderboard:
309
  value=[i.value.name for i in NumFewShots],
310
  elem_id="filter-columns-num-few-shots",
311
  )
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
  # DataFrameコンポーネントの初期化
314
  leaderboard_table = gr.Dataframe(
@@ -340,6 +372,8 @@ with gr.Blocks() as demo_leaderboard:
340
  filter_columns_size.change,
341
  filter_columns_add_special_tokens.change,
342
  filter_columns_num_few_shots.change,
 
 
343
  search_bar.submit,
344
  ],
345
  fn=update_table,
@@ -351,6 +385,8 @@ with gr.Blocks() as demo_leaderboard:
351
  filter_columns_size,
352
  filter_columns_add_special_tokens,
353
  filter_columns_num_few_shots,
 
 
354
  search_bar,
355
  ],
356
  outputs=leaderboard_table,
 
26
  TYPES,
27
  AddSpecialTokens,
28
  AutoEvalColumn,
29
+ Backend,
30
  ModelType,
31
  NumFewShots,
32
  Precision,
33
+ Version,
34
  fields,
35
  )
36
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO
 
77
  precision_query: list,
78
  add_special_tokens_query: list,
79
  num_few_shots_query: list,
80
+ version_query: list,
81
+ backend_query: list,
82
  ) -> pd.DataFrame:
83
  print(f"Initial df shape: {df.shape}")
84
  print(f"Initial df content:\n{df}")
 
114
  ]
115
  print(f"After num_few_shots filter: {filtered_df.shape}")
116
 
117
+ # Version フィルタリング
118
+ filtered_df = filtered_df[filtered_df["llm-jp-eval version"].isin(version_query)]
119
+ print(f"After version filter: {filtered_df.shape}")
120
+
121
+ # Backend フィルタリング
122
+ filtered_df = filtered_df[filtered_df["Backend Library"].isin(backend_query)]
123
+ print(f"After backend filter: {filtered_df.shape}")
124
+
125
  print("Filtered dataframe head:")
126
  print(filtered_df.head())
127
  return filtered_df
 
189
  size_query: list,
190
  add_special_tokens_query: list,
191
  num_few_shots_query: list,
192
+ version_query: list,
193
+ backend_query: list,
194
  query: str,
195
  ):
196
  print(
 
205
  precision_query,
206
  add_special_tokens_query,
207
  num_few_shots_query,
208
+ version_query,
209
+ backend_query,
210
  )
211
  print(f"filtered_df shape after filter_models: {filtered_df.shape}")
212
 
 
252
  [i.value.name for i in Precision],
253
  [i.value.name for i in AddSpecialTokens],
254
  [i.value.name for i in NumFewShots],
255
+ [i.value.name for i in Version],
256
+ [i.value.name for i in Backend],
257
  )
258
 
259
  leaderboard_df_filtered = filter_models(
 
263
  [i.value.name for i in Precision],
264
  [i.value.name for i in AddSpecialTokens],
265
  [i.value.name for i in NumFewShots],
266
+ [i.value.name for i in Version],
267
+ [i.value.name for i in Backend],
268
  )
269
 
270
  # DataFrameの初期化部分のみを修正
 
329
  value=[i.value.name for i in NumFewShots],
330
  elem_id="filter-columns-num-few-shots",
331
  )
332
+ filter_columns_version = gr.CheckboxGroup(
333
+ label="Version",
334
+ choices=[i.value.name for i in Version],
335
+ value=[i.value.name for i in Version],
336
+ elem_id="filter-columns-version",
337
+ )
338
+ filter_columns_backend = gr.CheckboxGroup(
339
+ label="Backend",
340
+ choices=[i.value.name for i in Backend],
341
+ value=[i.value.name for i in Backend],
342
+ elem_id="filter-columns-backend",
343
+ )
344
 
345
  # DataFrameコンポーネントの初期化
346
  leaderboard_table = gr.Dataframe(
 
372
  filter_columns_size.change,
373
  filter_columns_add_special_tokens.change,
374
  filter_columns_num_few_shots.change,
375
+ filter_columns_version.change,
376
+ filter_columns_backend.change,
377
  search_bar.submit,
378
  ],
379
  fn=update_table,
 
385
  filter_columns_size,
386
  filter_columns_add_special_tokens,
387
  filter_columns_num_few_shots,
388
+ filter_columns_version,
389
+ filter_columns_backend,
390
  search_bar,
391
  ],
392
  outputs=leaderboard_table,
src/display/utils.py CHANGED
@@ -44,6 +44,10 @@ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Avai
44
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
45
  auto_eval_column_dict.append(["num_few_shots", ColumnContent, ColumnContent("Few-shot", "str", False)])
46
  auto_eval_column_dict.append(["add_special_tokens", ColumnContent, ColumnContent("Add Special Tokens", "bool", False)])
 
 
 
 
47
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
48
 
49
  # We use make dataclass to dynamically fill the scores from Tasks
@@ -130,6 +134,28 @@ class NumFewShots(Enum):
130
  return NumFewShots.Unknown
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  # Column selection
134
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
135
  TYPES = [c.type for c in fields(AutoEvalColumn)]
 
44
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
45
  auto_eval_column_dict.append(["num_few_shots", ColumnContent, ColumnContent("Few-shot", "str", False)])
46
  auto_eval_column_dict.append(["add_special_tokens", ColumnContent, ColumnContent("Add Special Tokens", "bool", False)])
47
+ auto_eval_column_dict.append(
48
+ ["llm_jp_eval_version", ColumnContent, ColumnContent("llm-jp-eval version", "str", False)]
49
+ )
50
+ auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False)])
51
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
52
 
53
  # We use make dataclass to dynamically fill the scores from Tasks
 
134
  return NumFewShots.Unknown
135
 
136
 
137
+ class Version(Enum):
138
+ v1_4_1 = ModelDetails("v1.4.1")
139
+ Unknown = ModelDetails("?")
140
+
141
+ def from_str(version):
142
+ if version == "1.4.1":
143
+ return Version.v1_4_1
144
+ else:
145
+ return Version.Unknown
146
+
147
+
148
+ class Backend(Enum):
149
+ vllm = ModelDetails("vllm")
150
+ Unknown = ModelDetails("?")
151
+
152
+ def from_str(backend):
153
+ if backend == "vllm":
154
+ return Backend.vllm
155
+ else:
156
+ return Backend.Unknown
157
+
158
+
159
  # Column selection
160
  COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
161
  TYPES = [c.type for c in fields(AutoEvalColumn)]
src/leaderboard/read_evals.py CHANGED
@@ -7,7 +7,7 @@ from decimal import Decimal
7
  import dateutil
8
 
9
  from src.display.formatting import make_clickable_model
10
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, WeightType
11
  from src.submission.check_validity import is_model_on_hub
12
 
13
 
@@ -34,6 +34,8 @@ class EvalResult:
34
  still_on_hub: bool = False
35
  num_few_shots: str = "0"
36
  add_special_tokens: str = ""
 
 
37
 
38
  @classmethod
39
  def init_from_json_file(self, json_filepath):
@@ -62,6 +64,9 @@ class EvalResult:
62
  config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
63
  )
64
 
 
 
 
65
  # Get model and org
66
  # org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
67
  org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
@@ -116,6 +121,8 @@ class EvalResult:
116
  architecture=architecture,
117
  num_few_shots=num_few_shots,
118
  add_special_tokens=add_special_tokens,
 
 
119
  )
120
 
121
  def update_with_request_file(self, requests_path):
@@ -153,6 +160,8 @@ class EvalResult:
153
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
154
  AutoEvalColumn.num_few_shots.name: self.num_few_shots,
155
  AutoEvalColumn.add_special_tokens.name: self.add_special_tokens,
 
 
156
  }
157
 
158
  # for task in Tasks:
 
7
  import dateutil
8
 
9
  from src.display.formatting import make_clickable_model
10
+ from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
11
  from src.submission.check_validity import is_model_on_hub
12
 
13
 
 
34
  still_on_hub: bool = False
35
  num_few_shots: str = "0"
36
  add_special_tokens: str = ""
37
+ llm_jp_eval_version: str = ""
38
+ backend: str = ""
39
 
40
  @classmethod
41
  def init_from_json_file(self, json_filepath):
 
64
  config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
65
  )
66
 
67
+ version = Version.from_str(metainfo.get("version", "?")).value.name
68
+ backend = Backend.from_str(model_config.get("_target_", "?").split(".")[0]).value.name
69
+
70
  # Get model and org
71
  # org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
72
  org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
 
121
  architecture=architecture,
122
  num_few_shots=num_few_shots,
123
  add_special_tokens=add_special_tokens,
124
+ llm_jp_eval_version=version,
125
+ backend=backend,
126
  )
127
 
128
  def update_with_request_file(self, requests_path):
 
160
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
161
  AutoEvalColumn.num_few_shots.name: self.num_few_shots,
162
  AutoEvalColumn.add_special_tokens.name: self.add_special_tokens,
163
+ AutoEvalColumn.llm_jp_eval_version.name: self.llm_jp_eval_version,
164
+ AutoEvalColumn.backend.name: self.backend,
165
  }
166
 
167
  # for task in Tasks: