lvkaokao commited on
Commit
b9cb207
1 Parent(s): ac138f8

add new search.

Browse files
app.py CHANGED
@@ -25,6 +25,7 @@ from src.display.utils import (
25
  NUMERIC_INTERVALS,
26
  TYPES,
27
  AutoEvalColumn,
 
28
  ModelType,
29
  fields,
30
  WeightType,
@@ -105,17 +106,27 @@ def update_table(
105
  type_query: list,
106
  precision_query: str,
107
  size_query: list,
 
108
  hide_models: list,
109
  query: str,
110
  compute_dtype: str,
111
  weight_dtype: str,
112
- double_quant: str
 
113
  ):
114
 
115
  compute_dtype = [compute_dtype]
116
  weight_dtype = [weight_dtype]
 
 
 
 
 
 
 
 
117
  double_quant = [str_to_bool(double_quant)]
118
- filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models, compute_dtype=compute_dtype, weight_dtype=weight_dtype, double_quant=double_quant)
119
  filtered_df = filter_queries(query, filtered_df)
120
  df = select_columns(filtered_df, columns)
121
  return df
@@ -161,8 +172,8 @@ def filter_queries(query: str, filtered_df: pd.DataFrame):
161
 
162
 
163
  def filter_models(
164
- df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, hide_models: list, compute_dtype: list, weight_dtype: list, double_quant: list
165
- ) -> pd.DataFrame:
166
  # Show all models
167
  if "Private or deleted" in hide_models:
168
  filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
@@ -185,24 +196,31 @@ def filter_models(
185
  filtered_df = filtered_df.loc[df[AutoEvalColumn.weight_dtype.name].isin(weight_dtype)]
186
  filtered_df = filtered_df.loc[df[AutoEvalColumn.compute_dtype.name].isin(compute_dtype)]
187
  filtered_df = filtered_df.loc[df[AutoEvalColumn.double_quant.name].isin(double_quant)]
 
188
 
189
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
190
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
191
  mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
192
  filtered_df = filtered_df.loc[mask]
193
 
 
 
 
 
 
194
  return filtered_df
195
 
196
  leaderboard_df = filter_models(
197
  df=leaderboard_df,
198
  type_query=[t.to_str(" : ") for t in QuantType],
199
  size_query=list(NUMERIC_INTERVALS.keys()),
 
200
  precision_query=[i.value.name for i in Precision],
201
  hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs,
202
  compute_dtype=[i.value.name for i in ComputeDtype],
203
  weight_dtype=[i.value.name for i in WeightDtype],
204
- double_quant=[True, False]
205
-
206
  )
207
 
208
  demo = gr.Blocks(css=custom_css)
@@ -236,9 +254,18 @@ with demo:
236
  elem_id="column-select",
237
  interactive=True,
238
  )
 
 
 
 
 
 
 
 
 
239
  with gr.Row():
240
  filter_columns_size = gr.CheckboxGroup(
241
- label="Model sizes (in billions of parameters)",
242
  choices=list(NUMERIC_INTERVALS.keys()),
243
  value=list(NUMERIC_INTERVALS.keys()),
244
  interactive=True,
@@ -266,8 +293,7 @@ with demo:
266
  filter_columns_computeDtype = gr.Dropdown(choices=[i.value.name for i in ComputeDtype], label="Compute Dtype", multiselect=False, value="float16", interactive=True,)
267
  filter_columns_weightDtype = gr.Dropdown(choices=[i.value.name for i in WeightDtype], label="Weight Dtype", multiselect=False, value="int4", interactive=True,)
268
  filter_columns_doubleQuant = gr.Dropdown(choices=["True", "False"], label="Double Quant", multiselect=False, value=False, interactive=True)
269
- # with gr.Row():
270
- # gr.Checkbox(label="", info=""),
271
 
272
  leaderboard_table = gr.components.Dataframe(
273
  value=leaderboard_df[
@@ -308,11 +334,13 @@ with demo:
308
  filter_columns_type,
309
  filter_columns_precision,
310
  filter_columns_size,
 
311
  hide_models,
312
  search_bar,
313
  filter_columns_computeDtype,
314
  filter_columns_weightDtype,
315
- filter_columns_doubleQuant
 
316
  ],
317
  leaderboard_table,
318
  )
@@ -341,7 +369,7 @@ with demo:
341
  demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
342
 
343
  """
344
- for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, hide_models, filter_columns_computeDtype, filter_columns_weightDtype, filter_columns_doubleQuant]:
345
  selector.change(
346
  update_table,
347
  [
@@ -350,11 +378,13 @@ with demo:
350
  filter_columns_type,
351
  filter_columns_precision,
352
  filter_columns_size,
 
353
  hide_models,
354
  search_bar,
355
  filter_columns_computeDtype,
356
  filter_columns_weightDtype,
357
- filter_columns_doubleQuant
 
358
  ],
359
  leaderboard_table,
360
  queue=True,
 
25
  NUMERIC_INTERVALS,
26
  TYPES,
27
  AutoEvalColumn,
28
+ GroupDtype,
29
  ModelType,
30
  fields,
31
  WeightType,
 
106
  type_query: list,
107
  precision_query: str,
108
  size_query: list,
109
+ params_query: list,
110
  hide_models: list,
111
  query: str,
112
  compute_dtype: str,
113
  weight_dtype: str,
114
+ double_quant: str,
115
+ group_dtype: str
116
  ):
117
 
118
  compute_dtype = [compute_dtype]
119
  weight_dtype = [weight_dtype]
120
+ if group_dtype == 'All':
121
+ group_dtype = [-1, 1024, 256, 128, 32]
122
+ else:
123
+ try:
124
+ group_dtype = [int(group_dtype)]
125
+ except ValueError:
126
+ group_dtype = [-1]
127
+
128
  double_quant = [str_to_bool(double_quant)]
129
+ filtered_df = filter_models(df=hidden_df, type_query=type_query, size_query=size_query, precision_query=precision_query, hide_models=hide_models, compute_dtype=compute_dtype, weight_dtype=weight_dtype, double_quant=double_quant, group_dtype=group_dtype, params_query=params_query)
130
  filtered_df = filter_queries(query, filtered_df)
131
  df = select_columns(filtered_df, columns)
132
  return df
 
172
 
173
 
174
  def filter_models(
175
+ df: pd.DataFrame, type_query: list, size_query: list, params_query:list, precision_query: list, hide_models: list, compute_dtype: list, weight_dtype: list, double_quant: list, group_dtype: list,
176
+ ) -> pd.DataFrame:
177
  # Show all models
178
  if "Private or deleted" in hide_models:
179
  filtered_df = df[df[AutoEvalColumn.still_on_hub.name] == True]
 
196
  filtered_df = filtered_df.loc[df[AutoEvalColumn.weight_dtype.name].isin(weight_dtype)]
197
  filtered_df = filtered_df.loc[df[AutoEvalColumn.compute_dtype.name].isin(compute_dtype)]
198
  filtered_df = filtered_df.loc[df[AutoEvalColumn.double_quant.name].isin(double_quant)]
199
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.group_size.name].isin(group_dtype)]
200
 
201
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
202
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
203
  mask = params_column.apply(lambda x: any(numeric_interval.contains(x)))
204
  filtered_df = filtered_df.loc[mask]
205
 
206
+ numeric_interval_params = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in params_query]))
207
+ params_column_params = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
208
+ mask_params = params_column_params.apply(lambda x: any(numeric_interval_params.contains(x)))
209
+ filtered_df = filtered_df.loc[mask_params]
210
+
211
  return filtered_df
212
 
213
  leaderboard_df = filter_models(
214
  df=leaderboard_df,
215
  type_query=[t.to_str(" : ") for t in QuantType],
216
  size_query=list(NUMERIC_INTERVALS.keys()),
217
+ params_query=list(NUMERIC_INTERVALS.keys()),
218
  precision_query=[i.value.name for i in Precision],
219
  hide_models=["Private or deleted", "Contains a merge/moerge", "Flagged"], # Deleted, merges, flagged, MoEs,
220
  compute_dtype=[i.value.name for i in ComputeDtype],
221
  weight_dtype=[i.value.name for i in WeightDtype],
222
+ double_quant=[True, False],
223
+ group_dtype=[-1, 1024, 256, 128, 32]
224
  )
225
 
226
  demo = gr.Blocks(css=custom_css)
 
254
  elem_id="column-select",
255
  interactive=True,
256
  )
257
+
258
+ with gr.Row():
259
+ filter_columns_parameters = gr.CheckboxGroup(
260
+ label="Model parameters (in billions of parameters)",
261
+ choices=list(NUMERIC_INTERVALS.keys()),
262
+ value=list(NUMERIC_INTERVALS.keys()),
263
+ interactive=True,
264
+ elem_id="filter-columns-size",
265
+ )
266
  with gr.Row():
267
  filter_columns_size = gr.CheckboxGroup(
268
+ label="Model sizes (GB, int4)",
269
  choices=list(NUMERIC_INTERVALS.keys()),
270
  value=list(NUMERIC_INTERVALS.keys()),
271
  interactive=True,
 
293
  filter_columns_computeDtype = gr.Dropdown(choices=[i.value.name for i in ComputeDtype], label="Compute Dtype", multiselect=False, value="float16", interactive=True,)
294
  filter_columns_weightDtype = gr.Dropdown(choices=[i.value.name for i in WeightDtype], label="Weight Dtype", multiselect=False, value="int4", interactive=True,)
295
  filter_columns_doubleQuant = gr.Dropdown(choices=["True", "False"], label="Double Quant", multiselect=False, value=False, interactive=True)
296
+ filter_columns_groupDtype = gr.Dropdown(choices=[i.value.name for i in GroupDtype], label="Group Size", multiselect=False, value="All", interactive=True,)
 
297
 
298
  leaderboard_table = gr.components.Dataframe(
299
  value=leaderboard_df[
 
334
  filter_columns_type,
335
  filter_columns_precision,
336
  filter_columns_size,
337
+ filter_columns_parameters,
338
  hide_models,
339
  search_bar,
340
  filter_columns_computeDtype,
341
  filter_columns_weightDtype,
342
+ filter_columns_doubleQuant,
343
+ filter_columns_groupDtype
344
  ],
345
  leaderboard_table,
346
  )
 
369
  demo.load(load_query, inputs=[], outputs=[search_bar, hidden_search_bar])
370
 
371
  """
372
+ for selector in [shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, filter_columns_parameters, hide_models, filter_columns_computeDtype, filter_columns_weightDtype, filter_columns_doubleQuant, filter_columns_groupDtype]:
373
  selector.change(
374
  update_table,
375
  [
 
378
  filter_columns_type,
379
  filter_columns_precision,
380
  filter_columns_size,
381
+ filter_columns_parameters,
382
  hide_models,
383
  search_bar,
384
  filter_columns_computeDtype,
385
  filter_columns_weightDtype,
386
+ filter_columns_doubleQuant,
387
+ filter_columns_groupDtype
388
  ],
389
  leaderboard_table,
390
  queue=True,
src/display/about.py CHANGED
@@ -59,10 +59,10 @@ python main.py --model=hf-causal-experimental
59
  - ARC-C: 0-shot, *arc_challenge* (`acc`)
60
  - ARC-E: 0-shot, *arc_easy* (`acc`)
61
  - HellaSwag: 0-shot, *hellaswag* (`acc`)
62
- - TruthfulQA: 0-shot, *truthfulqa_mc2* (`acc`)
63
  - MMLU: 0-shot, *hendrycksTest-abstract_algebra,hendrycksTest-anatomy,hendrycksTest-astronomy,hendrycksTest-business_ethics,hendrycksTest-clinical_knowledge,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_computer_science,hendrycksTest-college_mathematics,hendrycksTest-college_medicine,hendrycksTest-college_physics,hendrycksTest-computer_security,hendrycksTest-conceptual_physics,hendrycksTest-econometrics,hendrycksTest-electrical_engineering,hendrycksTest-elementary_mathematics,hendrycksTest-formal_logic,hendrycksTest-global_facts,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_computer_science,hendrycksTest-high_school_european_history,hendrycksTest-high_school_geography,hendrycksTest-high_school_government_and_politics,hendrycksTest-high_school_macroeconomics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_microeconomics,hendrycksTest-high_school_physics,hendrycksTest-high_school_psychology,hendrycksTest-high_school_statistics,hendrycksTest-high_school_us_history,hendrycksTest-high_school_world_history,hendrycksTest-human_aging,hendrycksTest-human_sexuality,hendrycksTest-international_law,hendrycksTest-jurisprudence,hendrycksTest-logical_fallacies,hendrycksTest-machine_learning,hendrycksTest-management,hendrycksTest-marketing,hendrycksTest-medical_genetics,hendrycksTest-miscellaneous,hendrycksTest-moral_disputes,hendrycksTest-moral_scenarios,hendrycksTest-nutrition,hendrycksTest-philosophy,hendrycksTest-prehistory,hendrycksTest-professional_accounting,hendrycksTest-professional_law,hendrycksTest-professional_medicine,hendrycksTest-professional_psychology,hendrycksTest-public_relations,hendrycksTest-security_studies,hendrycksTest-sociology,hendrycksTest-us_foreign_policy,hendrycksTest-virology,hendrycksTest-world_religions* (average of all the results `acc`)
64
  - Winogrande: 0-shot, *winogrande* (`acc`)
65
- - Lambada_Openai: 0-shot, *lambada_openai* (`acc`)
66
  - PIQA: 0-shot, *piqa* (`acc`)
67
  - OpenBookQA: 0-shot, *openbookqa* (`acc`)
68
  - BoolQ: 0-shot, *boolq* (`acc`)
 
59
  - ARC-C: 0-shot, *arc_challenge* (`acc`)
60
  - ARC-E: 0-shot, *arc_easy* (`acc`)
61
  - HellaSwag: 0-shot, *hellaswag* (`acc`)
62
+ - TruthfulQA(Truthfulqa_mc1): 0-shot, *truthfulqa_mc1* (`acc`)
63
  - MMLU: 0-shot, *hendrycksTest-abstract_algebra,hendrycksTest-anatomy,hendrycksTest-astronomy,hendrycksTest-business_ethics,hendrycksTest-clinical_knowledge,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_computer_science,hendrycksTest-college_mathematics,hendrycksTest-college_medicine,hendrycksTest-college_physics,hendrycksTest-computer_security,hendrycksTest-conceptual_physics,hendrycksTest-econometrics,hendrycksTest-electrical_engineering,hendrycksTest-elementary_mathematics,hendrycksTest-formal_logic,hendrycksTest-global_facts,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_computer_science,hendrycksTest-high_school_european_history,hendrycksTest-high_school_geography,hendrycksTest-high_school_government_and_politics,hendrycksTest-high_school_macroeconomics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_microeconomics,hendrycksTest-high_school_physics,hendrycksTest-high_school_psychology,hendrycksTest-high_school_statistics,hendrycksTest-high_school_us_history,hendrycksTest-high_school_world_history,hendrycksTest-human_aging,hendrycksTest-human_sexuality,hendrycksTest-international_law,hendrycksTest-jurisprudence,hendrycksTest-logical_fallacies,hendrycksTest-machine_learning,hendrycksTest-management,hendrycksTest-marketing,hendrycksTest-medical_genetics,hendrycksTest-miscellaneous,hendrycksTest-moral_disputes,hendrycksTest-moral_scenarios,hendrycksTest-nutrition,hendrycksTest-philosophy,hendrycksTest-prehistory,hendrycksTest-professional_accounting,hendrycksTest-professional_law,hendrycksTest-professional_medicine,hendrycksTest-professional_psychology,hendrycksTest-public_relations,hendrycksTest-security_studies,hendrycksTest-sociology,hendrycksTest-us_foreign_policy,hendrycksTest-virology,hendrycksTest-world_religions* (average of all the results `acc`)
64
  - Winogrande: 0-shot, *winogrande* (`acc`)
65
+ - Lambada(Lambada_Openai): 0-shot, *lambada_openai* (`acc`)
66
  - PIQA: 0-shot, *piqa* (`acc`)
67
  - OpenBookQA: 0-shot, *openbookqa* (`acc`)
68
  - BoolQ: 0-shot, *boolq* (`acc`)
src/display/utils.py CHANGED
@@ -18,12 +18,12 @@ class Tasks(Enum):
18
  arc_easy = Task("arc:easy", "acc,none", "ARC-e")
19
  boolq = Task("boolq", "acc,none", "Boolq")
20
  hellaswag = Task("hellaswag", "acc,none", "HellaSwag")
21
- lambada_openai = Task("lambada:openai", "acc,none", "Lambada_openai")
22
  mmlu = Task("mmlu", "acc,none", "MMLU")
23
  openbookqa = Task("openbookqa", "acc,none", "Openbookqa")
24
  piqa = Task("piqa", "acc,none", "Piqa")
25
  # truthfulqa:mc1 / truthfulqa:mc2 -- ?
26
- truthfulqa_mc = Task("truthfulqa:mc1", "acc,none", "Truthfulqa_mc1")
27
  # arc:challenge ?
28
  # arc_challenge = Task("arc:challenge", "acc_norm,none", "Arc challenge")
29
  # truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA")
@@ -50,6 +50,8 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
50
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
51
  for task in Tasks:
52
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
 
53
  # Dummy column for the search bar (hidden by the custom CSS)
54
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
55
  # Model information
@@ -62,13 +64,14 @@ auto_eval_column_dict.append(["weight_dtype", ColumnContent, ColumnContent("Weig
62
  auto_eval_column_dict.append(["compute_dtype", ColumnContent, ColumnContent("Compute dtype", "str", False)])
63
  auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
64
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
65
- auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
66
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
67
  auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
68
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
69
  auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
70
  auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
71
  auto_eval_column_dict.append(["double_quant", ColumnContent, ColumnContent("Double Quant", "bool", False)])
 
72
  # We use make dataclass to dynamically fill the scores from Tasks
73
  # auto_eval_column_dict.sort(key=lambda x: x[0])
74
  sorted_columns = sorted(auto_eval_column_dict[3:], key=lambda x: x[0])
@@ -258,6 +261,28 @@ class ComputeDtype(Enum):
258
  if compute_dtype in ["float32"]:
259
  return ComputeDtype.fp32
260
  return ComputeDtype.Unknown
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  class Precision(Enum):
263
  # float16 = ModelDetails("float16")
 
18
  arc_easy = Task("arc:easy", "acc,none", "ARC-e")
19
  boolq = Task("boolq", "acc,none", "Boolq")
20
  hellaswag = Task("hellaswag", "acc,none", "HellaSwag")
21
+ lambada_openai = Task("lambada:openai", "acc,none", "Lambada")
22
  mmlu = Task("mmlu", "acc,none", "MMLU")
23
  openbookqa = Task("openbookqa", "acc,none", "Openbookqa")
24
  piqa = Task("piqa", "acc,none", "Piqa")
25
  # truthfulqa:mc1 / truthfulqa:mc2 -- ?
26
+ truthfulqa_mc = Task("truthfulqa:mc1", "acc,none", "Truthfulqa")
27
  # arc:challenge ?
28
  # arc_challenge = Task("arc:challenge", "acc_norm,none", "Arc challenge")
29
  # truthfulqa = Task("truthfulqa:mc", "mc2", "TruthfulQA")
 
50
  auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
51
  for task in Tasks:
52
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
53
+ auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", True)])
54
+ auto_eval_column_dict.append(["model_size", ColumnContent, ColumnContent("#Size (G)", "number", True)])
55
  # Dummy column for the search bar (hidden by the custom CSS)
56
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
57
  # Model information
 
64
  auto_eval_column_dict.append(["compute_dtype", ColumnContent, ColumnContent("Compute dtype", "str", False)])
65
  auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
66
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
67
+ # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
68
  auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❤️", "number", False)])
69
  auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False, hidden=True)])
70
  auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
71
  auto_eval_column_dict.append(["flagged", ColumnContent, ColumnContent("Flagged", "bool", False, hidden=True)])
72
  auto_eval_column_dict.append(["moe", ColumnContent, ColumnContent("MoE", "bool", False, hidden=True)])
73
  auto_eval_column_dict.append(["double_quant", ColumnContent, ColumnContent("Double Quant", "bool", False)])
74
+ auto_eval_column_dict.append(["group_size", ColumnContent, ColumnContent("Group Size", "bool", False)])
75
  # We use make dataclass to dynamically fill the scores from Tasks
76
  # auto_eval_column_dict.sort(key=lambda x: x[0])
77
  sorted_columns = sorted(auto_eval_column_dict[3:], key=lambda x: x[0])
 
261
  if compute_dtype in ["float32"]:
262
  return ComputeDtype.fp32
263
  return ComputeDtype.Unknown
264
+
265
+ class GroupDtype(Enum):
266
+ group_1 = ModelDetails("-1")
267
+ group_1024 = ModelDetails("1024")
268
+ group_256 = ModelDetails("256")
269
+ group_128 = ModelDetails("128")
270
+ group_32 = ModelDetails("32")
271
+
272
+ group_all = ModelDetails("All")
273
+
274
+ def from_str(compute_dtype):
275
+ if compute_dtype in ["-1"]:
276
+ return GroupDtype.group_1
277
+ if compute_dtype in ["1024"]:
278
+ return GroupDtype.group_1024
279
+ if compute_dtype in ["256"]:
280
+ return GroupDtype.group_256
281
+ if compute_dtype in ["128"]:
282
+ return GroupDtype.group_128
283
+ if compute_dtype in ["32"]:
284
+ return GroupDtype.group_32
285
+ return GroupDtype.group_all
286
 
287
  class Precision(Enum):
288
  # float16 = ModelDetails("float16")
src/leaderboard/read_evals.py CHANGED
@@ -33,6 +33,8 @@ class EvalResult:
33
  license: str = "?"
34
  likes: int = 0
35
  num_params: int = 0
 
 
36
  date: str = "" # submission date of request file
37
  still_on_hub: bool = True
38
  is_merge: bool = False
@@ -57,6 +59,8 @@ class EvalResult:
57
  compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
58
  double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
59
  model_params = config["model_params"]
 
 
60
 
61
  local = config.get("local", False)
62
  if not local:
@@ -109,6 +113,8 @@ class EvalResult:
109
  double_quant=double_quant,
110
  revision=config.get("model_sha", "main"),
111
  num_params=model_params,
 
 
112
  )
113
 
114
  def update_with_request_file(self, requests_path):
@@ -160,6 +166,8 @@ class EvalResult:
160
  AutoEvalColumn.license.name: self.license,
161
  AutoEvalColumn.likes.name: self.likes,
162
  AutoEvalColumn.params.name: self.num_params,
 
 
163
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
164
  AutoEvalColumn.merged.name: "merge" in self.tags if self.tags else False,
165
  AutoEvalColumn.moe.name: ("moe" in self.tags if self.tags else False) or "moe" in self.full_model.lower(),
 
33
  license: str = "?"
34
  likes: int = 0
35
  num_params: int = 0
36
+ model_size: int = 0
37
+ group_size: int = -1
38
  date: str = "" # submission date of request file
39
  still_on_hub: bool = True
40
  is_merge: bool = False
 
59
  compute_dtype = ComputeDtype.from_str(data["task_info"].get("compute_dtype", "bfloat16"))
60
  double_quant = data["quantization_config"].get("bnb_4bit_use_double_quant", False)
61
  model_params = config["model_params"]
62
+ model_size = config["model_size"]
63
+ group_size = data["quantization_config"].get("group_size", -1)
64
 
65
  local = config.get("local", False)
66
  if not local:
 
113
  double_quant=double_quant,
114
  revision=config.get("model_sha", "main"),
115
  num_params=model_params,
116
+ model_size=model_size,
117
+ group_size=group_size
118
  )
119
 
120
  def update_with_request_file(self, requests_path):
 
166
  AutoEvalColumn.license.name: self.license,
167
  AutoEvalColumn.likes.name: self.likes,
168
  AutoEvalColumn.params.name: self.num_params,
169
+ AutoEvalColumn.model_size.name: self.model_size,
170
+ AutoEvalColumn.group_size.name: self.group_size,
171
  AutoEvalColumn.still_on_hub.name: self.still_on_hub,
172
  AutoEvalColumn.merged.name: "merge" in self.tags if self.tags else False,
173
  AutoEvalColumn.moe.name: ("moe" in self.tags if self.tags else False) or "moe" in self.full_model.lower(),