app.py CHANGED
@@ -11,7 +11,6 @@ import time
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
  from huggingface_hub import snapshot_download
14
- from pytz import utc
15
 
16
  from src.display.about import (
17
  CITATION_BUTTON_LABEL,
@@ -160,7 +159,6 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
160
  type_emoji = [t[0] for t in type_query]
161
  filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
162
  filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
163
- filtered_df = filtered_df.loc[df[AutoEvalColumn.inference_framework.name].isin(size_query)]
164
 
165
  # numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
166
  # params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
@@ -259,7 +257,7 @@ with demo:
259
  for c in fields(AutoEvalColumn)
260
  if c.displayed_by_default and not c.hidden and not c.never_hidden
261
  ],
262
- label="Tasks",
263
  elem_id="column-select",
264
  interactive=True,
265
  )
@@ -479,7 +477,7 @@ with demo:
479
  show_copy_button=True,
480
  )
481
 
482
- scheduler = BackgroundScheduler(timezone=utc)
483
 
484
  scheduler.add_job(restart_space, "interval", hours=6)
485
 
 
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
  from huggingface_hub import snapshot_download
 
14
 
15
  from src.display.about import (
16
  CITATION_BUTTON_LABEL,
 
159
  type_emoji = [t[0] for t in type_query]
160
  filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
161
  filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
 
162
 
163
  # numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
164
  # params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
 
257
  for c in fields(AutoEvalColumn)
258
  if c.displayed_by_default and not c.hidden and not c.never_hidden
259
  ],
260
+ label="Select columns to show",
261
  elem_id="column-select",
262
  interactive=True,
263
  )
 
477
  show_copy_button=True,
478
  )
479
 
480
+ scheduler = BackgroundScheduler()
481
 
482
  scheduler.add_job(restart_space, "interval", hours=6)
483
 
backend-cli.py CHANGED
@@ -458,7 +458,6 @@ def get_args():
458
  parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
459
  help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
460
  parser.add_argument("--debug_repo", action="store_true", help="Use debug repo")
461
- parser.add_argument("--model_type", type=str, default="chat", help="Model type")
462
  return parser.parse_args()
463
 
464
 
@@ -489,8 +488,7 @@ if __name__ == "__main__":
489
  json_filepath="",
490
  precision=precision, # Use precision from arguments
491
  inference_framework=args.inference_framework, # Use inference framework from arguments
492
- gpu_type=args.gpu_type,
493
- model_type=args.model_type,
494
  )
495
  curr_gpu_type = get_gpu_details()
496
  if eval_request.gpu_type != curr_gpu_type:
 
458
  parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
459
  help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
460
  parser.add_argument("--debug_repo", action="store_true", help="Use debug repo")
 
461
  return parser.parse_args()
462
 
463
 
 
488
  json_filepath="",
489
  precision=precision, # Use precision from arguments
490
  inference_framework=args.inference_framework, # Use inference framework from arguments
491
+ gpu_type=args.gpu_type
 
492
  )
493
  curr_gpu_type = get_gpu_details()
494
  if eval_request.gpu_type != curr_gpu_type:
src/display/about.py CHANGED
@@ -19,8 +19,8 @@ Columns and Metrics:
19
  - E2E(s): Average End to End generation time in seconds.
20
  - PRE(s): Prefilling Time of input prompt in seconds.
21
  - T/s: Tokens throughout per second.
22
- - S-MBU(%): Sparse Model Bandwidth Utilization.
23
- - S-MFU(%): Sparse Model FLOPs Utilization.
24
  - Precision: The precison of used model.
25
 
26
  """
 
19
  - E2E(s): Average End to End generation time in seconds.
20
  - PRE(s): Prefilling Time of input prompt in seconds.
21
  - T/s: Tokens throughout per second.
22
+ - MBU(%): Model Bandwidth Utilization.
23
+ - MFU(%): Model FLOPs Utilization.
24
  - Precision: The precison of used model.
25
 
26
  """
src/display/utils.py CHANGED
@@ -18,8 +18,8 @@ GPU_Power = 'Power(W)'
18
  GPU_Mem = 'Mem(G)'
19
  GPU_Name = "GPU"
20
  GPU_Util = 'Util(%)'
21
- MFU = 'S-MFU(%)'
22
- MBU = 'S-MBU(%)'
23
  BATCH_SIZE = 'bs'
24
  PRECISION = "Precision"
25
  system_metrics_to_name_map = {
@@ -106,7 +106,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
106
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
107
 
108
  # Inference framework
109
- auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent(f"{InFrame}", "str", True, dummy=True)])
110
 
111
  for task in Tasks:
112
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
@@ -126,15 +126,15 @@ for task in Tasks:
126
 
127
 
128
  # Model information
129
- auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, dummy=True)])
130
- # auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
131
- # auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
132
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True, dummy=True)])
133
- # auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
134
- # auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
135
- # auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❀️", "number", False)])
136
- # auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
137
- # auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
138
  # Dummy column for the search bar (hidden by the custom CSS)
139
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
140
 
@@ -160,10 +160,10 @@ class ModelDetails:
160
 
161
 
162
  class ModelType(Enum):
163
- # PT = ModelDetails(name="pretrained", symbol="🟒")
164
- # FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="πŸ”Ά")
165
  chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="πŸ’¬")
166
- # merges = ModelDetails(name="base merges and moerges", symbol="🀝")
167
  Unknown = ModelDetails(name="", symbol="?")
168
 
169
  def to_str(self, separator=" "):
@@ -171,23 +171,22 @@ class ModelType(Enum):
171
 
172
  @staticmethod
173
  def from_str(type):
174
- # if "fine-tuned" in type or "πŸ”Ά" in type:
175
- # return ModelType.FT
176
- # if "pretrained" in type or "🟒" in type:
177
- # return ModelType.PT
178
  if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "🟦", "β­•", "πŸ’¬"]]):
179
  return ModelType.chat
180
- # if "merge" in type or "🀝" in type:
181
- # return ModelType.merges
182
  return ModelType.Unknown
183
 
184
 
185
  class InferenceFramework(Enum):
186
  # "moe-infinity", hf-chat
187
- # MoE_Infinity = ModelDetails("moe-infinity")
188
  HF_Chat = ModelDetails("hf-chat")
189
  VLLM = ModelDetails("vllm_moe")
190
- TRTLLM = ModelDetails("tensorrt_llm")
191
  Unknown = ModelDetails("?")
192
 
193
  def to_str(self):
@@ -195,10 +194,8 @@ class InferenceFramework(Enum):
195
 
196
  @staticmethod
197
  def from_str(inference_framework: str):
198
- # if inference_framework in ["moe-infinity"]:
199
- # return InferenceFramework.MoE_Infinity
200
- if inference_framework in ["tensorrt_llm"]:
201
- return InferenceFramework.TRTLLM
202
  if inference_framework in ["hf-chat"]:
203
  return InferenceFramework.HF_Chat
204
  if inference_framework in ["vllm_moe"]:
@@ -228,28 +225,28 @@ class WeightType(Enum):
228
 
229
 
230
  class Precision(Enum):
231
- # float32 = ModelDetails("float32")
232
- # float16 = ModelDetails("float16")
233
  bfloat16 = ModelDetails("bfloat16")
234
  qt_8bit = ModelDetails("8bit")
235
  qt_4bit = ModelDetails("4bit")
236
- # qt_GPTQ = ModelDetails("GPTQ")
237
  Unknown = ModelDetails("?")
238
 
239
  @staticmethod
240
  def from_str(precision: str):
241
- # if precision in ["torch.float32", "float32"]:
242
- # return Precision.float32
243
- # if precision in ["torch.float16", "float16"]:
244
- # return Precision.float16
245
  if precision in ["torch.bfloat16", "bfloat16"]:
246
  return Precision.bfloat16
247
  if precision in ["8bit"]:
248
  return Precision.qt_8bit
249
  if precision in ["4bit"]:
250
  return Precision.qt_4bit
251
- # if precision in ["GPTQ", "None"]:
252
- # return Precision.qt_GPTQ
253
  return Precision.Unknown
254
 
255
 
 
18
  GPU_Mem = 'Mem(G)'
19
  GPU_Name = "GPU"
20
  GPU_Util = 'Util(%)'
21
+ MFU = 'MFU(%)'
22
+ MBU = 'MBU(%)'
23
  BATCH_SIZE = 'bs'
24
  PRECISION = "Precision"
25
  system_metrics_to_name_map = {
 
106
  # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
107
 
108
  # Inference framework
109
+ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent(f"{InFrame}", "str", True)])
110
 
111
  for task in Tasks:
112
  auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
 
126
 
127
 
128
  # Model information
129
+ auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
130
+ auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
131
+ auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
132
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
133
+ auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
134
+ auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
135
+ auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub ❀️", "number", False)])
136
+ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
137
+ auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
138
  # Dummy column for the search bar (hidden by the custom CSS)
139
  auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
140
 
 
160
 
161
 
162
  class ModelType(Enum):
163
+ PT = ModelDetails(name="pretrained", symbol="🟒")
164
+ FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="πŸ”Ά")
165
  chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="πŸ’¬")
166
+ merges = ModelDetails(name="base merges and moerges", symbol="🀝")
167
  Unknown = ModelDetails(name="", symbol="?")
168
 
169
  def to_str(self, separator=" "):
 
171
 
172
  @staticmethod
173
  def from_str(type):
174
+ if "fine-tuned" in type or "πŸ”Ά" in type:
175
+ return ModelType.FT
176
+ if "pretrained" in type or "🟒" in type:
177
+ return ModelType.PT
178
  if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "🟦", "β­•", "πŸ’¬"]]):
179
  return ModelType.chat
180
+ if "merge" in type or "🀝" in type:
181
+ return ModelType.merges
182
  return ModelType.Unknown
183
 
184
 
185
  class InferenceFramework(Enum):
186
  # "moe-infinity", hf-chat
187
+ MoE_Infinity = ModelDetails("moe-infinity")
188
  HF_Chat = ModelDetails("hf-chat")
189
  VLLM = ModelDetails("vllm_moe")
 
190
  Unknown = ModelDetails("?")
191
 
192
  def to_str(self):
 
194
 
195
  @staticmethod
196
  def from_str(inference_framework: str):
197
+ if inference_framework in ["moe-infinity"]:
198
+ return InferenceFramework.MoE_Infinity
 
 
199
  if inference_framework in ["hf-chat"]:
200
  return InferenceFramework.HF_Chat
201
  if inference_framework in ["vllm_moe"]:
 
225
 
226
 
227
  class Precision(Enum):
228
+ float32 = ModelDetails("float32")
229
+ float16 = ModelDetails("float16")
230
  bfloat16 = ModelDetails("bfloat16")
231
  qt_8bit = ModelDetails("8bit")
232
  qt_4bit = ModelDetails("4bit")
233
+ qt_GPTQ = ModelDetails("GPTQ")
234
  Unknown = ModelDetails("?")
235
 
236
  @staticmethod
237
  def from_str(precision: str):
238
+ if precision in ["torch.float32", "float32"]:
239
+ return Precision.float32
240
+ if precision in ["torch.float16", "float16"]:
241
+ return Precision.float16
242
  if precision in ["torch.bfloat16", "bfloat16"]:
243
  return Precision.bfloat16
244
  if precision in ["8bit"]:
245
  return Precision.qt_8bit
246
  if precision in ["4bit"]:
247
  return Precision.qt_4bit
248
+ if precision in ["GPTQ", "None"]:
249
+ return Precision.qt_GPTQ
250
  return Precision.Unknown
251
 
252
 
src/leaderboard/read_evals.py CHANGED
@@ -140,7 +140,6 @@ class EvalResult:
140
  revision=config.get("model_sha", ""),
141
  still_on_hub=still_on_hub,
142
  architecture=architecture,
143
- model_type=ModelType.from_str(config.get("model_type", "")),
144
  inference_framework=inference_framework,
145
  )
146
 
@@ -175,22 +174,22 @@ class EvalResult:
175
 
176
  # breakpoint()
177
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
178
-
179
  data_dict = {
180
  "eval_name": self.eval_name, # not a column, just a save name,
181
  AutoEvalColumn.precision.name: self.precision.value.name,
182
- # AutoEvalColumn.model_type.name: self.model_type.value.name,
183
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
184
- # AutoEvalColumn.weight_type.name: self.weight_type.value.name,
185
- # AutoEvalColumn.architecture.name: self.architecture,
186
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
187
  AutoEvalColumn.dummy.name: self.full_model,
188
- # AutoEvalColumn.revision.name: self.revision,
189
- # # AutoEvalColumn.average.name: average,
190
- # AutoEvalColumn.license.name: self.license,
191
- # AutoEvalColumn.likes.name: self.likes,
192
- # AutoEvalColumn.params.name: self.num_params,
193
- # AutoEvalColumn.still_on_hub.name: self.still_on_hub,
194
  AutoEvalColumn.inference_framework.name: self.inference_framework,
195
  }
196
 
 
140
  revision=config.get("model_sha", ""),
141
  still_on_hub=still_on_hub,
142
  architecture=architecture,
 
143
  inference_framework=inference_framework,
144
  )
145
 
 
174
 
175
  # breakpoint()
176
  # average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
177
+
178
  data_dict = {
179
  "eval_name": self.eval_name, # not a column, just a save name,
180
  AutoEvalColumn.precision.name: self.precision.value.name,
181
+ AutoEvalColumn.model_type.name: self.model_type.value.name,
182
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
183
+ AutoEvalColumn.weight_type.name: self.weight_type.value.name,
184
+ AutoEvalColumn.architecture.name: self.architecture,
185
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
186
  AutoEvalColumn.dummy.name: self.full_model,
187
+ AutoEvalColumn.revision.name: self.revision,
188
+ # AutoEvalColumn.average.name: average,
189
+ AutoEvalColumn.license.name: self.license,
190
+ AutoEvalColumn.likes.name: self.likes,
191
+ AutoEvalColumn.params.name: self.num_params,
192
+ AutoEvalColumn.still_on_hub.name: self.still_on_hub,
193
  AutoEvalColumn.inference_framework.name: self.inference_framework,
194
  }
195