sync from github
Browse files- app.py +4 -2
- backend-cli.py +3 -1
- src/display/about.py +2 -2
- src/display/utils.py +36 -33
- src/leaderboard/read_evals.py +11 -10
app.py
CHANGED
@@ -11,6 +11,7 @@ import time
|
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
|
13 |
from huggingface_hub import snapshot_download
|
|
|
14 |
|
15 |
from src.display.about import (
|
16 |
CITATION_BUTTON_LABEL,
|
@@ -159,6 +160,7 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
|
|
159 |
type_emoji = [t[0] for t in type_query]
|
160 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
161 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
|
|
|
162 |
|
163 |
# numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
|
164 |
# params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
|
@@ -257,7 +259,7 @@ with demo:
|
|
257 |
for c in fields(AutoEvalColumn)
|
258 |
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
259 |
],
|
260 |
-
label="
|
261 |
elem_id="column-select",
|
262 |
interactive=True,
|
263 |
)
|
@@ -477,7 +479,7 @@ with demo:
|
|
477 |
show_copy_button=True,
|
478 |
)
|
479 |
|
480 |
-
scheduler = BackgroundScheduler()
|
481 |
|
482 |
scheduler.add_job(restart_space, "interval", hours=6)
|
483 |
|
|
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
|
13 |
from huggingface_hub import snapshot_download
|
14 |
+
from pytz import utc
|
15 |
|
16 |
from src.display.about import (
|
17 |
CITATION_BUTTON_LABEL,
|
|
|
160 |
type_emoji = [t[0] for t in type_query]
|
161 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.model_type_symbol.name].isin(type_emoji)]
|
162 |
filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
|
163 |
+
filtered_df = filtered_df.loc[df[AutoEvalColumn.inference_framework.name].isin(size_query)]
|
164 |
|
165 |
# numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
|
166 |
# params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
|
|
|
259 |
for c in fields(AutoEvalColumn)
|
260 |
if c.displayed_by_default and not c.hidden and not c.never_hidden
|
261 |
],
|
262 |
+
label="Tasks",
|
263 |
elem_id="column-select",
|
264 |
interactive=True,
|
265 |
)
|
|
|
479 |
show_copy_button=True,
|
480 |
)
|
481 |
|
482 |
+
scheduler = BackgroundScheduler(timezone=utc)
|
483 |
|
484 |
scheduler.add_job(restart_space, "interval", hours=6)
|
485 |
|
backend-cli.py
CHANGED
@@ -458,6 +458,7 @@ def get_args():
|
|
458 |
parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
|
459 |
help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
|
460 |
parser.add_argument("--debug_repo", action="store_true", help="Use debug repo")
|
|
|
461 |
return parser.parse_args()
|
462 |
|
463 |
|
@@ -488,7 +489,8 @@ if __name__ == "__main__":
|
|
488 |
json_filepath="",
|
489 |
precision=precision, # Use precision from arguments
|
490 |
inference_framework=args.inference_framework, # Use inference framework from arguments
|
491 |
-
gpu_type=args.gpu_type
|
|
|
492 |
)
|
493 |
curr_gpu_type = get_gpu_details()
|
494 |
if eval_request.gpu_type != curr_gpu_type:
|
|
|
458 |
parser.add_argument("--gpu-type", type=str, default="NVIDIA-A100-PCIe-80GB",
|
459 |
help="GPU type. NVIDIA-A100-PCIe-80GB; NVIDIA-RTX-A5000-24GB; NVIDIA-H100-PCIe-80GB")
|
460 |
parser.add_argument("--debug_repo", action="store_true", help="Use debug repo")
|
461 |
+
parser.add_argument("--model_type", type=str, default="chat", help="Model type")
|
462 |
return parser.parse_args()
|
463 |
|
464 |
|
|
|
489 |
json_filepath="",
|
490 |
precision=precision, # Use precision from arguments
|
491 |
inference_framework=args.inference_framework, # Use inference framework from arguments
|
492 |
+
gpu_type=args.gpu_type,
|
493 |
+
model_type=args.model_type,
|
494 |
)
|
495 |
curr_gpu_type = get_gpu_details()
|
496 |
if eval_request.gpu_type != curr_gpu_type:
|
src/display/about.py
CHANGED
@@ -19,8 +19,8 @@ Columns and Metrics:
|
|
19 |
- E2E(s): Average End to End generation time in seconds.
|
20 |
- PRE(s): Prefilling Time of input prompt in seconds.
|
21 |
- T/s: Tokens throughout per second.
|
22 |
-
- MBU(%): Model Bandwidth Utilization.
|
23 |
-
- MFU(%): Model FLOPs Utilization.
|
24 |
- Precision: The precison of used model.
|
25 |
|
26 |
"""
|
|
|
19 |
- E2E(s): Average End to End generation time in seconds.
|
20 |
- PRE(s): Prefilling Time of input prompt in seconds.
|
21 |
- T/s: Tokens throughout per second.
|
22 |
+
- S-MBU(%): Sparse Model Bandwidth Utilization.
|
23 |
+
- S-MFU(%): Sparse Model FLOPs Utilization.
|
24 |
- Precision: The precison of used model.
|
25 |
|
26 |
"""
|
src/display/utils.py
CHANGED
@@ -18,8 +18,8 @@ GPU_Power = 'Power(W)'
|
|
18 |
GPU_Mem = 'Mem(G)'
|
19 |
GPU_Name = "GPU"
|
20 |
GPU_Util = 'Util(%)'
|
21 |
-
MFU = 'MFU(%)'
|
22 |
-
MBU = 'MBU(%)'
|
23 |
BATCH_SIZE = 'bs'
|
24 |
PRECISION = "Precision"
|
25 |
system_metrics_to_name_map = {
|
@@ -106,7 +106,7 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
106 |
# # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
107 |
|
108 |
# Inference framework
|
109 |
-
auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent(f"{InFrame}", "str", True)])
|
110 |
|
111 |
for task in Tasks:
|
112 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
@@ -126,15 +126,15 @@ for task in Tasks:
|
|
126 |
|
127 |
|
128 |
# Model information
|
129 |
-
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
130 |
-
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
131 |
-
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
132 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
|
133 |
-
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
134 |
-
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
135 |
-
auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub β€οΈ", "number", False)])
|
136 |
-
auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
137 |
-
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
138 |
# Dummy column for the search bar (hidden by the custom CSS)
|
139 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
140 |
|
@@ -160,10 +160,10 @@ class ModelDetails:
|
|
160 |
|
161 |
|
162 |
class ModelType(Enum):
|
163 |
-
PT = ModelDetails(name="pretrained", symbol="π’")
|
164 |
-
FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="πΆ")
|
165 |
chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="π¬")
|
166 |
-
merges = ModelDetails(name="base merges and moerges", symbol="π€")
|
167 |
Unknown = ModelDetails(name="", symbol="?")
|
168 |
|
169 |
def to_str(self, separator=" "):
|
@@ -171,22 +171,23 @@ class ModelType(Enum):
|
|
171 |
|
172 |
@staticmethod
|
173 |
def from_str(type):
|
174 |
-
if "fine-tuned" in type or "πΆ" in type:
|
175 |
-
|
176 |
-
if "pretrained" in type or "π’" in type:
|
177 |
-
|
178 |
if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "π¦", "β", "π¬"]]):
|
179 |
return ModelType.chat
|
180 |
-
if "merge" in type or "π€" in type:
|
181 |
-
|
182 |
return ModelType.Unknown
|
183 |
|
184 |
|
185 |
class InferenceFramework(Enum):
|
186 |
# "moe-infinity", hf-chat
|
187 |
-
MoE_Infinity = ModelDetails("moe-infinity")
|
188 |
HF_Chat = ModelDetails("hf-chat")
|
189 |
VLLM = ModelDetails("vllm_moe")
|
|
|
190 |
Unknown = ModelDetails("?")
|
191 |
|
192 |
def to_str(self):
|
@@ -194,8 +195,10 @@ class InferenceFramework(Enum):
|
|
194 |
|
195 |
@staticmethod
|
196 |
def from_str(inference_framework: str):
|
197 |
-
if inference_framework in ["moe-infinity"]:
|
198 |
-
|
|
|
|
|
199 |
if inference_framework in ["hf-chat"]:
|
200 |
return InferenceFramework.HF_Chat
|
201 |
if inference_framework in ["vllm_moe"]:
|
@@ -225,28 +228,28 @@ class WeightType(Enum):
|
|
225 |
|
226 |
|
227 |
class Precision(Enum):
|
228 |
-
float32 = ModelDetails("float32")
|
229 |
-
float16 = ModelDetails("float16")
|
230 |
bfloat16 = ModelDetails("bfloat16")
|
231 |
qt_8bit = ModelDetails("8bit")
|
232 |
qt_4bit = ModelDetails("4bit")
|
233 |
-
qt_GPTQ = ModelDetails("GPTQ")
|
234 |
Unknown = ModelDetails("?")
|
235 |
|
236 |
@staticmethod
|
237 |
def from_str(precision: str):
|
238 |
-
if precision in ["torch.float32", "float32"]:
|
239 |
-
|
240 |
-
if precision in ["torch.float16", "float16"]:
|
241 |
-
|
242 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
243 |
return Precision.bfloat16
|
244 |
if precision in ["8bit"]:
|
245 |
return Precision.qt_8bit
|
246 |
if precision in ["4bit"]:
|
247 |
return Precision.qt_4bit
|
248 |
-
if precision in ["GPTQ", "None"]:
|
249 |
-
|
250 |
return Precision.Unknown
|
251 |
|
252 |
|
|
|
18 |
GPU_Mem = 'Mem(G)'
|
19 |
GPU_Name = "GPU"
|
20 |
GPU_Util = 'Util(%)'
|
21 |
+
MFU = 'S-MFU(%)'
|
22 |
+
MBU = 'S-MBU(%)'
|
23 |
BATCH_SIZE = 'bs'
|
24 |
PRECISION = "Precision"
|
25 |
system_metrics_to_name_map = {
|
|
|
106 |
# # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
|
107 |
|
108 |
# Inference framework
|
109 |
+
auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent(f"{InFrame}", "str", True, dummy=True)])
|
110 |
|
111 |
for task in Tasks:
|
112 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
|
|
126 |
|
127 |
|
128 |
# Model information
|
129 |
+
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False, dummy=True)])
|
130 |
+
# auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
131 |
+
# auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
132 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True, dummy=True)])
|
133 |
+
# auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
134 |
+
# auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
135 |
+
# auto_eval_column_dict.append(["likes", ColumnContent, ColumnContent("Hub β€οΈ", "number", False)])
|
136 |
+
# auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Available on the hub", "bool", False)])
|
137 |
+
# auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
138 |
# Dummy column for the search bar (hidden by the custom CSS)
|
139 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
140 |
|
|
|
160 |
|
161 |
|
162 |
class ModelType(Enum):
|
163 |
+
# PT = ModelDetails(name="pretrained", symbol="π’")
|
164 |
+
# FT = ModelDetails(name="fine-tuned on domain-specific datasets", symbol="πΆ")
|
165 |
chat = ModelDetails(name="chat models (RLHF, DPO, IFT, ...)", symbol="π¬")
|
166 |
+
# merges = ModelDetails(name="base merges and moerges", symbol="π€")
|
167 |
Unknown = ModelDetails(name="", symbol="?")
|
168 |
|
169 |
def to_str(self, separator=" "):
|
|
|
171 |
|
172 |
@staticmethod
|
173 |
def from_str(type):
|
174 |
+
# if "fine-tuned" in type or "πΆ" in type:
|
175 |
+
# return ModelType.FT
|
176 |
+
# if "pretrained" in type or "π’" in type:
|
177 |
+
# return ModelType.PT
|
178 |
if any([k in type for k in ["instruction-tuned", "RL-tuned", "chat", "π¦", "β", "π¬"]]):
|
179 |
return ModelType.chat
|
180 |
+
# if "merge" in type or "π€" in type:
|
181 |
+
# return ModelType.merges
|
182 |
return ModelType.Unknown
|
183 |
|
184 |
|
185 |
class InferenceFramework(Enum):
|
186 |
# "moe-infinity", hf-chat
|
187 |
+
# MoE_Infinity = ModelDetails("moe-infinity")
|
188 |
HF_Chat = ModelDetails("hf-chat")
|
189 |
VLLM = ModelDetails("vllm_moe")
|
190 |
+
TRTLLM = ModelDetails("tensorrt_llm")
|
191 |
Unknown = ModelDetails("?")
|
192 |
|
193 |
def to_str(self):
|
|
|
195 |
|
196 |
@staticmethod
|
197 |
def from_str(inference_framework: str):
|
198 |
+
# if inference_framework in ["moe-infinity"]:
|
199 |
+
# return InferenceFramework.MoE_Infinity
|
200 |
+
if inference_framework in ["tensorrt_llm"]:
|
201 |
+
return InferenceFramework.TRTLLM
|
202 |
if inference_framework in ["hf-chat"]:
|
203 |
return InferenceFramework.HF_Chat
|
204 |
if inference_framework in ["vllm_moe"]:
|
|
|
228 |
|
229 |
|
230 |
class Precision(Enum):
|
231 |
+
# float32 = ModelDetails("float32")
|
232 |
+
# float16 = ModelDetails("float16")
|
233 |
bfloat16 = ModelDetails("bfloat16")
|
234 |
qt_8bit = ModelDetails("8bit")
|
235 |
qt_4bit = ModelDetails("4bit")
|
236 |
+
# qt_GPTQ = ModelDetails("GPTQ")
|
237 |
Unknown = ModelDetails("?")
|
238 |
|
239 |
@staticmethod
|
240 |
def from_str(precision: str):
|
241 |
+
# if precision in ["torch.float32", "float32"]:
|
242 |
+
# return Precision.float32
|
243 |
+
# if precision in ["torch.float16", "float16"]:
|
244 |
+
# return Precision.float16
|
245 |
if precision in ["torch.bfloat16", "bfloat16"]:
|
246 |
return Precision.bfloat16
|
247 |
if precision in ["8bit"]:
|
248 |
return Precision.qt_8bit
|
249 |
if precision in ["4bit"]:
|
250 |
return Precision.qt_4bit
|
251 |
+
# if precision in ["GPTQ", "None"]:
|
252 |
+
# return Precision.qt_GPTQ
|
253 |
return Precision.Unknown
|
254 |
|
255 |
|
src/leaderboard/read_evals.py
CHANGED
@@ -140,6 +140,7 @@ class EvalResult:
|
|
140 |
revision=config.get("model_sha", ""),
|
141 |
still_on_hub=still_on_hub,
|
142 |
architecture=architecture,
|
|
|
143 |
inference_framework=inference_framework,
|
144 |
)
|
145 |
|
@@ -174,22 +175,22 @@ class EvalResult:
|
|
174 |
|
175 |
# breakpoint()
|
176 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
177 |
-
|
178 |
data_dict = {
|
179 |
"eval_name": self.eval_name, # not a column, just a save name,
|
180 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
181 |
-
AutoEvalColumn.model_type.name: self.model_type.value.name,
|
182 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
183 |
-
AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
184 |
-
AutoEvalColumn.architecture.name: self.architecture,
|
185 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
186 |
AutoEvalColumn.dummy.name: self.full_model,
|
187 |
-
AutoEvalColumn.revision.name: self.revision,
|
188 |
-
# AutoEvalColumn.average.name: average,
|
189 |
-
AutoEvalColumn.license.name: self.license,
|
190 |
-
AutoEvalColumn.likes.name: self.likes,
|
191 |
-
AutoEvalColumn.params.name: self.num_params,
|
192 |
-
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
193 |
AutoEvalColumn.inference_framework.name: self.inference_framework,
|
194 |
}
|
195 |
|
|
|
140 |
revision=config.get("model_sha", ""),
|
141 |
still_on_hub=still_on_hub,
|
142 |
architecture=architecture,
|
143 |
+
model_type=ModelType.from_str(config.get("model_type", "")),
|
144 |
inference_framework=inference_framework,
|
145 |
)
|
146 |
|
|
|
175 |
|
176 |
# breakpoint()
|
177 |
# average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
|
178 |
+
|
179 |
data_dict = {
|
180 |
"eval_name": self.eval_name, # not a column, just a save name,
|
181 |
AutoEvalColumn.precision.name: self.precision.value.name,
|
182 |
+
# AutoEvalColumn.model_type.name: self.model_type.value.name,
|
183 |
AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
|
184 |
+
# AutoEvalColumn.weight_type.name: self.weight_type.value.name,
|
185 |
+
# AutoEvalColumn.architecture.name: self.architecture,
|
186 |
AutoEvalColumn.model.name: make_clickable_model(self.full_model),
|
187 |
AutoEvalColumn.dummy.name: self.full_model,
|
188 |
+
# AutoEvalColumn.revision.name: self.revision,
|
189 |
+
# # AutoEvalColumn.average.name: average,
|
190 |
+
# AutoEvalColumn.license.name: self.license,
|
191 |
+
# AutoEvalColumn.likes.name: self.likes,
|
192 |
+
# AutoEvalColumn.params.name: self.num_params,
|
193 |
+
# AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
194 |
AutoEvalColumn.inference_framework.name: self.inference_framework,
|
195 |
}
|
196 |
|