add benchmark_columns
Browse files- app.py +14 -2
- src/display/utils.py +10 -10
app.py
CHANGED
@@ -96,6 +96,7 @@ def update_table(
|
|
96 |
):
|
97 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
98 |
filtered_df = filter_queries(query, filtered_df)
|
|
|
99 |
df = select_columns(filtered_df, columns)
|
100 |
return df
|
101 |
|
@@ -151,6 +152,16 @@ def filter_models(df: pd.DataFrame, type_query: list, size_query: list, precisio
|
|
151 |
# filtered_df = filtered_df.loc[mask]
|
152 |
|
153 |
return filtered_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
shown_columns = None
|
156 |
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
@@ -270,18 +281,19 @@ with demo:
|
|
270 |
# )
|
271 |
|
272 |
# breakpoint()
|
273 |
-
|
274 |
leaderboard_table = gr.components.Dataframe(
|
275 |
value=(
|
276 |
leaderboard_df[
|
277 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
278 |
+ shown_columns.value
|
|
|
279 |
+ [AutoEvalColumn.dummy.name]
|
280 |
]
|
281 |
if leaderboard_df.empty is False
|
282 |
else leaderboard_df
|
283 |
),
|
284 |
-
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
|
285 |
datatype=TYPES,
|
286 |
elem_id="leaderboard-table",
|
287 |
interactive=False,
|
|
|
96 |
):
|
97 |
filtered_df = filter_models(hidden_df, type_query, size_query, precision_query)
|
98 |
filtered_df = filter_queries(query, filtered_df)
|
99 |
+
columns.extend(add_benchmark_columns(columns))
|
100 |
df = select_columns(filtered_df, columns)
|
101 |
return df
|
102 |
|
|
|
152 |
# filtered_df = filtered_df.loc[mask]
|
153 |
|
154 |
return filtered_df
|
155 |
+
|
156 |
+
|
157 |
+
def add_benchmark_columns(shown_columns):
|
158 |
+
benchmark_columns = []
|
159 |
+
for benchmark in BENCHMARK_COLS:
|
160 |
+
if benchmark in shown_columns:
|
161 |
+
for c in COLS:
|
162 |
+
if benchmark in c and benchmark != c:
|
163 |
+
benchmark_columns.append(c)
|
164 |
+
return benchmark_columns
|
165 |
|
166 |
shown_columns = None
|
167 |
dataset_df, original_df, finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df = init_space()
|
|
|
281 |
# )
|
282 |
|
283 |
# breakpoint()
|
284 |
+
benchmark_columns = add_benchmark_columns(shown_columns.value)
|
285 |
leaderboard_table = gr.components.Dataframe(
|
286 |
value=(
|
287 |
leaderboard_df[
|
288 |
[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
289 |
+ shown_columns.value
|
290 |
+
+ benchmark_columns
|
291 |
+ [AutoEvalColumn.dummy.name]
|
292 |
]
|
293 |
if leaderboard_df.empty is False
|
294 |
else leaderboard_df
|
295 |
),
|
296 |
+
headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value + benchmark_columns,
|
297 |
datatype=TYPES,
|
298 |
elem_id="leaderboard-table",
|
299 |
interactive=False,
|
src/display/utils.py
CHANGED
@@ -104,16 +104,16 @@ auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnConten
|
|
104 |
for task in Tasks:
|
105 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
106 |
# System performance metrics
|
107 |
-
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True)])
|
108 |
-
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True)])
|
109 |
-
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True)])
|
110 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True)])
|
111 |
-
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True)])
|
112 |
-
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True)])
|
113 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
114 |
continue
|
115 |
-
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False)])
|
116 |
-
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True)])
|
117 |
|
118 |
|
119 |
# Model information
|
@@ -242,8 +242,8 @@ class Precision(Enum):
|
|
242 |
|
243 |
|
244 |
# Column selection
|
245 |
-
COLS = [c.name for c in fields(AutoEvalColumn)
|
246 |
-
TYPES = [c.type for c in fields(AutoEvalColumn)
|
247 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
248 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
249 |
|
|
|
104 |
for task in Tasks:
|
105 |
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
|
106 |
# System performance metrics
|
107 |
+
auto_eval_column_dict.append([f"{task.name}_end_to_end_time", ColumnContent, ColumnContent(f"{task.value.col_name} {E2Es}", "number", True, hidden=True)])
|
108 |
+
auto_eval_column_dict.append([f"{task.name}_batch_size", ColumnContent, ColumnContent(f"{task.value.col_name} {BATCH_SIZE}", "number", True, hidden=True)])
|
109 |
+
# auto_eval_column_dict.append([f"{task.name}_precision", ColumnContent, ColumnContent(f"{task.value.col_name} {PRECISION}", "str", True, hidden=True)])
|
110 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_mem", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Mem}", "number", True, hidden=True)])
|
111 |
+
auto_eval_column_dict.append([f"{task.name}_gpu", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Name}", "str", True, hidden=True)])
|
112 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_util", ColumnContent, ColumnContent(f"{task.value.col_name} {GPU_Util}", "number", True, hidden=True)])
|
113 |
if task.value.benchmark in MULTIPLE_CHOICEs:
|
114 |
continue
|
115 |
+
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
116 |
+
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
117 |
|
118 |
|
119 |
# Model information
|
|
|
242 |
|
243 |
|
244 |
# Column selection
|
245 |
+
COLS = [c.name for c in fields(AutoEvalColumn)]
|
246 |
+
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
247 |
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
248 |
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden]
|
249 |
|