Spaces:
Running
Running
BenchmarkBot
commited on
Commit
β’
d19e350
1
Parent(s):
4b40065
fix
Browse files
app.py
CHANGED
@@ -134,7 +134,7 @@ def get_benchmark_table(bench_df):
|
|
134 |
return copy_df
|
135 |
|
136 |
|
137 |
-
def
|
138 |
# filter latency bigger than 150s
|
139 |
bench_df = bench_df[bench_df["generate.latency(s)"] <= 150]
|
140 |
|
@@ -155,7 +155,7 @@ def get_benchmark_plot(bench_df):
|
|
155 |
"xanchor": "center",
|
156 |
"yanchor": "top",
|
157 |
},
|
158 |
-
xaxis_title="
|
159 |
yaxis_title="Open LLM Score (%)",
|
160 |
legend_title="LLM Type",
|
161 |
width=1200,
|
@@ -213,8 +213,8 @@ def filter_query(
|
|
213 |
& (raw_df["forward.peak_memory(MB)"] <= memory)
|
214 |
]
|
215 |
filtered_table = get_benchmark_table(filtered_df)
|
216 |
-
|
217 |
-
return filtered_table,
|
218 |
|
219 |
|
220 |
# Demo interface
|
@@ -226,13 +226,13 @@ with demo:
|
|
226 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="descriptive-text")
|
227 |
|
228 |
with gr.Tabs(elem_classes="leaderboard-tabs"):
|
|
|
229 |
hardware_plots = {}
|
230 |
-
hardware_learboards = {}
|
231 |
####################### HARDWARE TABS #######################
|
232 |
for hardware in ["A100-80GB", "RTX4090-24GB"]:
|
233 |
hardware_df = get_benchmark_df(benchmark=f"Succeeded-1x{hardware}")
|
234 |
-
|
235 |
-
|
236 |
del hardware_df
|
237 |
with gr.TabItem(f"{hardware} π₯οΈ", id=hardware):
|
238 |
with gr.Tabs(elem_classes="hardware-tabs"):
|
@@ -242,8 +242,8 @@ with demo:
|
|
242 |
elem_id="descriptive-text",
|
243 |
)
|
244 |
# Original leaderboard table
|
245 |
-
|
246 |
-
value=
|
247 |
headers=list(ALL_COLUMNS_MAPPING.values()),
|
248 |
datatype=ALL_COLUMNS_DATATYPES,
|
249 |
elem_id="hardware-leaderboard",
|
@@ -255,8 +255,8 @@ with demo:
|
|
255 |
elem_id="descriptive-text",
|
256 |
)
|
257 |
# Original leaderboard plot
|
258 |
-
|
259 |
-
value=
|
260 |
elem_id="hardware-plot",
|
261 |
show_label=False,
|
262 |
)
|
@@ -342,4 +342,33 @@ with demo:
|
|
342 |
score_slider,
|
343 |
memory_slider,
|
344 |
],
|
345 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
return copy_df
|
135 |
|
136 |
|
137 |
+
def get_benchmark_chart(bench_df):
|
138 |
# filter latency bigger than 150s
|
139 |
bench_df = bench_df[bench_df["generate.latency(s)"] <= 150]
|
140 |
|
|
|
155 |
"xanchor": "center",
|
156 |
"yanchor": "top",
|
157 |
},
|
158 |
+
xaxis_title="Per 1000 tokens Latency (s)",
|
159 |
yaxis_title="Open LLM Score (%)",
|
160 |
legend_title="LLM Type",
|
161 |
width=1200,
|
|
|
213 |
& (raw_df["forward.peak_memory(MB)"] <= memory)
|
214 |
]
|
215 |
filtered_table = get_benchmark_table(filtered_df)
|
216 |
+
filtered_chart = get_benchmark_chart(filtered_df)
|
217 |
+
return filtered_table, filtered_chart
|
218 |
|
219 |
|
220 |
# Demo interface
|
|
|
226 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="descriptive-text")
|
227 |
|
228 |
with gr.Tabs(elem_classes="leaderboard-tabs"):
|
229 |
+
hardware_dataframes = {}
|
230 |
hardware_plots = {}
|
|
|
231 |
####################### HARDWARE TABS #######################
|
232 |
for hardware in ["A100-80GB", "RTX4090-24GB"]:
|
233 |
hardware_df = get_benchmark_df(benchmark=f"Succeeded-1x{hardware}")
|
234 |
+
hardware_table = get_benchmark_table(hardware_df)
|
235 |
+
hardware_chart = get_benchmark_chart(hardware_df)
|
236 |
del hardware_df
|
237 |
with gr.TabItem(f"{hardware} π₯οΈ", id=hardware):
|
238 |
with gr.Tabs(elem_classes="hardware-tabs"):
|
|
|
242 |
elem_id="descriptive-text",
|
243 |
)
|
244 |
# Original leaderboard table
|
245 |
+
hardware_dataframes[hardware] = gr.components.Dataframe(
|
246 |
+
value=hardware_table,
|
247 |
headers=list(ALL_COLUMNS_MAPPING.values()),
|
248 |
datatype=ALL_COLUMNS_DATATYPES,
|
249 |
elem_id="hardware-leaderboard",
|
|
|
255 |
elem_id="descriptive-text",
|
256 |
)
|
257 |
# Original leaderboard plot
|
258 |
+
hardware_plots[hardware] = gr.components.Plot(
|
259 |
+
value=hardware_chart,
|
260 |
elem_id="hardware-plot",
|
261 |
show_label=False,
|
262 |
)
|
|
|
342 |
score_slider,
|
343 |
memory_slider,
|
344 |
],
|
345 |
+
[hardware_dataframes[hardware], hardware_plots[hardware]],
|
346 |
+
)
|
347 |
+
|
348 |
+
####################### ABOUT TAB #######################
|
349 |
+
with gr.TabItem("About π", id=3):
|
350 |
+
gr.HTML(ABOUT_TEXT, elem_classes="descriptive-text")
|
351 |
+
gr.Markdown(EXAMPLE_CONFIG_TEXT, elem_classes="descriptive-text")
|
352 |
+
|
353 |
+
####################### CITATION #######################
|
354 |
+
with gr.Row():
|
355 |
+
with gr.Accordion("π Citation", open=False):
|
356 |
+
citation_button = gr.Textbox(
|
357 |
+
value=CITATION_BUTTON_TEXT,
|
358 |
+
label=CITATION_BUTTON_LABEL,
|
359 |
+
elem_id="citation-button",
|
360 |
+
).style(show_copy_button=True)
|
361 |
+
|
362 |
+
|
363 |
+
# Restart space every hour
|
364 |
+
scheduler = BackgroundScheduler()
|
365 |
+
scheduler.add_job(
|
366 |
+
restart_space,
|
367 |
+
"interval",
|
368 |
+
seconds=3600,
|
369 |
+
args=[LLM_PERF_LEADERBOARD_REPO, OPTIMUM_TOKEN],
|
370 |
+
)
|
371 |
+
scheduler.start()
|
372 |
+
|
373 |
+
# Launch demo
|
374 |
+
demo.queue(concurrency_count=10).launch()
|