Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import pandas as pd
|
|
10 |
|
11 |
|
12 |
basic_component_values = [None] * 6
|
13 |
-
leader_component_values = [None]
|
14 |
|
15 |
def make_default_md(arena_df, elo_results):
|
16 |
total_votes = sum(arena_df["num_battles"]) // 2
|
@@ -70,12 +70,7 @@ def update_elo_components(max_num_files, elo_results_file):
|
|
70 |
if elo_results_file is None: # Do live update
|
71 |
battles = clean_battle_data(log_files)
|
72 |
elo_results = report_elo_analysis_results(battles)
|
73 |
-
|
74 |
leader_component_values[0] = make_leaderboard_md_live(elo_results)
|
75 |
-
leader_component_values[1] = elo_results["win_fraction_heatmap"]
|
76 |
-
leader_component_values[2] = elo_results["battle_count_heatmap"]
|
77 |
-
leader_component_values[3] = elo_results["bootstrap_elo_rating"]
|
78 |
-
leader_component_values[4] = elo_results["average_win_rate_bar"]
|
79 |
|
80 |
# Basic stats
|
81 |
basic_stats = report_basic_stats(log_files)
|
@@ -282,10 +277,10 @@ cat_name_to_explanation = {
|
|
282 |
"Overall": "Overall Questions",
|
283 |
}
|
284 |
|
285 |
-
def build_leaderboard_tab(
|
286 |
arena_dfs = {}
|
287 |
category_elo_results = {}
|
288 |
-
if
|
289 |
default_md = "Loading ..."
|
290 |
else:
|
291 |
with open(elo_results_file, "rb") as fin:
|
@@ -309,9 +304,49 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
309 |
with gr.Tabs() as tabs:
|
310 |
# arena table
|
311 |
arena_table_vals = get_arena_table(arena_df, model_table_df)
|
312 |
-
|
313 |
-
|
314 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
# with gr.Row():
|
316 |
# with gr.Column(scale=2):
|
317 |
# category_dropdown = gr.Dropdown(choices=list(arena_dfs.keys()), label="Category", value="Overall")
|
@@ -319,63 +354,30 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
319 |
# with gr.Column(scale=4, variant="panel"):
|
320 |
# category_deets = gr.Markdown(default_category_details, elem_id="category_deets")
|
321 |
|
322 |
-
#
|
|
|
323 |
# headers=[
|
324 |
-
# "Rank",
|
325 |
# "π€ Model",
|
326 |
-
# "β
|
|
|
|
|
327 |
# "Organization",
|
328 |
# "License",
|
329 |
# ],
|
330 |
-
# datatype=[
|
331 |
-
#
|
332 |
-
#
|
333 |
-
#
|
334 |
-
# "str",
|
335 |
-
# "str",
|
336 |
-
# ],
|
337 |
-
# value=arena_table_vals,
|
338 |
-
# elem_id="arena_leaderboard_dataframe",
|
339 |
# height=700,
|
340 |
-
# column_widths=[70, 190, 110, 160, 150, 140],
|
341 |
# wrap=True,
|
342 |
# )
|
343 |
-
|
344 |
# gr.Markdown(
|
345 |
# f"""Note: .
|
346 |
-
#
|
347 |
# elem_id="leaderboard_markdown"
|
348 |
# )
|
349 |
|
350 |
# leader_component_values[:] = [default_md]
|
351 |
-
|
352 |
-
with gr.Tab("Full Leaderboard", id=0):
|
353 |
-
md = make_full_leaderboard_md(elo_results)
|
354 |
-
gr.Markdown(md, elem_id="leaderboard_markdown")
|
355 |
-
with gr.Row():
|
356 |
-
with gr.Column(scale=2):
|
357 |
-
category_dropdown = gr.Dropdown(choices=list(arena_dfs.keys()), label="Category", value="Overall")
|
358 |
-
default_category_details = make_category_arena_leaderboard_md(arena_df, arena_df, name="Overall")
|
359 |
-
with gr.Column(scale=4, variant="panel"):
|
360 |
-
category_deets = gr.Markdown(default_category_details, elem_id="category_deets")
|
361 |
-
|
362 |
-
full_table_vals = get_full_table(model_table_df)
|
363 |
-
display_df = gr.Dataframe(
|
364 |
-
headers=[
|
365 |
-
"π€ Model",
|
366 |
-
"β Task 1",
|
367 |
-
"π Task 2",
|
368 |
-
"π Task 3",
|
369 |
-
"Organization",
|
370 |
-
"License",
|
371 |
-
],
|
372 |
-
datatype=["markdown", "number", "number", "number", "str", "str"],
|
373 |
-
value=full_table_vals,
|
374 |
-
elem_id="full_leaderboard_dataframe",
|
375 |
-
column_widths=[200, 100, 100, 100, 150, 150],
|
376 |
-
height=700,
|
377 |
-
wrap=True,
|
378 |
-
)
|
379 |
if not show_plot:
|
380 |
gr.Markdown(
|
381 |
""" ## Submit your model [here]().
|
|
|
10 |
|
11 |
|
12 |
basic_component_values = [None] * 6
|
13 |
+
leader_component_values = [None]
|
14 |
|
15 |
def make_default_md(arena_df, elo_results):
|
16 |
total_votes = sum(arena_df["num_battles"]) // 2
|
|
|
70 |
if elo_results_file is None: # Do live update
|
71 |
battles = clean_battle_data(log_files)
|
72 |
elo_results = report_elo_analysis_results(battles)
|
|
|
73 |
leader_component_values[0] = make_leaderboard_md_live(elo_results)
|
|
|
|
|
|
|
|
|
74 |
|
75 |
# Basic stats
|
76 |
basic_stats = report_basic_stats(log_files)
|
|
|
277 |
"Overall": "Overall Questions",
|
278 |
}
|
279 |
|
280 |
+
def build_leaderboard_tab(results_file, leaderboard_table_file, show_plot=False):
|
281 |
arena_dfs = {}
|
282 |
category_elo_results = {}
|
283 |
+
if results_file is None: # Do live update
|
284 |
default_md = "Loading ..."
|
285 |
else:
|
286 |
with open(elo_results_file, "rb") as fin:
|
|
|
304 |
with gr.Tabs() as tabs:
|
305 |
# arena table
|
306 |
arena_table_vals = get_arena_table(arena_df, model_table_df)
|
307 |
+
with gr.Tab("Arena Elo", id=0):
|
308 |
+
md = make_arena_leaderboard_md(arena_df)
|
309 |
+
leaderboard_markdown = gr.Markdown(md, elem_id="leaderboard_markdown")
|
310 |
+
with gr.Row():
|
311 |
+
with gr.Column(scale=2):
|
312 |
+
category_dropdown = gr.Dropdown(choices=list(arena_dfs.keys()), label="Category", value="Overall")
|
313 |
+
default_category_details = make_category_arena_leaderboard_md(arena_df, arena_df, name="Overall")
|
314 |
+
with gr.Column(scale=4, variant="panel"):
|
315 |
+
category_deets = gr.Markdown(default_category_details, elem_id="category_deets")
|
316 |
+
|
317 |
+
elo_display_df = gr.Dataframe(
|
318 |
+
headers=[
|
319 |
+
"Rank",
|
320 |
+
"π€ Model",
|
321 |
+
"β Arena Elo",
|
322 |
+
"Organization",
|
323 |
+
"License",
|
324 |
+
],
|
325 |
+
datatype=[
|
326 |
+
"number",
|
327 |
+
"markdown",
|
328 |
+
"number",
|
329 |
+
"str",
|
330 |
+
"str",
|
331 |
+
],
|
332 |
+
value=arena_table_vals,
|
333 |
+
elem_id="arena_leaderboard_dataframe",
|
334 |
+
height=700,
|
335 |
+
column_widths=[70, 190, 110, 160, 150, 140],
|
336 |
+
wrap=True,
|
337 |
+
)
|
338 |
+
|
339 |
+
gr.Markdown(
|
340 |
+
f"""Note: .
|
341 |
+
""",
|
342 |
+
elem_id="leaderboard_markdown"
|
343 |
+
)
|
344 |
+
|
345 |
+
leader_component_values[:] = [default_md]
|
346 |
+
|
347 |
+
# with gr.Tab("Full Leaderboard", id=0):
|
348 |
+
# md = make_full_leaderboard_md(elo_results)
|
349 |
+
# gr.Markdown(md, elem_id="leaderboard_markdown")
|
350 |
# with gr.Row():
|
351 |
# with gr.Column(scale=2):
|
352 |
# category_dropdown = gr.Dropdown(choices=list(arena_dfs.keys()), label="Category", value="Overall")
|
|
|
354 |
# with gr.Column(scale=4, variant="panel"):
|
355 |
# category_deets = gr.Markdown(default_category_details, elem_id="category_deets")
|
356 |
|
357 |
+
# full_table_vals = get_full_table(model_table_df)
|
358 |
+
# display_df = gr.Dataframe(
|
359 |
# headers=[
|
|
|
360 |
# "π€ Model",
|
361 |
+
# "β Task 1",
|
362 |
+
# "π Task 2",
|
363 |
+
# "π Task 3",
|
364 |
# "Organization",
|
365 |
# "License",
|
366 |
# ],
|
367 |
+
# datatype=["markdown", "number", "number", "number", "str", "str"],
|
368 |
+
# value=full_table_vals,
|
369 |
+
# elem_id="full_leaderboard_dataframe",
|
370 |
+
# column_widths=[200, 100, 100, 100, 150, 150],
|
|
|
|
|
|
|
|
|
|
|
371 |
# height=700,
|
|
|
372 |
# wrap=True,
|
373 |
# )
|
|
|
374 |
# gr.Markdown(
|
375 |
# f"""Note: .
|
376 |
+
# """,
|
377 |
# elem_id="leaderboard_markdown"
|
378 |
# )
|
379 |
|
380 |
# leader_component_values[:] = [default_md]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
if not show_plot:
|
382 |
gr.Markdown(
|
383 |
""" ## Submit your model [here]().
|