Spaces:
Running
Running
Amber Tanaka
commited on
Plot Adjustments (#19)
Browse files- leaderboard_transformer.py +56 -29
- ui_components.py +8 -7
leaderboard_transformer.py
CHANGED
|
@@ -234,7 +234,7 @@ class DataTransformer:
|
|
| 234 |
# The 'Submitter' column is no longer needed
|
| 235 |
df_view = df_view.drop(columns=['Submitter'])
|
| 236 |
|
| 237 |
-
# 4. Build the List of Columns to Display
|
| 238 |
base_cols = ["id","Agent","LLM Base", "agent_for_hover"]
|
| 239 |
new_cols = ["Openness", "Agent Tooling"]
|
| 240 |
ending_cols = ["Logs"]
|
|
@@ -295,7 +295,8 @@ class DataTransformer:
|
|
| 295 |
data=df_view,
|
| 296 |
x=primary_cost_col,
|
| 297 |
y=primary_score_col,
|
| 298 |
-
agent_col="agent_for_hover"
|
|
|
|
| 299 |
)
|
| 300 |
# Use a consistent key for easy retrieval later
|
| 301 |
plots['scatter_plot'] = fig
|
|
@@ -315,7 +316,8 @@ def _plot_scatter_plotly(
|
|
| 315 |
data: pd.DataFrame,
|
| 316 |
x: Optional[str],
|
| 317 |
y: str,
|
| 318 |
-
agent_col: str = 'agent_for_hover'
|
|
|
|
| 319 |
) -> go.Figure:
|
| 320 |
|
| 321 |
# --- Section 1: Define Mappings ---
|
|
@@ -326,7 +328,6 @@ def _plot_scatter_plotly(
|
|
| 326 |
"Open Source + Open Weights": "blue"
|
| 327 |
}
|
| 328 |
category_order = list(color_map.keys())
|
| 329 |
-
|
| 330 |
shape_map = {
|
| 331 |
"Standard": "star",
|
| 332 |
"Custom with Standard Search": "diamond",
|
|
@@ -337,6 +338,7 @@ def _plot_scatter_plotly(
|
|
| 337 |
x_col_to_use = x
|
| 338 |
y_col_to_use = y
|
| 339 |
|
|
|
|
| 340 |
required_cols = [y_col_to_use, agent_col, "Openness", "Agent Tooling"]
|
| 341 |
if not all(col in data.columns for col in required_cols):
|
| 342 |
logger.error(f"Missing one or more required columns for plotting: {required_cols}")
|
|
@@ -345,21 +347,39 @@ def _plot_scatter_plotly(
|
|
| 345 |
data_plot = data.copy()
|
| 346 |
data_plot[y_col_to_use] = pd.to_numeric(data_plot[y_col_to_use], errors='coerce')
|
| 347 |
|
| 348 |
-
x_axis_label = f"
|
| 349 |
-
|
|
|
|
|
|
|
| 350 |
if x and x in data_plot.columns:
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
# Clean data based on all necessary columns
|
| 365 |
data_plot.dropna(subset=[y_col_to_use, x_col_to_use, "Openness", "Agent Tooling"], inplace=True)
|
|
@@ -370,8 +390,8 @@ def _plot_scatter_plotly(
|
|
| 370 |
logger.warning(f"No valid data to plot after cleaning.")
|
| 371 |
return fig
|
| 372 |
|
| 373 |
-
# --- Section 4: Calculate and Draw Pareto Frontier
|
| 374 |
-
if
|
| 375 |
sorted_data = data_plot.sort_values(by=[x_col_to_use, y_col_to_use], ascending=[True, False])
|
| 376 |
frontier_points = []
|
| 377 |
max_score_so_far = float('-inf')
|
|
@@ -451,20 +471,27 @@ def _plot_scatter_plotly(
|
|
| 451 |
))
|
| 452 |
|
| 453 |
# --- Section 8: Configure Layout (Restored from your original code) ---
|
| 454 |
-
xaxis_config = dict(title=x_axis_label)
|
| 455 |
-
if
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
logo_data_uri = svg_to_data_uri("assets/just-icon.svg")
|
| 462 |
|
| 463 |
fig.update_layout(
|
| 464 |
template="plotly_white",
|
| 465 |
-
title=f"{
|
| 466 |
-
xaxis=xaxis_config,
|
| 467 |
-
yaxis=dict(title=
|
| 468 |
legend=dict(
|
| 469 |
bgcolor='#FAF2E9',
|
| 470 |
)
|
|
|
|
| 234 |
# The 'Submitter' column is no longer needed
|
| 235 |
df_view = df_view.drop(columns=['Submitter'])
|
| 236 |
|
| 237 |
+
# 4. Build the List of Columns to Display
|
| 238 |
base_cols = ["id","Agent","LLM Base", "agent_for_hover"]
|
| 239 |
new_cols = ["Openness", "Agent Tooling"]
|
| 240 |
ending_cols = ["Logs"]
|
|
|
|
| 295 |
data=df_view,
|
| 296 |
x=primary_cost_col,
|
| 297 |
y=primary_score_col,
|
| 298 |
+
agent_col="agent_for_hover",
|
| 299 |
+
name=primary_metric
|
| 300 |
)
|
| 301 |
# Use a consistent key for easy retrieval later
|
| 302 |
plots['scatter_plot'] = fig
|
|
|
|
| 316 |
data: pd.DataFrame,
|
| 317 |
x: Optional[str],
|
| 318 |
y: str,
|
| 319 |
+
agent_col: str = 'agent_for_hover',
|
| 320 |
+
name: Optional[str] = None
|
| 321 |
) -> go.Figure:
|
| 322 |
|
| 323 |
# --- Section 1: Define Mappings ---
|
|
|
|
| 328 |
"Open Source + Open Weights": "blue"
|
| 329 |
}
|
| 330 |
category_order = list(color_map.keys())
|
|
|
|
| 331 |
shape_map = {
|
| 332 |
"Standard": "star",
|
| 333 |
"Custom with Standard Search": "diamond",
|
|
|
|
| 338 |
x_col_to_use = x
|
| 339 |
y_col_to_use = y
|
| 340 |
|
| 341 |
+
# --- Section 2: Data Preparation---
|
| 342 |
required_cols = [y_col_to_use, agent_col, "Openness", "Agent Tooling"]
|
| 343 |
if not all(col in data.columns for col in required_cols):
|
| 344 |
logger.error(f"Missing one or more required columns for plotting: {required_cols}")
|
|
|
|
| 347 |
data_plot = data.copy()
|
| 348 |
data_plot[y_col_to_use] = pd.to_numeric(data_plot[y_col_to_use], errors='coerce')
|
| 349 |
|
| 350 |
+
x_axis_label = f"Cost per problem (USD)" if x else "Cost (Data N/A)"
|
| 351 |
+
max_reported_cost = 0
|
| 352 |
+
divider_line_x = 0
|
| 353 |
+
|
| 354 |
if x and x in data_plot.columns:
|
| 355 |
+
data_plot[x_col_to_use] = pd.to_numeric(data_plot[x_col_to_use], errors='coerce')
|
| 356 |
+
|
| 357 |
+
# --- Separate data into two groups ---
|
| 358 |
+
valid_cost_data = data_plot[data_plot[x_col_to_use].notna()].copy()
|
| 359 |
+
missing_cost_data = data_plot[data_plot[x_col_to_use].isna()].copy()
|
| 360 |
+
|
| 361 |
+
if not valid_cost_data.empty:
|
| 362 |
+
max_reported_cost = valid_cost_data[x_col_to_use].max()
|
| 363 |
+
# ---Calculate where to place the missing data and the divider line ---
|
| 364 |
+
divider_line_x = max_reported_cost + (max_reported_cost/10)
|
| 365 |
+
new_x_for_missing = max_reported_cost + (max_reported_cost/5)
|
| 366 |
+
|
| 367 |
+
if not missing_cost_data.empty:
|
| 368 |
+
missing_cost_data[x_col_to_use] = new_x_for_missing
|
| 369 |
+
# --- Combine the two groups back together ---
|
| 370 |
+
data_plot = pd.concat([valid_cost_data, missing_cost_data])
|
| 371 |
+
else:
|
| 372 |
+
data_plot = valid_cost_data # No missing data, just use the valid set
|
| 373 |
+
else:
|
| 374 |
+
# ---Handle the case where ALL costs are missing ---
|
| 375 |
+
if not missing_cost_data.empty:
|
| 376 |
+
missing_cost_data[x_col_to_use] = 0
|
| 377 |
+
data_plot = missing_cost_data
|
| 378 |
+
else:
|
| 379 |
+
data_plot = pd.DataFrame()
|
| 380 |
+
else:
|
| 381 |
+
# Handle case where x column is not provided at all
|
| 382 |
+
data_plot[x_col_to_use] = 0
|
| 383 |
|
| 384 |
# Clean data based on all necessary columns
|
| 385 |
data_plot.dropna(subset=[y_col_to_use, x_col_to_use, "Openness", "Agent Tooling"], inplace=True)
|
|
|
|
| 390 |
logger.warning(f"No valid data to plot after cleaning.")
|
| 391 |
return fig
|
| 392 |
|
| 393 |
+
# --- Section 4: Calculate and Draw Pareto Frontier ---
|
| 394 |
+
if x_col_to_use and y_col_to_use:
|
| 395 |
sorted_data = data_plot.sort_values(by=[x_col_to_use, y_col_to_use], ascending=[True, False])
|
| 396 |
frontier_points = []
|
| 397 |
max_score_so_far = float('-inf')
|
|
|
|
| 471 |
))
|
| 472 |
|
| 473 |
# --- Section 8: Configure Layout (Restored from your original code) ---
|
| 474 |
+
xaxis_config = dict(title=x_axis_label, rangemode="tozero")
|
| 475 |
+
if divider_line_x > 0:
|
| 476 |
+
fig.add_vline(
|
| 477 |
+
x=divider_line_x,
|
| 478 |
+
line_width=2,
|
| 479 |
+
line_dash="dash",
|
| 480 |
+
line_color="grey",
|
| 481 |
+
annotation_text="Missing Cost Data",
|
| 482 |
+
annotation_position="top right"
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
# ---Adjust x-axis range to make room for the new points ---
|
| 486 |
+
xaxis_config['range'] = [0, (max_reported_cost + (max_reported_cost / 4))]
|
| 487 |
|
| 488 |
logo_data_uri = svg_to_data_uri("assets/just-icon.svg")
|
| 489 |
|
| 490 |
fig.update_layout(
|
| 491 |
template="plotly_white",
|
| 492 |
+
title=f"Astabench {name} Leaderboard",
|
| 493 |
+
xaxis=xaxis_config, # Use the updated config
|
| 494 |
+
yaxis=dict(title="Score", rangemode="tozero"),
|
| 495 |
legend=dict(
|
| 496 |
bgcolor='#FAF2E9',
|
| 497 |
)
|
ui_components.py
CHANGED
|
@@ -109,7 +109,7 @@ def create_svg_html(value, svg_map):
|
|
| 109 |
|
| 110 |
# Global variables
|
| 111 |
OPENNESS_SVG_MAP = {
|
| 112 |
-
"
|
| 113 |
}
|
| 114 |
TOOLING_SVG_MAP = {
|
| 115 |
"Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
|
|
@@ -164,7 +164,7 @@ legend_markdown = f"""
|
|
| 164 |
<b>Pareto</b><span class="tooltip-icon" data-tooltip="
|
| 165 |
•Pareto: Indicates if agent is on the Pareto frontier
|
| 166 |
">ⓘ</span>
|
| 167 |
-
<div style="padding-top: 4px;"><span
|
| 168 |
</div>
|
| 169 |
|
| 170 |
<div> <!-- Container for the Openness section -->
|
|
@@ -283,7 +283,7 @@ def create_leaderboard_display(
|
|
| 283 |
else:
|
| 284 |
pareto_agent_names = []
|
| 285 |
df_view['Pareto'] = df_view.apply(
|
| 286 |
-
lambda row: '
|
| 287 |
axis=1
|
| 288 |
)
|
| 289 |
# Create mapping for Openness / tooling
|
|
@@ -338,7 +338,7 @@ def create_leaderboard_display(
|
|
| 338 |
gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
|
| 339 |
|
| 340 |
# Put table and key into an accordion
|
| 341 |
-
with gr.Accordion("
|
| 342 |
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
| 343 |
dataframe_component = gr.DataFrame(
|
| 344 |
headers=df_headers,
|
|
@@ -404,7 +404,7 @@ def create_benchmark_details_display(
|
|
| 404 |
else:
|
| 405 |
pareto_agent_names = []
|
| 406 |
benchmark_table_df['Pareto'] = benchmark_table_df.apply(
|
| 407 |
-
lambda row: '
|
| 408 |
axis=1
|
| 409 |
)
|
| 410 |
|
|
@@ -480,12 +480,13 @@ def create_benchmark_details_display(
|
|
| 480 |
data=full_df,
|
| 481 |
x=benchmark_cost_col,
|
| 482 |
y=benchmark_score_col,
|
| 483 |
-
agent_col="Agent"
|
|
|
|
| 484 |
)
|
| 485 |
gr.Plot(value=benchmark_plot, show_label=False)
|
| 486 |
gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
|
| 487 |
# Put table and key into an accordion
|
| 488 |
-
with gr.Accordion("
|
| 489 |
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
| 490 |
gr.DataFrame(
|
| 491 |
headers=df_headers,
|
|
|
|
| 109 |
|
| 110 |
# Global variables
|
| 111 |
OPENNESS_SVG_MAP = {
|
| 112 |
+
"Open Source + Open Weights": "assets/open-weights.svg", "Open Source": "assets/open-source.svg", "API Available": "assets/api.svg", "Closed": "assets/ui.svg"
|
| 113 |
}
|
| 114 |
TOOLING_SVG_MAP = {
|
| 115 |
"Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
|
|
|
|
| 164 |
<b>Pareto</b><span class="tooltip-icon" data-tooltip="
|
| 165 |
•Pareto: Indicates if agent is on the Pareto frontier
|
| 166 |
">ⓘ</span>
|
| 167 |
+
<div style="padding-top: 4px;"><span>🏆 On frontier</span></div>
|
| 168 |
</div>
|
| 169 |
|
| 170 |
<div> <!-- Container for the Openness section -->
|
|
|
|
| 283 |
else:
|
| 284 |
pareto_agent_names = []
|
| 285 |
df_view['Pareto'] = df_view.apply(
|
| 286 |
+
lambda row: '🏆' if row['id'] in pareto_agent_names else '',
|
| 287 |
axis=1
|
| 288 |
)
|
| 289 |
# Create mapping for Openness / tooling
|
|
|
|
| 338 |
gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
|
| 339 |
|
| 340 |
# Put table and key into an accordion
|
| 341 |
+
with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
|
| 342 |
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
| 343 |
dataframe_component = gr.DataFrame(
|
| 344 |
headers=df_headers,
|
|
|
|
| 404 |
else:
|
| 405 |
pareto_agent_names = []
|
| 406 |
benchmark_table_df['Pareto'] = benchmark_table_df.apply(
|
| 407 |
+
lambda row: ' 🏆' if row['id'] in pareto_agent_names else '',
|
| 408 |
axis=1
|
| 409 |
)
|
| 410 |
|
|
|
|
| 480 |
data=full_df,
|
| 481 |
x=benchmark_cost_col,
|
| 482 |
y=benchmark_score_col,
|
| 483 |
+
agent_col="Agent",
|
| 484 |
+
name=benchmark_name
|
| 485 |
)
|
| 486 |
gr.Plot(value=benchmark_plot, show_label=False)
|
| 487 |
gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
|
| 488 |
# Put table and key into an accordion
|
| 489 |
+
with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
|
| 490 |
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
| 491 |
gr.DataFrame(
|
| 492 |
headers=df_headers,
|