Amber Tanaka commited on
Commit
aca1950
·
unverified ·
1 Parent(s): 11de2f8

Plot Adjustments (#19)

Browse files
Files changed (2) hide show
  1. leaderboard_transformer.py +56 -29
  2. ui_components.py +8 -7
leaderboard_transformer.py CHANGED
@@ -234,7 +234,7 @@ class DataTransformer:
234
  # The 'Submitter' column is no longer needed
235
  df_view = df_view.drop(columns=['Submitter'])
236
 
237
- # 4. Build the List of Columns to Display (now simplified)
238
  base_cols = ["id","Agent","LLM Base", "agent_for_hover"]
239
  new_cols = ["Openness", "Agent Tooling"]
240
  ending_cols = ["Logs"]
@@ -295,7 +295,8 @@ class DataTransformer:
295
  data=df_view,
296
  x=primary_cost_col,
297
  y=primary_score_col,
298
- agent_col="agent_for_hover"
 
299
  )
300
  # Use a consistent key for easy retrieval later
301
  plots['scatter_plot'] = fig
@@ -315,7 +316,8 @@ def _plot_scatter_plotly(
315
  data: pd.DataFrame,
316
  x: Optional[str],
317
  y: str,
318
- agent_col: str = 'agent_for_hover'
 
319
  ) -> go.Figure:
320
 
321
  # --- Section 1: Define Mappings ---
@@ -326,7 +328,6 @@ def _plot_scatter_plotly(
326
  "Open Source + Open Weights": "blue"
327
  }
328
  category_order = list(color_map.keys())
329
-
330
  shape_map = {
331
  "Standard": "star",
332
  "Custom with Standard Search": "diamond",
@@ -337,6 +338,7 @@ def _plot_scatter_plotly(
337
  x_col_to_use = x
338
  y_col_to_use = y
339
 
 
340
  required_cols = [y_col_to_use, agent_col, "Openness", "Agent Tooling"]
341
  if not all(col in data.columns for col in required_cols):
342
  logger.error(f"Missing one or more required columns for plotting: {required_cols}")
@@ -345,21 +347,39 @@ def _plot_scatter_plotly(
345
  data_plot = data.copy()
346
  data_plot[y_col_to_use] = pd.to_numeric(data_plot[y_col_to_use], errors='coerce')
347
 
348
- x_axis_label = f"{x} per task (USD)" if x else "Cost (Data N/A)"
349
- x_data_is_valid = False
 
 
350
  if x and x in data_plot.columns:
351
- try:
352
- data_plot[x_col_to_use] = pd.to_numeric(data_plot[x_col_to_use], errors='coerce')
353
- if data_plot[x_col_to_use].notna().any():
354
- x_data_is_valid = True
355
- except Exception as e:
356
- logger.warning(f"Error converting x-column '{x_col_to_use}' to numeric: {e}")
357
-
358
- if not x_data_is_valid:
359
- dummy_x_col_name = "__dummy_x_for_plotting__"
360
- data_plot[dummy_x_col_name] = DUMMY_X_VALUE_FOR_MISSING_COSTS
361
- x_col_to_use = dummy_x_col_name
362
- logger.info("Using dummy x-values for plotting.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
  # Clean data based on all necessary columns
365
  data_plot.dropna(subset=[y_col_to_use, x_col_to_use, "Openness", "Agent Tooling"], inplace=True)
@@ -370,8 +390,8 @@ def _plot_scatter_plotly(
370
  logger.warning(f"No valid data to plot after cleaning.")
371
  return fig
372
 
373
- # --- Section 4: Calculate and Draw Pareto Frontier (Restored from your original code) ---
374
- if x_data_is_valid:
375
  sorted_data = data_plot.sort_values(by=[x_col_to_use, y_col_to_use], ascending=[True, False])
376
  frontier_points = []
377
  max_score_so_far = float('-inf')
@@ -451,20 +471,27 @@ def _plot_scatter_plotly(
451
  ))
452
 
453
  # --- Section 8: Configure Layout (Restored from your original code) ---
454
- xaxis_config = dict(title=x_axis_label)
455
- if not x_data_is_valid:
456
- xaxis_config['range'] = [DUMMY_X_VALUE_FOR_MISSING_COSTS - 1, DUMMY_X_VALUE_FOR_MISSING_COSTS + 1]
457
- xaxis_config['tickvals'] = [DUMMY_X_VALUE_FOR_MISSING_COSTS]
458
- else:
459
- xaxis_config['rangemode'] = "tozero"
 
 
 
 
 
 
 
460
 
461
  logo_data_uri = svg_to_data_uri("assets/just-icon.svg")
462
 
463
  fig.update_layout(
464
  template="plotly_white",
465
- title=f"{y_col_to_use} vs. {x_axis_label}",
466
- xaxis=xaxis_config,
467
- yaxis=dict(title=y_col_to_use, rangemode="tozero"),
468
  legend=dict(
469
  bgcolor='#FAF2E9',
470
  )
 
234
  # The 'Submitter' column is no longer needed
235
  df_view = df_view.drop(columns=['Submitter'])
236
 
237
+ # 4. Build the List of Columns to Display
238
  base_cols = ["id","Agent","LLM Base", "agent_for_hover"]
239
  new_cols = ["Openness", "Agent Tooling"]
240
  ending_cols = ["Logs"]
 
295
  data=df_view,
296
  x=primary_cost_col,
297
  y=primary_score_col,
298
+ agent_col="agent_for_hover",
299
+ name=primary_metric
300
  )
301
  # Use a consistent key for easy retrieval later
302
  plots['scatter_plot'] = fig
 
316
  data: pd.DataFrame,
317
  x: Optional[str],
318
  y: str,
319
+ agent_col: str = 'agent_for_hover',
320
+ name: Optional[str] = None
321
  ) -> go.Figure:
322
 
323
  # --- Section 1: Define Mappings ---
 
328
  "Open Source + Open Weights": "blue"
329
  }
330
  category_order = list(color_map.keys())
 
331
  shape_map = {
332
  "Standard": "star",
333
  "Custom with Standard Search": "diamond",
 
338
  x_col_to_use = x
339
  y_col_to_use = y
340
 
341
+ # --- Section 2: Data Preparation---
342
  required_cols = [y_col_to_use, agent_col, "Openness", "Agent Tooling"]
343
  if not all(col in data.columns for col in required_cols):
344
  logger.error(f"Missing one or more required columns for plotting: {required_cols}")
 
347
  data_plot = data.copy()
348
  data_plot[y_col_to_use] = pd.to_numeric(data_plot[y_col_to_use], errors='coerce')
349
 
350
+ x_axis_label = f"Cost per problem (USD)" if x else "Cost (Data N/A)"
351
+ max_reported_cost = 0
352
+ divider_line_x = 0
353
+
354
  if x and x in data_plot.columns:
355
+ data_plot[x_col_to_use] = pd.to_numeric(data_plot[x_col_to_use], errors='coerce')
356
+
357
+ # --- Separate data into two groups ---
358
+ valid_cost_data = data_plot[data_plot[x_col_to_use].notna()].copy()
359
+ missing_cost_data = data_plot[data_plot[x_col_to_use].isna()].copy()
360
+
361
+ if not valid_cost_data.empty:
362
+ max_reported_cost = valid_cost_data[x_col_to_use].max()
363
+ # ---Calculate where to place the missing data and the divider line ---
364
+ divider_line_x = max_reported_cost + (max_reported_cost/10)
365
+ new_x_for_missing = max_reported_cost + (max_reported_cost/5)
366
+
367
+ if not missing_cost_data.empty:
368
+ missing_cost_data[x_col_to_use] = new_x_for_missing
369
+ # --- Combine the two groups back together ---
370
+ data_plot = pd.concat([valid_cost_data, missing_cost_data])
371
+ else:
372
+ data_plot = valid_cost_data # No missing data, just use the valid set
373
+ else:
374
+ # ---Handle the case where ALL costs are missing ---
375
+ if not missing_cost_data.empty:
376
+ missing_cost_data[x_col_to_use] = 0
377
+ data_plot = missing_cost_data
378
+ else:
379
+ data_plot = pd.DataFrame()
380
+ else:
381
+ # Handle case where x column is not provided at all
382
+ data_plot[x_col_to_use] = 0
383
 
384
  # Clean data based on all necessary columns
385
  data_plot.dropna(subset=[y_col_to_use, x_col_to_use, "Openness", "Agent Tooling"], inplace=True)
 
390
  logger.warning(f"No valid data to plot after cleaning.")
391
  return fig
392
 
393
+ # --- Section 4: Calculate and Draw Pareto Frontier ---
394
+ if x_col_to_use and y_col_to_use:
395
  sorted_data = data_plot.sort_values(by=[x_col_to_use, y_col_to_use], ascending=[True, False])
396
  frontier_points = []
397
  max_score_so_far = float('-inf')
 
471
  ))
472
 
473
  # --- Section 8: Configure Layout (Restored from your original code) ---
474
+ xaxis_config = dict(title=x_axis_label, rangemode="tozero")
475
+ if divider_line_x > 0:
476
+ fig.add_vline(
477
+ x=divider_line_x,
478
+ line_width=2,
479
+ line_dash="dash",
480
+ line_color="grey",
481
+ annotation_text="Missing Cost Data",
482
+ annotation_position="top right"
483
+ )
484
+
485
+ # ---Adjust x-axis range to make room for the new points ---
486
+ xaxis_config['range'] = [0, (max_reported_cost + (max_reported_cost / 4))]
487
 
488
  logo_data_uri = svg_to_data_uri("assets/just-icon.svg")
489
 
490
  fig.update_layout(
491
  template="plotly_white",
492
+ title=f"Astabench {name} Leaderboard",
493
+ xaxis=xaxis_config, # Use the updated config
494
+ yaxis=dict(title="Score", rangemode="tozero"),
495
  legend=dict(
496
  bgcolor='#FAF2E9',
497
  )
ui_components.py CHANGED
@@ -109,7 +109,7 @@ def create_svg_html(value, svg_map):
109
 
110
  # Global variables
111
  OPENNESS_SVG_MAP = {
112
- "Closed": "assets/ui.svg", "API Available": "assets/api.svg", "Open Source": "assets/open-source.svg", "Open Source + Open Weights": "assets/open-weights.svg"
113
  }
114
  TOOLING_SVG_MAP = {
115
  "Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
@@ -164,7 +164,7 @@ legend_markdown = f"""
164
  <b>Pareto</b><span class="tooltip-icon" data-tooltip="
165
  •Pareto: Indicates if agent is on the Pareto frontier
166
  ">ⓘ</span>
167
- <div style="padding-top: 4px;"><span>📈 On frontier</span></div>
168
  </div>
169
 
170
  <div> <!-- Container for the Openness section -->
@@ -283,7 +283,7 @@ def create_leaderboard_display(
283
  else:
284
  pareto_agent_names = []
285
  df_view['Pareto'] = df_view.apply(
286
- lambda row: '📈' if row['id'] in pareto_agent_names else '',
287
  axis=1
288
  )
289
  # Create mapping for Openness / tooling
@@ -338,7 +338,7 @@ def create_leaderboard_display(
338
  gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
339
 
340
  # Put table and key into an accordion
341
- with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
342
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
343
  dataframe_component = gr.DataFrame(
344
  headers=df_headers,
@@ -404,7 +404,7 @@ def create_benchmark_details_display(
404
  else:
405
  pareto_agent_names = []
406
  benchmark_table_df['Pareto'] = benchmark_table_df.apply(
407
- lambda row: '📈' if row['id'] in pareto_agent_names else '',
408
  axis=1
409
  )
410
 
@@ -480,12 +480,13 @@ def create_benchmark_details_display(
480
  data=full_df,
481
  x=benchmark_cost_col,
482
  y=benchmark_score_col,
483
- agent_col="Agent"
 
484
  )
485
  gr.Plot(value=benchmark_plot, show_label=False)
486
  gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
487
  # Put table and key into an accordion
488
- with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
489
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
490
  gr.DataFrame(
491
  headers=df_headers,
 
109
 
110
  # Global variables
111
  OPENNESS_SVG_MAP = {
112
+ "Open Source + Open Weights": "assets/open-weights.svg", "Open Source": "assets/open-source.svg", "API Available": "assets/api.svg", "Closed": "assets/ui.svg"
113
  }
114
  TOOLING_SVG_MAP = {
115
  "Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
 
164
  <b>Pareto</b><span class="tooltip-icon" data-tooltip="
165
  •Pareto: Indicates if agent is on the Pareto frontier
166
  ">ⓘ</span>
167
+ <div style="padding-top: 4px;"><span>🏆 On frontier</span></div>
168
  </div>
169
 
170
  <div> <!-- Container for the Openness section -->
 
283
  else:
284
  pareto_agent_names = []
285
  df_view['Pareto'] = df_view.apply(
286
+ lambda row: '🏆' if row['id'] in pareto_agent_names else '',
287
  axis=1
288
  )
289
  # Create mapping for Openness / tooling
 
338
  gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
339
 
340
  # Put table and key into an accordion
341
+ with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
342
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
343
  dataframe_component = gr.DataFrame(
344
  headers=df_headers,
 
404
  else:
405
  pareto_agent_names = []
406
  benchmark_table_df['Pareto'] = benchmark_table_df.apply(
407
+ lambda row: ' 🏆' if row['id'] in pareto_agent_names else '',
408
  axis=1
409
  )
410
 
 
480
  data=full_df,
481
  x=benchmark_cost_col,
482
  y=benchmark_score_col,
483
+ agent_col="Agent",
484
+ name=benchmark_name
485
  )
486
  gr.Plot(value=benchmark_plot, show_label=False)
487
  gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
488
  # Put table and key into an accordion
489
+ with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
490
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
491
  gr.DataFrame(
492
  headers=df_headers,