asta-bench-leaderboard

Running

App Files Files Community

Amber Tanaka commited on Jul 31

Commit

aca1950

unverified ·

1 Parent(s): 11de2f8

Plot Adjustments (#19)

Browse files

Files changed (2) hide show

leaderboard_transformer.py +56 -29
ui_components.py +8 -7

leaderboard_transformer.py CHANGED Viewed

@@ -234,7 +234,7 @@ class DataTransformer:
             # The 'Submitter' column is no longer needed
             df_view = df_view.drop(columns=['Submitter'])
-        # 4. Build the List of Columns to Display (now simplified)
         base_cols = ["id","Agent","LLM Base", "agent_for_hover"]
         new_cols = ["Openness", "Agent Tooling"]
         ending_cols = ["Logs"]
@@ -295,7 +295,8 @@ class DataTransformer:
                     data=df_view,
                     x=primary_cost_col,
                     y=primary_score_col,
-                    agent_col="agent_for_hover"
                 )
                 # Use a consistent key for easy retrieval later
                 plots['scatter_plot'] = fig
@@ -315,7 +316,8 @@ def _plot_scatter_plotly(
         data: pd.DataFrame,
         x: Optional[str],
         y: str,
-        agent_col: str = 'agent_for_hover'
 ) -> go.Figure:
     # --- Section 1: Define Mappings ---
@@ -326,7 +328,6 @@ def _plot_scatter_plotly(
         "Open Source + Open Weights": "blue"
     }
     category_order = list(color_map.keys())
     shape_map = {
         "Standard": "star",
         "Custom with Standard Search": "diamond",
@@ -337,6 +338,7 @@ def _plot_scatter_plotly(
     x_col_to_use = x
     y_col_to_use = y
     required_cols = [y_col_to_use, agent_col, "Openness", "Agent Tooling"]
     if not all(col in data.columns for col in required_cols):
         logger.error(f"Missing one or more required columns for plotting: {required_cols}")
@@ -345,21 +347,39 @@ def _plot_scatter_plotly(
     data_plot = data.copy()
     data_plot[y_col_to_use] = pd.to_numeric(data_plot[y_col_to_use], errors='coerce')
-    x_axis_label = f"{x} per task (USD)" if x else "Cost (Data N/A)"
-    x_data_is_valid = False
     if x and x in data_plot.columns:
-        try:
-            data_plot[x_col_to_use] = pd.to_numeric(data_plot[x_col_to_use], errors='coerce')
-            if data_plot[x_col_to_use].notna().any():
-                x_data_is_valid = True
-        except Exception as e:
-            logger.warning(f"Error converting x-column '{x_col_to_use}' to numeric: {e}")
-    if not x_data_is_valid:
-        dummy_x_col_name = "__dummy_x_for_plotting__"
-        data_plot[dummy_x_col_name] = DUMMY_X_VALUE_FOR_MISSING_COSTS
-        x_col_to_use = dummy_x_col_name
-        logger.info("Using dummy x-values for plotting.")
     # Clean data based on all necessary columns
     data_plot.dropna(subset=[y_col_to_use, x_col_to_use, "Openness", "Agent Tooling"], inplace=True)
@@ -370,8 +390,8 @@ def _plot_scatter_plotly(
         logger.warning(f"No valid data to plot after cleaning.")
         return fig
-    # --- Section 4: Calculate and Draw Pareto Frontier (Restored from your original code) ---
-    if x_data_is_valid:
         sorted_data = data_plot.sort_values(by=[x_col_to_use, y_col_to_use], ascending=[True, False])
         frontier_points = []
         max_score_so_far = float('-inf')
@@ -451,20 +471,27 @@ def _plot_scatter_plotly(
         ))
     # --- Section 8: Configure Layout (Restored from your original code) ---
-    xaxis_config = dict(title=x_axis_label)
-    if not x_data_is_valid:
-        xaxis_config['range'] = [DUMMY_X_VALUE_FOR_MISSING_COSTS - 1, DUMMY_X_VALUE_FOR_MISSING_COSTS + 1]
-        xaxis_config['tickvals'] = [DUMMY_X_VALUE_FOR_MISSING_COSTS]
-    else:
-        xaxis_config['rangemode'] = "tozero"
     logo_data_uri = svg_to_data_uri("assets/just-icon.svg")
     fig.update_layout(
         template="plotly_white",
-        title=f"{y_col_to_use} vs. {x_axis_label}",
-        xaxis=xaxis_config,
-        yaxis=dict(title=y_col_to_use, rangemode="tozero"),
         legend=dict(
             bgcolor='#FAF2E9',
         )

             # The 'Submitter' column is no longer needed
             df_view = df_view.drop(columns=['Submitter'])
+        # 4. Build the List of Columns to Display
         base_cols = ["id","Agent","LLM Base", "agent_for_hover"]
         new_cols = ["Openness", "Agent Tooling"]
         ending_cols = ["Logs"]
                     data=df_view,
                     x=primary_cost_col,
                     y=primary_score_col,
+                    agent_col="agent_for_hover",
+                    name=primary_metric
                 )
                 # Use a consistent key for easy retrieval later
                 plots['scatter_plot'] = fig
         data: pd.DataFrame,
         x: Optional[str],
         y: str,
+        agent_col: str = 'agent_for_hover',
+        name: Optional[str] = None
 ) -> go.Figure:
     # --- Section 1: Define Mappings ---
         "Open Source + Open Weights": "blue"
     }
     category_order = list(color_map.keys())
     shape_map = {
         "Standard": "star",
         "Custom with Standard Search": "diamond",
     x_col_to_use = x
     y_col_to_use = y
+    # --- Section 2: Data Preparation---
     required_cols = [y_col_to_use, agent_col, "Openness", "Agent Tooling"]
     if not all(col in data.columns for col in required_cols):
         logger.error(f"Missing one or more required columns for plotting: {required_cols}")
     data_plot = data.copy()
     data_plot[y_col_to_use] = pd.to_numeric(data_plot[y_col_to_use], errors='coerce')
+    x_axis_label = f"Cost per problem (USD)" if x else "Cost (Data N/A)"
+    max_reported_cost = 0
+    divider_line_x = 0
     if x and x in data_plot.columns:
+        data_plot[x_col_to_use] = pd.to_numeric(data_plot[x_col_to_use], errors='coerce')
+        # --- Separate data into two groups ---
+        valid_cost_data = data_plot[data_plot[x_col_to_use].notna()].copy()
+        missing_cost_data = data_plot[data_plot[x_col_to_use].isna()].copy()
+        if not valid_cost_data.empty:
+            max_reported_cost = valid_cost_data[x_col_to_use].max()
+            # ---Calculate where to place the missing data and the divider line ---
+            divider_line_x = max_reported_cost + (max_reported_cost/10)
+            new_x_for_missing = max_reported_cost + (max_reported_cost/5)
+            if not missing_cost_data.empty:
+                missing_cost_data[x_col_to_use] = new_x_for_missing
+                # --- Combine the two groups back together ---
+                data_plot = pd.concat([valid_cost_data, missing_cost_data])
+            else:
+                data_plot = valid_cost_data # No missing data, just use the valid set
+        else:
+            # ---Handle the case where ALL costs are missing ---
+            if not missing_cost_data.empty:
+                missing_cost_data[x_col_to_use] = 0
+                data_plot = missing_cost_data
+            else:
+                data_plot = pd.DataFrame()
+    else:
+        # Handle case where x column is not provided at all
+        data_plot[x_col_to_use] = 0
     # Clean data based on all necessary columns
     data_plot.dropna(subset=[y_col_to_use, x_col_to_use, "Openness", "Agent Tooling"], inplace=True)
         logger.warning(f"No valid data to plot after cleaning.")
         return fig
+    # --- Section 4: Calculate and Draw Pareto Frontier ---
+    if x_col_to_use and y_col_to_use:
         sorted_data = data_plot.sort_values(by=[x_col_to_use, y_col_to_use], ascending=[True, False])
         frontier_points = []
         max_score_so_far = float('-inf')
         ))
     # --- Section 8: Configure Layout (Restored from your original code) ---
+    xaxis_config = dict(title=x_axis_label, rangemode="tozero")
+    if divider_line_x > 0:
+        fig.add_vline(
+            x=divider_line_x,
+            line_width=2,
+            line_dash="dash",
+            line_color="grey",
+            annotation_text="Missing Cost Data",
+            annotation_position="top right"
+        )
+        # ---Adjust x-axis range to make room for the new points ---
+        xaxis_config['range'] = [0, (max_reported_cost + (max_reported_cost / 4))]
     logo_data_uri = svg_to_data_uri("assets/just-icon.svg")
     fig.update_layout(
         template="plotly_white",
+        title=f"Astabench {name} Leaderboard",
+        xaxis=xaxis_config, # Use the updated config
+        yaxis=dict(title="Score", rangemode="tozero"),
         legend=dict(
             bgcolor='#FAF2E9',
         )

ui_components.py CHANGED Viewed

@@ -109,7 +109,7 @@ def create_svg_html(value, svg_map):
 # Global variables
 OPENNESS_SVG_MAP = {
-    "Closed": "assets/ui.svg", "API Available": "assets/api.svg", "Open Source": "assets/open-source.svg", "Open Source + Open Weights": "assets/open-weights.svg"
 }
 TOOLING_SVG_MAP = {
     "Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
@@ -164,7 +164,7 @@ legend_markdown = f"""
         <b>Pareto</b><span class="tooltip-icon" data-tooltip="
         •Pareto: Indicates if agent is on the Pareto frontier
         ">ⓘ</span>
-        <div style="padding-top: 4px;"><span>📈 On frontier</span></div>
     </div>
     <div> <!-- Container for the Openness section -->
@@ -283,7 +283,7 @@ def create_leaderboard_display(
     else:
         pareto_agent_names = []
     df_view['Pareto'] = df_view.apply(
-        lambda row: '📈' if row['id'] in pareto_agent_names else '',
         axis=1
     )
     # Create mapping for Openness / tooling
@@ -338,7 +338,7 @@ def create_leaderboard_display(
     gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
     # Put table and key into an accordion
-    with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
         gr.HTML(value=legend_markdown, elem_id="legend-markdown")
         dataframe_component = gr.DataFrame(
             headers=df_headers,
@@ -404,7 +404,7 @@ def create_benchmark_details_display(
         else:
             pareto_agent_names = []
         benchmark_table_df['Pareto'] = benchmark_table_df.apply(
-            lambda row: '📈' if row['id'] in pareto_agent_names else '',
             axis=1
         )
@@ -480,12 +480,13 @@ def create_benchmark_details_display(
             data=full_df,
             x=benchmark_cost_col,
             y=benchmark_score_col,
-            agent_col="Agent"
         )
         gr.Plot(value=benchmark_plot, show_label=False)
         gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
         # Put table and key into an accordion
-        with gr.Accordion("Details", open=True, elem_id="leaderboard-accordion"):
             gr.HTML(value=legend_markdown, elem_id="legend-markdown")
             gr.DataFrame(
                 headers=df_headers,

 # Global variables
 OPENNESS_SVG_MAP = {
+    "Open Source + Open Weights": "assets/open-weights.svg", "Open Source": "assets/open-source.svg", "API Available": "assets/api.svg", "Closed": "assets/ui.svg"
 }
 TOOLING_SVG_MAP = {
     "Standard": {"light": "assets/star-light.svg", "dark": "assets/star-dark.svg"},
         <b>Pareto</b><span class="tooltip-icon" data-tooltip="
         •Pareto: Indicates if agent is on the Pareto frontier
         ">ⓘ</span>
+        <div style="padding-top: 4px;"><span>🏆 On frontier</span></div>
     </div>
     <div> <!-- Container for the Openness section -->
     else:
         pareto_agent_names = []
     df_view['Pareto'] = df_view.apply(
+        lambda row: '🏆' if row['id'] in pareto_agent_names else '',
         axis=1
     )
     # Create mapping for Openness / tooling
     gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
     # Put table and key into an accordion
+    with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
         gr.HTML(value=legend_markdown, elem_id="legend-markdown")
         dataframe_component = gr.DataFrame(
             headers=df_headers,
         else:
             pareto_agent_names = []
         benchmark_table_df['Pareto'] = benchmark_table_df.apply(
+            lambda row: ' 🏆' if row['id'] in pareto_agent_names else '',
             axis=1
         )
             data=full_df,
             x=benchmark_cost_col,
             y=benchmark_score_col,
+            agent_col="Agent",
+            name=benchmark_name
         )
         gr.Plot(value=benchmark_plot, show_label=False)
         gr.HTML(SCATTER_DISCLAIMER, elem_id="scatter-disclaimer")
         # Put table and key into an accordion
+        with gr.Accordion("Show / Hide Table View", open=True, elem_id="leaderboard-accordion"):
             gr.HTML(value=legend_markdown, elem_id="legend-markdown")
             gr.DataFrame(
                 headers=df_headers,