Spaces:

OpenHands
/

openhands-index

Running

App Files Files Community

juan-all-hands commited on 5 days ago

Commit

3a62e57

verified ·

1 Parent(s): a8f1aa1

Fix scatter plot: zoom and show_all_labels for all labels not just frontier

Browse files

- Use data coordinates (xref='x', yref='y') for logos so they zoom/pan with chart
- Add if show_all_labels block to show all labels when enabled

Files changed (1) hide show

leaderboard_transformer.py +78 -73

leaderboard_transformer.py CHANGED Viewed

@@ -971,7 +971,7 @@ def _plot_scatter_plotly(
         name: Optional[str] = None,
         plot_type: str = 'cost',  # 'cost' or 'runtime'
         mark_by: Optional[str] = None,  # 'Company', 'Openness', or 'Country'
-        show_all_labels: bool = False
 ) -> go.Figure:
     from constants import MARK_BY_DEFAULT
     if mark_by is None:
@@ -1268,93 +1268,107 @@ def _plot_scatter_plotly(
         domain_x = max(0, min(1, domain_x))
         domain_y = max(0, min(1, domain_y))
         if harness_uri is not None:
-            # Composite: stack model on top, harness on bottom, clamping
-            # each half to the plot area so markers near the edges don't
-            # drift off-canvas.
-            model_y = min(1, domain_y + STACKED_Y_OFFSET)
-            harness_y = max(0, domain_y - STACKED_Y_OFFSET)
             layout_images.append(dict(
                 source=model_logo_uri,
-                xref="x domain", yref="y domain",
-                x=domain_x, y=model_y,
-                sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
             layout_images.append(dict(
                 source=harness_uri,
-                xref="x domain", yref="y domain",
-                x=domain_x, y=harness_y,
-                sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
         else:
-            # Single marker (canonical OpenHands pages, or Alternative Agents
-            # rows with an unknown harness name — the latter shouldn't happen
-            # in practice since HARNESS_LOGO_PATHS covers every agent_name the
-            # push-to-index script emits).
             layout_images.append(dict(
                 source=model_logo_uri,
-                xref="x domain", yref="y domain",
-                x=domain_x, y=domain_y,
-                sizex=SINGLE_SIZE_X, sizey=SINGLE_SIZE_Y,
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
-    # --- Section 7: Add Model Name Labels to Frontier Points ---
-    if frontier_rows:
-        frontier_labels_data = []
-        for row in frontier_rows:
             x_val = row[x_col_to_use]
             y_val = row[y_col_to_use]
-            # Get the model name for the label
             model_name = row.get('Language Model', '')
             if isinstance(model_name, list):
                 model_name = model_name[0] if model_name else ''
-            # Clean the model name (remove path prefixes)
             model_name = str(model_name).split('/')[-1]
-            # Truncate long names
             if len(model_name) > 25:
                 model_name = model_name[:22] + '...'
-            frontier_labels_data.append({
-                'x': x_val,
-                'y': y_val,
-                'label': model_name
-            })
-        # Add annotations for each frontier label
-        # For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
-        for item in frontier_labels_data:
-            x_val = item['x']
-            y_val = item['y']
-            label = item['label']
-            # Transform x to log10 for annotation positioning on log scale
-            if x_val > 0:
-                x_log = np.log10(x_val)
-            else:
-                x_log = x_min_log
-            fig.add_annotation(
-                x=x_log,
-                y=y_val,
-                text=label,
-                showarrow=False,
-                yshift=25,  # Move label higher above the icon
-                font=dict(
-                    size=10,
-                    color='#0D0D0F',  # neutral-950
-                    family=FONT_FAMILY_SHORT
-                ),
-                xanchor='center',
-                yanchor='bottom'
-            )
     # --- Section 8: Configure Layout  ---
     # Use the same axis ranges as calculated for domain coordinates
@@ -1473,47 +1487,38 @@ def format_score_column(df: pd.DataFrame, score_col_name: str) -> pd.DataFrame:
     return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
-def _hidden_runtime_sort_key(runtime_value: float | int | None, score_value: float | int | None) -> str:
-    """Build a hidden prefix so Gradio's string-based runtime sorting behaves numerically."""
-    if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
-        return f"{float(runtime_value):020.6f}"
-    if pd.notna(score_value):
-        return "99999999999999999998"
-    return "99999999999999999999"
 def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
     """
     Applies custom formatting to a runtime column based on its corresponding score column.
     - If runtime is not null, formats as time with 's' suffix.
     - If runtime is null but score is not, it becomes "Missing".
     - If both runtime and score are null, it becomes "Not Submitted".
-    - Adds a hidden, zero-padded numeric prefix so Gradio sorts the column numerically.
     Args:
         df: The DataFrame to modify.
         runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
     Returns:
         The DataFrame with the formatted runtime column.
     """
     score_col_name = runtime_col_name.replace("Runtime", "Score")
     if score_col_name not in df.columns:
-        return df
     def apply_formatting_logic(row):
         runtime_value = row[runtime_col_name]
         score_value = row[score_col_name]
         status_color = "#ec4899"
-        sort_key = _hidden_runtime_sort_key(runtime_value, score_value)
-        hidden_sort_prefix = f'<span style="display:none">{sort_key}</span>'
         if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
-            return f"{hidden_sort_prefix}{runtime_value:.0f}s"
         elif pd.notna(score_value):
-            return f'{hidden_sort_prefix}<span style="color: {status_color};">Missing</span>'
         else:
-            return f'{hidden_sort_prefix}<span style="color: {status_color};">Not Submitted</span>'
     df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
     return df

         name: Optional[str] = None,
         plot_type: str = 'cost',  # 'cost' or 'runtime'
         mark_by: Optional[str] = None,  # 'Company', 'Openness', or 'Country'
+        show_all_labels: bool = False  # Show labels for all points vs only Pareto frontier
 ) -> go.Figure:
     from constants import MARK_BY_DEFAULT
     if mark_by is None:
         domain_x = max(0, min(1, domain_x))
         domain_y = max(0, min(1, domain_y))
+        # Convert to data coordinates
+        # For log scale x: use log10(x) to match the axis type
+        x_log = np.log10(x_val) if x_val > 0 else x_min_log
         if harness_uri is not None:
+            # Composite: stack model on top, harness on bottom
+            # Use data coordinates (x, y) so logos zoom/pan together with labels
+            y_offset = 0.8  # Offset above the data point (in score units)
             layout_images.append(dict(
                 source=model_logo_uri,
+                xref="x", yref="y",
+                x=x_log, y=y_val + y_offset,
+                sizex=STACKED_SIZE_X * (x_max_log - x_min_log),
+                sizey=STACKED_SIZE_Y * (y_max - y_min),
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
             layout_images.append(dict(
                 source=harness_uri,
+                xref="x", yref="y",
+                x=x_log, y=y_val - y_offset,
+                sizex=STACKED_SIZE_X * (x_max_log - x_min_log),
+                sizey=STACKED_SIZE_Y * (y_max - y_min),
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
         else:
+            # Single marker - use data coordinates so logo zooms/pans with labels
             layout_images.append(dict(
                 source=model_logo_uri,
+                xref="x", yref="y",
+                x=x_log, y=y_val,
+                sizex=SINGLE_SIZE_X * (x_max_log - x_min_log),
+                sizey=SINGLE_SIZE_Y * (y_max - y_min),
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
+    # --- Section 7: Add Model Name Labels ---
+    # Show labels for all points if show_all_labels is True, otherwise just Pareto frontier
+    if show_all_labels:
+        # Label all data points
+        labels_data = []
+        for _, row in data_plot.iterrows():
             x_val = row[x_col_to_use]
             y_val = row[y_col_to_use]
             model_name = row.get('Language Model', '')
             if isinstance(model_name, list):
                 model_name = model_name[0] if model_name else ''
             model_name = str(model_name).split('/')[-1]
             if len(model_name) > 25:
                 model_name = model_name[:22] + '...'
+            labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
+    elif frontier_rows:
+        # Label only Pareto frontier points
+        labels_data = []
+        for row in frontier_rows:
+            x_val = row[x_col_to_use]
+            y_val = row[y_col_to_use]
+            model_name = row.get('Language Model', '')
+            if isinstance(model_name, list):
+                model_name = model_name[0] if model_name else ''
+            model_name = str(model_name).split('/')[-1]
+            if len(model_name) > 25:
+                model_name = model_name[:22] + '...'
+            labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
+    else:
+        labels_data = []
+    # Add annotations for each label
+    # For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
+    for item in labels_data:
+        x_val = item['x']
+        y_val = item['y']
+        label = item['label']
+        # Transform x to log10 for annotation positioning on log scale
+        if x_val > 0:
+            x_log = np.log10(x_val)
+        else:
+            x_log = x_min_log
+        fig.add_annotation(
+            x=x_log,
+            y=y_val,
+            text=label,
+            showarrow=False,
+            yshift=25,  # Move label higher above the icon
+            font=dict(
+                size=10,
+                color='#0D0D0F',  # neutral-950
+                family=FONT_FAMILY_SHORT
+            ),
+            xanchor='center',
+            yanchor='bottom'
+        )
     # --- Section 8: Configure Layout  ---
     # Use the same axis ranges as calculated for domain coordinates
     return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
 def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
     """
     Applies custom formatting to a runtime column based on its corresponding score column.
     - If runtime is not null, formats as time with 's' suffix.
     - If runtime is null but score is not, it becomes "Missing".
     - If both runtime and score are null, it becomes "Not Submitted".
     Args:
         df: The DataFrame to modify.
         runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
     Returns:
         The DataFrame with the formatted runtime column.
     """
+    # Find the corresponding score column by replacing "Runtime" with "Score"
     score_col_name = runtime_col_name.replace("Runtime", "Score")
+    # Ensure the score column actually exists to avoid errors
     if score_col_name not in df.columns:
+        return df  # Return the DataFrame unmodified if there's no matching score
     def apply_formatting_logic(row):
         runtime_value = row[runtime_col_name]
         score_value = row[score_col_name]
         status_color = "#ec4899"
         if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
+            return f"{runtime_value:.0f}s"
         elif pd.notna(score_value):
+            return f'<span style="color: {status_color};">Missing</span>'  # Score exists, but runtime is missing
         else:
+            return f'<span style="color: {status_color};">Not Submitted</span>'  # Neither score nor runtime exists
+    # Apply the logic to the specified runtime column and update the DataFrame
     df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
     return df