Spaces:
Running
Running
Fix scatter plot: zoom and show_all_labels for all labels not just frontier
Browse files- Use data coordinates (xref='x', yref='y') for logos so they zoom/pan with chart
- Add if show_all_labels block to show all labels when enabled
- leaderboard_transformer.py +78 -73
leaderboard_transformer.py
CHANGED
|
@@ -971,7 +971,7 @@ def _plot_scatter_plotly(
|
|
| 971 |
name: Optional[str] = None,
|
| 972 |
plot_type: str = 'cost', # 'cost' or 'runtime'
|
| 973 |
mark_by: Optional[str] = None, # 'Company', 'Openness', or 'Country'
|
| 974 |
-
show_all_labels: bool = False
|
| 975 |
) -> go.Figure:
|
| 976 |
from constants import MARK_BY_DEFAULT
|
| 977 |
if mark_by is None:
|
|
@@ -1268,93 +1268,107 @@ def _plot_scatter_plotly(
|
|
| 1268 |
domain_x = max(0, min(1, domain_x))
|
| 1269 |
domain_y = max(0, min(1, domain_y))
|
| 1270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1271 |
if harness_uri is not None:
|
| 1272 |
-
# Composite: stack model on top, harness on bottom
|
| 1273 |
-
#
|
| 1274 |
-
|
| 1275 |
-
model_y = min(1, domain_y + STACKED_Y_OFFSET)
|
| 1276 |
-
harness_y = max(0, domain_y - STACKED_Y_OFFSET)
|
| 1277 |
layout_images.append(dict(
|
| 1278 |
source=model_logo_uri,
|
| 1279 |
-
xref="x
|
| 1280 |
-
x=
|
| 1281 |
-
sizex=STACKED_SIZE_X
|
|
|
|
| 1282 |
xanchor="center", yanchor="middle",
|
| 1283 |
layer="above",
|
| 1284 |
))
|
| 1285 |
layout_images.append(dict(
|
| 1286 |
source=harness_uri,
|
| 1287 |
-
xref="x
|
| 1288 |
-
x=
|
| 1289 |
-
sizex=STACKED_SIZE_X
|
|
|
|
| 1290 |
xanchor="center", yanchor="middle",
|
| 1291 |
layer="above",
|
| 1292 |
))
|
| 1293 |
else:
|
| 1294 |
-
# Single marker
|
| 1295 |
-
# rows with an unknown harness name — the latter shouldn't happen
|
| 1296 |
-
# in practice since HARNESS_LOGO_PATHS covers every agent_name the
|
| 1297 |
-
# push-to-index script emits).
|
| 1298 |
layout_images.append(dict(
|
| 1299 |
source=model_logo_uri,
|
| 1300 |
-
xref="x
|
| 1301 |
-
x=
|
| 1302 |
-
sizex=SINGLE_SIZE_X
|
|
|
|
| 1303 |
xanchor="center", yanchor="middle",
|
| 1304 |
layer="above",
|
| 1305 |
))
|
| 1306 |
|
| 1307 |
-
# --- Section 7: Add Model Name Labels
|
| 1308 |
-
if
|
| 1309 |
-
|
| 1310 |
-
|
| 1311 |
-
|
|
|
|
| 1312 |
x_val = row[x_col_to_use]
|
| 1313 |
y_val = row[y_col_to_use]
|
| 1314 |
|
| 1315 |
-
# Get the model name for the label
|
| 1316 |
model_name = row.get('Language Model', '')
|
| 1317 |
if isinstance(model_name, list):
|
| 1318 |
model_name = model_name[0] if model_name else ''
|
| 1319 |
-
# Clean the model name (remove path prefixes)
|
| 1320 |
model_name = str(model_name).split('/')[-1]
|
| 1321 |
-
# Truncate long names
|
| 1322 |
if len(model_name) > 25:
|
| 1323 |
model_name = model_name[:22] + '...'
|
| 1324 |
|
| 1325 |
-
|
| 1326 |
-
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
})
|
| 1330 |
|
| 1331 |
-
|
| 1332 |
-
|
| 1333 |
-
|
| 1334 |
-
x_val = item['x']
|
| 1335 |
-
y_val = item['y']
|
| 1336 |
-
label = item['label']
|
| 1337 |
|
| 1338 |
-
|
| 1339 |
-
if
|
| 1340 |
-
|
| 1341 |
-
|
| 1342 |
-
|
|
|
|
| 1343 |
|
| 1344 |
-
|
| 1345 |
-
|
| 1346 |
-
|
| 1347 |
-
|
| 1348 |
-
|
| 1349 |
-
|
| 1350 |
-
|
| 1351 |
-
|
| 1352 |
-
|
| 1353 |
-
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
-
|
| 1357 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1358 |
|
| 1359 |
# --- Section 8: Configure Layout ---
|
| 1360 |
# Use the same axis ranges as calculated for domain coordinates
|
|
@@ -1473,47 +1487,38 @@ def format_score_column(df: pd.DataFrame, score_col_name: str) -> pd.DataFrame:
|
|
| 1473 |
return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
|
| 1474 |
|
| 1475 |
|
| 1476 |
-
def _hidden_runtime_sort_key(runtime_value: float | int | None, score_value: float | int | None) -> str:
|
| 1477 |
-
"""Build a hidden prefix so Gradio's string-based runtime sorting behaves numerically."""
|
| 1478 |
-
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1479 |
-
return f"{float(runtime_value):020.6f}"
|
| 1480 |
-
if pd.notna(score_value):
|
| 1481 |
-
return "99999999999999999998"
|
| 1482 |
-
return "99999999999999999999"
|
| 1483 |
-
|
| 1484 |
-
|
| 1485 |
def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
|
| 1486 |
"""
|
| 1487 |
Applies custom formatting to a runtime column based on its corresponding score column.
|
| 1488 |
- If runtime is not null, formats as time with 's' suffix.
|
| 1489 |
- If runtime is null but score is not, it becomes "Missing".
|
| 1490 |
- If both runtime and score are null, it becomes "Not Submitted".
|
| 1491 |
-
- Adds a hidden, zero-padded numeric prefix so Gradio sorts the column numerically.
|
| 1492 |
Args:
|
| 1493 |
df: The DataFrame to modify.
|
| 1494 |
runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
|
| 1495 |
Returns:
|
| 1496 |
The DataFrame with the formatted runtime column.
|
| 1497 |
"""
|
|
|
|
| 1498 |
score_col_name = runtime_col_name.replace("Runtime", "Score")
|
| 1499 |
|
|
|
|
| 1500 |
if score_col_name not in df.columns:
|
| 1501 |
-
return df
|
| 1502 |
|
| 1503 |
def apply_formatting_logic(row):
|
| 1504 |
runtime_value = row[runtime_col_name]
|
| 1505 |
score_value = row[score_col_name]
|
| 1506 |
status_color = "#ec4899"
|
| 1507 |
-
sort_key = _hidden_runtime_sort_key(runtime_value, score_value)
|
| 1508 |
-
hidden_sort_prefix = f'<span style="display:none">{sort_key}</span>'
|
| 1509 |
|
| 1510 |
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1511 |
-
return f"{
|
| 1512 |
elif pd.notna(score_value):
|
| 1513 |
-
return f'
|
| 1514 |
else:
|
| 1515 |
-
return f'
|
| 1516 |
|
|
|
|
| 1517 |
df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
|
| 1518 |
|
| 1519 |
return df
|
|
|
|
| 971 |
name: Optional[str] = None,
|
| 972 |
plot_type: str = 'cost', # 'cost' or 'runtime'
|
| 973 |
mark_by: Optional[str] = None, # 'Company', 'Openness', or 'Country'
|
| 974 |
+
show_all_labels: bool = False # Show labels for all points vs only Pareto frontier
|
| 975 |
) -> go.Figure:
|
| 976 |
from constants import MARK_BY_DEFAULT
|
| 977 |
if mark_by is None:
|
|
|
|
| 1268 |
domain_x = max(0, min(1, domain_x))
|
| 1269 |
domain_y = max(0, min(1, domain_y))
|
| 1270 |
|
| 1271 |
+
# Convert to data coordinates
|
| 1272 |
+
# For log scale x: use log10(x) to match the axis type
|
| 1273 |
+
x_log = np.log10(x_val) if x_val > 0 else x_min_log
|
| 1274 |
+
|
| 1275 |
if harness_uri is not None:
|
| 1276 |
+
# Composite: stack model on top, harness on bottom
|
| 1277 |
+
# Use data coordinates (x, y) so logos zoom/pan together with labels
|
| 1278 |
+
y_offset = 0.8 # Offset above the data point (in score units)
|
|
|
|
|
|
|
| 1279 |
layout_images.append(dict(
|
| 1280 |
source=model_logo_uri,
|
| 1281 |
+
xref="x", yref="y",
|
| 1282 |
+
x=x_log, y=y_val + y_offset,
|
| 1283 |
+
sizex=STACKED_SIZE_X * (x_max_log - x_min_log),
|
| 1284 |
+
sizey=STACKED_SIZE_Y * (y_max - y_min),
|
| 1285 |
xanchor="center", yanchor="middle",
|
| 1286 |
layer="above",
|
| 1287 |
))
|
| 1288 |
layout_images.append(dict(
|
| 1289 |
source=harness_uri,
|
| 1290 |
+
xref="x", yref="y",
|
| 1291 |
+
x=x_log, y=y_val - y_offset,
|
| 1292 |
+
sizex=STACKED_SIZE_X * (x_max_log - x_min_log),
|
| 1293 |
+
sizey=STACKED_SIZE_Y * (y_max - y_min),
|
| 1294 |
xanchor="center", yanchor="middle",
|
| 1295 |
layer="above",
|
| 1296 |
))
|
| 1297 |
else:
|
| 1298 |
+
# Single marker - use data coordinates so logo zooms/pans with labels
|
|
|
|
|
|
|
|
|
|
| 1299 |
layout_images.append(dict(
|
| 1300 |
source=model_logo_uri,
|
| 1301 |
+
xref="x", yref="y",
|
| 1302 |
+
x=x_log, y=y_val,
|
| 1303 |
+
sizex=SINGLE_SIZE_X * (x_max_log - x_min_log),
|
| 1304 |
+
sizey=SINGLE_SIZE_Y * (y_max - y_min),
|
| 1305 |
xanchor="center", yanchor="middle",
|
| 1306 |
layer="above",
|
| 1307 |
))
|
| 1308 |
|
| 1309 |
+
# --- Section 7: Add Model Name Labels ---
|
| 1310 |
+
# Show labels for all points if show_all_labels is True, otherwise just Pareto frontier
|
| 1311 |
+
if show_all_labels:
|
| 1312 |
+
# Label all data points
|
| 1313 |
+
labels_data = []
|
| 1314 |
+
for _, row in data_plot.iterrows():
|
| 1315 |
x_val = row[x_col_to_use]
|
| 1316 |
y_val = row[y_col_to_use]
|
| 1317 |
|
|
|
|
| 1318 |
model_name = row.get('Language Model', '')
|
| 1319 |
if isinstance(model_name, list):
|
| 1320 |
model_name = model_name[0] if model_name else ''
|
|
|
|
| 1321 |
model_name = str(model_name).split('/')[-1]
|
|
|
|
| 1322 |
if len(model_name) > 25:
|
| 1323 |
model_name = model_name[:22] + '...'
|
| 1324 |
|
| 1325 |
+
labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
|
| 1326 |
+
elif frontier_rows:
|
| 1327 |
+
# Label only Pareto frontier points
|
| 1328 |
+
labels_data = []
|
|
|
|
| 1329 |
|
| 1330 |
+
for row in frontier_rows:
|
| 1331 |
+
x_val = row[x_col_to_use]
|
| 1332 |
+
y_val = row[y_col_to_use]
|
|
|
|
|
|
|
|
|
|
| 1333 |
|
| 1334 |
+
model_name = row.get('Language Model', '')
|
| 1335 |
+
if isinstance(model_name, list):
|
| 1336 |
+
model_name = model_name[0] if model_name else ''
|
| 1337 |
+
model_name = str(model_name).split('/')[-1]
|
| 1338 |
+
if len(model_name) > 25:
|
| 1339 |
+
model_name = model_name[:22] + '...'
|
| 1340 |
|
| 1341 |
+
labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
|
| 1342 |
+
else:
|
| 1343 |
+
labels_data = []
|
| 1344 |
+
|
| 1345 |
+
# Add annotations for each label
|
| 1346 |
+
# For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
|
| 1347 |
+
for item in labels_data:
|
| 1348 |
+
x_val = item['x']
|
| 1349 |
+
y_val = item['y']
|
| 1350 |
+
label = item['label']
|
| 1351 |
+
|
| 1352 |
+
# Transform x to log10 for annotation positioning on log scale
|
| 1353 |
+
if x_val > 0:
|
| 1354 |
+
x_log = np.log10(x_val)
|
| 1355 |
+
else:
|
| 1356 |
+
x_log = x_min_log
|
| 1357 |
+
|
| 1358 |
+
fig.add_annotation(
|
| 1359 |
+
x=x_log,
|
| 1360 |
+
y=y_val,
|
| 1361 |
+
text=label,
|
| 1362 |
+
showarrow=False,
|
| 1363 |
+
yshift=25, # Move label higher above the icon
|
| 1364 |
+
font=dict(
|
| 1365 |
+
size=10,
|
| 1366 |
+
color='#0D0D0F', # neutral-950
|
| 1367 |
+
family=FONT_FAMILY_SHORT
|
| 1368 |
+
),
|
| 1369 |
+
xanchor='center',
|
| 1370 |
+
yanchor='bottom'
|
| 1371 |
+
)
|
| 1372 |
|
| 1373 |
# --- Section 8: Configure Layout ---
|
| 1374 |
# Use the same axis ranges as calculated for domain coordinates
|
|
|
|
| 1487 |
return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
|
| 1488 |
|
| 1489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1490 |
def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
|
| 1491 |
"""
|
| 1492 |
Applies custom formatting to a runtime column based on its corresponding score column.
|
| 1493 |
- If runtime is not null, formats as time with 's' suffix.
|
| 1494 |
- If runtime is null but score is not, it becomes "Missing".
|
| 1495 |
- If both runtime and score are null, it becomes "Not Submitted".
|
|
|
|
| 1496 |
Args:
|
| 1497 |
df: The DataFrame to modify.
|
| 1498 |
runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
|
| 1499 |
Returns:
|
| 1500 |
The DataFrame with the formatted runtime column.
|
| 1501 |
"""
|
| 1502 |
+
# Find the corresponding score column by replacing "Runtime" with "Score"
|
| 1503 |
score_col_name = runtime_col_name.replace("Runtime", "Score")
|
| 1504 |
|
| 1505 |
+
# Ensure the score column actually exists to avoid errors
|
| 1506 |
if score_col_name not in df.columns:
|
| 1507 |
+
return df # Return the DataFrame unmodified if there's no matching score
|
| 1508 |
|
| 1509 |
def apply_formatting_logic(row):
|
| 1510 |
runtime_value = row[runtime_col_name]
|
| 1511 |
score_value = row[score_col_name]
|
| 1512 |
status_color = "#ec4899"
|
|
|
|
|
|
|
| 1513 |
|
| 1514 |
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1515 |
+
return f"{runtime_value:.0f}s"
|
| 1516 |
elif pd.notna(score_value):
|
| 1517 |
+
return f'<span style="color: {status_color};">Missing</span>' # Score exists, but runtime is missing
|
| 1518 |
else:
|
| 1519 |
+
return f'<span style="color: {status_color};">Not Submitted</span>' # Neither score nor runtime exists
|
| 1520 |
|
| 1521 |
+
# Apply the logic to the specified runtime column and update the DataFrame
|
| 1522 |
df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
|
| 1523 |
|
| 1524 |
return df
|