Spaces:
Paused
Paused
Chloe Anastasiades
commited on
Support old and new openness and tool usage values (#56)
Browse files- aliases.py +23 -0
- leaderboard_transformer.py +27 -11
- ui_components.py +31 -16
aliases.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS = "Open Source + Open Weights"
|
| 2 |
+
CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS = "Open Source"
|
| 3 |
+
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
|
| 4 |
+
CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
CANONICAL_TOOL_USAGE_STANDARD = "Standard"
|
| 8 |
+
CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom with Standard Search"
|
| 9 |
+
CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
OPENNESS_ALIASES = {
|
| 13 |
+
CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: {"Open source & open weights"},
|
| 14 |
+
CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: {"Open source & closed weights"},
|
| 15 |
+
CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
|
| 16 |
+
CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
TOOL_USAGE_ALIASES = {
|
| 20 |
+
CANONICAL_TOOL_USAGE_STANDARD: {},
|
| 21 |
+
CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom interface"},
|
| 22 |
+
CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully custom"}
|
| 23 |
+
}
|
leaderboard_transformer.py
CHANGED
|
@@ -6,6 +6,8 @@ from typing import Optional
|
|
| 6 |
import base64
|
| 7 |
import html
|
| 8 |
|
|
|
|
|
|
|
| 9 |
logger = logging.getLogger(__name__)
|
| 10 |
|
| 11 |
INFORMAL_TO_FORMAL_NAME_MAP = {
|
|
@@ -366,19 +368,34 @@ def _plot_scatter_plotly(
|
|
| 366 |
) -> go.Figure:
|
| 367 |
|
| 368 |
# --- Section 1: Define Mappings ---
|
|
|
|
|
|
|
| 369 |
color_map = {
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
category_order = list(color_map.keys())
|
|
|
|
|
|
|
|
|
|
| 376 |
shape_map = {
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
}
|
|
|
|
|
|
|
|
|
|
| 381 |
default_shape = 'square'
|
|
|
|
|
|
|
| 382 |
|
| 383 |
x_col_to_use = x
|
| 384 |
y_col_to_use = y
|
|
@@ -527,7 +544,7 @@ def _plot_scatter_plotly(
|
|
| 527 |
)
|
| 528 |
))
|
| 529 |
# ---- Add logic for making the legend -----------
|
| 530 |
-
for i, category in enumerate(
|
| 531 |
fig.add_trace(go.Scatter(
|
| 532 |
x=[None], y=[None],
|
| 533 |
mode='markers',
|
|
@@ -542,15 +559,14 @@ def _plot_scatter_plotly(
|
|
| 542 |
))
|
| 543 |
|
| 544 |
# Part B: Dummy traces for the SHAPES ("Agent Tooling")
|
| 545 |
-
|
| 546 |
-
for i, (shape_name, shape_symbol) in enumerate(shape_items):
|
| 547 |
fig.add_trace(go.Scatter(
|
| 548 |
x=[None], y=[None],
|
| 549 |
mode='markers',
|
| 550 |
name=shape_name,
|
| 551 |
legendgroup="tooling_group",
|
| 552 |
legendgrouptitle_text="Agent Tooling" if i == 0 else None,
|
| 553 |
-
marker=dict(color='black', symbol=
|
| 554 |
))
|
| 555 |
|
| 556 |
# --- Section 8: Configure Layout ---
|
|
|
|
| 6 |
import base64
|
| 7 |
import html
|
| 8 |
|
| 9 |
+
import aliases
|
| 10 |
+
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
INFORMAL_TO_FORMAL_NAME_MAP = {
|
|
|
|
| 368 |
) -> go.Figure:
|
| 369 |
|
| 370 |
# --- Section 1: Define Mappings ---
|
| 371 |
+
# These include aliases for openness categories,
|
| 372 |
+
# so multiple names might correspond to the same color.
|
| 373 |
color_map = {
|
| 374 |
+
aliases.CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: "deeppink",
|
| 375 |
+
aliases.CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: "coral",
|
| 376 |
+
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
|
| 377 |
+
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
|
| 378 |
}
|
| 379 |
+
for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
|
| 380 |
+
for openness_alias in openness_aliases:
|
| 381 |
+
color_map[openness_alias] = color_map[canonical_openness]
|
| 382 |
+
# Only keep one name per color for the legend.
|
| 383 |
+
colors_for_legend = set(aliases.OPENNESS_ALIASES.keys())
|
| 384 |
category_order = list(color_map.keys())
|
| 385 |
+
|
| 386 |
+
# These include aliases for tool usage categories,
|
| 387 |
+
# so multiple names might correspond to the same shape.
|
| 388 |
shape_map = {
|
| 389 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: "star",
|
| 390 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "star-diamond",
|
| 391 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "star-triangle-up",
|
| 392 |
}
|
| 393 |
+
for canonical_tool_usage, tool_usages_aliases in aliases.TOOL_USAGE_ALIASES.items():
|
| 394 |
+
for tool_usage_alias in tool_usages_aliases:
|
| 395 |
+
shape_map[tool_usage_alias] = shape_map[canonical_tool_usage]
|
| 396 |
default_shape = 'square'
|
| 397 |
+
# Only keep one name per shape for the legend.
|
| 398 |
+
shapes_for_legend = set(aliases.TOOL_USAGE_ALIASES.keys())
|
| 399 |
|
| 400 |
x_col_to_use = x
|
| 401 |
y_col_to_use = y
|
|
|
|
| 544 |
)
|
| 545 |
))
|
| 546 |
# ---- Add logic for making the legend -----------
|
| 547 |
+
for i, category in enumerate(colors_for_legend):
|
| 548 |
fig.add_trace(go.Scatter(
|
| 549 |
x=[None], y=[None],
|
| 550 |
mode='markers',
|
|
|
|
| 559 |
))
|
| 560 |
|
| 561 |
# Part B: Dummy traces for the SHAPES ("Agent Tooling")
|
| 562 |
+
for i, shape_name in enumerate(shapes_for_legend):
|
|
|
|
| 563 |
fig.add_trace(go.Scatter(
|
| 564 |
x=[None], y=[None],
|
| 565 |
mode='markers',
|
| 566 |
name=shape_name,
|
| 567 |
legendgroup="tooling_group",
|
| 568 |
legendgrouptitle_text="Agent Tooling" if i == 0 else None,
|
| 569 |
+
marker=dict(color='black', symbol=shape_map.get(shape_name), size=12)
|
| 570 |
))
|
| 571 |
|
| 572 |
# --- Section 8: Configure Layout ---
|
ui_components.py
CHANGED
|
@@ -8,6 +8,7 @@ import base64
|
|
| 8 |
from agenteval.leaderboard.view import LeaderboardViewer
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
|
|
|
|
| 11 |
from leaderboard_transformer import (
|
| 12 |
DataTransformer,
|
| 13 |
transform_raw_dataframe,
|
|
@@ -40,27 +41,41 @@ AGENTEVAL_MANIFEST_NAME = "agenteval.json"
|
|
| 40 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 41 |
# Global variables
|
| 42 |
COMBINED_ICON_MAP = {
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
},
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
},
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
},
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
}
|
| 63 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
OPENNESS_SVG_MAP = {
|
| 65 |
"Open Source + Open Weights": "assets/os-ow-legend.svg",
|
| 66 |
"Open Source": "assets/os-legend.svg",
|
|
|
|
| 8 |
from agenteval.leaderboard.view import LeaderboardViewer
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
|
| 11 |
+
import aliases
|
| 12 |
from leaderboard_transformer import (
|
| 13 |
DataTransformer,
|
| 14 |
transform_raw_dataframe,
|
|
|
|
| 41 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 42 |
# Global variables
|
| 43 |
COMBINED_ICON_MAP = {
|
| 44 |
+
aliases.CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: {
|
| 45 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
|
| 46 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
|
| 47 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
|
| 48 |
},
|
| 49 |
+
aliases.CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: {
|
| 50 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
|
| 51 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
|
| 52 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
|
| 53 |
},
|
| 54 |
+
aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {
|
| 55 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/api-standard.svg",
|
| 56 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/api-equivalent.svg",
|
| 57 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/api-custom.svg",
|
| 58 |
},
|
| 59 |
+
aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: {
|
| 60 |
+
aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/c-standard.svg",
|
| 61 |
+
aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/c-equivalent.svg",
|
| 62 |
+
aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/c-custom.svg",
|
| 63 |
}
|
| 64 |
}
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# it's important to do the tool usage first here, so that when
|
| 68 |
+
# we do openness, the tool usage changes get picked up
|
| 69 |
+
for openness in COMBINED_ICON_MAP:
|
| 70 |
+
for canonical_tool_usage, tool_usage_aliases in aliases.TOOL_USAGE_ALIASES.items():
|
| 71 |
+
for tool_usage_alias in tool_usage_aliases:
|
| 72 |
+
COMBINED_ICON_MAP[openness][tool_usage_alias] = COMBINED_ICON_MAP[openness][canonical_tool_usage]
|
| 73 |
+
|
| 74 |
+
for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
|
| 75 |
+
for openness_alias in openness_aliases:
|
| 76 |
+
COMBINED_ICON_MAP[openness_alias] = COMBINED_ICON_MAP[canonical_openness]
|
| 77 |
+
|
| 78 |
+
|
| 79 |
OPENNESS_SVG_MAP = {
|
| 80 |
"Open Source + Open Weights": "assets/os-ow-legend.svg",
|
| 81 |
"Open Source": "assets/os-legend.svg",
|