Chloe Anastasiades commited on
Commit
b3aef2c
·
unverified ·
1 Parent(s): 1e64d2b

Support old and new openness and tool usage values (#56)

Browse files
Files changed (3) hide show
  1. aliases.py +23 -0
  2. leaderboard_transformer.py +27 -11
  3. ui_components.py +31 -16
aliases.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS = "Open Source + Open Weights"
2
+ CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS = "Open Source"
3
+ CANONICAL_OPENNESS_CLOSED_API_AVAILABLE = "API Available"
4
+ CANONICAL_OPENNESS_CLOSED_UI_ONLY = "Closed"
5
+
6
+
7
+ CANONICAL_TOOL_USAGE_STANDARD = "Standard"
8
+ CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE = "Custom with Standard Search"
9
+ CANONICAL_TOOL_USAGE_FULLY_CUSTOM = "Fully Custom"
10
+
11
+
12
+ OPENNESS_ALIASES = {
13
+ CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: {"Open source & open weights"},
14
+ CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: {"Open source & closed weights"},
15
+ CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {"Closed source & API available"},
16
+ CANONICAL_OPENNESS_CLOSED_UI_ONLY: {"Closed source & UI only"}
17
+ }
18
+
19
+ TOOL_USAGE_ALIASES = {
20
+ CANONICAL_TOOL_USAGE_STANDARD: {},
21
+ CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: {"Custom interface"},
22
+ CANONICAL_TOOL_USAGE_FULLY_CUSTOM: {"Fully custom"}
23
+ }
leaderboard_transformer.py CHANGED
@@ -6,6 +6,8 @@ from typing import Optional
6
  import base64
7
  import html
8
 
 
 
9
  logger = logging.getLogger(__name__)
10
 
11
  INFORMAL_TO_FORMAL_NAME_MAP = {
@@ -366,19 +368,34 @@ def _plot_scatter_plotly(
366
  ) -> go.Figure:
367
 
368
  # --- Section 1: Define Mappings ---
 
 
369
  color_map = {
370
- "Open Source + Open Weights": "deeppink",
371
- "Open Source": "coral",
372
- "API Available": "yellow",
373
- "Closed": "white",
374
  }
 
 
 
 
 
375
  category_order = list(color_map.keys())
 
 
 
376
  shape_map = {
377
- "Standard": "star",
378
- "Custom with Standard Search": "star-diamond",
379
- "Fully Custom": "star-triangle-up"
380
  }
 
 
 
381
  default_shape = 'square'
 
 
382
 
383
  x_col_to_use = x
384
  y_col_to_use = y
@@ -527,7 +544,7 @@ def _plot_scatter_plotly(
527
  )
528
  ))
529
  # ---- Add logic for making the legend -----------
530
- for i, category in enumerate(category_order):
531
  fig.add_trace(go.Scatter(
532
  x=[None], y=[None],
533
  mode='markers',
@@ -542,15 +559,14 @@ def _plot_scatter_plotly(
542
  ))
543
 
544
  # Part B: Dummy traces for the SHAPES ("Agent Tooling")
545
- shape_items = list(shape_map.items())
546
- for i, (shape_name, shape_symbol) in enumerate(shape_items):
547
  fig.add_trace(go.Scatter(
548
  x=[None], y=[None],
549
  mode='markers',
550
  name=shape_name,
551
  legendgroup="tooling_group",
552
  legendgrouptitle_text="Agent Tooling" if i == 0 else None,
553
- marker=dict(color='black', symbol=shape_symbol, size=12)
554
  ))
555
 
556
  # --- Section 8: Configure Layout ---
 
6
  import base64
7
  import html
8
 
9
+ import aliases
10
+
11
  logger = logging.getLogger(__name__)
12
 
13
  INFORMAL_TO_FORMAL_NAME_MAP = {
 
368
  ) -> go.Figure:
369
 
370
  # --- Section 1: Define Mappings ---
371
+ # These include aliases for openness categories,
372
+ # so multiple names might correspond to the same color.
373
  color_map = {
374
+ aliases.CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: "deeppink",
375
+ aliases.CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: "coral",
376
+ aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: "yellow",
377
+ aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: "white",
378
  }
379
+ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
380
+ for openness_alias in openness_aliases:
381
+ color_map[openness_alias] = color_map[canonical_openness]
382
+ # Only keep one name per color for the legend.
383
+ colors_for_legend = set(aliases.OPENNESS_ALIASES.keys())
384
  category_order = list(color_map.keys())
385
+
386
+ # These include aliases for tool usage categories,
387
+ # so multiple names might correspond to the same shape.
388
  shape_map = {
389
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: "star",
390
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "star-diamond",
391
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "star-triangle-up",
392
  }
393
+ for canonical_tool_usage, tool_usages_aliases in aliases.TOOL_USAGE_ALIASES.items():
394
+ for tool_usage_alias in tool_usages_aliases:
395
+ shape_map[tool_usage_alias] = shape_map[canonical_tool_usage]
396
  default_shape = 'square'
397
+ # Only keep one name per shape for the legend.
398
+ shapes_for_legend = set(aliases.TOOL_USAGE_ALIASES.keys())
399
 
400
  x_col_to_use = x
401
  y_col_to_use = y
 
544
  )
545
  ))
546
  # ---- Add logic for making the legend -----------
547
+ for i, category in enumerate(colors_for_legend):
548
  fig.add_trace(go.Scatter(
549
  x=[None], y=[None],
550
  mode='markers',
 
559
  ))
560
 
561
  # Part B: Dummy traces for the SHAPES ("Agent Tooling")
562
+ for i, shape_name in enumerate(shapes_for_legend):
 
563
  fig.add_trace(go.Scatter(
564
  x=[None], y=[None],
565
  mode='markers',
566
  name=shape_name,
567
  legendgroup="tooling_group",
568
  legendgrouptitle_text="Agent Tooling" if i == 0 else None,
569
+ marker=dict(color='black', symbol=shape_map.get(shape_name), size=12)
570
  ))
571
 
572
  # --- Section 8: Configure Layout ---
ui_components.py CHANGED
@@ -8,6 +8,7 @@ import base64
8
  from agenteval.leaderboard.view import LeaderboardViewer
9
  from huggingface_hub import HfApi
10
 
 
11
  from leaderboard_transformer import (
12
  DataTransformer,
13
  transform_raw_dataframe,
@@ -40,27 +41,41 @@ AGENTEVAL_MANIFEST_NAME = "agenteval.json"
40
  os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
41
  # Global variables
42
  COMBINED_ICON_MAP = {
43
- "Open Source + Open Weights": {
44
- "Standard": "assets/os-ow-standard.svg",
45
- "Custom with Standard Search": "assets/os-ow-equivalent.svg",
46
- "Custom": "assets/os-ow-custom.svg",
47
  },
48
- "Open Source": {
49
- "Standard": "assets/os-standard.svg",
50
- "Custom with Standard Search": "assets/os-equivalent.svg",
51
- "Fully Custom": "assets/os-custom.svg",
52
  },
53
- "API Available": {
54
- "Standard": "assets/api-standard.svg",
55
- "Custom with Standard Search": "assets/api-equivalent.svg",
56
- "Fully Custom": "assets/api-custom.svg",
57
  },
58
- "Closed": {
59
- "Standard": "assets/c-standard.svg",
60
- "Equivalent": "assets/c-equivalent.svg",
61
- "Fully Custom": "assets/c-custom.svg",
62
  }
63
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  OPENNESS_SVG_MAP = {
65
  "Open Source + Open Weights": "assets/os-ow-legend.svg",
66
  "Open Source": "assets/os-legend.svg",
 
8
  from agenteval.leaderboard.view import LeaderboardViewer
9
  from huggingface_hub import HfApi
10
 
11
+ import aliases
12
  from leaderboard_transformer import (
13
  DataTransformer,
14
  transform_raw_dataframe,
 
41
  os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
42
  # Global variables
43
  COMBINED_ICON_MAP = {
44
+ aliases.CANONICAL_OPENNESS_OPEN_OPEN_WEIGHTS: {
45
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-ow-standard.svg",
46
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-ow-equivalent.svg",
47
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-ow-custom.svg",
48
  },
49
+ aliases.CANONICAL_OPENNESS_OPEN_CLOSED_WEIGHTS: {
50
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/os-standard.svg",
51
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/os-equivalent.svg",
52
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/os-custom.svg",
53
  },
54
+ aliases.CANONICAL_OPENNESS_CLOSED_API_AVAILABLE: {
55
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/api-standard.svg",
56
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/api-equivalent.svg",
57
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/api-custom.svg",
58
  },
59
+ aliases.CANONICAL_OPENNESS_CLOSED_UI_ONLY: {
60
+ aliases.CANONICAL_TOOL_USAGE_STANDARD: "assets/c-standard.svg",
61
+ aliases.CANONICAL_TOOL_USAGE_CUSTOM_INTERFACE: "assets/c-equivalent.svg",
62
+ aliases.CANONICAL_TOOL_USAGE_FULLY_CUSTOM: "assets/c-custom.svg",
63
  }
64
  }
65
+
66
+
67
+ # it's important to do the tool usage first here, so that when
68
+ # we do openness, the tool usage changes get picked up
69
+ for openness in COMBINED_ICON_MAP:
70
+ for canonical_tool_usage, tool_usage_aliases in aliases.TOOL_USAGE_ALIASES.items():
71
+ for tool_usage_alias in tool_usage_aliases:
72
+ COMBINED_ICON_MAP[openness][tool_usage_alias] = COMBINED_ICON_MAP[openness][canonical_tool_usage]
73
+
74
+ for canonical_openness, openness_aliases in aliases.OPENNESS_ALIASES.items():
75
+ for openness_alias in openness_aliases:
76
+ COMBINED_ICON_MAP[openness_alias] = COMBINED_ICON_MAP[canonical_openness]
77
+
78
+
79
  OPENNESS_SVG_MAP = {
80
  "Open Source + Open Weights": "assets/os-ow-legend.svg",
81
  "Open Source": "assets/os-legend.svg",