Amber Tanaka commited on
Commit
0b78abd
·
unverified ·
1 Parent(s): 94497d7

Fix test data display (#6)

Browse files
c_and_e.py CHANGED
@@ -12,13 +12,16 @@ with gr.Blocks() as demo:
12
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
  test_df, test_tag_map = get_full_leaderboard_data("test")
14
  gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
15
- if validation_tag_map:
16
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
 
 
 
 
18
 
19
  # --- This page now has two main sections: Validation and Test ---
20
  with gr.Tabs():
21
- with gr.Tab("Results: Validation"):
22
  # 1. Load all necessary data for the "validation" split ONCE.
23
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
24
 
@@ -40,7 +43,7 @@ with gr.Blocks() as demo:
40
  else:
41
  gr.Markdown("No data available for validation split.")
42
 
43
- with gr.Tab("Results: Test"):
44
  # Repeat the process for the "test" split
45
  test_df, test_tag_map = get_full_leaderboard_data("test")
46
 
@@ -57,4 +60,24 @@ with gr.Blocks() as demo:
57
  category_name=CATEGORY_NAME
58
  )
59
  else:
60
- gr.Markdown("No data available for test split.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
  test_df, test_tag_map = get_full_leaderboard_data("test")
14
  gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
15
+ with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
16
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
 
18
+ with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
19
+ create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
20
+
21
 
22
  # --- This page now has two main sections: Validation and Test ---
23
  with gr.Tabs():
24
+ with gr.Tab("Results: Validation") as validation_tab:
25
  # 1. Load all necessary data for the "validation" split ONCE.
26
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
27
 
 
43
  else:
44
  gr.Markdown("No data available for validation split.")
45
 
46
+ with gr.Tab("Results: Test") as test_tab:
47
  # Repeat the process for the "test" split
48
  test_df, test_tag_map = get_full_leaderboard_data("test")
49
 
 
60
  category_name=CATEGORY_NAME
61
  )
62
  else:
63
+ gr.Markdown("No data available for test split.")
64
+
65
+ show_validation_js = """
66
+ () => {
67
+ document.getElementById('validation_nav_container').style.display = 'block';
68
+ document.getElementById('test_nav_container').style.display = 'none';
69
+ }
70
+ """
71
+
72
+ # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
73
+ show_test_js = """
74
+ () => {
75
+ document.getElementById('validation_nav_container').style.display = 'none';
76
+ document.getElementById('test_nav_container').style.display = 'block';
77
+ setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
78
+ }
79
+ """
80
+
81
+ # Assign the pure JS functions to the select events. No Python `fn` is needed.
82
+ validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
83
+ test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
data_analysis.py CHANGED
@@ -12,12 +12,14 @@ with gr.Blocks() as demo:
12
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
  test_df, test_tag_map = get_full_leaderboard_data("test")
14
  gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
15
- if validation_tag_map:
16
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
 
 
 
18
  # --- This page now has two main sections: Validation and Test ---
19
  with gr.Tabs():
20
- with gr.Tab("Results: Validation"):
21
  # 1. Load all necessary data for the "validation" split ONCE.
22
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
23
 
@@ -39,7 +41,7 @@ with gr.Blocks() as demo:
39
  else:
40
  gr.Markdown("No data available for validation split.")
41
 
42
- with gr.Tab("Results: Test"):
43
  # Repeat the process for the "test" split
44
  test_df, test_tag_map = get_full_leaderboard_data("test")
45
 
@@ -56,4 +58,24 @@ with gr.Blocks() as demo:
56
  category_name=CATEGORY_NAME
57
  )
58
  else:
59
- gr.Markdown("No data available for test split.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
  test_df, test_tag_map = get_full_leaderboard_data("test")
14
  gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
15
+ with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
16
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
 
18
+ with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
19
+ create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
20
  # --- This page now has two main sections: Validation and Test ---
21
  with gr.Tabs():
22
+ with gr.Tab("Results: Validation") as validation_tab:
23
  # 1. Load all necessary data for the "validation" split ONCE.
24
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
25
 
 
41
  else:
42
  gr.Markdown("No data available for validation split.")
43
 
44
+ with gr.Tab("Results: Test") as test_tab:
45
  # Repeat the process for the "test" split
46
  test_df, test_tag_map = get_full_leaderboard_data("test")
47
 
 
58
  category_name=CATEGORY_NAME
59
  )
60
  else:
61
+ gr.Markdown("No data available for test split.")
62
+
63
+ show_validation_js = """
64
+ () => {
65
+ document.getElementById('validation_nav_container').style.display = 'block';
66
+ document.getElementById('test_nav_container').style.display = 'none';
67
+ }
68
+ """
69
+
70
+ # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
71
+ show_test_js = """
72
+ () => {
73
+ document.getElementById('validation_nav_container').style.display = 'none';
74
+ document.getElementById('test_nav_container').style.display = 'block';
75
+ setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
76
+ }
77
+ """
78
+
79
+ # Assign the pure JS functions to the select events. No Python `fn` is needed.
80
+ validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
81
+ test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
e2e.py CHANGED
@@ -12,12 +12,14 @@ with gr.Blocks() as demo:
12
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
  test_df, test_tag_map = get_full_leaderboard_data("test")
14
  gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
15
- if validation_tag_map:
16
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
 
 
 
18
  # --- This page now has two main sections: Validation and Test ---
19
  with gr.Tabs():
20
- with gr.Tab("Results: Validation"):
21
  # 1. Load all necessary data for the "validation" split ONCE.
22
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
23
 
@@ -39,7 +41,7 @@ with gr.Blocks() as demo:
39
  else:
40
  gr.Markdown("No data available for validation split.")
41
 
42
- with gr.Tab("Results: Test"):
43
  # Repeat the process for the "test" split
44
  test_df, test_tag_map = get_full_leaderboard_data("test")
45
 
@@ -56,4 +58,23 @@ with gr.Blocks() as demo:
56
  category_name=CATEGORY_NAME
57
  )
58
  else:
59
- gr.Markdown("No data available for test split.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
13
  test_df, test_tag_map = get_full_leaderboard_data("test")
14
  gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
15
+ with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
16
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
17
 
18
+ with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
19
+ create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
20
  # --- This page now has two main sections: Validation and Test ---
21
  with gr.Tabs():
22
+ with gr.Tab("Results: Validation") as validation_tab:
23
  # 1. Load all necessary data for the "validation" split ONCE.
24
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
25
 
 
41
  else:
42
  gr.Markdown("No data available for validation split.")
43
 
44
+ with gr.Tab("Results: Test") as test_tab:
45
  # Repeat the process for the "test" split
46
  test_df, test_tag_map = get_full_leaderboard_data("test")
47
 
 
58
  category_name=CATEGORY_NAME
59
  )
60
  else:
61
+ gr.Markdown("No data available for test split.")
62
+ show_validation_js = """
63
+ () => {
64
+ document.getElementById('validation_nav_container').style.display = 'block';
65
+ document.getElementById('test_nav_container').style.display = 'none';
66
+ }
67
+ """
68
+
69
+ # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
70
+ show_test_js = """
71
+ () => {
72
+ document.getElementById('validation_nav_container').style.display = 'none';
73
+ document.getElementById('test_nav_container').style.display = 'block';
74
+ setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
75
+ }
76
+ """
77
+
78
+ # Assign the pure JS functions to the select events. No Python `fn` is needed.
79
+ validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
80
+ test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
leaderboard_transformer.py CHANGED
@@ -14,7 +14,7 @@ INFORMAL_TO_FORMAL_NAME_MAP = {
14
  "code": "Code Execution",
15
  "discovery": "Discovery",
16
 
17
- # Long Raw Names
18
  "arxivdigestables_validation": "Arxivdigestables Validation",
19
  "sqa_dev": "Sqa Dev",
20
  "litqa2_validation": "Litqa2 Validation",
@@ -24,6 +24,18 @@ INFORMAL_TO_FORMAL_NAME_MAP = {
24
  "ds1000_validation": "DS1000 Validation",
25
  "e2e_discovery_validation": "E2E Discovery Validation",
26
  "super_validation": "Super Validation",
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
 
29
 
 
14
  "code": "Code Execution",
15
  "discovery": "Discovery",
16
 
17
+ # Validation Names
18
  "arxivdigestables_validation": "Arxivdigestables Validation",
19
  "sqa_dev": "Sqa Dev",
20
  "litqa2_validation": "Litqa2 Validation",
 
24
  "ds1000_validation": "DS1000 Validation",
25
  "e2e_discovery_validation": "E2E Discovery Validation",
26
  "super_validation": "Super Validation",
27
+ # Test Names
28
+ "paper_finder_test": "Paper Finder Test",
29
+ "paper_finder_litqa2_test": "Paper Finder Litqa2 Test",
30
+ "sqa_test": "Sqa Test",
31
+ "arxivdigestables_test": "Arxivdigestables Test",
32
+ "litqa2_test": "Litqa2 Test",
33
+ "discoverybench_test": "Discoverybench Test",
34
+ "core_bench_test": "Core Bench Test",
35
+ "ds1000_test": "DS1000 Test",
36
+ "e2e_discovery_test": "E2E Discovery Test",
37
+ "e2e_discovery_hard_test": "E2E Discovery Hard Test",
38
+ "super_test": "Super Test",
39
  }
40
 
41
 
literature_understanding.py CHANGED
@@ -13,12 +13,15 @@ with gr.Blocks() as demo:
13
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
14
  test_df, test_tag_map = get_full_leaderboard_data("test")
15
  gr.Markdown(LIT_DESCRIPTION, elem_id="category-intro")
16
- if validation_tag_map:
17
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
18
 
 
 
 
19
  # --- This page now has two main sections: Validation and Test ---
20
  with gr.Tabs():
21
- with gr.Tab("Results: Validation"):
22
  # 1. Load all necessary data for the "validation" split ONCE.
23
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
24
 
@@ -40,7 +43,7 @@ with gr.Blocks() as demo:
40
  else:
41
  gr.Markdown("No data available for validation split.")
42
 
43
- with gr.Tab("Results: Test"):
44
  # Repeat the process for the "test" split
45
  test_df, test_tag_map = get_full_leaderboard_data("test")
46
 
@@ -57,4 +60,24 @@ with gr.Blocks() as demo:
57
  category_name=CATEGORY_NAME
58
  )
59
  else:
60
- gr.Markdown("No data available for test split.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
14
  test_df, test_tag_map = get_full_leaderboard_data("test")
15
  gr.Markdown(LIT_DESCRIPTION, elem_id="category-intro")
16
+ with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
17
  create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
18
 
19
+ with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
20
+ create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
21
+
22
  # --- This page now has two main sections: Validation and Test ---
23
  with gr.Tabs():
24
+ with gr.Tab("Results: Validation") as validation_tab:
25
  # 1. Load all necessary data for the "validation" split ONCE.
26
  validation_df, validation_tag_map = get_full_leaderboard_data("validation")
27
 
 
43
  else:
44
  gr.Markdown("No data available for validation split.")
45
 
46
+ with gr.Tab("Results: Test") as test_tab:
47
  # Repeat the process for the "test" split
48
  test_df, test_tag_map = get_full_leaderboard_data("test")
49
 
 
60
  category_name=CATEGORY_NAME
61
  )
62
  else:
63
+ gr.Markdown("No data available for test split.")
64
+
65
+ show_validation_js = """
66
+ () => {
67
+ document.getElementById('validation_nav_container').style.display = 'block';
68
+ document.getElementById('test_nav_container').style.display = 'none';
69
+ }
70
+ """
71
+
72
+ # JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
73
+ show_test_js = """
74
+ () => {
75
+ document.getElementById('validation_nav_container').style.display = 'none';
76
+ document.getElementById('test_nav_container').style.display = 'block';
77
+ setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
78
+ }
79
+ """
80
+
81
+ # Assign the pure JS functions to the select events. No Python `fn` is needed.
82
+ validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
83
+ test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
ui_components.py CHANGED
@@ -28,7 +28,7 @@ from content import (
28
 
29
  # --- Constants and Configuration ---
30
  LOCAL_DEBUG = not (os.environ.get("system") == "spaces")
31
- CONFIG_NAME = "1.0.0-dev2" # This corresponds to 'config' in LeaderboardViewer
32
  IS_INTERNAL = os.environ.get("IS_INTERNAL", "false").lower() == "true"
33
 
34
  OWNER = "allenai"
@@ -213,7 +213,6 @@ def get_full_leaderboard_data(split: str) -> tuple[pd.DataFrame, dict]:
213
  Loads and transforms the complete dataset for a given split.
214
  This function handles caching and returns the final "pretty" DataFrame and tag map.
215
  """
216
- # This reuses your existing robust caching logic
217
  viewer_or_data, raw_tag_map = get_leaderboard_viewer_instance(split)
218
 
219
  if isinstance(viewer_or_data, (LeaderboardViewer, DummyViewer)):
@@ -291,7 +290,6 @@ def create_benchmark_details_display(
291
 
292
  # 2. Loop through each benchmark and create its UI components
293
  for benchmark_name in benchmark_names:
294
- with gr.Blocks():
295
  gr.Markdown(f"### {benchmark_name}", header_links=True)
296
 
297
  # 3. Prepare the data for this specific benchmark's table and plot
 
28
 
29
  # --- Constants and Configuration ---
30
  LOCAL_DEBUG = not (os.environ.get("system") == "spaces")
31
+ CONFIG_NAME = "1.0.0-dev1" # This corresponds to 'config' in LeaderboardViewer
32
  IS_INTERNAL = os.environ.get("IS_INTERNAL", "false").lower() == "true"
33
 
34
  OWNER = "allenai"
 
213
  Loads and transforms the complete dataset for a given split.
214
  This function handles caching and returns the final "pretty" DataFrame and tag map.
215
  """
 
216
  viewer_or_data, raw_tag_map = get_leaderboard_viewer_instance(split)
217
 
218
  if isinstance(viewer_or_data, (LeaderboardViewer, DummyViewer)):
 
290
 
291
  # 2. Loop through each benchmark and create its UI components
292
  for benchmark_name in benchmark_names:
 
293
  gr.Markdown(f"### {benchmark_name}", header_links=True)
294
 
295
  # 3. Prepare the data for this specific benchmark's table and plot