Spaces:
Running
Running
Amber Tanaka
commited on
Fix test data display (#6)
Browse files- c_and_e.py +27 -4
- data_analysis.py +26 -4
- e2e.py +25 -4
- leaderboard_transformer.py +13 -1
- literature_understanding.py +27 -4
- ui_components.py +1 -3
c_and_e.py
CHANGED
@@ -12,13 +12,16 @@ with gr.Blocks() as demo:
|
|
12 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
13 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
14 |
gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
|
15 |
-
|
16 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
17 |
|
|
|
|
|
|
|
18 |
|
19 |
# --- This page now has two main sections: Validation and Test ---
|
20 |
with gr.Tabs():
|
21 |
-
with gr.Tab("Results: Validation"):
|
22 |
# 1. Load all necessary data for the "validation" split ONCE.
|
23 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
24 |
|
@@ -40,7 +43,7 @@ with gr.Blocks() as demo:
|
|
40 |
else:
|
41 |
gr.Markdown("No data available for validation split.")
|
42 |
|
43 |
-
with gr.Tab("Results: Test"):
|
44 |
# Repeat the process for the "test" split
|
45 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
46 |
|
@@ -57,4 +60,24 @@ with gr.Blocks() as demo:
|
|
57 |
category_name=CATEGORY_NAME
|
58 |
)
|
59 |
else:
|
60 |
-
gr.Markdown("No data available for test split.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
13 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
14 |
gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
|
15 |
+
with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
|
16 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
17 |
|
18 |
+
with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
|
19 |
+
create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
|
20 |
+
|
21 |
|
22 |
# --- This page now has two main sections: Validation and Test ---
|
23 |
with gr.Tabs():
|
24 |
+
with gr.Tab("Results: Validation") as validation_tab:
|
25 |
# 1. Load all necessary data for the "validation" split ONCE.
|
26 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
27 |
|
|
|
43 |
else:
|
44 |
gr.Markdown("No data available for validation split.")
|
45 |
|
46 |
+
with gr.Tab("Results: Test") as test_tab:
|
47 |
# Repeat the process for the "test" split
|
48 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
49 |
|
|
|
60 |
category_name=CATEGORY_NAME
|
61 |
)
|
62 |
else:
|
63 |
+
gr.Markdown("No data available for test split.")
|
64 |
+
|
65 |
+
show_validation_js = """
|
66 |
+
() => {
|
67 |
+
document.getElementById('validation_nav_container').style.display = 'block';
|
68 |
+
document.getElementById('test_nav_container').style.display = 'none';
|
69 |
+
}
|
70 |
+
"""
|
71 |
+
|
72 |
+
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
|
73 |
+
show_test_js = """
|
74 |
+
() => {
|
75 |
+
document.getElementById('validation_nav_container').style.display = 'none';
|
76 |
+
document.getElementById('test_nav_container').style.display = 'block';
|
77 |
+
setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
|
78 |
+
}
|
79 |
+
"""
|
80 |
+
|
81 |
+
# Assign the pure JS functions to the select events. No Python `fn` is needed.
|
82 |
+
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
|
83 |
+
test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
|
data_analysis.py
CHANGED
@@ -12,12 +12,14 @@ with gr.Blocks() as demo:
|
|
12 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
13 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
14 |
gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
|
15 |
-
|
16 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
17 |
|
|
|
|
|
18 |
# --- This page now has two main sections: Validation and Test ---
|
19 |
with gr.Tabs():
|
20 |
-
with gr.Tab("Results: Validation"):
|
21 |
# 1. Load all necessary data for the "validation" split ONCE.
|
22 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
23 |
|
@@ -39,7 +41,7 @@ with gr.Blocks() as demo:
|
|
39 |
else:
|
40 |
gr.Markdown("No data available for validation split.")
|
41 |
|
42 |
-
with gr.Tab("Results: Test"):
|
43 |
# Repeat the process for the "test" split
|
44 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
45 |
|
@@ -56,4 +58,24 @@ with gr.Blocks() as demo:
|
|
56 |
category_name=CATEGORY_NAME
|
57 |
)
|
58 |
else:
|
59 |
-
gr.Markdown("No data available for test split.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
13 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
14 |
gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
|
15 |
+
with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
|
16 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
17 |
|
18 |
+
with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
|
19 |
+
create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
|
20 |
# --- This page now has two main sections: Validation and Test ---
|
21 |
with gr.Tabs():
|
22 |
+
with gr.Tab("Results: Validation") as validation_tab:
|
23 |
# 1. Load all necessary data for the "validation" split ONCE.
|
24 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
25 |
|
|
|
41 |
else:
|
42 |
gr.Markdown("No data available for validation split.")
|
43 |
|
44 |
+
with gr.Tab("Results: Test") as test_tab:
|
45 |
# Repeat the process for the "test" split
|
46 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
47 |
|
|
|
58 |
category_name=CATEGORY_NAME
|
59 |
)
|
60 |
else:
|
61 |
+
gr.Markdown("No data available for test split.")
|
62 |
+
|
63 |
+
show_validation_js = """
|
64 |
+
() => {
|
65 |
+
document.getElementById('validation_nav_container').style.display = 'block';
|
66 |
+
document.getElementById('test_nav_container').style.display = 'none';
|
67 |
+
}
|
68 |
+
"""
|
69 |
+
|
70 |
+
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
|
71 |
+
show_test_js = """
|
72 |
+
() => {
|
73 |
+
document.getElementById('validation_nav_container').style.display = 'none';
|
74 |
+
document.getElementById('test_nav_container').style.display = 'block';
|
75 |
+
setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
|
76 |
+
}
|
77 |
+
"""
|
78 |
+
|
79 |
+
# Assign the pure JS functions to the select events. No Python `fn` is needed.
|
80 |
+
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
|
81 |
+
test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
|
e2e.py
CHANGED
@@ -12,12 +12,14 @@ with gr.Blocks() as demo:
|
|
12 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
13 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
14 |
gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
|
15 |
-
|
16 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
17 |
|
|
|
|
|
18 |
# --- This page now has two main sections: Validation and Test ---
|
19 |
with gr.Tabs():
|
20 |
-
with gr.Tab("Results: Validation"):
|
21 |
# 1. Load all necessary data for the "validation" split ONCE.
|
22 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
23 |
|
@@ -39,7 +41,7 @@ with gr.Blocks() as demo:
|
|
39 |
else:
|
40 |
gr.Markdown("No data available for validation split.")
|
41 |
|
42 |
-
with gr.Tab("Results: Test"):
|
43 |
# Repeat the process for the "test" split
|
44 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
45 |
|
@@ -56,4 +58,23 @@ with gr.Blocks() as demo:
|
|
56 |
category_name=CATEGORY_NAME
|
57 |
)
|
58 |
else:
|
59 |
-
gr.Markdown("No data available for test split.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
13 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
14 |
gr.Markdown(PLACEHOLDER_DESCRIPTION, elem_id="category-intro")
|
15 |
+
with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
|
16 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
17 |
|
18 |
+
with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
|
19 |
+
create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
|
20 |
# --- This page now has two main sections: Validation and Test ---
|
21 |
with gr.Tabs():
|
22 |
+
with gr.Tab("Results: Validation") as validation_tab:
|
23 |
# 1. Load all necessary data for the "validation" split ONCE.
|
24 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
25 |
|
|
|
41 |
else:
|
42 |
gr.Markdown("No data available for validation split.")
|
43 |
|
44 |
+
with gr.Tab("Results: Test") as test_tab:
|
45 |
# Repeat the process for the "test" split
|
46 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
47 |
|
|
|
58 |
category_name=CATEGORY_NAME
|
59 |
)
|
60 |
else:
|
61 |
+
gr.Markdown("No data available for test split.")
|
62 |
+
show_validation_js = """
|
63 |
+
() => {
|
64 |
+
document.getElementById('validation_nav_container').style.display = 'block';
|
65 |
+
document.getElementById('test_nav_container').style.display = 'none';
|
66 |
+
}
|
67 |
+
"""
|
68 |
+
|
69 |
+
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
|
70 |
+
show_test_js = """
|
71 |
+
() => {
|
72 |
+
document.getElementById('validation_nav_container').style.display = 'none';
|
73 |
+
document.getElementById('test_nav_container').style.display = 'block';
|
74 |
+
setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
|
75 |
+
}
|
76 |
+
"""
|
77 |
+
|
78 |
+
# Assign the pure JS functions to the select events. No Python `fn` is needed.
|
79 |
+
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
|
80 |
+
test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
|
leaderboard_transformer.py
CHANGED
@@ -14,7 +14,7 @@ INFORMAL_TO_FORMAL_NAME_MAP = {
|
|
14 |
"code": "Code Execution",
|
15 |
"discovery": "Discovery",
|
16 |
|
17 |
-
#
|
18 |
"arxivdigestables_validation": "Arxivdigestables Validation",
|
19 |
"sqa_dev": "Sqa Dev",
|
20 |
"litqa2_validation": "Litqa2 Validation",
|
@@ -24,6 +24,18 @@ INFORMAL_TO_FORMAL_NAME_MAP = {
|
|
24 |
"ds1000_validation": "DS1000 Validation",
|
25 |
"e2e_discovery_validation": "E2E Discovery Validation",
|
26 |
"super_validation": "Super Validation",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
}
|
28 |
|
29 |
|
|
|
14 |
"code": "Code Execution",
|
15 |
"discovery": "Discovery",
|
16 |
|
17 |
+
# Validation Names
|
18 |
"arxivdigestables_validation": "Arxivdigestables Validation",
|
19 |
"sqa_dev": "Sqa Dev",
|
20 |
"litqa2_validation": "Litqa2 Validation",
|
|
|
24 |
"ds1000_validation": "DS1000 Validation",
|
25 |
"e2e_discovery_validation": "E2E Discovery Validation",
|
26 |
"super_validation": "Super Validation",
|
27 |
+
# Test Names
|
28 |
+
"paper_finder_test": "Paper Finder Test",
|
29 |
+
"paper_finder_litqa2_test": "Paper Finder Litqa2 Test",
|
30 |
+
"sqa_test": "Sqa Test",
|
31 |
+
"arxivdigestables_test": "Arxivdigestables Test",
|
32 |
+
"litqa2_test": "Litqa2 Test",
|
33 |
+
"discoverybench_test": "Discoverybench Test",
|
34 |
+
"core_bench_test": "Core Bench Test",
|
35 |
+
"ds1000_test": "DS1000 Test",
|
36 |
+
"e2e_discovery_test": "E2E Discovery Test",
|
37 |
+
"e2e_discovery_hard_test": "E2E Discovery Hard Test",
|
38 |
+
"super_test": "Super Test",
|
39 |
}
|
40 |
|
41 |
|
literature_understanding.py
CHANGED
@@ -13,12 +13,15 @@ with gr.Blocks() as demo:
|
|
13 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
14 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
15 |
gr.Markdown(LIT_DESCRIPTION, elem_id="category-intro")
|
16 |
-
|
17 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
18 |
|
|
|
|
|
|
|
19 |
# --- This page now has two main sections: Validation and Test ---
|
20 |
with gr.Tabs():
|
21 |
-
with gr.Tab("Results: Validation"):
|
22 |
# 1. Load all necessary data for the "validation" split ONCE.
|
23 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
24 |
|
@@ -40,7 +43,7 @@ with gr.Blocks() as demo:
|
|
40 |
else:
|
41 |
gr.Markdown("No data available for validation split.")
|
42 |
|
43 |
-
with gr.Tab("Results: Test"):
|
44 |
# Repeat the process for the "test" split
|
45 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
46 |
|
@@ -57,4 +60,24 @@ with gr.Blocks() as demo:
|
|
57 |
category_name=CATEGORY_NAME
|
58 |
)
|
59 |
else:
|
60 |
-
gr.Markdown("No data available for test split.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
14 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
15 |
gr.Markdown(LIT_DESCRIPTION, elem_id="category-intro")
|
16 |
+
with gr.Column(elem_id="validation_nav_container", visible=True) as validation_nav_container:
|
17 |
create_sub_navigation_bar(validation_tag_map, CATEGORY_NAME)
|
18 |
|
19 |
+
with gr.Column(elem_id="test_nav_container", visible=False) as test_nav_container:
|
20 |
+
create_sub_navigation_bar(test_tag_map, CATEGORY_NAME)
|
21 |
+
|
22 |
# --- This page now has two main sections: Validation and Test ---
|
23 |
with gr.Tabs():
|
24 |
+
with gr.Tab("Results: Validation") as validation_tab:
|
25 |
# 1. Load all necessary data for the "validation" split ONCE.
|
26 |
validation_df, validation_tag_map = get_full_leaderboard_data("validation")
|
27 |
|
|
|
43 |
else:
|
44 |
gr.Markdown("No data available for validation split.")
|
45 |
|
46 |
+
with gr.Tab("Results: Test") as test_tab:
|
47 |
# Repeat the process for the "test" split
|
48 |
test_df, test_tag_map = get_full_leaderboard_data("test")
|
49 |
|
|
|
60 |
category_name=CATEGORY_NAME
|
61 |
)
|
62 |
else:
|
63 |
+
gr.Markdown("No data available for test split.")
|
64 |
+
|
65 |
+
show_validation_js = """
|
66 |
+
() => {
|
67 |
+
document.getElementById('validation_nav_container').style.display = 'block';
|
68 |
+
document.getElementById('test_nav_container').style.display = 'none';
|
69 |
+
}
|
70 |
+
"""
|
71 |
+
|
72 |
+
# JavaScript to show the TEST nav, hide the VALIDATION nav, AND fix the plots.
|
73 |
+
show_test_js = """
|
74 |
+
() => {
|
75 |
+
document.getElementById('validation_nav_container').style.display = 'none';
|
76 |
+
document.getElementById('test_nav_container').style.display = 'block';
|
77 |
+
setTimeout(() => { window.dispatchEvent(new Event('resize')) }, 0);
|
78 |
+
}
|
79 |
+
"""
|
80 |
+
|
81 |
+
# Assign the pure JS functions to the select events. No Python `fn` is needed.
|
82 |
+
validation_tab.select(fn=None, inputs=None, outputs=None, js=show_validation_js)
|
83 |
+
test_tab.select(fn=None, inputs=None, outputs=None, js=show_test_js)
|
ui_components.py
CHANGED
@@ -28,7 +28,7 @@ from content import (
|
|
28 |
|
29 |
# --- Constants and Configuration ---
|
30 |
LOCAL_DEBUG = not (os.environ.get("system") == "spaces")
|
31 |
-
CONFIG_NAME = "1.0.0-
|
32 |
IS_INTERNAL = os.environ.get("IS_INTERNAL", "false").lower() == "true"
|
33 |
|
34 |
OWNER = "allenai"
|
@@ -213,7 +213,6 @@ def get_full_leaderboard_data(split: str) -> tuple[pd.DataFrame, dict]:
|
|
213 |
Loads and transforms the complete dataset for a given split.
|
214 |
This function handles caching and returns the final "pretty" DataFrame and tag map.
|
215 |
"""
|
216 |
-
# This reuses your existing robust caching logic
|
217 |
viewer_or_data, raw_tag_map = get_leaderboard_viewer_instance(split)
|
218 |
|
219 |
if isinstance(viewer_or_data, (LeaderboardViewer, DummyViewer)):
|
@@ -291,7 +290,6 @@ def create_benchmark_details_display(
|
|
291 |
|
292 |
# 2. Loop through each benchmark and create its UI components
|
293 |
for benchmark_name in benchmark_names:
|
294 |
-
with gr.Blocks():
|
295 |
gr.Markdown(f"### {benchmark_name}", header_links=True)
|
296 |
|
297 |
# 3. Prepare the data for this specific benchmark's table and plot
|
|
|
28 |
|
29 |
# --- Constants and Configuration ---
|
30 |
LOCAL_DEBUG = not (os.environ.get("system") == "spaces")
|
31 |
+
CONFIG_NAME = "1.0.0-dev1" # This corresponds to 'config' in LeaderboardViewer
|
32 |
IS_INTERNAL = os.environ.get("IS_INTERNAL", "false").lower() == "true"
|
33 |
|
34 |
OWNER = "allenai"
|
|
|
213 |
Loads and transforms the complete dataset for a given split.
|
214 |
This function handles caching and returns the final "pretty" DataFrame and tag map.
|
215 |
"""
|
|
|
216 |
viewer_or_data, raw_tag_map = get_leaderboard_viewer_instance(split)
|
217 |
|
218 |
if isinstance(viewer_or_data, (LeaderboardViewer, DummyViewer)):
|
|
|
290 |
|
291 |
# 2. Loop through each benchmark and create its UI components
|
292 |
for benchmark_name in benchmark_names:
|
|
|
293 |
gr.Markdown(f"### {benchmark_name}", header_links=True)
|
294 |
|
295 |
# 3. Prepare the data for this specific benchmark's table and plot
|