|
|
""" |
|
|
Synthefy MUSEval Leaderboard - Main Gradio Application |
|
|
Following GIFT-Eval import structure with custom layout |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
|
|
|
|
|
|
try: |
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
SCHEDULER_AVAILABLE = True |
|
|
except ImportError: |
|
|
SCHEDULER_AVAILABLE = False |
|
|
print("Warning: apscheduler not available, scheduler features disabled") |
|
|
|
|
|
try: |
|
|
from huggingface_hub import snapshot_download |
|
|
HUB_AVAILABLE = True |
|
|
except ImportError: |
|
|
HUB_AVAILABLE = False |
|
|
print("Warning: huggingface_hub not available, hub features disabled") |
|
|
|
|
|
from src.about import ( |
|
|
CITATION_BUTTON_LABEL, |
|
|
CITATION_BUTTON_TEXT, |
|
|
EVALUATION_QUEUE_TEXT, |
|
|
INTRODUCTION_TEXT, |
|
|
BENCHMARKS_TEXT, |
|
|
TITLE, |
|
|
) |
|
|
from src.display.css_html_js import custom_css |
|
|
from src.display.utils import ( |
|
|
BENCHMARK_COLS, |
|
|
EVAL_COLS, |
|
|
EVAL_TYPES, |
|
|
ModelInfoColumn, |
|
|
ModelType, |
|
|
fields, |
|
|
WeightType, |
|
|
Precision |
|
|
) |
|
|
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN |
|
|
from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_model_info_df, get_merged_df |
|
|
from src.utils import norm_sNavie, pivot_df, get_grouped_dfs, pivot_existed_df, rename_metrics, format_df |
|
|
from src.load_results import ( |
|
|
load_results_with_metadata, |
|
|
create_overall_table, |
|
|
create_html_table, |
|
|
create_html_table_from_df, |
|
|
get_filter_options, |
|
|
get_model_metadata, |
|
|
create_model_metadata_display, |
|
|
get_overall_summary, |
|
|
sort_table_by_column, |
|
|
get_available_models |
|
|
) |
|
|
|
|
|
def create_model_buttons(): |
|
|
"""Create buttons for each model that can trigger Model Inspector updates""" |
|
|
from src.load_results import get_available_models |
|
|
|
|
|
models = get_available_models() |
|
|
buttons = [] |
|
|
|
|
|
for model in models: |
|
|
btn = gr.Button( |
|
|
value=model, |
|
|
variant="secondary", |
|
|
size="sm", |
|
|
scale=0.5 |
|
|
) |
|
|
buttons.append(btn) |
|
|
|
|
|
return buttons |
|
|
|
|
|
def restart_space(): |
|
|
API.restart_space(repo_id=REPO_ID) |
|
|
|
|
|
def create_leaderboard_interface(): |
|
|
"""Create the main leaderboard interface""" |
|
|
demo = gr.Blocks(css=custom_css) |
|
|
with demo: |
|
|
gr.HTML(TITLE) |
|
|
|
|
|
|
|
|
with gr.Accordion("π Description", open=False, elem_id="description-accordion"): |
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text", elem_id="description-text") |
|
|
|
|
|
|
|
|
filter_options = get_filter_options() |
|
|
|
|
|
|
|
|
with gr.Row(elem_id="filter-row"): |
|
|
model_search = gr.Textbox( |
|
|
label="π Filter by Model", |
|
|
placeholder="Search...", |
|
|
value="", |
|
|
elem_id="model-search", |
|
|
scale=0 |
|
|
) |
|
|
category_dropdown = gr.Dropdown( |
|
|
choices=filter_options["categories"], |
|
|
value="all", |
|
|
label="π Filter By Category", |
|
|
allow_custom_value=False, |
|
|
elem_id="category-filter", |
|
|
scale=0 |
|
|
) |
|
|
domain_dropdown = gr.Dropdown( |
|
|
choices=filter_options["domains"], |
|
|
value="all", |
|
|
label="π Filter By Domain", |
|
|
allow_custom_value=False, |
|
|
elem_id="domain-filter", |
|
|
scale=0 |
|
|
) |
|
|
dataset_dropdown = gr.Dropdown( |
|
|
choices=filter_options["datasets"], |
|
|
value="all", |
|
|
label="π Filter by Dataset", |
|
|
allow_custom_value=False, |
|
|
elem_id="dataset-filter", |
|
|
scale=0 |
|
|
) |
|
|
sort_dropdown = gr.Dropdown( |
|
|
choices=[ |
|
|
"Rank", |
|
|
"Model A-Z", |
|
|
"Organization A-Z", |
|
|
"Top-Performer β", |
|
|
"Multi-MAPE β", |
|
|
"Uni-MAPE β", |
|
|
"Uni-Multi-MAPE β", |
|
|
"NMAE β", |
|
|
"Date β" |
|
|
], |
|
|
value="Rank", |
|
|
label="π Sort", |
|
|
allow_custom_value=False, |
|
|
elem_id="sort-filter", |
|
|
scale=0 |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("### Models ranked by the number of datasets where they achieve the lowest MAPE (Top-Performer). Click on the model cell to details.") |
|
|
|
|
|
|
|
|
hidden_model_input = gr.Textbox(visible=False) |
|
|
|
|
|
|
|
|
scroll_trigger = gr.HTML(visible=False, elem_id="scroll-trigger") |
|
|
|
|
|
|
|
|
df = create_overall_table() |
|
|
|
|
|
|
|
|
df_values = df.values.tolist() |
|
|
df_headers = df.columns.tolist() |
|
|
|
|
|
results_table = gr.Dataframe( |
|
|
value=df_values, |
|
|
headers=df_headers, |
|
|
label="", |
|
|
interactive=False, |
|
|
elem_id="results-table" |
|
|
) |
|
|
|
|
|
refresh_btn = gr.Button("π Refresh Table", variant="primary") |
|
|
|
|
|
|
|
|
model_inspector_accordion = gr.Accordion("π Model Inspector", open=False, elem_id="model-inspector") |
|
|
with model_inspector_accordion: |
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
model_selector = gr.Dropdown( |
|
|
choices=filter_options["models"], |
|
|
value=None, |
|
|
label="Select Model", |
|
|
info="Choose a model to view its metadata", |
|
|
allow_custom_value=False |
|
|
) |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
metadata_display = gr.Markdown( |
|
|
value="Select a model to view its metadata.", |
|
|
label="Model Metadata" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Accordion("π About MUSEval Leaderboard", open=False, elem_id="about-accordion"): |
|
|
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text", elem_id="about-text") |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Accordion("π Citation", open=False, elem_id="citation-accordion"): |
|
|
citation_button = gr.Textbox( |
|
|
value=CITATION_BUTTON_TEXT, |
|
|
label=CITATION_BUTTON_LABEL, |
|
|
lines=20, |
|
|
elem_id="citation-button", |
|
|
show_copy_button=True, |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Accordion("π Submit Your Model", open=False, elem_id="submit-accordion"): |
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; padding: 20px;"> |
|
|
<h3>Submit by creating a pull request with your model's performance here:</h3> |
|
|
<a href='https://github.com/Synthefy/MUSEval' |
|
|
target='_blank' |
|
|
style='display: inline-block; |
|
|
background-color: #FF6B6B; |
|
|
color: white; |
|
|
padding: 15px 30px; |
|
|
text-decoration: none; |
|
|
border-radius: 8px; |
|
|
font-weight: bold; |
|
|
font-size: 18px; |
|
|
transition: background-color 0.3s ease;' |
|
|
onmouseover='this.style.backgroundColor="#FF5252"' |
|
|
onmouseout='this.style.backgroundColor="#FF6B6B"'> |
|
|
π Submit Here |
|
|
</a> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
def update_table(domain, category, dataset, model): |
|
|
return create_html_table(domain, category, dataset, model) |
|
|
|
|
|
def clear_filters(): |
|
|
return "all", "all", "all", "" |
|
|
|
|
|
def reset_other_filters(selected_filter, filter_type): |
|
|
"""Reset other filters when one is selected""" |
|
|
if filter_type == "category" and selected_filter != "all": |
|
|
return gr.update(value="all"), gr.update(value="all") |
|
|
elif filter_type == "domain" and selected_filter != "all": |
|
|
return gr.update(value="all"), gr.update(value="all") |
|
|
elif filter_type == "dataset" and selected_filter != "all": |
|
|
return gr.update(value="all"), gr.update(value="all") |
|
|
else: |
|
|
return gr.update(), gr.update() |
|
|
|
|
|
def sort_by_dropdown(sort_option, domain, category, dataset, model): |
|
|
"""Sort table based on dropdown selection - apply filters first, then sort""" |
|
|
|
|
|
sort_mapping = { |
|
|
"Rank": "Rank", |
|
|
"Model A-Z": "Model", |
|
|
"Organization A-Z": "Organization", |
|
|
"Top-Performer β": "Top-Performer", |
|
|
"Multi-MAPE β": "Multi-MAPE", |
|
|
"Uni-MAPE β": "Uni-MAPE", |
|
|
"Uni-Multi-MAPE β": "Uni-Multi-MAPE", |
|
|
"NMAE β": "NMAE", |
|
|
"Date β": "Submission Date" |
|
|
} |
|
|
|
|
|
column_name = sort_mapping.get(sort_option, "Rank") |
|
|
|
|
|
|
|
|
df = create_overall_table(domain_filter=domain, category_filter=category, dataset_filter=dataset, model_filter=model) |
|
|
|
|
|
|
|
|
sorted_df = sort_table_by_column(df, column_name) |
|
|
|
|
|
|
|
|
return sorted_df.values.tolist() |
|
|
|
|
|
def update_table_with_sort(sort_option, domain, category, dataset, model): |
|
|
"""Update table with current filters and sorting""" |
|
|
return sort_by_dropdown(sort_option, domain, category, dataset, model) |
|
|
|
|
|
def update_table_with_model_search(model, sort_option, domain, category, dataset): |
|
|
"""Update table with model search - keep other filters unchanged""" |
|
|
|
|
|
table_result = update_table_with_sort(sort_option, domain, category, dataset, model) |
|
|
|
|
|
|
|
|
return (table_result, gr.update(), gr.update(), gr.update()) |
|
|
|
|
|
def update_table_with_reset(selected_filter, filter_type, sort_option, domain, category, dataset, model): |
|
|
"""Update table with proper filter reset logic""" |
|
|
|
|
|
if filter_type == "category" and selected_filter != "all": |
|
|
domain = "all" |
|
|
dataset = "all" |
|
|
elif filter_type == "domain" and selected_filter != "all": |
|
|
category = "all" |
|
|
dataset = "all" |
|
|
elif filter_type == "dataset" and selected_filter != "all": |
|
|
category = "all" |
|
|
domain = "all" |
|
|
|
|
|
|
|
|
table_result = update_table_with_sort(sort_option, domain, category, dataset, model) |
|
|
|
|
|
|
|
|
reset_updates = reset_other_filters(selected_filter, filter_type) |
|
|
return (table_result, *reset_updates) |
|
|
|
|
|
|
|
|
|
|
|
domain_dropdown.change( |
|
|
fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(domain, "domain", sort_option, domain, category, dataset, model), |
|
|
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown], |
|
|
outputs=[results_table, category_dropdown, dataset_dropdown] |
|
|
) |
|
|
|
|
|
category_dropdown.change( |
|
|
fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(category, "category", sort_option, domain, category, dataset, model), |
|
|
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown], |
|
|
outputs=[results_table, domain_dropdown, dataset_dropdown] |
|
|
) |
|
|
|
|
|
dataset_dropdown.change( |
|
|
fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(dataset, "dataset", sort_option, domain, category, dataset, model), |
|
|
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown], |
|
|
outputs=[results_table, category_dropdown, domain_dropdown] |
|
|
) |
|
|
|
|
|
model_search.change( |
|
|
fn=lambda model, sort_option, domain, category, dataset: update_table_with_model_search(model, sort_option, domain, category, dataset), |
|
|
inputs=[model_search, sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown], |
|
|
outputs=[results_table, domain_dropdown, category_dropdown, dataset_dropdown] |
|
|
) |
|
|
|
|
|
refresh_btn.click( |
|
|
fn=update_table_with_sort, |
|
|
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown], |
|
|
outputs=results_table |
|
|
) |
|
|
|
|
|
|
|
|
sort_dropdown.change( |
|
|
fn=sort_by_dropdown, |
|
|
inputs=[sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown, model_search], |
|
|
outputs=results_table |
|
|
) |
|
|
|
|
|
|
|
|
model_selector.change( |
|
|
fn=create_model_metadata_display, |
|
|
inputs=[model_selector], |
|
|
outputs=[metadata_display] |
|
|
) |
|
|
|
|
|
|
|
|
def handle_model_column_clicks(evt: gr.SelectData): |
|
|
"""Handle only model column cell clicks for model selection""" |
|
|
print(f"DEBUG: Click detected - Row: {evt.index[0]}, Column: {evt.index[1]}, Value: {evt.value}") |
|
|
|
|
|
row_idx = evt.index[0] |
|
|
col_idx = evt.index[1] |
|
|
|
|
|
|
|
|
if col_idx == 0: |
|
|
print("π― MODEL COLUMN CELL CLICK DETECTED!") |
|
|
if hasattr(evt, 'row_value') and evt.row_value is not None and len(evt.row_value) > 0: |
|
|
model_name = evt.row_value[0] |
|
|
print(f"π― Model selected: {model_name}") |
|
|
|
|
|
return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll") |
|
|
elif evt.value is not None: |
|
|
model_name = evt.value |
|
|
print(f"π― Model selected: {model_name}") |
|
|
|
|
|
return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll") |
|
|
else: |
|
|
print("π― OTHER COLUMN CELL CLICK - NO ACTION (NO SCROLLING)") |
|
|
|
|
|
|
|
|
return gr.update(), gr.update(), gr.update() |
|
|
|
|
|
results_table.select( |
|
|
fn=handle_model_column_clicks, |
|
|
inputs=[], |
|
|
outputs=[model_selector, model_inspector_accordion, scroll_trigger] |
|
|
) |
|
|
|
|
|
|
|
|
def handle_scroll_trigger(trigger_value): |
|
|
"""Handle scrolling only when model is selected""" |
|
|
if trigger_value == "scroll": |
|
|
print("π― SCROLL TRIGGER ACTIVATED!") |
|
|
|
|
|
return gr.update(value="scrolled"), gr.update(open=True) |
|
|
return gr.update(), gr.update() |
|
|
|
|
|
scroll_trigger.change( |
|
|
fn=handle_scroll_trigger, |
|
|
inputs=[scroll_trigger], |
|
|
outputs=[scroll_trigger, model_inspector_accordion], |
|
|
scroll_to_output=True |
|
|
) |
|
|
|
|
|
|
|
|
def handle_table_changes(new_value): |
|
|
"""Handle changes from interactive table behavior""" |
|
|
print("=" * 50) |
|
|
print("DEBUG: Table Change Event Detected") |
|
|
print("=" * 50) |
|
|
print(f"New value: {new_value}") |
|
|
print(f"New value type: {type(new_value)}") |
|
|
print("=" * 50) |
|
|
return gr.update() |
|
|
|
|
|
results_table.change( |
|
|
fn=handle_table_changes, |
|
|
inputs=[results_table], |
|
|
outputs=[] |
|
|
) |
|
|
|
|
|
|
|
|
def update_model_from_hidden(hidden_value): |
|
|
if hidden_value: |
|
|
return gr.update(value=hidden_value) |
|
|
return gr.update() |
|
|
|
|
|
hidden_model_input.change( |
|
|
fn=update_model_from_hidden, |
|
|
inputs=[hidden_model_input], |
|
|
outputs=[model_selector] |
|
|
) |
|
|
|
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
if SCHEDULER_AVAILABLE: |
|
|
scheduler = BackgroundScheduler() |
|
|
scheduler.start() |
|
|
else: |
|
|
scheduler = None |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo = create_leaderboard_interface() |
|
|
demo.queue(default_concurrency_limit=40).launch() |