Spaces:

Synthefy
/

MUSEval

Running

App Files Files Community

MUSEval / app.py

Calcharles

updated naming

7939a4f about 2 months ago

raw

history blame contribute delete

18.3 kB

	"""
	Synthefy MUSEval Leaderboard - Main Gradio Application
	Following GIFT-Eval import structure with custom layout
	"""

	import gradio as gr
	import pandas as pd

	# Optional imports for production features
	try:
	from apscheduler.schedulers.background import BackgroundScheduler
	SCHEDULER_AVAILABLE = True
	except ImportError:
	SCHEDULER_AVAILABLE = False
	print("Warning: apscheduler not available, scheduler features disabled")

	try:
	from huggingface_hub import snapshot_download
	HUB_AVAILABLE = True
	except ImportError:
	HUB_AVAILABLE = False
	print("Warning: huggingface_hub not available, hub features disabled")

	from src.about import (
	CITATION_BUTTON_LABEL,
	CITATION_BUTTON_TEXT,
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	BENCHMARKS_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	BENCHMARK_COLS,
	EVAL_COLS,
	EVAL_TYPES,
	ModelInfoColumn,
	ModelType,
	fields,
	WeightType,
	Precision
	)
	from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
	from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_model_info_df, get_merged_df
	from src.utils import norm_sNavie, pivot_df, get_grouped_dfs, pivot_existed_df, rename_metrics, format_df
	from src.load_results import (
	load_results_with_metadata,
	create_overall_table,
	create_html_table,
	create_html_table_from_df,
	get_filter_options,
	get_model_metadata,
	create_model_metadata_display,
	get_overall_summary,
	sort_table_by_column,
	get_available_models
	)

	def create_model_buttons():
	"""Create buttons for each model that can trigger Model Inspector updates"""
	from src.load_results import get_available_models

	models = get_available_models()
	buttons = []

	for model in models:
	btn = gr.Button(
	value=model,
	variant="secondary",
	size="sm",
	scale=0.5
	)
	buttons.append(btn)

	return buttons

	def restart_space():
	API.restart_space(repo_id=REPO_ID)

	def create_leaderboard_interface():
	"""Create the main leaderboard interface"""
	demo = gr.Blocks(css=custom_css)
	with demo:
	gr.HTML(TITLE)

	# Minimizable description section
	with gr.Accordion("📖 Description", open=False, elem_id="description-accordion"):
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text", elem_id="description-text")

	# Get filter options
	filter_options = get_filter_options()

	# Compact filters in a single horizontal scrollable row
	with gr.Row(elem_id="filter-row"):
	model_search = gr.Textbox(
	label="🔍 Filter by Model",
	placeholder="Search...",
	value="",
	elem_id="model-search",
	scale=0
	)
	category_dropdown = gr.Dropdown(
	choices=filter_options["categories"],
	value="all",
	label="📂 Filter By Category",
	allow_custom_value=False,
	elem_id="category-filter",
	scale=0
	)
	domain_dropdown = gr.Dropdown(
	choices=filter_options["domains"],
	value="all",
	label="🌐 Filter By Domain",
	allow_custom_value=False,
	elem_id="domain-filter",
	scale=0
	)
	dataset_dropdown = gr.Dropdown(
	choices=filter_options["datasets"],
	value="all",
	label="📊 Filter by Dataset",
	allow_custom_value=False,
	elem_id="dataset-filter",
	scale=0
	)
	sort_dropdown = gr.Dropdown(
	choices=[
	"Rank",
	"Model A-Z",
	"Organization A-Z",
	"Top-Performer ↓",
	"Multi-MAPE ↓",
	"Uni-MAPE ↓",
	"Uni-Multi-MAPE ↑",
	"NMAE ↓",
	"Date ↑"
	],
	value="Rank",
	label="🔄 Sort",
	allow_custom_value=False,
	elem_id="sort-filter",
	scale=0
	)

	# Full width table
	gr.Markdown("### Models ranked by the number of datasets where they achieve the lowest MAPE (Top-Performer). Click on the model cell to details.")

	# Hidden input to handle model selection from table
	hidden_model_input = gr.Textbox(visible=False)

	# Hidden component to trigger scrolling only for model clicks
	scroll_trigger = gr.HTML(visible=False, elem_id="scroll-trigger")

	# Main results table with clickable rows
	df = create_overall_table()

	# Convert DataFrame to list of lists for Gradio
	df_values = df.values.tolist()
	df_headers = df.columns.tolist()

	results_table = gr.Dataframe(
	value=df_values,
	headers=df_headers,
	label="",
	interactive=False, # Disable editing but keep select events
	elem_id="results-table"
	)

	refresh_btn = gr.Button("🔄 Refresh Table", variant="primary")

	# Model metadata section at bottom
	model_inspector_accordion = gr.Accordion("🔍 Model Inspector", open=False, elem_id="model-inspector")
	with model_inspector_accordion:
	with gr.Row():
	with gr.Column(scale=1):
	model_selector = gr.Dropdown(
	choices=filter_options["models"],
	value=None,
	label="Select Model",
	info="Choose a model to view its metadata",
	allow_custom_value=False
	)

	with gr.Column(scale=3):
	metadata_display = gr.Markdown(
	value="Select a model to view its metadata.",
	label="Model Metadata"
	)

	# About section
	with gr.Accordion("📖 About MUSEval Leaderboard", open=False, elem_id="about-accordion"):
	gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text", elem_id="about-text")

	# Citation section
	with gr.Row():
	with gr.Accordion("📙 Citation", open=False, elem_id="citation-accordion"):
	citation_button = gr.Textbox(
	value=CITATION_BUTTON_TEXT,
	label=CITATION_BUTTON_LABEL,
	lines=20,
	elem_id="citation-button",
	show_copy_button=True,
	)

	# Submit section
	with gr.Row():
	with gr.Accordion("🚀 Submit Your Model", open=False, elem_id="submit-accordion"):
	gr.HTML("""
	<div style="text-align: center; padding: 20px;">
	<h3>Submit by creating a pull request with your model's performance here:</h3>
	<a href='https://github.com/Synthefy/MUSEval'
	target='_blank'
	style='display: inline-block;
	background-color: #FF6B6B;
	color: white;
	padding: 15px 30px;
	text-decoration: none;
	border-radius: 8px;
	font-weight: bold;
	font-size: 18px;
	transition: background-color 0.3s ease;'
	onmouseover='this.style.backgroundColor="#FF5252"'
	onmouseout='this.style.backgroundColor="#FF6B6B"'>
	🚀 Submit Here
	</a>
	</div>
	""")

	# Event handlers
	def update_table(domain, category, dataset, model):
	return create_html_table(domain, category, dataset, model)

	def clear_filters():
	return "all", "all", "all", ""

	def reset_other_filters(selected_filter, filter_type):
	"""Reset other filters when one is selected"""
	if filter_type == "category" and selected_filter != "all":
	return gr.update(value="all"), gr.update(value="all") # Reset domain and dataset
	elif filter_type == "domain" and selected_filter != "all":
	return gr.update(value="all"), gr.update(value="all") # Reset category and dataset
	elif filter_type == "dataset" and selected_filter != "all":
	return gr.update(value="all"), gr.update(value="all") # Reset category and domain
	else:
	return gr.update(), gr.update() # No change

	def sort_by_dropdown(sort_option, domain, category, dataset, model):
	"""Sort table based on dropdown selection - apply filters first, then sort"""
	# Map dropdown options to column names
	sort_mapping = {
	"Rank": "Rank",
	"Model A-Z": "Model",
	"Organization A-Z": "Organization",
	"Top-Performer ↓": "Top-Performer",
	"Multi-MAPE ↓": "Multi-MAPE",
	"Uni-MAPE ↓": "Uni-MAPE",
	"Uni-Multi-MAPE ↑": "Uni-Multi-MAPE",
	"NMAE ↓": "NMAE",
	"Date ↑": "Submission Date"
	}

	column_name = sort_mapping.get(sort_option, "Rank")

	# First apply filters to get the filtered data
	df = create_overall_table(domain_filter=domain, category_filter=category, dataset_filter=dataset, model_filter=model)

	# Then sort the filtered data
	sorted_df = sort_table_by_column(df, column_name)

	# Convert sorted DataFrame back to list format for Gradio Dataframe
	return sorted_df.values.tolist()

	def update_table_with_sort(sort_option, domain, category, dataset, model):
	"""Update table with current filters and sorting"""
	return sort_by_dropdown(sort_option, domain, category, dataset, model)

	def update_table_with_model_search(model, sort_option, domain, category, dataset):
	"""Update table with model search - keep other filters unchanged"""
	# Update the table with current filter values (don't reset other filters)
	table_result = update_table_with_sort(sort_option, domain, category, dataset, model)

	# Return the table and no changes to other dropdowns
	return (table_result, gr.update(), gr.update(), gr.update())

	def update_table_with_reset(selected_filter, filter_type, sort_option, domain, category, dataset, model):
	"""Update table with proper filter reset logic"""
	# First, determine what the reset values should be
	if filter_type == "category" and selected_filter != "all":
	domain = "all"
	dataset = "all"
	elif filter_type == "domain" and selected_filter != "all":
	category = "all"
	dataset = "all"
	elif filter_type == "dataset" and selected_filter != "all":
	category = "all"
	domain = "all"

	# Update the table with the corrected filter values
	table_result = update_table_with_sort(sort_option, domain, category, dataset, model)

	# Return the table and the reset updates
	reset_updates = reset_other_filters(selected_filter, filter_type)
	return (table_result, *reset_updates)


	# Connect filters to table updates with mutual exclusivity and sorting
	domain_dropdown.change(
	fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(domain, "domain", sort_option, domain, category, dataset, model),
	inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
	outputs=[results_table, category_dropdown, dataset_dropdown]
	)

	category_dropdown.change(
	fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(category, "category", sort_option, domain, category, dataset, model),
	inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
	outputs=[results_table, domain_dropdown, dataset_dropdown]
	)

	dataset_dropdown.change(
	fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(dataset, "dataset", sort_option, domain, category, dataset, model),
	inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
	outputs=[results_table, category_dropdown, domain_dropdown]
	)

	model_search.change(
	fn=lambda model, sort_option, domain, category, dataset: update_table_with_model_search(model, sort_option, domain, category, dataset),
	inputs=[model_search, sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown],
	outputs=[results_table, domain_dropdown, category_dropdown, dataset_dropdown]
	)

	refresh_btn.click(
	fn=update_table_with_sort,
	inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
	outputs=results_table
	)

	# Sort dropdown event handler - independent of filters
	sort_dropdown.change(
	fn=sort_by_dropdown,
	inputs=[sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown, model_search],
	outputs=results_table
	)

	# Model selector event handler
	model_selector.change(
	fn=create_model_metadata_display,
	inputs=[model_selector],
	outputs=[metadata_display]
	)

	# Model column cell selection handler (with controlled scrolling)
	def handle_model_column_clicks(evt: gr.SelectData):
	"""Handle only model column cell clicks for model selection"""
	print(f"DEBUG: Click detected - Row: {evt.index[0]}, Column: {evt.index[1]}, Value: {evt.value}")

	row_idx = evt.index[0]
	col_idx = evt.index[1]

	# Only handle model column clicks (column 0)
	if col_idx == 0: # Model column
	print("🎯 MODEL COLUMN CELL CLICK DETECTED!")
	if hasattr(evt, 'row_value') and evt.row_value is not None and len(evt.row_value) > 0:
	model_name = evt.row_value[0]
	print(f"🎯 Model selected: {model_name}")
	# Return model selection, accordion expansion, and scroll trigger
	return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll")
	elif evt.value is not None:
	model_name = evt.value
	print(f"🎯 Model selected: {model_name}")
	# Return model selection, accordion expansion, and scroll trigger
	return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll")
	else:
	print("🎯 OTHER COLUMN CELL CLICK - NO ACTION (NO SCROLLING)")

	# For non-model column clicks, return no changes (no scrolling)
	return gr.update(), gr.update(), gr.update()

	results_table.select(
	fn=handle_model_column_clicks,
	inputs=[],
	outputs=[model_selector, model_inspector_accordion, scroll_trigger]
	)

	# Handle scroll trigger - only scroll when model is selected
	def handle_scroll_trigger(trigger_value):
	"""Handle scrolling only when model is selected"""
	if trigger_value == "scroll":
	print("🎯 SCROLL TRIGGER ACTIVATED!")
	# Trigger scrolling by updating the scroll trigger and ensuring accordion is open
	return gr.update(value="scrolled"), gr.update(open=True)
	return gr.update(), gr.update()

	scroll_trigger.change(
	fn=handle_scroll_trigger,
	inputs=[scroll_trigger],
	outputs=[scroll_trigger, model_inspector_accordion],
	scroll_to_output=True
	)

	# Handle change events from interactive table
	def handle_table_changes(new_value):
	"""Handle changes from interactive table behavior"""
	print("=" * 50)
	print("DEBUG: Table Change Event Detected")
	print("=" * 50)
	print(f"New value: {new_value}")
	print(f"New value type: {type(new_value)}")
	print("=" * 50)
	return gr.update()

	results_table.change(
	fn=handle_table_changes,
	inputs=[results_table],
	outputs=[]
	)

	# Hidden input event handler - when model is selected from table
	def update_model_from_hidden(hidden_value):
	if hidden_value:
	return gr.update(value=hidden_value)
	return gr.update()

	hidden_model_input.change(
	fn=update_model_from_hidden,
	inputs=[hidden_model_input],
	outputs=[model_selector]
	)


	return demo

	# Start scheduler if available
	if SCHEDULER_AVAILABLE:
	scheduler = BackgroundScheduler()
	scheduler.start()
	else:
	scheduler = None

	# Launch the demo
	if __name__ == "__main__":
	demo = create_leaderboard_interface()
	demo.queue(default_concurrency_limit=40).launch()