import gradio as gr from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import snapshot_download import os from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css from src.display.utils import ( BENCHMARK_COLS, COLS, EVAL_COLS, EVAL_TYPES, AutoEvalColumn, ModelType, fields, WeightType, Precision ) from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN from src.populate import get_evaluation_queue_df, get_leaderboard_df from src.submission.submit import add_new_eval def restart_space(): API.restart_space(repo_id=REPO_ID) # Create directories first os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True) os.makedirs(EVAL_RESULTS_PATH, exist_ok=True) ### Space initialisation try: print(EVAL_REQUESTS_PATH) snapshot_download( repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN ) except Exception as e: print(f"Error downloading requests: {e}") # Initialize with empty directory if download fails pass try: print(EVAL_RESULTS_PATH) snapshot_download( repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN ) except Exception as e: print(f"Error downloading results: {e}") # Initialize with empty directory if download fails pass # Initialize the leaderboard DataFrame try: LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) except Exception: LEADERBOARD_DF = pd.DataFrame(columns=COLS) # Get evaluation queue status ( finished_eval_queue_df, running_eval_queue_df, pending_eval_queue_df, ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) def init_leaderboard(dataframe): return Leaderboard( value=dataframe, datatype=[c.type for c in fields(AutoEvalColumn)], select_columns=SelectColumns( default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], label="Seleccionar columnas:", ), search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name], hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], filter_columns=[ ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"), ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"), ColumnFilter( AutoEvalColumn.params.name, type="slider", min=0.01, max=150, label="Select the number of parameters (B)", ), ColumnFilter( AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True ), ], bool_checkboxgroup_label="Hide models", interactive=False, ) def submit_handler(model, base_model, revision, precision, weight_type, model_type, submit_type, openrouter_key): """Manejador unificado para ambos tipos de submit""" return add_new_eval( model=model, base_model=base_model, revision=revision, precision=precision, weight_type=weight_type, model_type=model_type, submit_type=submit_type, openrouter_key=openrouter_key if submit_type == "openrouter" else None ) demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): leaderboard = init_leaderboard(LEADERBOARD_DF) with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3): with gr.Column(): with gr.Row(): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Column(): with gr.Accordion( f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False, ): with gr.Row(): finished_eval_table = gr.components.Dataframe( value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5, ) with gr.Accordion( f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False, ): with gr.Row(): running_eval_table = gr.components.Dataframe( value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5, ) with gr.Accordion( f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False, ): with gr.Row(): pending_eval_table = gr.components.Dataframe( value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5, ) with gr.Row(): gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text") # Replace Radio with Tabs with gr.Tabs() as submit_tabs: # Huggingface Tab with gr.TabItem("Huggingface") as huggingface_tab: with gr.Row(): with gr.Column(): hf_model_name_textbox = gr.Textbox(label="Model name") hf_revision_name_textbox = gr.Textbox( label="Revision commit", placeholder="main" ) hf_model_type = gr.Dropdown( choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown], label="Model type", multiselect=False, value=None, interactive=True, ) with gr.Column(): hf_precision = gr.Dropdown( choices=[i.value.name for i in Precision if i != Precision.Unknown], label="Precision", multiselect=False, value="float16", interactive=True, ) hf_weight_type = gr.Dropdown( choices=[i.value.name for i in WeightType], label="Weights type", multiselect=False, value="Original", interactive=True, ) hf_base_model_name_textbox = gr.Textbox( label="Base model (for delta or adapter weights)" ) hf_submit_button = gr.Button("Submit Huggingface Model") hf_submission_result = gr.Markdown() # OpenRouter Tab with gr.TabItem("OpenRouter") as openrouter_tab: with gr.Row(): with gr.Column(): or_model_name_textbox = gr.Textbox( label="OpenRouter Model ID" ) # Get available themes from EXAM_QUESTIONS from src.evaluation.questions import EXAM_QUESTIONS # Solución para mostrar solo los labels # Creamos un diccionario auxiliar para mapear los nombres visibles a los valores internos theme_label_to_value = {} theme_labels = ["Todos los temas"] # Lista de solo labels para mostrar theme_values = [None] # Lista de valores correspondientes (misma posición) # Rellenamos las listas de labels y values en el mismo orden for theme, questions in EXAM_QUESTIONS.items(): if questions: original_theme = questions[0]["theme"] theme_labels.append(original_theme) # Solo nombre legible theme_values.append(theme) # Valor interno theme_label_to_value[original_theme] = theme # Para mapeo # Función para convertir de label a value cuando se selecciona def convert_theme_selection(label): if label == "Todos los temas" or label is None: return None return theme_label_to_value.get(label) or_theme = gr.Dropdown( choices=theme_labels, # Solo mostramos los labels label="Tema del examen (opcional, por defecto todos)", multiselect=False, interactive=True, value=None ) or_model_type = gr.Dropdown( choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown], label="Model type", multiselect=False, value=None, interactive=True, ) with gr.Column(): or_api_key = gr.Textbox( label="OpenRouter API Key", type="password" ) or_submit_button = gr.Button("Submit OpenRouter Model") or_submission_result = gr.Markdown() # Replace old submit handler with individual handlers for each tab def hf_submit_handler(model, base_model, revision, precision, weight_type, model_type): return add_new_eval( model=model, base_model=base_model, revision=revision, precision=precision, weight_type=weight_type, model_type=model_type, submit_type="huggingface", openrouter_key=None ) def or_submit_handler(model, model_type, openrouter_key, theme_label=None, progress=gr.Progress()): """OpenRouter submission handler with progress indicator""" # Convertir el label seleccionado al value correspondiente theme_value = convert_theme_selection(theme_label) # Pass theme as parameter to run_exam function via do_exam.py return add_new_eval( model=model, base_model="", revision="openrouter", precision="float16", # Default for API models weight_type="Original", model_type=model_type, submit_type="openrouter", openrouter_key=openrouter_key, exam_theme=theme_value, # Pasar el valor interno, no el label progress=progress, # Añadir el indicador de progreso leaderboard_component=leaderboard # Pasar la referencia al componente leaderboard ) # Connect handlers to buttons hf_submit_button.click( hf_submit_handler, inputs=[ hf_model_name_textbox, hf_base_model_name_textbox, hf_revision_name_textbox, hf_precision, hf_weight_type, hf_model_type, ], outputs=hf_submission_result, ) or_submit_button.click( or_submit_handler, inputs=[ or_model_name_textbox, or_model_type, or_api_key, or_theme, ], outputs=or_submission_result, ) with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.queue(default_concurrency_limit=40).launch()