Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS, EU_LANGUAGES, MULTILINGUAL_TAB_TEXT | |
| from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub | |
| from utils_display import AutoEvalColumn, MultilingualColumn, fields, make_clickable_model, styled_error, styled_message | |
| import numpy as np | |
| from datetime import datetime, timezone | |
| LAST_UPDATED = "Apr 8th 2025" | |
| # Global variable to store detailed benchmark data | |
| benchmark_details = {} | |
| expanded_languages = set() # Track which languages are expanded | |
| column_names = { | |
| "MODEL": "Model", | |
| "Avg. WER": "Average WER β¬οΈ", | |
| "RTFx": "RTFx β¬οΈοΈ", | |
| "AMI WER": "AMI", | |
| "Earnings22 WER": "Earnings22", | |
| "Gigaspeech WER": "Gigaspeech", | |
| "LS Clean WER": "LS Clean", | |
| "LS Other WER": "LS Other", | |
| "SPGISpeech WER": "SPGISpeech", | |
| "Tedlium WER": "Tedlium", | |
| "Voxpopuli WER": "Voxpopuli", | |
| } | |
| eval_queue_repo, requested_models, csv_results, multilingual_csv_path = load_all_info_from_dataset_hub() | |
| if not csv_results.exists(): | |
| raise Exception(f"CSV file {csv_results} does not exist locally") | |
| # Get csv with data and parse columns | |
| original_df = pd.read_csv(csv_results) | |
| # Formats the columns | |
| def formatter(x): | |
| if type(x) is str: | |
| x = x | |
| elif x == -1: | |
| x = "NA" | |
| else: | |
| x = round(x, 2) | |
| return x | |
| for col in original_df.columns: | |
| if col == "model": | |
| original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x))) | |
| else: | |
| original_df[col] = original_df[col].apply(formatter) # For numerical values | |
| original_df.rename(columns=column_names, inplace=True) | |
| original_df.sort_values(by='Average WER β¬οΈ', inplace=True) | |
| COLS = [c.name for c in fields(AutoEvalColumn)] | |
| TYPES = [c.type for c in fields(AutoEvalColumn)] | |
| # Multilingual columns (dynamic based on expansion state) | |
| MULTILINGUAL_COLS = [c.name for c in fields(MultilingualColumn)] | |
| def create_multilingual_dataframe(): | |
| """Create multilingual dataframe with CoVoST, MLS, and FLEURS benchmark data""" | |
| global benchmark_details, expanded_languages | |
| if multilingual_csv_path is None or not multilingual_csv_path.exists(): | |
| raise Exception("Multilingual CSV file not found") | |
| # Load CSV data | |
| multilingual_raw_df = pd.read_csv(multilingual_csv_path) | |
| # Store detailed benchmark data for click functionality | |
| benchmark_details = {} | |
| multilingual_data = [] | |
| for _, row_data in multilingual_raw_df.iterrows(): | |
| model_name = row_data['model'] | |
| model_details = {} | |
| row = {"Model": make_clickable_model(model_name)} | |
| # Process data for each language and collect all individual datapoints | |
| all_datapoints = [] # Collect all individual dataset scores across all languages | |
| for lang_code, lang_info in EU_LANGUAGES.items(): | |
| # Get individual benchmark scores from CSV, using None for missing values | |
| # Special cases: de doesn't have MLS, pt doesn't have CoVoST | |
| if lang_code == "pt": | |
| covost_score = None # pt doesn't have CoVoST data | |
| else: | |
| covost_score = row_data.get(f"{lang_code}_covost", None) | |
| if lang_code == "de": | |
| mls_score = None # de doesn't have MLS data | |
| else: | |
| mls_score = row_data.get(f"{lang_code}_mls", None) | |
| fleurs_score = row_data.get(f"{lang_code}_fleurs", None) | |
| # Convert string zeros or empty values to None | |
| for score_name, score_val in [("covost", covost_score), ("mls", mls_score), ("fleurs", fleurs_score)]: | |
| if score_val is not None and (score_val == 0.0 or score_val == "" or str(score_val).strip() == "0" or str(score_val).strip() == ""): | |
| if score_name == "covost": | |
| covost_score = None | |
| elif score_name == "mls": | |
| mls_score = None | |
| elif score_name == "fleurs": | |
| fleurs_score = None | |
| # Add individual datapoints to the global list | |
| if covost_score is not None and covost_score > 0: | |
| all_datapoints.append(covost_score) | |
| if mls_score is not None and mls_score > 0: | |
| all_datapoints.append(mls_score) | |
| if fleurs_score is not None and fleurs_score > 0: | |
| all_datapoints.append(fleurs_score) | |
| # Calculate average only from available scores for this language (for display) | |
| available_scores = [s for s in [covost_score, mls_score, fleurs_score] if s is not None and s > 0] | |
| if available_scores: | |
| avg_score = round(sum(available_scores) / len(available_scores), 2) | |
| else: | |
| avg_score = None | |
| # Store individual scores for detailed view (only store existing datasets) | |
| lang_data = {"average": avg_score if avg_score is not None else "NA"} | |
| # Only store datasets that exist for this language | |
| if lang_code != "pt" and covost_score is not None: # pt doesn't have CoVoST | |
| lang_data["CoVoST"] = covost_score | |
| if lang_code != "de" and mls_score is not None: # de doesn't have MLS | |
| lang_data["MLS"] = mls_score | |
| if fleurs_score is not None: # All languages have FLEURS | |
| lang_data["FLEURS"] = fleurs_score | |
| model_details[lang_code] = lang_data | |
| # Calculate overall multilingual average from all individual datapoints | |
| if all_datapoints: | |
| row["Average WER β¬οΈ"] = round(np.mean(all_datapoints), 2) | |
| else: | |
| row["Average WER β¬οΈ"] = 0.0 | |
| # Add RTFx from the CSV (it should be a single value per model) | |
| rtfx_value = row_data.get("rtfx", row_data.get("RTFx", 0.0)) | |
| # Convert 0 or -1 values to "NA" like in the English leaderboard | |
| if rtfx_value == 0.0 or rtfx_value == -1 or rtfx_value == 0 or rtfx_value == "0" or rtfx_value == "0.0": | |
| row["RTFx β¬οΈοΈ"] = "NA" | |
| else: | |
| row["RTFx β¬οΈοΈ"] = rtfx_value | |
| # Add language columns based on expansion state | |
| for lang_code, lang_info in EU_LANGUAGES.items(): | |
| lang_col_name = f"{lang_info['flag']} {lang_info['name']}" | |
| model_data = model_details[lang_code] | |
| if lang_code in expanded_languages: | |
| # Show average column AND detailed columns | |
| row[f"{lang_col_name} Avg"] = model_data["average"] | |
| # Only show columns for datasets that actually exist in the data | |
| if "CoVoST" in model_data: | |
| row[f"{lang_col_name} CoVoST"] = model_data["CoVoST"] | |
| if "MLS" in model_data: | |
| row[f"{lang_col_name} MLS"] = model_data["MLS"] | |
| if "FLEURS" in model_data: | |
| row[f"{lang_col_name} FLEURS"] = model_data["FLEURS"] | |
| else: | |
| # Show only average column | |
| row[lang_col_name] = model_data["average"] | |
| # Store model details for click functionality | |
| benchmark_details[model_name] = model_details | |
| multilingual_data.append(row) | |
| multilingual_df = pd.DataFrame(multilingual_data) | |
| multilingual_df = multilingual_df.sort_values(by='Average WER β¬οΈ') | |
| return multilingual_df | |
| def get_multilingual_datatypes(df): | |
| """Generate appropriate datatypes for multilingual dataframe columns""" | |
| datatypes = [] | |
| for col in df.columns: | |
| if col == "Model": | |
| datatypes.append("markdown") # This allows HTML rendering | |
| else: | |
| datatypes.append("number") | |
| return datatypes | |
| def get_language_details(model, language_code): | |
| """Get detailed breakdown for a specific model and language""" | |
| global benchmark_details | |
| if model not in benchmark_details or language_code not in benchmark_details[model]: | |
| return None | |
| language_info = EU_LANGUAGES.get(language_code, {}) | |
| language_name = language_info.get("name", "Unknown") | |
| model_data = benchmark_details[model][language_code] | |
| details = { | |
| "Language": f"{language_info.get('flag', '')} {language_name}", | |
| "Model": model, | |
| "CoVoST WER": model_data["CoVoST"], | |
| "MLS WER": model_data["MLS"], | |
| "FLEURS WER": model_data["FLEURS"], | |
| "Average WER": model_data["average"] | |
| } | |
| return details | |
| def toggle_language_expansion(language_code): | |
| """Toggle expansion of language columns when button is clicked""" | |
| global expanded_languages | |
| # Toggle expansion state | |
| if language_code in expanded_languages: | |
| expanded_languages.remove(language_code) | |
| else: | |
| expanded_languages.add(language_code) | |
| # Recreate dataframe with new expansion state | |
| updated_df = create_multilingual_dataframe() | |
| updated_datatypes = get_multilingual_datatypes(updated_df) | |
| return gr.update(value=updated_df, datatype=updated_datatypes) | |
| # Initialize multilingual dataframe | |
| multilingual_df = create_multilingual_dataframe() | |
| def request_model(model_text, chbcoco2017): | |
| # Determine the selected checkboxes | |
| dataset_selection = [] | |
| if chbcoco2017: | |
| dataset_selection.append("ESB Datasets tests only") | |
| if len(dataset_selection) == 0: | |
| return styled_error("You need to select at least one dataset") | |
| base_model_on_hub, error_msg = is_model_on_hub(model_text) | |
| if not base_model_on_hub: | |
| return styled_error(f"Base model '{model_text}' {error_msg}") | |
| # Construct the output dictionary | |
| current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
| required_datasets = ', '.join(dataset_selection) | |
| eval_entry = { | |
| "date": current_time, | |
| "model": model_text, | |
| "datasets_selected": required_datasets | |
| } | |
| # Prepare file path | |
| DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True) | |
| fn_datasets = '@ '.join(dataset_selection) | |
| filename = model_text.replace("/","@") + "@@" + fn_datasets | |
| if filename in requested_models: | |
| return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.") | |
| try: | |
| filename_ext = filename + ".txt" | |
| out_filepath = DIR_OUTPUT_REQUESTS / filename_ext | |
| # Write the results to a text file | |
| with open(out_filepath, "w") as f: | |
| f.write(json.dumps(eval_entry)) | |
| upload_file(filename, out_filepath) | |
| # Include file in the list of uploaded files | |
| requested_models.append(filename) | |
| # Remove the local file | |
| out_filepath.unlink() | |
| return styled_message("π€ Your request has been submitted and will be evaluated soon!</p>") | |
| except Exception as e: | |
| return styled_error(f"Error submitting request!") | |
| def filter_main_table(show_proprietary=True): | |
| filtered_df = original_df.copy() | |
| # Filter proprietary models if needed | |
| if not show_proprietary and "License" in filtered_df.columns: | |
| # Keep only models with "Open" license | |
| filtered_df = filtered_df[filtered_df["License"] == "Open"] | |
| return filtered_df | |
| with gr.Blocks(css=LEADERBOARD_CSS) as demo: | |
| gr.HTML(BANNER, elem_id="banner") | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("π Leaderboard", elem_id="od-benchmark-tab-table", id=0): | |
| leaderboard_table = gr.components.Dataframe( | |
| value=original_df, | |
| datatype=TYPES, | |
| elem_id="leaderboard-table", | |
| interactive=False, | |
| visible=True, | |
| ) | |
| with gr.Row(): | |
| show_proprietary_checkbox = gr.Checkbox( | |
| label="Show proprietary models", | |
| value=True, | |
| elem_id="show-proprietary-checkbox" | |
| ) | |
| # Connect checkbox to the filtering function | |
| show_proprietary_checkbox.change( | |
| filter_main_table, | |
| inputs=[show_proprietary_checkbox], | |
| outputs=leaderboard_table | |
| ) | |
| with gr.TabItem("π Multilingual", elem_id="multilingual-benchmark-tab-table", id=1): | |
| gr.Markdown(MULTILINGUAL_TAB_TEXT, elem_classes="markdown-text") | |
| # Language toggle buttons | |
| gr.Markdown("Click on a language button to show/hide detailed benchmark scores (CoVoST, MLS, FLEURS):") | |
| language_buttons = {} | |
| lang_codes = list(EU_LANGUAGES.keys()) | |
| # First row of buttons (5 languages) | |
| with gr.Row(): | |
| for lang_code in lang_codes[:5]: | |
| lang_info = EU_LANGUAGES[lang_code] | |
| button_label = f"{lang_info['flag']} {lang_info['name']}" | |
| language_buttons[lang_code] = gr.Button( | |
| button_label, | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| # Second row of buttons (remaining 5 languages) | |
| with gr.Row(): | |
| for lang_code in lang_codes[5:]: | |
| lang_info = EU_LANGUAGES[lang_code] | |
| button_label = f"{lang_info['flag']} {lang_info['name']}" | |
| language_buttons[lang_code] = gr.Button( | |
| button_label, | |
| variant="secondary", | |
| size="sm" | |
| ) | |
| multilingual_table = gr.components.Dataframe( | |
| value=multilingual_df, | |
| datatype=get_multilingual_datatypes(multilingual_df), | |
| elem_id="multilingual-table", | |
| interactive=False, | |
| visible=True, | |
| ) | |
| # Connect buttons to toggle language expansion | |
| for lang_code, button in language_buttons.items(): | |
| def create_toggle_func(code): | |
| return lambda: toggle_language_expansion(code) | |
| button.click( | |
| create_toggle_func(lang_code), | |
| outputs=[multilingual_table] | |
| ) | |
| with gr.TabItem("π Metrics", elem_id="od-benchmark-tab-table", id=3): | |
| gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text") | |
| with gr.TabItem("βοΈβ¨ Request a model here!", elem_id="od-benchmark-tab-table", id=4): | |
| with gr.Column(): | |
| gr.Markdown("# βοΈβ¨ Request results for a new model here!", elem_classes="markdown-text") | |
| with gr.Column(): | |
| gr.Markdown("Select a dataset:", elem_classes="markdown-text") | |
| with gr.Column(): | |
| model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)") | |
| chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False) | |
| with gr.Column(): | |
| mdw_submission_result = gr.Markdown() | |
| btn_submitt = gr.Button(value="π Request") | |
| btn_submitt.click(request_model, | |
| [model_name_textbox, chb_coco2017], | |
| mdw_submission_result) | |
| # add an about section | |
| with gr.TabItem("π€ About", elem_id="od-benchmark-tab-table", id=5): | |
| gr.Markdown("## About", elem_classes="markdown-text") | |
| gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Accordion("π Citation", open=False): | |
| gr.Textbox( | |
| value=CITATION_TEXT, lines=7, | |
| label="Copy the BibTeX snippet to cite this source", | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| demo.launch(ssr_mode=False) | |