Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import pandas as pd | |
| from datetime import datetime, timezone | |
| from src.about import Tasks, SpeechTasks | |
| from src.display.formatting import styled_error, styled_message, styled_warning | |
| from src.display.utils import REGION_MAP | |
| from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO, EVAL_RESULTS_PATH | |
| REQUESTED_MODELS = None | |
| USERS_TO_SUBMISSION_DATES = None | |
| def handle_csv_submission( | |
| model_name: str, | |
| csv_file, # uploaded file path | |
| result_type: str, | |
| ): | |
| if model_name == "" or model_name is None: | |
| return styled_error("Please provide a model name.") | |
| if csv_file is None: | |
| return styled_error("Please provide a CSV file with results.") | |
| df = pd.read_csv(csv_file) | |
| current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
| # Save uploaded CSV | |
| subdir = os.path.join(EVAL_REQUESTS_PATH, result_type) | |
| os.makedirs(subdir, exist_ok=True) | |
| filename = f"{current_time}_{model_name}_{result_type}_results.csv" | |
| remote_path = f"msteb_{result_type}_requests/{filename}" | |
| csv_save_path = os.path.join(subdir,filename) | |
| df.to_csv(csv_save_path, index=False) | |
| print(f"Uploading to {QUEUE_REPO}/{remote_path}") | |
| API.upload_file( | |
| path_or_fileobj=csv_save_path, | |
| path_in_repo=remote_path, | |
| repo_id=QUEUE_REPO, | |
| repo_type="dataset", # or "model" if you made the repo that way | |
| commit_message=f"Add {result_type} request for {model_name} at {current_time}", | |
| ) | |
| # Remove the local file | |
| os.remove(csv_save_path) | |
| # this converts dataframe to json and uploads it to results | |
| try: | |
| convert_csv_to_json_and_upload(df, model_name, result_type) | |
| except ValueError as e: | |
| return styled_error(f"{str(e)}") | |
| return styled_message(f"Results CSV successfully submitted for `{model_name}`!") | |
| def find_task_by_col_name(col_name, enum_cls): | |
| for task in enum_cls: | |
| if task.value.col_name == col_name: | |
| return task | |
| return None | |
| def convert_csv_to_json_and_upload(df: pd.DataFrame, model_name: str, result_type: str): | |
| task_enum = Tasks if result_type == "text" else SpeechTasks | |
| task_display_names = {t.value.col_name for t in task_enum} | |
| region_names = df["Region"].tolist() | |
| average_row = "Average (Micro)" | |
| # --- Validation --- | |
| df_columns = set(df.columns[1:]) # exclude Region column | |
| if not df_columns.issubset(task_display_names): | |
| extra = df_columns - task_display_names | |
| raise ValueError(f"Extra columns in CSV: {extra}") | |
| if average_row not in df["Region"].values: | |
| raise ValueError("Missing row for 'Average (Micro)'") | |
| data_region_names = [r for r in region_names if r != average_row] | |
| for region in data_region_names: | |
| if region not in REGION_MAP: | |
| raise ValueError(f"Region '{region}' not found in REGION_MAP keys.") | |
| # --- Build JSON --- | |
| # I go over the regions in the CSV and create a JSON object. | |
| model_json = { | |
| "config": {"model_name": model_name}, | |
| "results": {}, | |
| "regions": {}, | |
| } | |
| at_least_one_number = False | |
| for _, row in df.iterrows(): | |
| region_display = row["Region"] | |
| if region_display == average_row: | |
| for col, val in row.items(): | |
| if col == "Region": | |
| continue | |
| task = find_task_by_col_name(col, task_enum) | |
| if val is not None and not pd.isna(val) and isinstance(val, (int, float)): | |
| print(f" value {val}") | |
| at_least_one_number = True | |
| model_json["results"][task.value.benchmark] = {task.value.metric: val/100} | |
| else: | |
| model_json["regions"][REGION_MAP[region_display]] = {} | |
| for col, val in row.items(): | |
| if col == "Region": | |
| continue | |
| task = find_task_by_col_name(col, task_enum) | |
| if val is not None and not pd.isna(val) and isinstance(val, (int, float)): | |
| model_json["regions"][REGION_MAP[region_display]][task.value.benchmark] = {task.value.metric: val/100} | |
| # Check if at least one number is present in the results | |
| print(at_least_one_number) | |
| if at_least_one_number is False: | |
| raise ValueError("No valid numeric results found in the CSV. Please check your input.") | |
| # --- Save locally --- | |
| subdir = os.path.join(EVAL_RESULTS_PATH, result_type) | |
| os.makedirs(subdir, exist_ok=True) | |
| current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
| filename = f"{current_time}_{model_name}_{result_type}.json" | |
| json_save_path = os.path.join(subdir,filename) | |
| with open(json_save_path, "w") as f: | |
| json.dump(model_json, f, indent=2) | |
| # --- Upload to HF Hub --- | |
| remote_path = f"msteb_leaderboard/msteb_{result_type}_results/{filename}" | |
| API.upload_file( | |
| path_or_fileobj=json_save_path, | |
| path_in_repo=remote_path, | |
| repo_id=RESULTS_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Upload results for {model_name} ({result_type}) at {current_time}", | |
| ) | |
| os.remove(json_save_path) | |
| print(f"Uploaded to {RESULTS_REPO}/{current_time}") | |
| return f"Uploaded to {RESULTS_REPO}/{current_time}" | |