Spaces:
Running
Running
import json | |
import os | |
import pandas as pd | |
from datetime import datetime, timezone | |
from src.about import Tasks, SpeechTasks | |
from src.display.formatting import styled_error, styled_message, styled_warning | |
from src.display.utils import REGION_MAP | |
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO, EVAL_RESULTS_PATH | |
REQUESTED_MODELS = None | |
USERS_TO_SUBMISSION_DATES = None | |
def handle_csv_submission( | |
model_name: str, | |
csv_file, # uploaded file path | |
result_type: str, | |
): | |
if model_name == "" or model_name is None: | |
return styled_error("Please provide a model name.") | |
if csv_file is None: | |
return styled_error("Please provide a CSV file with results.") | |
df = pd.read_csv(csv_file) | |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
# Save uploaded CSV | |
subdir = os.path.join(EVAL_REQUESTS_PATH, result_type) | |
os.makedirs(subdir, exist_ok=True) | |
filename = f"{current_time}_{model_name}_{result_type}_results.csv" | |
remote_path = f"msteb_{result_type}_requests/{filename}" | |
csv_save_path = os.path.join(subdir,filename) | |
df.to_csv(csv_save_path, index=False) | |
print(f"Uploading to {QUEUE_REPO}/{remote_path}") | |
API.upload_file( | |
path_or_fileobj=csv_save_path, | |
path_in_repo=remote_path, | |
repo_id=QUEUE_REPO, | |
repo_type="dataset", # or "model" if you made the repo that way | |
commit_message=f"Add {result_type} request for {model_name} at {current_time}", | |
) | |
# Remove the local file | |
os.remove(csv_save_path) | |
# this converts dataframe to json and uploads it to results | |
try: | |
convert_csv_to_json_and_upload(df, model_name, result_type) | |
except ValueError as e: | |
return styled_error(f"{str(e)}") | |
return styled_message(f"Results CSV successfully submitted for `{model_name}`!") | |
def find_task_by_col_name(col_name, enum_cls): | |
for task in enum_cls: | |
if task.value.col_name == col_name: | |
return task | |
return None | |
def convert_csv_to_json_and_upload(df: pd.DataFrame, model_name: str, result_type: str): | |
task_enum = Tasks if result_type == "text" else SpeechTasks | |
task_display_names = {t.value.col_name for t in task_enum} | |
region_names = df["Region"].tolist() | |
average_row = "Average (Micro)" | |
# --- Validation --- | |
df_columns = set(df.columns[1:]) # exclude Region column | |
if not df_columns.issubset(task_display_names): | |
extra = df_columns - task_display_names | |
raise ValueError(f"Extra columns in CSV: {extra}") | |
if average_row not in df["Region"].values: | |
raise ValueError("Missing row for 'Average (Micro)'") | |
data_region_names = [r for r in region_names if r != average_row] | |
for region in data_region_names: | |
if region not in REGION_MAP: | |
raise ValueError(f"Region '{region}' not found in REGION_MAP keys.") | |
# --- Build JSON --- | |
# I go over the regions in the CSV and create a JSON object. | |
model_json = { | |
"config": {"model_name": model_name}, | |
"results": {}, | |
"regions": {}, | |
} | |
at_least_one_number = False | |
for _, row in df.iterrows(): | |
region_display = row["Region"] | |
if region_display == average_row: | |
for col, val in row.items(): | |
if col == "Region": | |
continue | |
task = find_task_by_col_name(col, task_enum) | |
if val is not None and not pd.isna(val) and isinstance(val, (int, float)): | |
print(f" value {val}") | |
at_least_one_number = True | |
model_json["results"][task.value.benchmark] = {task.value.metric: val/100} | |
else: | |
model_json["regions"][REGION_MAP[region_display]] = {} | |
for col, val in row.items(): | |
if col == "Region": | |
continue | |
task = find_task_by_col_name(col, task_enum) | |
if val is not None and not pd.isna(val) and isinstance(val, (int, float)): | |
model_json["regions"][REGION_MAP[region_display]][task.value.benchmark] = {task.value.metric: val/100} | |
# Check if at least one number is present in the results | |
print(at_least_one_number) | |
if at_least_one_number is False: | |
raise ValueError("No valid numeric results found in the CSV. Please check your input.") | |
# --- Save locally --- | |
subdir = os.path.join(EVAL_RESULTS_PATH, result_type) | |
os.makedirs(subdir, exist_ok=True) | |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
filename = f"{current_time}_{model_name}_{result_type}.json" | |
json_save_path = os.path.join(subdir,filename) | |
with open(json_save_path, "w") as f: | |
json.dump(model_json, f, indent=2) | |
# --- Upload to HF Hub --- | |
remote_path = f"msteb_leaderboard/msteb_{result_type}_results/{filename}" | |
API.upload_file( | |
path_or_fileobj=json_save_path, | |
path_in_repo=remote_path, | |
repo_id=RESULTS_REPO, | |
repo_type="dataset", | |
commit_message=f"Upload results for {model_name} ({result_type}) at {current_time}", | |
) | |
os.remove(json_save_path) | |
print(f"Uploaded to {RESULTS_REPO}/{current_time}") | |
return f"Uploaded to {RESULTS_REPO}/{current_time}" | |