vivekvermaiit's picture
json file in results
8fc70f8
import json
import os
import pandas as pd
from datetime import datetime, timezone
from src.about import Tasks, SpeechTasks
from src.display.formatting import styled_error, styled_message, styled_warning
from src.display.utils import REGION_MAP
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO, EVAL_RESULTS_PATH
REQUESTED_MODELS = None
USERS_TO_SUBMISSION_DATES = None
def handle_csv_submission(
model_name: str,
csv_file, # uploaded file path
result_type: str,
):
if model_name == "" or model_name is None:
return styled_error("Please provide a model name.")
if csv_file is None:
return styled_error("Please provide a CSV file with results.")
df = pd.read_csv(csv_file)
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Save uploaded CSV
subdir = os.path.join(EVAL_REQUESTS_PATH, result_type)
os.makedirs(subdir, exist_ok=True)
filename = f"{current_time}_{model_name}_{result_type}_results.csv"
remote_path = f"msteb_{result_type}_requests/{filename}"
csv_save_path = os.path.join(subdir,filename)
df.to_csv(csv_save_path, index=False)
print(f"Uploading to {QUEUE_REPO}/{remote_path}")
API.upload_file(
path_or_fileobj=csv_save_path,
path_in_repo=remote_path,
repo_id=QUEUE_REPO,
repo_type="dataset", # or "model" if you made the repo that way
commit_message=f"Add {result_type} request for {model_name} at {current_time}",
)
# Remove the local file
os.remove(csv_save_path)
# this converts dataframe to json and uploads it to results
try:
convert_csv_to_json_and_upload(df, model_name, result_type)
except ValueError as e:
return styled_error(f"{str(e)}")
return styled_message(f"Results CSV successfully submitted for `{model_name}`!")
def find_task_by_col_name(col_name, enum_cls):
for task in enum_cls:
if task.value.col_name == col_name:
return task
return None
def convert_csv_to_json_and_upload(df: pd.DataFrame, model_name: str, result_type: str):
task_enum = Tasks if result_type == "text" else SpeechTasks
task_display_names = {t.value.col_name for t in task_enum}
region_names = df["Region"].tolist()
average_row = "Average (Micro)"
# --- Validation ---
df_columns = set(df.columns[1:]) # exclude Region column
if not df_columns.issubset(task_display_names):
extra = df_columns - task_display_names
raise ValueError(f"Extra columns in CSV: {extra}")
if average_row not in df["Region"].values:
raise ValueError("Missing row for 'Average (Micro)'")
data_region_names = [r for r in region_names if r != average_row]
for region in data_region_names:
if region not in REGION_MAP:
raise ValueError(f"Region '{region}' not found in REGION_MAP keys.")
# --- Build JSON ---
# I go over the regions in the CSV and create a JSON object.
model_json = {
"config": {"model_name": model_name},
"results": {},
"regions": {},
}
at_least_one_number = False
for _, row in df.iterrows():
region_display = row["Region"]
if region_display == average_row:
for col, val in row.items():
if col == "Region":
continue
task = find_task_by_col_name(col, task_enum)
if val is not None and not pd.isna(val) and isinstance(val, (int, float)):
print(f" value {val}")
at_least_one_number = True
model_json["results"][task.value.benchmark] = {task.value.metric: val/100}
else:
model_json["regions"][REGION_MAP[region_display]] = {}
for col, val in row.items():
if col == "Region":
continue
task = find_task_by_col_name(col, task_enum)
if val is not None and not pd.isna(val) and isinstance(val, (int, float)):
model_json["regions"][REGION_MAP[region_display]][task.value.benchmark] = {task.value.metric: val/100}
# Check if at least one number is present in the results
print(at_least_one_number)
if at_least_one_number is False:
raise ValueError("No valid numeric results found in the CSV. Please check your input.")
# --- Save locally ---
subdir = os.path.join(EVAL_RESULTS_PATH, result_type)
os.makedirs(subdir, exist_ok=True)
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
filename = f"{current_time}_{model_name}_{result_type}.json"
json_save_path = os.path.join(subdir,filename)
with open(json_save_path, "w") as f:
json.dump(model_json, f, indent=2)
# --- Upload to HF Hub ---
remote_path = f"msteb_leaderboard/msteb_{result_type}_results/{filename}"
API.upload_file(
path_or_fileobj=json_save_path,
path_in_repo=remote_path,
repo_id=RESULTS_REPO,
repo_type="dataset",
commit_message=f"Upload results for {model_name} ({result_type}) at {current_time}",
)
os.remove(json_save_path)
print(f"Uploaded to {RESULTS_REPO}/{current_time}")
return f"Uploaded to {RESULTS_REPO}/{current_time}"