Spaces:

Davlan
/

msteb_leaderboard

Sleeping

App Files Files Community

msteb_leaderboard / src /submission /submit.py

Davlan

Upload folder using huggingface_hub

252ce9b verified about 2 months ago

raw

history blame contribute delete

5.31 kB

	import json
	import os
	import pandas as pd
	from datetime import datetime, timezone

	from src.about import Tasks, SpeechTasks
	from src.display.formatting import styled_error, styled_message, styled_warning
	from src.display.utils import REGION_MAP
	from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO, RESULTS_REPO, EVAL_RESULTS_PATH

	REQUESTED_MODELS = None
	USERS_TO_SUBMISSION_DATES = None


	def handle_csv_submission(
	model_name: str,
	csv_file, # uploaded file path
	result_type: str,
	):
	if model_name == "" or model_name is None:
	return styled_error("Please provide a model name.")
	if csv_file is None:
	return styled_error("Please provide a CSV file with results.")

	df = pd.read_csv(csv_file)

	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

	# Save uploaded CSV
	subdir = os.path.join(EVAL_REQUESTS_PATH, result_type)
	os.makedirs(subdir, exist_ok=True)

	filename = f"{current_time}_{model_name}_{result_type}_results.csv"
	remote_path = f"msteb_{result_type}_requests/{filename}"

	csv_save_path = os.path.join(subdir,filename)
	df.to_csv(csv_save_path, index=False)

	print(f"Uploading to {QUEUE_REPO}/{remote_path}")
	API.upload_file(
	path_or_fileobj=csv_save_path,
	path_in_repo=remote_path,
	repo_id=QUEUE_REPO,
	repo_type="dataset", # or "model" if you made the repo that way
	commit_message=f"Add {result_type} request for {model_name} at {current_time}",
	)

	# Remove the local file
	os.remove(csv_save_path)
	# this converts dataframe to json and uploads it to results


	try:
	convert_csv_to_json_and_upload(df, model_name, result_type)
	except ValueError as e:
	return styled_error(f"{str(e)}")
	return styled_message(f"Results CSV successfully submitted for `{model_name}`!")

	def find_task_by_col_name(col_name, enum_cls):
	for task in enum_cls:
	if task.value.col_name == col_name:
	return task
	return None
	def convert_csv_to_json_and_upload(df: pd.DataFrame, model_name: str, result_type: str):
	task_enum = Tasks if result_type == "text" else SpeechTasks

	task_display_names = {t.value.col_name for t in task_enum}
	region_names = df["Region"].tolist()
	average_row = "Average (Micro)"

	# --- Validation ---
	df_columns = set(df.columns[1:]) # exclude Region column
	if not df_columns.issubset(task_display_names):
	extra = df_columns - task_display_names
	raise ValueError(f"Extra columns in CSV: {extra}")
	if average_row not in df["Region"].values:
	raise ValueError("Missing row for 'Average (Micro)'")

	data_region_names = [r for r in region_names if r != average_row]

	for region in data_region_names:
	if region not in REGION_MAP:
	raise ValueError(f"Region '{region}' not found in REGION_MAP keys.")

	# --- Build JSON ---
	# I go over the regions in the CSV and create a JSON object.
	model_json = {
	"config": {"model_name": model_name},
	"results": {},
	"regions": {},
	}
	at_least_one_number = False

	for _, row in df.iterrows():
	region_display = row["Region"]

	if region_display == average_row:
	for col, val in row.items():
	if col == "Region":
	continue
	task = find_task_by_col_name(col, task_enum)
	if val is not None and not pd.isna(val) and isinstance(val, (int, float)):
	print(f" value {val}")
	at_least_one_number = True
	model_json["results"][task.value.benchmark] = {task.value.metric: val/100}
	else:
	model_json["regions"][REGION_MAP[region_display]] = {}
	for col, val in row.items():
	if col == "Region":
	continue
	task = find_task_by_col_name(col, task_enum)
	if val is not None and not pd.isna(val) and isinstance(val, (int, float)):
	model_json["regions"][REGION_MAP[region_display]][task.value.benchmark] = {task.value.metric: val/100}

	# Check if at least one number is present in the results
	print(at_least_one_number)
	if at_least_one_number is False:
	raise ValueError("No valid numeric results found in the CSV. Please check your input.")

	# --- Save locally ---
	subdir = os.path.join(EVAL_RESULTS_PATH, result_type)
	os.makedirs(subdir, exist_ok=True)
	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
	filename = f"{current_time}_{model_name}_{result_type}.json"
	json_save_path = os.path.join(subdir,filename)

	with open(json_save_path, "w") as f:
	json.dump(model_json, f, indent=2)

	# --- Upload to HF Hub ---
	remote_path = f"msteb_leaderboard/msteb_{result_type}_results/{filename}"
	API.upload_file(
	path_or_fileobj=json_save_path,
	path_in_repo=remote_path,
	repo_id=RESULTS_REPO,
	repo_type="dataset",
	commit_message=f"Upload results for {model_name} ({result_type}) at {current_time}",
	)
	os.remove(json_save_path)

	print(f"Uploaded to {RESULTS_REPO}/{current_time}")

	return f"Uploaded to {RESULTS_REPO}/{current_time}"