from email.utils import parseaddr from huggingface_hub import HfApi import os import datetime OWNER="locuslab" SUBMISSION_DATASET = f"{OWNER}/submissions_internal" RESULTS_DATASET = f"{OWNER}/results_public" LEADERBOARD_PATH = f"{OWNER}/tofu_leaderboard" api = HfApi() TOKEN = os.environ.get("TOKEN", None) YEAR_VERSION = "2024" def format_error(msg): return f"
{msg}
" def format_warning(msg): return f"{msg}
" def format_log(msg): return f"{msg}
" def model_hyperlink(link, model_name): return f'{model_name}' def add_new_eval( val_or_test: str, model: str, model_family: str, system_prompt: str, url: str, path_to_file: str, organisation: str, mail: str, ): # Very basic email parsing _, parsed_mail = parseaddr(mail) if not "@" in parsed_mail: return format_warning("Please provide a valid email adress.") print("Adding new eval") # Check if the combination model/org already exists and prints a warning message if yes # if model.lower() in set(eval_results[val_or_test]["model"]) and organisation.lower() in set(eval_results[val_or_test]["organisation"]): # return format_warning("This model has been already submitted.") if path_to_file is None: return format_warning("Please attach a file.") # Save submitted file api.upload_file( repo_id=SUBMISSION_DATASET, path_or_fileobj=path_to_file.name, path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_raw_{datetime.datetime.today()}.jsonl", repo_type="dataset", token=TOKEN ) # Compute score # Save scored file api.upload_file( repo_id=SUBMISSION_DATASET, path_or_fileobj=f"scored/{organisation}_{model}.jsonl", path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_{val_or_test}_scored_{datetime.datetime.today()}.jsonl", repo_type="dataset", token=TOKEN ) # Actual submission eval_entry = { "model": model, "model_family": model_family, "system_prompt": system_prompt, "url": url, "organisation": organisation, "mail": mail, # "score": scores["all"]/num_questions["all"], # "score_level1": scores[1]/num_questions[1], # "score_level2": scores[2]/num_questions[2], # "score_level3": scores[3]/num_questions[3], } # eval_results[val_or_test] = eval_results[val_or_test].add_item(eval_entry) # print(eval_results) # eval_results.push_to_hub(RESULTS_DATASET, config_name = YEAR_VERSION, token=TOKEN) return format_log(f"Model {model} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed")