IL-TUR-Leaderboard

Sleeping

File size: 5,067 Bytes

from email.utils import parseaddr
from huggingface_hub import HfApi
import os
import datetime
import json
import pandas as pd
import gradio as gr

from eval_utils import get_evaluation_scores


LEADERBOARD_PATH = "Exploration-Lab/IL-TUR-Leaderboard"
SUBMISSION_FORMAT = "predictions"
# RESULTS_PATH = "Exploration-Lab/IL-TUR-Leaderboard-results"
TOKEN = os.environ.get("TOKEN", None)
YEAR_VERSION = "2024"

api = HfApi(token=TOKEN)


def format_error(msg):
    return f"<p style='color: red; font-size: 20px; text-align: center;'>{msg}</p>"


def format_warning(msg):
    return f"<p style='color: orange; font-size: 20px; text-align: center;'>{msg}</p>"


def format_log(msg):
    return f"<p style='color: green; font-size: 20px; text-align: center;'>{msg}</p>"


def model_hyperlink(link, model_name):
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'


def input_verification(method_name, url, path_to_file, organisation, mail):
    for input in [method_name, url, path_to_file, organisation, mail]:
        if input == "":
            return format_warning("Please fill all the fields.")

    # Very basic email parsing
    _, parsed_mail = parseaddr(mail)
    if not "@" in parsed_mail:
        return format_warning("Please provide a valid email adress.")

    if path_to_file is None:
        return format_warning("Please attach a file.")

    # check the required fields
    required_fields = ["Method", "Submitted By", "url", "organisation", "mail"]

    # Check if the required_fields are not blank
    for field in required_fields:
        if field not in locals():
            raise gr.Error(f"{field} cannot be blank")

    return parsed_mail


def add_new_eval(
    method_name: str,
    submitted_by: str,
    url: str,
    path_to_file: str,
    organisation: str,
    mail: str,
):

    parsed_mail = input_verification(
        method_name,
        url,
        path_to_file,
        organisation,
        mail,
    )

    # # load the file
    # df = pd.read_csv(path_to_file)
    # submission_df = pd.read_csv(path_to_file)

    # # modify the df to include metadata
    # df["Method"] = method_name
    # df["url"] = url
    # df["organisation"] = organisation
    # df["mail"] = parsed_mail
    # df["timestamp"] = datetime.datetime.now()

    # submission_df = pd.read_csv(path_to_file)
    # submission_df["Method"] = method_name
    # submission_df["Submitted By"] = organisation
    # # upload to spaces using the hf api at

    # path_in_repo = f"submissions/{method_name}"
    # file_name = f"{method_name}-{organisation}-{datetime.datetime.now().strftime('%Y-%m-%d')}.csv"

    # upload the df to spaces
    import io

    if SUBMISSION_FORMAT == "predictions":
        # read the submission json file
        with open(path_to_file, "r") as f:
            submission_data = json.load(f)

        # read the gold json file
        with open("submissions/baseline/IL_TUR_eval_gold_small.json", "r") as f:
            gold_data = json.load(f)

        submission = get_evaluation_scores(gold_data, submission_data)

    else:
        # read the submission json file
        with open(path_to_file, "r") as f:
            submission = json.load(f)

    with open("submissions/baseline/results.json", "r") as f:
        results = json.load(f)

    # update the results
    results.append(submission[0])

    leaderboard_buffer = io.BytesIO()
    # df.to_csv(buffer, index=False)  # Write the DataFrame to a buffer in CSV format
    # buffer.seek(0)  # Rewind the buffer to the beginning

    # save the results to buffer
    leaderboard_buffer.write(json.dumps(results).encode())
    leaderboard_buffer.seek(0)

    # api.upload_file(
    #     repo_id=RESULTS_PATH,
    #     path_in_repo=f"{path_in_repo}/{file_name}",
    #     path_or_fileobj=buffer,
    #     token=TOKEN,
    #     repo_type="dataset",
    # )
    # # read the leaderboard
    # leaderboard_df = pd.read_csv(f"submissions/baseline/baseline.csv")

    # # append the new submission_df csv to the leaderboard
    # # leaderboard_df = leaderboard_df._append(submission_df)
    # # leaderboard_df = pd.concat([leaderboard_df, submission_df], ignore_index=True)

    # # save the new leaderboard
    # # leaderboard_df.to_csv(f"submissions/baseline/baseline.csv", index=False)
    # leaderboard_buffer = io.BytesIO()
    # leaderboard_df.to_csv(leaderboard_buffer, index=False)
    # leaderboard_buffer.seek(0)
    # with open("submissions/baseline/results.json", "w") as f:
    #     json.dump(results, f)

    api.upload_file(
        repo_id=LEADERBOARD_PATH,
        # path_in_repo=f"submissions/baseline/baseline.csv",
        path_in_repo=f"submissions/baseline/results.json",
        path_or_fileobj=leaderboard_buffer,
        token=TOKEN,
        repo_type="space",
    )

    return format_log(
        f"Method {method_name} submitted by {organisation} successfully. \nPlease refresh the leaderboard, and wait a bit to see the score displayed"
    )