|
|
import os |
|
|
import json |
|
|
import csv |
|
|
import datetime |
|
|
from email.utils import parseaddr |
|
|
|
|
|
import gradio as gr |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
|
|
|
from datasets import load_dataset |
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
from huggingface_hub import HfApi |
|
|
|
|
|
from scorer import instruction_scorer |
|
|
from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink |
|
|
|
|
|
TOKEN = os.environ.get("TOKEN", None) |
|
|
OWNER="ucla-contextual" |
|
|
ALL_DATASET = f"{OWNER}/contextual_all" |
|
|
VAL_DATASET = f"{OWNER}/contextual_val" |
|
|
SUBMISSION_DATASET = f"{OWNER}/submissions_internal" |
|
|
CONTACT_DATASET = f"{OWNER}/contact_info" |
|
|
RESULTS_DATASET = f"{OWNER}/results" |
|
|
LEADERBOARD_PATH = f"{OWNER}/leaderboard" |
|
|
api = HfApi() |
|
|
|
|
|
YEAR_VERSION = "2024" |
|
|
|
|
|
def read_json_file(filepath): |
|
|
with open(filepath) as infile: |
|
|
data_dict = json.load(infile) |
|
|
return data_dict |
|
|
|
|
|
def save_json_file(filepath, data_dict): |
|
|
with open(filepath, "w") as outfile: |
|
|
json.dump(data_dict, outfile) |
|
|
|
|
|
os.makedirs("scored", exist_ok=True) |
|
|
|
|
|
all_data_files = {"overall": "contextual_all.csv"} |
|
|
all_dataset = load_dataset(ALL_DATASET, data_files=all_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
|
|
|
|
|
val_data_files = {"val": "contextual_val.csv"} |
|
|
val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
|
|
|
|
|
results_data_files = {"overall": "contextual_all_results.csv", "val": "contextual_val_results.csv"} |
|
|
results = load_dataset(RESULTS_DATASET, data_files= |
|
|
results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
|
|
|
|
|
contacts_data_files = {"contacts": "contacts.csv"} |
|
|
contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
|
|
|
|
|
def get_dataframe_from_results(results, split): |
|
|
df = results[split].to_pandas() |
|
|
df.drop(columns=['URL'], inplace=True) |
|
|
df = df.sort_values(by=["All"], ascending=False) |
|
|
return df |
|
|
|
|
|
all_dataset_dataframe = all_dataset["overall"].to_pandas() |
|
|
val_dataset_dataframe = val_dataset["val"].to_pandas() |
|
|
|
|
|
contacts_dataframe = contact_infos["contacts"].to_pandas() |
|
|
|
|
|
val_results_dataframe = get_dataframe_from_results(results=results, split="val") |
|
|
all_results_dataframe = get_dataframe_from_results(results=results, split="overall") |
|
|
|
|
|
def restart_space(): |
|
|
api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) |
|
|
|
|
|
TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"] |
|
|
|
|
|
def add_new_eval( |
|
|
model: str, |
|
|
method: str, |
|
|
url: str, |
|
|
path_to_file: str, |
|
|
organisation: str, |
|
|
mail: str, |
|
|
): |
|
|
print("printing all inputs:", model, method, url, path_to_file, organisation, mail) |
|
|
|
|
|
if len(model)==0: |
|
|
print("model none") |
|
|
raise gr.Error("Please provide a model name. Field empty!") |
|
|
|
|
|
if len(method)==0: |
|
|
print("method none") |
|
|
raise gr.Error("Please provide a method. Field empty!") |
|
|
|
|
|
if len(organisation)==0: |
|
|
print("org none") |
|
|
raise gr.Error("Please provide organisation information. Field empty!") |
|
|
|
|
|
|
|
|
_, parsed_mail = parseaddr(mail) |
|
|
if not "@" in parsed_mail: |
|
|
print("email here") |
|
|
raise gr.Error("Please provide a valid email address.") |
|
|
|
|
|
|
|
|
|
|
|
if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]): |
|
|
print("model org combo here") |
|
|
raise gr.Error("This model has been already submitted.") |
|
|
|
|
|
if path_to_file is None: |
|
|
print("file missing here") |
|
|
raise gr.Error("Please attach a file.") |
|
|
|
|
|
tmp_file_output = read_json_file(path_to_file.name) |
|
|
|
|
|
if len(tmp_file_output.keys())!=1: |
|
|
print("file format wrong here") |
|
|
raise gr.Error("Submission file format incorrect. Please refer to the format description!") |
|
|
|
|
|
tmp_output_key = list(tmp_file_output.keys())[0] |
|
|
if len(tmp_file_output[tmp_output_key].keys())!=100: |
|
|
print("file not 100 here") |
|
|
raise gr.Error("File must contain exactly 100 predictions.") |
|
|
|
|
|
|
|
|
time_atm = datetime.datetime.today() |
|
|
api.upload_file( |
|
|
repo_id=SUBMISSION_DATASET, |
|
|
path_or_fileobj=path_to_file.name, |
|
|
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json", |
|
|
repo_type="dataset", |
|
|
token=TOKEN |
|
|
) |
|
|
|
|
|
|
|
|
file_path = path_to_file.name |
|
|
scores = instruction_scorer(val_dataset_dataframe, file_path , model) |
|
|
|
|
|
path_or_fileobj=f"scored/{organisation}_{model}.json" |
|
|
save_json_file(path_or_fileobj, scores) |
|
|
|
|
|
|
|
|
api.upload_file( |
|
|
repo_id=SUBMISSION_DATASET, |
|
|
path_or_fileobj=path_or_fileobj, |
|
|
path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json", |
|
|
repo_type="dataset", |
|
|
token=TOKEN |
|
|
) |
|
|
|
|
|
|
|
|
eval_entry = { |
|
|
"Model": model, |
|
|
"Method":method, |
|
|
"Organisation": organisation, |
|
|
"URL": url, |
|
|
"All":scores["average"], |
|
|
"Time":scores["time"], |
|
|
"Shopping":scores["shopping"], |
|
|
"Navigation":scores["navigation-transportation"], |
|
|
"Abstract":scores["abstract"], |
|
|
"Application Usage":scores["app"], |
|
|
"Web Usage":scores["web"], |
|
|
"Infographic":scores["infographics"], |
|
|
"Miscellaneous Natural Scenes": scores["misc"] |
|
|
} |
|
|
|
|
|
val_results_dataframe = get_dataframe_from_results(results=results, split="val") |
|
|
val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True) |
|
|
val_results_dataframe.to_csv('contextual_val_results.csv', index=False) |
|
|
|
|
|
api.upload_file( |
|
|
repo_id=RESULTS_DATASET, |
|
|
path_or_fileobj="contextual_val_results.csv", |
|
|
path_in_repo=f"contextual_val_results.csv", |
|
|
repo_type="dataset", |
|
|
token=TOKEN |
|
|
) |
|
|
|
|
|
contact_info = { |
|
|
"Model": model, |
|
|
"URL": url, |
|
|
"Organisation": organisation, |
|
|
"Mail": mail, |
|
|
} |
|
|
|
|
|
contacts_dataframe = contact_infos["contacts"].to_pandas() |
|
|
contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True) |
|
|
contacts_dataframe.to_csv('contacts.csv', index=False) |
|
|
|
|
|
api.upload_file( |
|
|
repo_id=CONTACT_DATASET, |
|
|
path_or_fileobj="contacts.csv", |
|
|
path_in_repo=f"contacts.csv", |
|
|
repo_type="dataset", |
|
|
token=TOKEN |
|
|
) |
|
|
|
|
|
return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed") |
|
|
|
|
|
|
|
|
def refresh(): |
|
|
results_data_files = {"overall": "contextual_all_results.csv", "val": "contextual_val_results.csv"} |
|
|
results = load_dataset(RESULTS_DATASET, data_files= |
|
|
results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) |
|
|
val_results_dataframe = get_dataframe_from_results(results=results, split="val") |
|
|
all_results_dataframe = get_dataframe_from_results(results=results, split="overall") |
|
|
return val_results_dataframe, all_results_dataframe |
|
|
|
|
|
def upload_file(files): |
|
|
file_paths = [file.name for file in files] |
|
|
return file_paths |
|
|
|
|
|
|
|
|
demo = gr.Blocks() |
|
|
with demo: |
|
|
gr.HTML(TITLE) |
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Accordion("π§ Introduction", open=False): |
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Accordion("π― Submission Guidelines", open=False): |
|
|
gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Accordion("π Citation", open=False): |
|
|
citation_button = gr.TextArea( |
|
|
value=CITATION_BUTTON_TEXT, |
|
|
label=CITATION_BUTTON_LABEL, |
|
|
elem_id="citation-button", |
|
|
) |
|
|
with gr.Tab("Results: Test"): |
|
|
leaderboard_table_all = gr.components.Dataframe( |
|
|
value=all_results_dataframe, datatype=TYPES, interactive=False, |
|
|
column_widths=["20%"] |
|
|
) |
|
|
with gr.Tab("Results: Val"): |
|
|
leaderboard_table_val = gr.components.Dataframe( |
|
|
value=val_results_dataframe, datatype=TYPES, interactive=False, |
|
|
column_widths=["20%"] |
|
|
) |
|
|
|
|
|
refresh_button = gr.Button("Refresh") |
|
|
refresh_button.click( |
|
|
refresh, |
|
|
inputs=[], |
|
|
outputs=[ |
|
|
leaderboard_table_val, |
|
|
leaderboard_table_all, |
|
|
], |
|
|
) |
|
|
with gr.Accordion("Submit a new model for evaluation"): |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
model_name_textbox = gr.Textbox(label="Model name", type='text') |
|
|
method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text') |
|
|
url_textbox = gr.Textbox(label="URL to model information", type='text') |
|
|
with gr.Column(): |
|
|
organisation = gr.Textbox(label="Organisation", type='text') |
|
|
mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email') |
|
|
file_output = gr.File() |
|
|
|
|
|
|
|
|
submit_button = gr.Button("Submit Eval") |
|
|
submission_result = gr.Markdown() |
|
|
submit_button.click( |
|
|
add_new_eval, |
|
|
[ |
|
|
model_name_textbox, |
|
|
method_textbox, |
|
|
url_textbox, |
|
|
file_output, |
|
|
organisation, |
|
|
mail |
|
|
], |
|
|
submission_result, |
|
|
) |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
|
scheduler.add_job(restart_space, "interval", seconds=3600) |
|
|
scheduler.start() |
|
|
demo.launch(debug=True) |
|
|
|