import os import json import csv import datetime from email.utils import parseaddr import gradio as gr import pandas as pd import numpy as np from datasets import load_dataset from apscheduler.schedulers.background import BackgroundScheduler from huggingface_hub import HfApi from scorer import instruction_scorer from content import format_error, format_warning, format_log, TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink TOKEN = os.environ.get("TOKEN", None) OWNER="ucla-contextual" TEST_DATASET = f"{OWNER}/contextual_test" VAL_DATASET = f"{OWNER}/contextual_val" SUBMISSION_DATASET = f"{OWNER}/submissions_internal" CONTACT_DATASET = f"{OWNER}/contact_info" RESULTS_DATASET = f"{OWNER}/results" LEADERBOARD_PATH = f"{OWNER}/leaderboard" api = HfApi() YEAR_VERSION = "2024" def read_json_file(filepath): with open(filepath) as infile: data_dict = json.load(infile) return data_dict def save_json_file(filepath, data_dict): with open(filepath, "w") as outfile: json.dump(data_dict, outfile) os.makedirs("scored", exist_ok=True) test_data_files = {"test": "contextual_test.csv"} test_dataset = load_dataset(TEST_DATASET, data_files=test_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) val_data_files = {"val": "contextual_val.csv"} val_dataset = load_dataset(VAL_DATASET, data_files=val_data_files , token=TOKEN, download_mode="force_redownload", ignore_verifications=True) results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"} results = load_dataset(RESULTS_DATASET, data_files= results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) contacts_data_files = {"contacts": "contacts.csv"} contact_infos = load_dataset(CONTACT_DATASET, data_files=contacts_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) def get_dataframe_from_results(results, split): df = results[split].to_pandas() df.drop(columns=['URL'], inplace=True) df = df.sort_values(by=["All"], ascending=False) return df test_dataset_dataframe = test_dataset["test"].to_pandas() val_dataset_dataframe = val_dataset["val"].to_pandas() contacts_dataframe = contact_infos["contacts"].to_pandas() val_results_dataframe = get_dataframe_from_results(results=results, split="val") test_results_dataframe = get_dataframe_from_results(results=results, split="test") def restart_space(): api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) TYPES = ["markdown", "markdown", "markdown", "number", "number", "number","number", "number", "number", "number", "number", "number"] def add_new_eval( model: str, method: str, url: str, path_to_file: str, organisation: str, mail: str, ): print("printing all inputs:", model, method, url, path_to_file, organisation, mail) if len(model)==0: print("model none") raise gr.Error("Please provide a model name. Field empty!") if len(method)==0: print("method none") raise gr.Error("Please provide a method. Field empty!") if len(organisation)==0: print("org none") raise gr.Error("Please provide organisation information. Field empty!") # Very basic email parsing _, parsed_mail = parseaddr(mail) if not "@" in parsed_mail: print("email here") raise gr.Error("Please provide a valid email address.") # Check if the combination model/org already exists and prints a warning message if yes if model.lower() in set([m.lower() for m in results["val"]["Model"]]) and organisation.lower() in set([o.lower() for o in results["val"]["Organisation"]]): print("model org combo here") raise gr.Error("This model has been already submitted.") if path_to_file is None: print("file missing here") raise gr.Error("Please attach a file.") tmp_file_output = read_json_file(path_to_file.name) if len(tmp_file_output.keys())!=1: print("file format wrong here") raise gr.Error("Submission file format incorrect. Please refer to the format description!") tmp_output_key = list(tmp_file_output.keys())[0] if len(tmp_file_output[tmp_output_key].keys())!=100: print("file not 100 here") raise gr.Error("File must contain exactly 100 predictions.") # Save submitted file time_atm = datetime.datetime.today() api.upload_file( repo_id=SUBMISSION_DATASET, path_or_fileobj=path_to_file.name, path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_raw_{time_atm}.json", repo_type="dataset", token=TOKEN ) # Compute score file_path = path_to_file.name scores = instruction_scorer(val_dataset_dataframe, file_path , model) path_or_fileobj=f"scored/{organisation}_{model}.json" save_json_file(path_or_fileobj, scores) # Save scored file api.upload_file( repo_id=SUBMISSION_DATASET, path_or_fileobj=path_or_fileobj, path_in_repo=f"{organisation}/{model}/{YEAR_VERSION}_scored_{time_atm}.json", repo_type="dataset", token=TOKEN ) # Actual submission eval_entry = { "Model": model, "Method":method, "Organisation": organisation, "URL": url, "All":scores["average"], "Time":scores["time"], "Shopping":scores["shopping"], "Navigation":scores["navigation-transportation"], "Abstract":scores["abstract"], "Application Usage":scores["app"], "Web Usage":scores["web"], "Infographic":scores["infographics"], "Miscellaneous Natural Scenes": scores["misc"] } val_results_dataframe = get_dataframe_from_results(results=results, split="val") val_results_dataframe = pd.concat([val_results_dataframe, pd.DataFrame([eval_entry])], ignore_index=True) val_results_dataframe.to_csv('contextual_val_results.csv', index=False) api.upload_file( repo_id=RESULTS_DATASET, path_or_fileobj="contextual_val_results.csv", path_in_repo=f"contextual_val_results.csv", repo_type="dataset", token=TOKEN ) contact_info = { "Model": model, "URL": url, "Organisation": organisation, "Mail": mail, } contacts_dataframe = contact_infos["contacts"].to_pandas() contacts_dataframe = pd.concat([contacts_dataframe, pd.DataFrame([contact_info])], ignore_index=True) contacts_dataframe.to_csv('contacts.csv', index=False) api.upload_file( repo_id=CONTACT_DATASET, path_or_fileobj="contacts.csv", path_in_repo=f"contacts.csv", repo_type="dataset", token=TOKEN ) return format_log(f"Model {model} submitted by {organisation} successfully! \nPlease refresh the val leaderboard, and wait a bit to see the score displayed") def refresh(): results_data_files = {"test": "contextual_test_results.csv", "val": "contextual_val_results.csv"} results = load_dataset(RESULTS_DATASET, data_files= results_data_files, token=TOKEN, download_mode="force_redownload", ignore_verifications=True) val_results_dataframe = get_dataframe_from_results(results=results, split="val") test_results_dataframe = get_dataframe_from_results(results=results, split="test") return val_results_dataframe, test_results_dataframe def upload_file(files): file_paths = [file.name for file in files] return file_paths demo = gr.Blocks() with demo: gr.HTML(TITLE) # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("🧐 Introduction", open=False): gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("🎯 Submission Guidelines", open=False): gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.TextArea( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, elem_id="citation-button", ) with gr.Tab("Results: Test"): leaderboard_table_test = gr.components.Dataframe( value=test_results_dataframe, datatype=TYPES, interactive=False, column_widths=["20%"] ) with gr.Tab("Results: Val"): leaderboard_table_val = gr.components.Dataframe( value=val_results_dataframe, datatype=TYPES, interactive=False, column_widths=["20%"] ) refresh_button = gr.Button("Refresh") refresh_button.click( refresh, inputs=[], outputs=[ leaderboard_table_val, leaderboard_table_test, ], ) with gr.Accordion("Submit a new model for evaluation"): with gr.Row(): with gr.Column(): model_name_textbox = gr.Textbox(label="Model name", type='text') method_textbox = gr.Textbox(label="Method (LMM or Aug LLM or any other)", type='text') url_textbox = gr.Textbox(label="URL to model information", type='text') with gr.Column(): organisation = gr.Textbox(label="Organisation", type='text') mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)", type='email') file_output = gr.File() submit_button = gr.Button("Submit Eval") submission_result = gr.Markdown() submit_button.click( add_new_eval, [ model_name_textbox, method_textbox, url_textbox, file_output, organisation, mail ], submission_result, ) scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=3600) scheduler.start() demo.launch(debug=True)