import json import os import re import shutil import subprocess from datetime import datetime from pathlib import Path import streamlit as st from dotenv import load_dotenv from huggingface_hub import Repository, whoami from validate import validate_submission if Path(".env").is_file(): load_dotenv(".env") HF_TOKEN = os.getenv("HF_TOKEN") # AutoNLP login autonlp_login = subprocess.run(["autonlp", "login", f"--api-key {HF_TOKEN}"], stdout=subprocess.PIPE) if autonlp_login.returncode == -1: raise Exception(f"AutoNLP login failed with return code {autonlp_login.returncode}") LOCAL_REPO = "submission_repo" with st.form(key="form"): # Flush local repo shutil.rmtree(LOCAL_REPO, ignore_errors=True) uploaded_file = st.file_uploader("Upload a submission.json file", type=["json"]) if uploaded_file is not None: data = str(uploaded_file.read(), "utf-8") json_data = json.loads(data) token = st.text_input("API token", type="password") submit_button = st.form_submit_button("Submit") validate_submission(json_data) user_info = whoami(token) user_name = user_info["name"] submission_name = json_data["submission_name"] # Create submission dataset under benchmarks ORG dataset_repo_url = f"https://huggingface.co/datasets/benchmarks/gem-{user_name}" repo = Repository( local_dir=LOCAL_REPO, clone_from=dataset_repo_url, repo_type="dataset", private=True, use_auth_token=HF_TOKEN ) submission_metadata = {"benchmark": "gem", "type": "prediction", "submission_name": submission_name} repo.repocard_metadata_save(submission_metadata) with open(f"{LOCAL_REPO}/submission.json", "w", encoding="utf-8") as f: json.dump(json_data, f) # TODO: add informative commit msg commit_url = repo.push_to_hub() if commit_url is not None: commit_sha = commit_url.split("/")[-1] else: commit_sha = repo.git_head_commit_url().split("/")[-1] submission_time = str(int(datetime.now().timestamp())) submission_id = submission_name + "__" + commit_sha + "__" + submission_time process = subprocess.run( [ "autonlp", "benchmark", "--eval_name", "gem", "--dataset", "GEM/references", "--submission", f"gem-{user_name}", "--submission_id", f"{submission_id}", ], stdout=subprocess.PIPE, ) if process.returncode == -1: st.write("Error laucnhing AutoNLP job") else: try: match_job_id = re.search(r"# (\d+)", process.stdout.decode("utf-8")) job_id = match_job_id.group(1) st.write(f"Successfully launched evaluation job #{job_id} for submission {submission_name}!") except Exception as e: st.write(f"Could not extract AutoNLP job ID due to error: {e}") st.write(json_data["submission_name"]) st.write(commit_sha) # Flush local repo shutil.rmtree(LOCAL_REPO, ignore_errors=True)