File size: 2,862 Bytes
a557d54
 
c00ae85
 
 
 
a557d54
c00ae85
 
a557d54
c00ae85
 
 
a557d54
 
 
 
c00ae85
 
 
 
 
 
 
 
a557d54
 
c00ae85
 
a557d54
 
 
 
 
 
 
 
 
 
 
 
 
c00ae85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import json
import os
import re
import shutil
import subprocess
from datetime import datetime
from pathlib import Path

import streamlit as st
from dotenv import load_dotenv
from huggingface_hub import Repository, whoami

from validate import validate_submission

if Path(".env").is_file():
    load_dotenv(".env")

HF_TOKEN = os.getenv("HF_TOKEN")

# AutoNLP login
autonlp_login = subprocess.run(["autonlp", "login", f"--api-key {HF_TOKEN}"], stdout=subprocess.PIPE)
if autonlp_login.returncode == -1:
    raise Exception(f"AutoNLP login failed with return code {autonlp_login.returncode}")

LOCAL_REPO = "submission_repo"

with st.form(key="form"):
    # Flush local repo
    shutil.rmtree(LOCAL_REPO, ignore_errors=True)
    uploaded_file = st.file_uploader("Upload a submission.json file", type=["json"])

    if uploaded_file is not None:
        data = str(uploaded_file.read(), "utf-8")
        json_data = json.loads(data)

    token = st.text_input("API token", type="password")

    submit_button = st.form_submit_button("Submit")

validate_submission(json_data)
user_info = whoami(token)
user_name = user_info["name"]
submission_name = json_data["submission_name"]

# Create submission dataset under benchmarks ORG
dataset_repo_url = f"https://huggingface.co/datasets/benchmarks/gem-{user_name}"
repo = Repository(
    local_dir=LOCAL_REPO, clone_from=dataset_repo_url, repo_type="dataset", private=True, use_auth_token=HF_TOKEN
)
submission_metadata = {"benchmark": "gem", "type": "prediction", "submission_name": submission_name}
repo.repocard_metadata_save(submission_metadata)

with open(f"{LOCAL_REPO}/submission.json", "w", encoding="utf-8") as f:
    json.dump(json_data, f)

# TODO: add informative commit msg
commit_url = repo.push_to_hub()
if commit_url is not None:
    commit_sha = commit_url.split("/")[-1]
else:
    commit_sha = repo.git_head_commit_url().split("/")[-1]

submission_time = str(int(datetime.now().timestamp()))
submission_id = submission_name + "__" + commit_sha + "__" + submission_time
process = subprocess.run(
    [
        "autonlp",
        "benchmark",
        "--eval_name",
        "gem",
        "--dataset",
        "GEM/references",
        "--submission",
        f"gem-{user_name}",
        "--submission_id",
        f"{submission_id}",
    ],
    stdout=subprocess.PIPE,
)
if process.returncode == -1:
    st.write("Error laucnhing AutoNLP job")
else:
    try:
        match_job_id = re.search(r"# (\d+)", process.stdout.decode("utf-8"))
        job_id = match_job_id.group(1)
        st.write(f"Successfully launched evaluation job #{job_id} for submission {submission_name}!")
    except Exception as e:
        st.write(f"Could not extract AutoNLP job ID due to error: {e}")

st.write(json_data["submission_name"])
st.write(commit_sha)

# Flush local repo
shutil.rmtree(LOCAL_REPO, ignore_errors=True)