lewtun's picture
lewtun HF staff
Update leaderboard with real data
b727941
raw history blame
No virus
2.23 kB
import os
from pathlib import Path
import pandas as pd
import requests
import streamlit as st
from datasets import get_dataset_config_names
from dotenv import load_dotenv
if Path(".env").is_file():
load_dotenv(".env")
auth_token = os.getenv("HF_HUB_TOKEN")
header = {"Authorization": "Bearer " + auth_token}
TASKS = get_dataset_config_names("ought/raft")
def extract_tags(dataset):
tags = {}
for tag in dataset["tags"]:
k, v = tuple(tag.split(":", 1))
tags[k] = v
return tags
def download_submissions():
response = requests.get("http://huggingface.co/api/datasets", headers=header)
all_datasets = response.json()
submissions = []
for dataset in all_datasets:
tags = extract_tags(dataset)
if tags.get("benchmark") == "ought/raft" and tags.get("type") == "evaluation":
submissions.append(dataset)
return submissions
def format_submissions(submissions):
submission_data = {**{"Submission": []}, **{t: [] for t in TASKS}}
# TODO(lewtun): delete / filter all the junk repos from development
# The following picks the latest submissions which adhere to the model card schema
for submission in submissions[-2:]:
submission_id = submission["id"]
response = requests.get(
f"http://huggingface.co/api/datasets/{submission_id}?full=true",
headers=header,
)
data = response.json()
card_data = data["card_data"]
submission_name = card_data["submission_dataset"]
submission_data["Submission"].append(submission_name)
for task in card_data["results"]:
task_data = task["task"]
task_name = task_data["name"]
score = task_data["metrics"][0]["value"]
submission_data[task_name].append(score)
df = pd.DataFrame(submission_data)
df.insert(1, "Overall", df[TASKS].mean(axis=1))
return df
###########
### APP ###
###########
st.set_page_config(layout="wide")
st.title("RAFT Leaderboard")
submissions = download_submissions()
df = format_submissions(submissions)
# hack to remove index column from https://github.com/streamlit/streamlit/issues/641
st.table(df.assign(hack="").set_index("hack"))