File size: 1,087 Bytes
3edbc93
 
58db0a0
50e75cf
58db0a0
10e69e7
 
50e75cf
8f9985e
3edbc93
 
 
8f9985e
3edbc93
21f87d6
 
 
 
10e69e7
 
 
 
 
89d69bf
62b6599
3edbc93
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
from datasets import load_dataset
import gradio as gr

from constants import RESULTS_REPO, ASSAY_RENAME, LEADERBOARD_RESULTS_COLUMNS

pd.set_option('display.max_columns', None)


def show_output_box(message):
    return gr.update(value=message, visible=True)


def fetch_hf_results():
    # For debugging
    # # Print current time in EST
    # EST = timezone(timedelta(hours=-4))
    # print(f"tmp: Fetching results from HF at {datetime.now(EST)}") 
    # Should cache by default if not using force_redownload
    df = load_dataset(
        RESULTS_REPO, data_files="auto_submissions/metrics_all.csv",
    )["train"].to_pandas()
    assert all(col in df.columns for col in LEADERBOARD_RESULTS_COLUMNS), f"Expected columns {LEADERBOARD_RESULTS_COLUMNS} not found in {df.columns}. Missing columns: {set(LEADERBOARD_COLUMNS) - set(df.columns)}"
    # Show latest submission only
    df = df.sort_values("submission_time", ascending=False).drop_duplicates(subset=["model", "assay", "user"], keep="first")
    df["property"] = df["assay"].map(ASSAY_RENAME)
    return df