File size: 3,409 Bytes
b27966a
ad8e780
b27966a
ad8e780
 
 
 
b27966a
ad8e780
73a04d8
b27966a
 
 
 
 
73a04d8
ad8e780
b27966a
 
 
 
 
 
0bb31bc
803d3a0
 
 
 
0bb31bc
ad8e780
 
 
 
 
 
 
 
 
 
da05480
 
 
ad8e780
 
 
 
 
 
 
 
 
 
73a04d8
6cd57e5
73a04d8
 
803d3a0
3ee4539
73a04d8
 
88be2d3
 
 
 
 
 
 
 
73a04d8
 
803d3a0
6cd57e5
4b57226
73a04d8
803d3a0
 
73a04d8
 
 
 
803d3a0
 
73a04d8
83abc20
73a04d8
ad8e780
 
 
 
 
0bb31bc
ad8e780
 
6a1592c
6cd57e5
db6e029
 
e3ec4e5
b27966a
803d3a0
 
ad8e780
b27966a
ad8e780
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from apscheduler.schedulers.background import BackgroundScheduler
from model_types import MODEL_TYPES, ModelType
from huggingface_hub import HfApi
import matplotlib.pyplot as plt
import bar_chart_race as bcr
import pandas as pd
import gradio as gr
import os


def restart_space():
    HfApi(token=os.environ.get("HF_TOKEN", None)).restart_space(
        repo_id="IlyasMoutawwakil/llm-bar-race",
        token=os.environ.get("HF_TOKEN", None),
    )


scheduler = BackgroundScheduler()
scheduler.add_job(
    func=restart_space,
    trigger="interval",
    seconds=60 * 60 * 6,
)

open_llm_race_dataset = pd.read_parquet(
    "https://huggingface.co/datasets/IlyasMoutawwakil/llm-race-dataset/resolve/main/llm-race-dataset.parquet",
    engine="pyarrow",
)

# resample for ever model to a daily frequency
open_llm_race_dataset = (
    open_llm_race_dataset.set_index("date", drop=True)
    .groupby("model", as_index=False)
    .resample("D", how="last", closed="right", fill_method="ffill")
    .last()
    .reset_index(drop=False)
)
# filter
open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d")
open_llm_race_dataset = open_llm_race_dataset[
    open_llm_race_dataset["date"] >= "2023-07-10"
]
open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]]
# drop nan values
open_llm_race_dataset.dropna(inplace=True)
# drop duplicates on model and date
open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True)
# add the model type
open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply(
    lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name
)


def get_bar_chart(model_type: str, top_n: int = 10, title: str = ""):
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.set_xlim(0, 100)
    plt.subplots_adjust(left=0.30, right=0.98)

    subset = open_llm_race_dataset[open_llm_race_dataset["type"] == model_type]
    subset = subset.pivot(index="date", columns="model", values="score")

    was_non_null = set()
    for date in subset.index:
        was_non_null.update(subset.loc[date].dropna().index.tolist())
        subset.loc[date, list(was_non_null)] = subset.loc[
            date, list(was_non_null)
        ].fillna(value=0)

    fig = bcr.bar_chart_race(
        subset,
        fig=fig,
        title=title,
        n_bars=top_n,
        fixed_max=True,
        bar_label_font=10,
        tick_label_font=10,
        period_length=1000,
        steps_per_period=20,
        end_period_pause=100,
        filter_column_colors=True,
        bar_texttemplate="{x:.2f}%",
        bar_kwargs={"alpha": 0.5, "ec": "black", "lw": 2},
    )
    return gr.HTML(fig)


# Demo interface
demo = gr.Blocks()
with demo:
    # leaderboard title
    gr.HTML("<center><h1>LLM Bar Race πŸ“ŠπŸƒβ€β™‚οΈ</h1></center>")

    with gr.Tabs():
        with gr.TabItem(label="Pretrained Models"):
            get_bar_chart(ModelType.PT.name, title="Pretrained Models")
        with gr.TabItem(label="Instructions Finetuned Models"):
            get_bar_chart(ModelType.IFT.name, title="Instructions Finetuned Models")
        with gr.TabItem(label="RLHF Models"):
            get_bar_chart(ModelType.RL.name, title="RLHF Models")
        with gr.TabItem(label="Finetuned Models"):
            get_bar_chart(ModelType.FT.name, title="Finetuned Models")

scheduler.start()
demo.queue(concurrency_count=10).launch()