llm-bar-race / app.py
IlyasMoutawwakil's picture
fix
b27966a
raw
history blame
3.2 kB
from apscheduler.schedulers.background import BackgroundScheduler
from model_types import MODEL_TYPES, ModelType
from huggingface_hub import HfApi
import matplotlib.pyplot as plt
import bar_chart_race as bcr
import pandas as pd
import gradio as gr
import os
def restart_space():
HfApi(token=os.environ.get("HF_TOKEN", None)).restart_space(
repo_id="IlyasMoutawwakil/llm-bar-race",
token=os.environ.get("HF_TOKEN", None),
)
scheduler = BackgroundScheduler()
scheduler.add_job(
func=restart_space,
trigger="interval",
seconds=60 * 60 * 6,
)
open_llm_race_dataset = pd.read_parquet(
"https://huggingface.co/datasets/IlyasMoutawwakil/llm-race-dataset/resolve/main/llm-race-dataset.parquet",
engine="pyarrow",
)
# resample for ever model to a daily frequency
open_llm_race_dataset = (
open_llm_race_dataset.set_index("date", drop=True)
.groupby("model", as_index=False)
.resample("D", how="last", closed="right", fill_method="ffill")
.last()
.reset_index(drop=False)
)
# filter
open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d")
# open_llm_race_dataset = open_llm_race_dataset[
# open_llm_race_dataset["date"] >= "2023-07-10"
# ]
open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]]
# drop nan values
open_llm_race_dataset.dropna(inplace=True)
# drop duplicates on model and date
open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True)
# add the model type
open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply(
lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name
)
def get_bar_chart(model_type: str, top_n: int = 10, title: str = ""):
fig, ax = plt.subplots(figsize=(12, 6))
ax.set_xlim(0, 100)
plt.subplots_adjust(left=0.30, right=0.98)
subset = open_llm_race_dataset[open_llm_race_dataset["type"] == model_type]
subset = subset.pivot(index="date", columns="model", values="score")
# subset.fillna(0, inplace=True)
fig = bcr.bar_chart_race(
subset,
fig=fig,
title=title,
n_bars=top_n,
fixed_max=True,
bar_label_font=10,
tick_label_font=10,
period_length=1000,
steps_per_period=20,
end_period_pause=100,
filter_column_colors=True,
bar_texttemplate="{x:.2f}%",
bar_kwargs={"alpha": 0.5, "ec": "black", "lw": 2},
)
return gr.HTML(fig)
# Demo interface
demo = gr.Blocks()
with demo:
# leaderboard title
gr.HTML("<center><h1>LLM Bar Race πŸ“ŠπŸƒβ€β™‚οΈ</h1></center>")
with gr.Tabs():
with gr.TabItem(label="Pretrained Models"):
get_bar_chart(ModelType.PT.name, title="Pretrained Models")
with gr.TabItem(label="Instructions Finetuned Models"):
get_bar_chart(ModelType.IFT.name, title="Instructions Finetuned Models")
with gr.TabItem(label="RLHF Models"):
get_bar_chart(ModelType.RL.name, title="RLHF Models")
with gr.TabItem(label="Finetuned Models"):
get_bar_chart(ModelType.FT.name, title="Finetuned Models")
scheduler.start()
demo.queue(concurrency_count=10).launch()