from apscheduler.schedulers.background import BackgroundScheduler from model_types import MODEL_TYPES, ModelType from huggingface_hub import HfApi import matplotlib.pyplot as plt import bar_chart_race as bcr import pandas as pd import gradio as gr import os def restart_space(): HfApi(token=os.environ.get("HF_TOKEN", None)).restart_space( repo_id="IlyasMoutawwakil/llm-bar-race", token=os.environ.get("HF_TOKEN", None), ) scheduler = BackgroundScheduler() scheduler.add_job( func=restart_space, trigger="interval", seconds=60 * 60 * 6, ) open_llm_race_dataset = pd.read_parquet( "https://huggingface.co/datasets/IlyasMoutawwakil/llm-race-dataset/resolve/main/llm-race-dataset.parquet", engine="pyarrow", ) # resample for ever model to a daily frequency open_llm_race_dataset = ( open_llm_race_dataset.set_index("date", drop=True) .groupby("model", as_index=False) .resample("D", how="last", closed="right", fill_method="ffill") .last() .reset_index(drop=False) ) # filter open_llm_race_dataset["date"] = open_llm_race_dataset["date"].dt.strftime("%Y-%m-%d") open_llm_race_dataset = open_llm_race_dataset[ open_llm_race_dataset["date"] >= "2023-07-10" ] open_llm_race_dataset = open_llm_race_dataset[["date", "score", "model"]] # drop nan values open_llm_race_dataset.dropna(inplace=True) # drop duplicates on model and date open_llm_race_dataset.drop_duplicates(subset=["model", "date"], inplace=True) # add the model type open_llm_race_dataset["type"] = open_llm_race_dataset["model"].apply( lambda x: MODEL_TYPES[x].name if x in MODEL_TYPES else ModelType.Unknown.name ) def get_bar_chart(model_type: str, top_n: int = 10, title: str = ""): fig, ax = plt.subplots(figsize=(12, 6)) ax.set_xlim(0, 100) plt.subplots_adjust(left=0.30, right=0.98) subset = open_llm_race_dataset[open_llm_race_dataset["type"] == model_type] subset = subset.pivot(index="date", columns="model", values="score") was_non_null = set() for date in subset.index: was_non_null.update(subset.loc[date].dropna().index.tolist()) subset.loc[date, list(was_non_null)] = subset.loc[ date, list(was_non_null) ].fillna(value=0) fig = bcr.bar_chart_race( subset, fig=fig, title=title, n_bars=top_n, fixed_max=True, bar_label_font=10, tick_label_font=10, period_length=1000, steps_per_period=20, end_period_pause=100, filter_column_colors=True, bar_texttemplate="{x:.2f}%", bar_kwargs={"alpha": 0.5, "ec": "black", "lw": 2}, ) return gr.HTML(fig) # Demo interface demo = gr.Blocks() with demo: # leaderboard title gr.HTML("

LLM Bar Race 📊🏃‍♂️

") with gr.Tabs(): with gr.TabItem(label="Pretrained Models"): get_bar_chart(ModelType.PT.name, title="Pretrained Models") with gr.TabItem(label="Instructions Finetuned Models"): get_bar_chart(ModelType.IFT.name, title="Instructions Finetuned Models") with gr.TabItem(label="RLHF Models"): get_bar_chart(ModelType.RL.name, title="RLHF Models") with gr.TabItem(label="Finetuned Models"): get_bar_chart(ModelType.FT.name, title="Finetuned Models") scheduler.start() demo.queue(concurrency_count=10).launch()