Spaces:
Runtime error
Runtime error
import json | |
import os | |
from datetime import datetime, timezone | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from huggingface_hub import HfApi | |
from src.assets.text_content import * | |
from src.elo_leaderboard.load_results import get_elo_plots, get_elo_results_dicts | |
from src.assets.css_html_js import custom_css, get_window_url_params # left in case you need them | |
from src.utils_display import EloEvalColumn, fields, styled_error, styled_warning, styled_message | |
from src.init import load_all_info_from_hub | |
# clone / pull the lmeh eval data | |
H4_TOKEN = os.environ.get("H4_TOKEN", None) | |
HUMAN_EVAL_REPO = "HuggingFaceH4/scale-human-eval" | |
GPT_4_EVAL_REPO = "HuggingFaceH4/open_llm_leaderboard_oai_evals" | |
IS_PUBLIC = bool(os.environ.get("IS_PUBLIC", True)) | |
ADD_PLOTS = False | |
EVAL_REQUESTS_PATH = "auto_evals/eval_requests" | |
api = HfApi() | |
def restart_space(): | |
api.restart_space( | |
repo_id="HuggingFaceH4/open_llm_leaderboard", token=H4_TOKEN | |
) | |
human_eval_repo, gpt_4_eval_repo = load_all_info_from_hub(HUMAN_EVAL_REPO, GPT_4_EVAL_REPO) | |
ELO_COLS = [c.name for c in fields(EloEvalColumn)] | |
ELO_TYPES = [c.type for c in fields(EloEvalColumn)] | |
ELO_SORT_COL = EloEvalColumn.gpt4.name | |
def has_no_nan_values(df, columns): | |
return df[columns].notna().all(axis=1) | |
def has_nan_values(df, columns): | |
return df[columns].isna().any(axis=1) | |
def get_elo_leaderboard(df_instruct, df_code_instruct, tie_allowed=False): | |
if human_eval_repo: | |
print("Pulling human_eval_repo changes") | |
human_eval_repo.git_pull() | |
all_data = get_elo_results_dicts(df_instruct, df_code_instruct, tie_allowed) | |
dataframe = pd.DataFrame.from_records(all_data) | |
dataframe = dataframe.sort_values(by=ELO_SORT_COL, ascending=False) | |
dataframe = dataframe[ELO_COLS] | |
return dataframe | |
def get_elo_elements(): | |
df_instruct = pd.read_json("human_evals/without_code.json") | |
df_code_instruct = pd.read_json("human_evals/with_code.json") | |
elo_leaderboard = get_elo_leaderboard( | |
df_instruct, df_code_instruct, tie_allowed=False | |
) | |
elo_leaderboard_with_tie_allowed = get_elo_leaderboard( | |
df_instruct, df_code_instruct, tie_allowed=True | |
) | |
plot_1, plot_2, plot_3, plot_4 = get_elo_plots( | |
df_instruct, df_code_instruct, tie_allowed=False | |
) | |
return ( | |
elo_leaderboard, | |
elo_leaderboard_with_tie_allowed, | |
plot_1, | |
plot_2, | |
plot_3, | |
plot_4, | |
) | |
( | |
elo_leaderboard, | |
elo_leaderboard_with_tie_allowed, | |
plot_1, | |
plot_2, | |
plot_3, | |
plot_4, | |
) = get_elo_elements() | |
demo = gr.Blocks(css=custom_css) | |
with demo: | |
gr.HTML(TITLE) | |
with gr.Row(): | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
with gr.Column(): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
gr.Markdown(HUMAN_GPT_EVAL_TEXT, elem_classes="markdown-text") | |
with gr.Column(scale=1): | |
gr.Image( | |
"src/assets/scale-hf-logo.png", elem_id="scale-logo", show_label=False | |
) | |
gr.Markdown("## No tie allowed") | |
elo_leaderboard_table = gr.components.Dataframe( | |
value=elo_leaderboard, | |
headers=ELO_COLS, | |
datatype=ELO_TYPES, | |
max_rows=5, | |
) | |
gr.Markdown("## Tie allowed*") | |
elo_leaderboard_table_with_tie_allowed = gr.components.Dataframe( | |
value=elo_leaderboard_with_tie_allowed, | |
headers=ELO_COLS, | |
datatype=ELO_TYPES, | |
max_rows=5, | |
) | |
gr.Markdown( | |
"\* Results when the scores of 4 and 5 were treated as ties.", | |
elem_classes="markdown-text", | |
) | |
gr.Markdown( | |
"Let us know in [this discussion](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard/discussions/65) which models we should add!", | |
elem_id="models-to-add-text", | |
) | |
if ADD_PLOTS: | |
with gr.Box(): | |
visualization_title = gr.HTML(VISUALIZATION_TITLE) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown(f"#### Figure 1: {PLOT_1_TITLE}") | |
plot_1 = gr.Plot(plot_1, show_label=False) | |
with gr.Column(): | |
gr.Markdown(f"#### Figure 2: {PLOT_2_TITLE}") | |
plot_2 = gr.Plot(plot_2, show_label=False) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown(f"#### Figure 3: {PLOT_3_TITLE}") | |
plot_3 = gr.Plot(plot_3, show_label=False) | |
with gr.Column(): | |
gr.Markdown(f"#### Figure 4: {PLOT_4_TITLE}") | |
plot_4 = gr.Plot(plot_4, show_label=False) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Accordion("📙 Citation", open=False): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
).style(show_copy_button=True) | |
with gr.Column(): | |
with gr.Accordion("✨ CHANGELOG", open=False): | |
changelog = gr.Markdown(CHANGELOG_TEXT, elem_id="changelog-text") | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(restart_space, "interval", seconds=3600) | |
scheduler.start() | |
demo.queue(concurrency_count=40).launch() | |