lukecq's picture
add contact information
35957e0
raw
history blame
7.86 kB
import gradio as gr
import pandas as pd
import os
from huggingface_hub import snapshot_download
from apscheduler.schedulers.background import BackgroundScheduler
from src.display.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
CONTACT_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.envs import API
from src.leaderboard.load_results import load_data
# clone / pull the lmeh eval data
TOKEN = os.environ.get("TOKEN", None)
RESULTS_REPO = f"SeaLLMs/SeaExam-results"
CACHE_PATH=os.getenv("HF_HOME", ".")
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
token=TOKEN
)
def restart_space():
API.restart_space(repo_id="SeaLLMs/SeaExam_leaderboard", token=TOKEN)
# Load the data from the csv file
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results.csv'
df_m3exam, df_mmlu, df_avg = load_data(csv_path)
# Searching and filtering
def update_table(
hidden_df: pd.DataFrame,
# columns: list,
# type_query: list,
# precision_query: str,
# size_query: list,
# show_deleted: bool,
query: str,
):
# filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
# filtered_df = filter_queries(query, filtered_df)
# df = select_columns(filtered_df, columns)
filtered_df = hidden_df.copy()
df = filter_queries(query, filtered_df)
# deduplication
df = df.drop_duplicates(subset=["Model"])
return df
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
return df[(df['Model'].str.contains(query, case=False))]
def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
final_df = []
if query != "":
queries = [q.strip() for q in query.split(";")]
for _q in queries:
_q = _q.strip()
if _q != "":
temp_filtered_df = search_table(filtered_df, _q)
if len(temp_filtered_df) > 0:
final_df.append(temp_filtered_df)
if len(final_df) > 0:
filtered_df = pd.concat(final_df)
return filtered_df
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Overall", elem_id="llm-benchmark-Sum", id=0):
with gr.Row():
search_bar = gr.Textbox(
placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
show_label=False,
elem_id="search-bar",
)
# with gr.Row():
# shown_columns = gr.CheckboxGroup(
# choices=["🟒 base", "πŸ”Ά chat"
# ],
# value=[
# "base",
# "chat",
# ],
# label="Select model types to show",
# elem_id="column-select",
# interactive=True,
# )
leaderboard_table = gr.components.Dataframe(
value=df_avg,
# value=leaderboard_df[
# [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
# + shown_columns.value
# + [AutoEvalColumn.dummy.name]
# ],
# headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
# datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
visible=True,
# column_widths=["20%", "6%", "8%", "6%", "8%", "8%", "6%", "6%", "6%", "6%", "6%"],
)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=df_avg,
# elem_id="leaderboard-table",
interactive=False,
visible=False,
)
search_bar.submit(
update_table,
[
# df_avg,
hidden_leaderboard_table_for_search,
# shown_columns,
# filter_columns_type,
# filter_columns_precision,
# filter_columns_size,
# deleted_models_visibility,
search_bar,
],
leaderboard_table,
)
with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
with gr.Row():
search_bar = gr.Textbox(
placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
show_label=False,
elem_id="search-bar",
)
leaderboard_table = gr.components.Dataframe(
value=df_m3exam,
interactive=False,
visible=True,
)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=df_m3exam,
interactive=False,
visible=False,
)
search_bar.submit(
update_table,
[
# df_avg,
hidden_leaderboard_table_for_search,
# shown_columns,
# filter_columns_type,
# filter_columns_precision,
# filter_columns_size,
# deleted_models_visibility,
search_bar,
],
leaderboard_table,
)
with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
with gr.Row():
search_bar = gr.Textbox(
placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
show_label=False,
elem_id="search-bar",
)
leaderboard_table = gr.components.Dataframe(
value=df_mmlu,
interactive=False,
visible=True,
)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=df_mmlu,
interactive=False,
visible=False,
)
search_bar.submit(
update_table,
[
# df_avg,
hidden_leaderboard_table_for_search,
# shown_columns,
# filter_columns_type,
# filter_columns_precision,
# filter_columns_size,
# deleted_models_visibility,
search_bar,
],
leaderboard_table,
)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# with gr.Row():
# with gr.Accordion("πŸ“™ Citation", open=False):
# citation_button = gr.Textbox(
# value=CITATION_BUTTON_TEXT,
# label=CITATION_BUTTON_LABEL,
# lines=20,
# elem_id="citation-button",
# show_copy_button=True,
# )
gr.Markdown(CONTACT_TEXT, elem_classes="markdown-text")
demo.launch()
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch(share=True)