lukecq's picture
update UI
c69a5b0
import gradio as gr
import pandas as pd
import os
from huggingface_hub import snapshot_download, login
from apscheduler.schedulers.background import BackgroundScheduler
from src.display.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
CONTACT_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
SUB_TITLE,
)
from src.display.css_html_js import custom_css
from src.envs import API
from src.leaderboard.load_results import load_data
# clone / pull the lmeh eval data
TOKEN = os.environ.get("TOKEN", None)
login(token=TOKEN)
RESULTS_REPO = f"SeaLLMs/SeaExam-results"
CACHE_PATH=os.getenv("HF_HOME", ".")
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
token=TOKEN
)
def restart_space():
API.restart_space(repo_id="SeaLLMs/SeaExam_leaderboard", token=TOKEN)
all_columns = ['R','type', 'Model','open?', 'avg_sea ⬇️', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'params(B)']
show_columns = ['R', 'Model','type','open?','params(B)', 'avg_sea ⬇️', 'en', 'zh', 'id', 'th', 'vi', 'avg', ]
TYPES = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
# Load the data from the csv file
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_20240425.csv'
df_m3exam, df_mmlu, df_avg = load_data(csv_path)
# df_m3exam = df_m3exam.copy()[show_columns]
# df_mmlu = df_mmlu.copy()[show_columns]
df_avg_init = df_avg.copy()[df_avg['type'] == 'πŸ”Ά chat'][show_columns]
df_m3exam_init = df_m3exam.copy()[df_m3exam['type'] == 'πŸ”Ά chat'][show_columns]
df_mmlu_init = df_mmlu.copy()[df_mmlu['type'] == 'πŸ”Ά chat'][show_columns]
# data_types = ['number', 'str', 'markdown','str', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
# map_columns = {'rank':'R','type':'type', 'Model':'Model','open?':'open?', 'avg_sea':'avg_sea ⬇️', 'en':'en', 'zh':'zh', 'id':'id', 'th':'th', 'vi':'vi', 'avg':'avg', 'params':'params(B)'}
# map_types = {'rank': 'number', 'type': 'str', 'Model': 'markdown', 'open?': 'str', 'avg_sea': 'number', 'en': 'number', 'zh': 'number', 'id': 'number', 'th': 'number', 'vi': 'number', 'avg': 'number', 'params': 'number'}
# Searching and filtering
def update_table(
hidden_df: pd.DataFrame,
# columns: list,
type_query: list,
open_query: list,
# precision_query: str,
# size_query: list,
# show_deleted: bool,
query: str,
):
# filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted)
# filtered_df = filter_queries(query, filtered_df)
# df = select_columns(filtered_df, columns)
filtered_df = hidden_df.copy()
filtered_df = filtered_df[filtered_df['type'].isin(type_query)]
map_open = {'open': 'Y', 'closed': 'N'}
filtered_df = filtered_df[filtered_df['open?'].isin([map_open[o] for o in open_query])]
filtered_df = filter_queries(query, filtered_df)
# filtered_df = filtered_df[[map_columns[k] for k in columns]]
# deduplication
# df = df.drop_duplicates(subset=["Model"])
df = filtered_df.drop_duplicates()
df = df[show_columns]
return df
def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
return df[(df['Model'].str.contains(query, case=False))]
def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
final_df = []
if query != "":
queries = [q.strip() for q in query.split(";")]
for _q in queries:
_q = _q.strip()
if _q != "":
temp_filtered_df = search_table(filtered_df, _q)
if len(temp_filtered_df) > 0:
final_df.append(temp_filtered_df)
if len(final_df) > 0:
filtered_df = pd.concat(final_df)
return filtered_df
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.HTML(SUB_TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Overall", elem_id="llm-benchmark-Sum", id=0):
with gr.Row():
with gr.Column():
with gr.Row():
search_bar = gr.Textbox(
placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
show_label=False,
elem_id="search-bar",
)
# with gr.Row():
# with gr.Column():
# shown_columns = gr.CheckboxGroup(
# choices=["rank","type", "Model","open?", "avg_sea", "en", "zh", "id", "th", "vi", "avg", "params"],
# value=["rank", "type", "Model", "avg_sea", "en", "zh", "id", "th", "vi", "avg", "params"],
# label="Select model types to show",
# elem_id="column-select",
# interactive=True,
# )
# with gr.Row():
with gr.Column():
type_query = gr.CheckboxGroup(
choices=["🟒 base", "πŸ”Ά chat"],
value=["πŸ”Ά chat" ],
label="model types to show",
elem_id="type-select",
interactive=True,
)
with gr.Column():
open_query = gr.CheckboxGroup(
choices=["open", "closed"],
value=["open", "closed"],
label="open-source or closed-source models?",
elem_id="open-select",
interactive=True,
)
leaderboard_table = gr.components.Dataframe(
value=df_avg_init,
# [[map_columns[k] for k in shown_columns.value]],
# value=leaderboard_df[
# [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
# + shown_columns.value
# + [AutoEvalColumn.dummy.name]
# ],
# headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
# datatype=['number', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
# datatype=[map_types[k] for k in shown_columns.value],
visible=True,
# column_widths=["20%", "6%", "8%", "6%", "8%", "8%", "6%", "6%", "6%", "6%", "6%"],
)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=df_avg,
# elem_id="leaderboard-table",
interactive=False,
visible=False,
)
search_bar.submit(
update_table,
[
# df_avg,
hidden_leaderboard_table_for_search,
# shown_columns,
type_query,
open_query,
# filter_columns_type,
# filter_columns_precision,
# filter_columns_size,
# deleted_models_visibility,
search_bar,
],
leaderboard_table,
)
for selector in [type_query, open_query]:
selector.change(
update_table,
[
# df_avg,
hidden_leaderboard_table_for_search,
# shown_columns,
type_query,
open_query,
# filter_columns_type,
# filter_columns_precision,
# filter_columns_size,
# deleted_models_visibility,
search_bar,
],
leaderboard_table,
)
with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
with gr.Row():
with gr.Column():
search_bar = gr.Textbox(
placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
show_label=False,
elem_id="search-bar",
)
with gr.Column():
type_query = gr.CheckboxGroup(
choices=["🟒 base", "πŸ”Ά chat"],
value=["πŸ”Ά chat" ],
label="model types to show",
elem_id="type-select",
interactive=True,
)
with gr.Column():
open_query = gr.CheckboxGroup(
choices=["open", "closed"],
value=["open", "closed"],
label="open-source or closed-source models?",
elem_id="open-select",
interactive=True,
)
leaderboard_table = gr.components.Dataframe(
value=df_m3exam_init,
interactive=False,
visible=True,
# datatype=['number', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
datatype=TYPES,
)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=df_m3exam,
interactive=False,
visible=False,
)
search_bar.submit(
update_table,
[
hidden_leaderboard_table_for_search,
type_query,
open_query,
search_bar,
],
leaderboard_table,
)
for selector in [type_query, open_query]:
selector.change(
update_table,
[
hidden_leaderboard_table_for_search,
type_query,
open_query,
search_bar,
],
leaderboard_table,
)
with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
with gr.Row():
with gr.Column():
search_bar = gr.Textbox(
placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
show_label=False,
elem_id="search-bar",
)
with gr.Column():
type_query = gr.CheckboxGroup(
choices=["🟒 base", "πŸ”Ά chat"],
value=["πŸ”Ά chat" ],
label="model types to show",
elem_id="type-select",
interactive=True,
)
with gr.Column():
open_query = gr.CheckboxGroup(
choices=["open", "closed"],
value=["open", "closed"],
label="open-source or closed-source models?",
elem_id="open-select",
interactive=True,
)
leaderboard_table = gr.components.Dataframe(
value=df_mmlu_init,
interactive=False,
visible=True,
# datatype=['number', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number'],
datatype=TYPES,
)
hidden_leaderboard_table_for_search = gr.components.Dataframe(
value=df_mmlu,
interactive=False,
visible=False,
)
search_bar.submit(
update_table,
[
hidden_leaderboard_table_for_search,
type_query,
open_query,
search_bar,
],
leaderboard_table,
)
for selector in [type_query, open_query]:
selector.change(
update_table,
[
hidden_leaderboard_table_for_search,
type_query,
open_query,
search_bar,
],
leaderboard_table,
)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# with gr.Row():
# with gr.Accordion("πŸ“™ Citation", open=False):
# citation_button = gr.Textbox(
# value=CITATION_BUTTON_TEXT,
# label=CITATION_BUTTON_LABEL,
# lines=20,
# elem_id="citation-button",
# show_copy_button=True,
# )
gr.Markdown(CONTACT_TEXT, elem_classes="markdown-text")
demo.launch()
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch(share=True)