EmoBench / app.py
Listever's picture
Update app.py
7573164 verified
# import gradio as gr
# from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
# import pandas as pd
# from apscheduler.schedulers.background import BackgroundScheduler
# from huggingface_hub import snapshot_download
# from src.about import (
# CITATION_BUTTON_LABEL,
# CITATION_BUTTON_TEXT,
# EVALUATION_QUEUE_TEXT,
# INTRODUCTION_TEXT,
# LLM_BENCHMARKS_TEXT,
# TITLE,
# )
# from src.display.css_html_js import custom_css
# from src.display.utils import (
# BENCHMARK_COLS,
# COLS,
# EVAL_COLS,
# EVAL_TYPES,
# AutoEvalColumn,
# ModelType,
# fields,
# WeightType,
# Precision
# )
# from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
# from src.populate import get_evaluation_queue_df, get_leaderboard_df
# from src.submission.submit import add_new_eval
# def restart_space():
# API.restart_space(repo_id=REPO_ID)
# ### Space initialisation
# try:
# print(EVAL_REQUESTS_PATH)
# snapshot_download(
# repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
# )
# except Exception:
# restart_space()
# try:
# print(EVAL_RESULTS_PATH)
# snapshot_download(
# repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
# )
# except Exception:
# restart_space()
# LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
# (
# finished_eval_queue_df,
# running_eval_queue_df,
# pending_eval_queue_df,
# ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
# def init_leaderboard(dataframe):
# if dataframe is None or dataframe.empty:
# raise ValueError("Leaderboard DataFrame is empty or None.")
# return Leaderboard(
# value=dataframe,
# datatype=[c.type for c in fields(AutoEvalColumn)],
# select_columns=SelectColumns(
# default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
# cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
# label="Select Columns to Display:",
# ),
# search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
# hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
# filter_columns=[
# ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
# ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
# ColumnFilter(
# AutoEvalColumn.params.name,
# type="slider",
# min=0.01,
# max=150,
# label="Select the number of parameters (B)",
# ),
# ColumnFilter(
# AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
# ),
# ],
# bool_checkboxgroup_label="Hide models",
# interactive=False,
# )
# demo = gr.Blocks(css=custom_css)
# with demo:
# gr.HTML(TITLE)
# gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
# with gr.Tabs(elem_classes="tab-buttons") as tabs:
# with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
# leaderboard = init_leaderboard(LEADERBOARD_DF)
# with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=2):
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# with gr.TabItem("πŸš€ Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
# with gr.Column():
# with gr.Row():
# gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
# with gr.Column():
# with gr.Accordion(
# f"βœ… Finished Evaluations ({len(finished_eval_queue_df)})",
# open=False,
# ):
# with gr.Row():
# finished_eval_table = gr.components.Dataframe(
# value=finished_eval_queue_df,
# headers=EVAL_COLS,
# datatype=EVAL_TYPES,
# row_count=5,
# )
# with gr.Accordion(
# f"πŸ”„ Running Evaluation Queue ({len(running_eval_queue_df)})",
# open=False,
# ):
# with gr.Row():
# running_eval_table = gr.components.Dataframe(
# value=running_eval_queue_df,
# headers=EVAL_COLS,
# datatype=EVAL_TYPES,
# row_count=5,
# )
# with gr.Accordion(
# f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
# open=False,
# ):
# with gr.Row():
# pending_eval_table = gr.components.Dataframe(
# value=pending_eval_queue_df,
# headers=EVAL_COLS,
# datatype=EVAL_TYPES,
# row_count=5,
# )
# with gr.Row():
# gr.Markdown("# βœ‰οΈβœ¨ Submit your model here!", elem_classes="markdown-text")
# with gr.Row():
# with gr.Column():
# model_name_textbox = gr.Textbox(label="Model name")
# revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
# model_type = gr.Dropdown(
# choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
# label="Model type",
# multiselect=False,
# value=None,
# interactive=True,
# )
# with gr.Column():
# precision = gr.Dropdown(
# choices=[i.value.name for i in Precision if i != Precision.Unknown],
# label="Precision",
# multiselect=False,
# value="float16",
# interactive=True,
# )
# weight_type = gr.Dropdown(
# choices=[i.value.name for i in WeightType],
# label="Weights type",
# multiselect=False,
# value="Original",
# interactive=True,
# )
# base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
# submit_button = gr.Button("Submit Eval")
# submission_result = gr.Markdown()
# submit_button.click(
# add_new_eval,
# [
# model_name_textbox,
# base_model_name_textbox,
# revision_name_textbox,
# precision,
# weight_type,
# model_type,
# ],
# submission_result,
# )
# with gr.Row():
# with gr.Accordion("πŸ“™ Citation", open=False):
# citation_button = gr.Textbox(
# value=CITATION_BUTTON_TEXT,
# label=CITATION_BUTTON_LABEL,
# lines=20,
# elem_id="citation-button",
# show_copy_button=True,
# )
# scheduler = BackgroundScheduler()
# scheduler.add_job(restart_space, "interval", seconds=1800)
# scheduler.start()
# demo.queue(default_concurrency_limit=40).launch()
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import os
import gradio as gr
import pandas as pd
import json
import tempfile
from constants import *
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")
global data_component, filter_component
def download_csv():
# pull the results and return this file!
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
return CSV_DIR, gr.update(visible=True)
def upload_file(files):
file_paths = [file.name for file in files]
return file_paths
def add_new_eval(
input_file,
model_name_textbox: str,
revision_name_textbox: str,
model_type: str,
model_link: str,
model_size: str,
LLM_type: str,
LLM_name_textbox: str,
):
if input_file is None:
return "Error! Empty file!"
upload_data=json.loads(input_file)
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
csv_data = pd.read_csv(CSV_DIR)
if LLM_type == 'Other':
LLM_name = LLM_name_textbox
else:
LLM_name = LLM_type
if revision_name_textbox == '':
col = csv_data.shape[0]
model_name = model_name_textbox
else:
model_name = revision_name_textbox
model_name_list = csv_data['Model']
name_list = [name.split(']')[0][1:] for name in model_name_list]
if revision_name_textbox not in name_list:
col = csv_data.shape[0]
else:
col = name_list.index(revision_name_textbox)
if model_link == '':
model_name = model_name # no url
else:
model_name = '[' + model_name + '](' + model_link + ')'
# add new data
new_data = [
model_type,
model_name,
LLM_name
]
for key in TASK_INFO:
if key in upload_data:
new_data.append(upload_data[key])
else:
new_data.append(0)
csv_data.loc[col] = new_data
csv_data = csv_data.to_csv(CSV_DIR, index=False)
submission_repo.push_to_hub()
return 0
def get_baseline_df():
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
df = pd.read_csv(CSV_DIR)
df = df.sort_values(by="Avg", ascending=False)
present_columns = MODEL_INFO + checkbox_group.value
df = df[present_columns]
return df
def get_all_df():
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
df = pd.read_csv(CSV_DIR)
df = df.sort_values(by="Avg", ascending=False)
return df
def on_filter_model_size_method_change(selected_columns):
updated_data = get_all_df()
# columns:
selected_columns = [item for item in TASK_INFO if item in selected_columns]
present_columns = MODEL_INFO + selected_columns
# print("selected_columns",'|'.join(selected_columns))
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by=selected_columns[0], ascending=False)
updated_headers = present_columns
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
# print(updated_data,present_columns,update_datatype)
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component#.value
block = gr.Blocks()
with block:
gr.Markdown(
LEADERBORAD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ“Š MVBench", elem_id="mvbench-tab-table", id=1):
with gr.Row():
with gr.Accordion("Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
lines=10,
)
gr.Markdown(
TABLE_INTRODUCTION
)
# selection for column part:
checkbox_group = gr.CheckboxGroup(
choices=TASK_INFO,
value=AVG_INFO,
label="Evaluation Dimension",
interactive=True,
)
data_component = gr.components.Dataframe(
value=get_baseline_df,
headers=COLUMN_NAMES,
type="pandas",
datatype=DATA_TITILE_TYPE,
interactive=False,
visible=True,
)
checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
# table 2
with gr.TabItem("πŸ“ About", elem_id="mvbench-tab-table", id=2):
gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
# table 3
with gr.TabItem("πŸš€ Submit here! ", elem_id="mvbench-tab-table", id=3):
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
with gr.Row():
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
with gr.Row():
gr.Markdown("# βœ‰οΈβœ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
with gr.Row():
with gr.Column():
model_name_textbox = gr.Textbox(
label="Model name", placeholder="LLaMA-7B"
)
revision_name_textbox = gr.Textbox(
label="Revision Model Name", placeholder="LLaMA-7B"
)
model_type = gr.Dropdown(
choices=[
"LLM",
"ImageLLM",
"VideoLLM",
"Other",
],
label="Model type",
multiselect=False,
value="ImageLLM",
interactive=True,
)
with gr.Column():
LLM_type = gr.Dropdown(
choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "InternLM-7B", "Other"],
label="LLM type",
multiselect=False,
value="LLaMA-7B",
interactive=True,
)
LLM_name_textbox = gr.Textbox(
label="LLM model (for Other)",
placeholder="LLaMA-13B"
)
model_link = gr.Textbox(
label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
)
model_size = gr.Textbox(
label="Model size", placeholder="7B(Input content format must be 'number+B' or '-')"
)
with gr.Column():
input_file = gr.components.File(label = "Click to Upload a json File", file_count="single", type='binary')
submit_button = gr.Button("Submit Eval")
submission_result = gr.Markdown()
submit_button.click(
add_new_eval,
inputs = [
input_file,
model_name_textbox,
revision_name_textbox,
model_type,
model_link,
model_size,
LLM_type,
LLM_name_textbox,
],
)
def refresh_data():
value1 = get_baseline_df()
return value1
with gr.Row():
data_run = gr.Button("Refresh")
with gr.Row():
result_download = gr.Button("Download Leaderboard")
file_download = gr.File(label="download the csv of leaderborad.", visible=False)
data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
result_download.click(download_csv, inputs=None, outputs= [file_download,file_download])
block.launch()