qq-hzlh's picture
Upload 5 files
fc71d05 verified
raw
history blame
7.02 kB
import abc
import gradio as gr
from gen_table import *
from meta_data import *
# import pandas as pd
# pd.set_option('display.max_colwidth', 0)
head_style = """
<style>
@media (min-width: 1536px)
{
.gradio-container {
min-width: var(--size-full) !important;
}
}
</style>
"""
with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
struct = load_results(OVERALL_MATH_SCORE_FILE)
timestamp = struct['time']
EVAL_TIME = format_timestamp(timestamp)
results = struct['results']
N_MODEL = len(results)
N_DATA = len(results['IO'])
DATASETS = list(results['IO'])
DATASETS.remove('META')
print(DATASETS)
with gr.Tabs(elem_classes='tab-buttons') as tabs:
gr.Markdown(LEADERBORAD_INTRODUCTION.format(EVAL_TIME))
with gr.TabItem('🏅 Open Agent Overall Math Leaderboard', elem_id='math', id=0):
gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
table = generate_table(results, DEFAULT_MATH_BENCH)
type_map = check_box['type_map']
type_map['Rank'] = 'number'
checkbox_group = gr.CheckboxGroup(
choices=check_box['all'],
value=check_box['required'],
label='Evaluation Dimension',
interactive=True,
)
headers = ['Rank'] + check_box['essential'] + checkbox_group.value
data_component = gr.components.DataFrame(
value=table[headers],
type='pandas',
datatype=[type_map[x] for x in headers],
interactive=False,
wrap=True,
visible=True)
def filter_df(fields, *args):
# 获取基础列和选中的列
headers = ['Rank'] + check_box['essential'] + fields
df = table.copy()
comp = gr.components.DataFrame(
value=table[headers], # 只显示选中的列
type='pandas',
datatype=[type_map[x] for x in headers],
interactive=False,
wrap=True,
visible=True)
return comp
# checkbox_group的change事件只需要传入checkbox_group
checkbox_group.change(
fn=filter_df,
inputs=[checkbox_group],
outputs=data_component
)
# detail math leaderboard
with gr.TabItem('🏅 Open Agent Detail Math Leaderboard', elem_id='math_detail', id=1):
gr.Markdown(LEADERBOARD_MD['MATH_DETAIL'])
struct_detail = load_results(DETAIL_MATH_SCORE_FILE)
timestamp = struct_detail['time']
EVAL_TIME = format_timestamp(timestamp)
results_detail = struct_detail['results']
table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
# table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)
type_map = check_box['type_map']
type_map['Rank'] = 'number'
checkbox_group = gr.CheckboxGroup(
choices=check_box['all'],
value=check_box['required'],
label='Evaluation Dimension',
interactive=True,
)
headers = ['Rank'] + checkbox_group.value
with gr.Row():
algo_name = gr.CheckboxGroup(
choices=ALGORITHMS,
value=ALGORITHMS,
label='Algorithm',
interactive=True
)
dataset_name = gr.CheckboxGroup(
choices=DATASETS,
value=DATASETS,
label='Datasets',
interactive=True
)
llm_name = gr.CheckboxGroup(
choices=LLM,
value=LLM,
label='LLM',
interactive=True
)
data_component = gr.components.DataFrame(
value=table[headers],
type='pandas',
datatype=[type_map[x] for x in headers],
interactive=False,
wrap=True,
visible=True)
def filter_df(fields, algos, datasets, llms):
headers = ['Rank'] + check_box['essential'] + fields
df = table.copy()
# 过滤数据
df['flag'] = df.apply(lambda row: (
row['Algorithm'] in algos and
row['Dataset'] in datasets and
row['LLM'] in llms
), axis=1)
df = df[df['flag']].copy()
df.pop('flag')
# 按数据集分组,在每个组内根据Score排序并计算排名
if 'Score' in df.columns:
# 创建一个临时的排名列
df['Rank'] = df.groupby('Dataset')['Score'].rank(method='first', ascending=False)
# 确保排名为整数
df['Rank'] = df['Rank'].astype(int)
comp = gr.components.DataFrame(
value=df[headers],
type='pandas',
datatype=[type_map[x] for x in headers],
interactive=False,
wrap=True,
visible=True)
return comp
# 为所有复选框组添加change事件
checkbox_group.change(
fn=filter_df,
inputs=[checkbox_group, algo_name, dataset_name, llm_name],
outputs=data_component
)
algo_name.change(
fn=filter_df,
inputs=[checkbox_group, algo_name, dataset_name, llm_name],
outputs=data_component
)
dataset_name.change(
fn=filter_df,
inputs=[checkbox_group, algo_name, dataset_name, llm_name],
outputs=data_component
)
llm_name.change(
fn=filter_df,
inputs=[checkbox_group, algo_name, dataset_name, llm_name],
outputs=data_component
)
with gr.Row():
with gr.Accordion("📙 Citation", open=False):
gr.Textbox(
value=CITATION_BUTTON_TEXT, lines=7,
label="Copy the BibTeX snippet to cite this source",
elem_id="citation-button",
show_copy_button=True,
)
if __name__ == '__main__':
demo.launch(server_name='0.0.0.0')