Spaces:
Runtime error
Runtime error
File size: 9,359 Bytes
0f89c55 733bd44 9a678a4 a6d507f 9a678a4 a6d507f 50154dd 733bd44 a6d507f 9a678a4 a6d507f 32e04fa a6d507f 733bd44 a6d507f 32e04fa a6d507f 22cd459 a6d507f 22cd459 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 50154dd a6d507f 50154dd a6d507f 9a678a4 a6d507f 50154dd 9a678a4 a6d507f 9a678a4 50154dd a6d507f 9a678a4 50154dd 9a678a4 a6d507f 9a678a4 50154dd 9a678a4 a6d507f 9a678a4 50154dd 9a678a4 733bd44 9a678a4 733bd44 0f89c55 a6d507f 733bd44 0f89c55 733bd44 |
|
# gradio display leaderboard
import pandas as pd
import numpy as np
import matplotlib
# matplotlib.use('macosx')
import gradio as gr
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from apscheduler.schedulers.background import BackgroundScheduler
from texts import *
from leaderboards import eng_leaderboards, chi_leaderboards
import toml
import os
from opseval_datasets import *
from latex_utils import gen_latex_table
config = toml.load("config.toml")
def create_lang_tabs(lang, lang_cates):
df_dict = {}
for dataset, cates in lang_cates:
dataset_dt = {}
for cat in cates:
leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv')
dataset_dt[cat] = leaderboard_df
df_dict[dataset] = dataset_dt
return df_dict
dict_lang = {
'English': create_lang_tabs('en', eng_leaderboards),
'Chinese': create_lang_tabs('zh', chi_leaderboards)
}
def process_mc_df(df, shot=None):
# 将name列重命名为Model
df = df.rename(columns={"name": "Model"})
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
df = df.set_index("Model")
# df = df.stack().unstack()
df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")])
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
if shot:
df = df[shot]
# 将除了Model列之外的列的value转换为数值型,失败的为NaN
df = df.apply(pd.to_numeric, errors="coerce")
# 保留小数点后两位
df = df.round(2)
# 给每一行添加一列BestScore
df["BestScore"] = df.max(axis=1)
# 根据BestScore给df排序
df = df.sort_values(by="BestScore", ascending=False)
# reset_index
df = df.reset_index()
# 对于所有空的值,填充为'/'
df = df.fillna('/')
return df
def process_qa_df(df):
# 保留小数点后四位
df = df.round(4)
return df
def dataframe_to_gradio(df, is_mc=True, shot=None):
if is_mc:
df = process_mc_df(df, shot)
else:
df = process_qa_df(df)
headers = df.columns
# types = ["str"] + ["number"] * (len(headers) - 1)
return gr.components.Dataframe(
value=df.values.tolist(),
headers=[label for label in df.columns],
# datatype=types,
# max_rows=10,
)
def plot_radar_chart(df, attributes):
fig = go.Figure()
for index, row in df.iterrows():
model = row['Model']
values = row[attributes].tolist()
fig.add_trace(go.Scatterpolar(
r=values,
theta=attributes,
fill='toself',
name=model
))
fig.update_layout(
title="OpsEval",
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 0.9]
)),
showlegend=True
)
return fig
def pop_latex_table(caption, label, dataframe):
table = gen_latex_table(caption, label, dataframe)
return gr.Textbox(table, label="LaTeX Table", visible=True)
def generate_csv(df, filename):
df.to_csv(filename, index=False)
download_link = gr.File(label="Download Link", type="filepath", value=filename,
visible=True)
return download_link
def create_lang_leader_board(lang_dict, lang, dis_lang='en'):
best_scores = {}
best_plot_datasets = []
for dataset, value in lang_dict.items():
for cat, df in value.items():
if cat == 'mc':
processed = process_mc_df(df)
bestscores = processed['BestScore']
best_scores[dataset] = bestscores
best_plot_datasets.append(dataset)
best_df = pd.DataFrame(best_scores)
# print(best_scores)
# print(best_df)
# plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets)
# gr.Plot(plot)
tab_list = []
for dataset, value in lang_dict.items():
chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict
with gr.Tab(chosen_dict[dataset]) as tab:
for cat, df in value.items():
if cat == 'mc':
for shot in ['Zeroshot', 'Fewshot']:
with gr.Tab(f'Multiple Choice Question ({shot})'):
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口
# with gr.Row():
# latex_button = gr.Button("Export LaTeX Table", variant="primary")
# csv_button = gr.Button("Export CSV", variant="primary")
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
# download_link = gr.File(label="Download Link", type="filepath",
# visible=False)
# latex_button.click(lambda: pop_latex_table(
# caption=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard",
# label=f"tab:{dataset}_{shot}_{lang}",
# dataframe=df,
# ), inputs=[], outputs=[latex_textbox])
# csv_button.click(lambda: generate_csv(df, f"/tmp/opseval-{chosen_dict[dataset]}-mc-{shot}.csv"), inputs=[], outputs=[download_link])
else:
with gr.Tab('Question Answering'):
df_component = dataframe_to_gradio(df, is_mc=False)
# df_list.append(df_component)
# button = gr.Button("Export LaTeX Table", variant="primary")
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
# button.click(lambda: pop_latex_table(
# caption=f"{chosen_dict[dataset]} {shot} {lang} Leaderboard",
# label=f"tab:{dataset}_{shot}_{lang}",
# dataframe=df,
# ), inputs=[], outputs=[latex_textbox])
tab_list.append(tab)
return tab_list
def get_latest_modification_date():
latest = 0
for file in os.listdir(config['dataset']['dataset_dir']):
if file.endswith('.csv'):
mtime = os.path.getmtime(os.path.join(config['dataset']['dataset_dir'], file))
latest = max(latest, mtime)
latest = pd.to_datetime(latest, unit='s')
return latest.strftime("%Y-%m-%d %H:%M:%S")
translation_dict = {
'zh': {
'intro': ZH_INTRODUCTION_TEXT,
'title': ZH_TITLE,
'lb_sec': f"""# 🏅 排行榜 \n 更新时间: {get_latest_modification_date()}\n""",
},
'en': {
'intro': INTRODUCTION_TEXT,
'title': TITLE,
'lb_sec': f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n"""
}
}
def get_language_lb(language):
tab_dict = {'English': None, 'Chinese': None}
for key, dict in dict_lang.items():
tab_list = create_lang_leader_board(dict, key, language)
tab_dict[key] = tab_list
return [*tab_dict['English'], *tab_dict['Chinese']]
def switch_language(language):
# gr.update(visible=True)
return translation_dict[language]['title'], translation_dict[language]['intro'], translation_dict[language]['lb_sec'], *get_language_lb(language), language
def get_lb_body(language='en'):
tab_dict = {'English': None, 'Chinese': None}
with gr.Blocks() as body:
for key, dict in dict_lang.items():
with gr.Tab(key):
tab_list = create_lang_leader_board(dict, key, language)
tab_dict[key] = tab_list
return body, tab_dict
def launch_gradio():
demo = gr.Blocks()
with demo:
lang_state = gr.State("en")
with gr.Row():
en_button = gr.Button("English", variant="primary")
zh_button = gr.Button("中文", variant="primary")
title = gr.HTML(TITLE)
intro = gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
leaderboard_section = gr.Markdown(f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""",
elem_classes="markdown-text")
lb_body, tab_dict = get_lb_body(language=lang_state.value)
tab_list = [*tab_dict['English'], *tab_dict['Chinese']]
# print(tab_list)
en_button.click(switch_language, inputs=[gr.State("en")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False)
zh_button.click(switch_language, inputs=[gr.State("zh")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False)
demo.launch()
pd.set_option('display.float_format', '{:.02f}'.format)
scheduler = BackgroundScheduler()
scheduler.add_job(launch_gradio, 'interval', hours=1)
scheduler.start()
launch_gradio() |