Spaces:
Runtime error
Runtime error
# gradio display leaderboard | |
import pandas as pd | |
import numpy as np | |
import matplotlib | |
# matplotlib.use('macosx') | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import plotly.graph_objects as go | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from texts import * | |
from leaderboards import eng_leaderboards, chi_leaderboards | |
import toml | |
import os | |
from opseval_datasets import * | |
from latex_utils import gen_latex_table | |
config = toml.load("config.toml") | |
def create_lang_tabs(lang, lang_cates): | |
df_dict = {} | |
for dataset, cates in lang_cates: | |
dataset_dt = {} | |
for cat in cates: | |
leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv') | |
dataset_dt[cat] = leaderboard_df | |
df_dict[dataset] = dataset_dt | |
return df_dict | |
dict_lang = { | |
'English': create_lang_tabs('en', eng_leaderboards), | |
'Chinese': create_lang_tabs('zh', chi_leaderboards) | |
} | |
def process_mc_df(df, shot=None): | |
# 将name列重命名为Model | |
df = df.rename(columns={"name": "Model"}) | |
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency | |
df = df.set_index("Model") | |
# df = df.stack().unstack() | |
df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")]) | |
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留 | |
if shot: | |
df = df[shot] | |
# 将除了Model列之外的列的value转换为数值型,失败的为NaN | |
df = df.apply(pd.to_numeric, errors="coerce") | |
# 保留小数点后两位 | |
df = df.round(2) | |
# 给每一行添加一列BestScore | |
df["BestScore"] = df.max(axis=1) | |
# 根据BestScore给df排序 | |
df = df.sort_values(by="BestScore", ascending=False) | |
# reset_index | |
df = df.reset_index() | |
# 对于所有空的值,填充为'/' | |
df = df.fillna('/') | |
return df | |
def process_qa_df(df): | |
# 保留小数点后四位 | |
df = df.round(4) | |
return df | |
def dataframe_to_gradio(df, is_mc=True, shot=None): | |
if is_mc: | |
df = process_mc_df(df, shot) | |
else: | |
df = process_qa_df(df) | |
headers = df.columns | |
# types = ["str"] + ["number"] * (len(headers) - 1) | |
return gr.components.Dataframe( | |
value=df.values.tolist(), | |
headers=[label for label in df.columns], | |
# datatype=types, | |
# max_rows=10, | |
) | |
def plot_radar_chart(df, attributes): | |
fig = go.Figure() | |
for index, row in df.iterrows(): | |
model = row['Model'] | |
values = row[attributes].tolist() | |
fig.add_trace(go.Scatterpolar( | |
r=values, | |
theta=attributes, | |
fill='toself', | |
name=model | |
)) | |
fig.update_layout( | |
title="OpsEval", | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 0.9] | |
)), | |
showlegend=True | |
) | |
return fig | |
def pop_latex_table(caption, label, dataframe): | |
table = gen_latex_table(caption, label, dataframe) | |
return gr.Textbox(table, label="LaTeX Table", visible=True) | |
def generate_csv(df, filename): | |
df.to_csv(filename, index=False) | |
download_link = gr.File(label="Download Link", type="filepath", value=filename, | |
visible=True) | |
return download_link | |
def create_lang_leader_board(lang_dict, lang, dis_lang='en'): | |
best_scores = {} | |
best_plot_datasets = [] | |
for dataset, value in lang_dict.items(): | |
for cat, df in value.items(): | |
if cat == 'mc': | |
processed = process_mc_df(df) | |
bestscores = processed['BestScore'] | |
best_scores[dataset] = bestscores | |
best_plot_datasets.append(dataset) | |
best_df = pd.DataFrame(best_scores) | |
# print(best_scores) | |
# print(best_df) | |
# plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets) | |
# gr.Plot(plot) | |
tab_list = [] | |
for dataset, value in lang_dict.items(): | |
chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict | |
with gr.Tab(chosen_dict[dataset]) as tab: | |
for cat, df in value.items(): | |
if cat == 'mc': | |
for shot in ['Zeroshot', 'Fewshot']: | |
with gr.Tab(f'Multiple Choice Question ({shot})'): | |
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot) | |
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口 | |
# with gr.Row(): | |
# latex_button = gr.Button("Export LaTeX Table", variant="primary") | |
# csv_button = gr.Button("Export CSV", variant="primary") | |
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False) | |
# download_link = gr.File(label="Download Link", type="filepath", | |
# visible=False) | |
# latex_button.click(lambda: pop_latex_table( | |
# caption=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard", | |
# label=f"tab:{dataset}_{shot}_{lang}", | |
# dataframe=df, | |
# ), inputs=[], outputs=[latex_textbox]) | |
# csv_button.click(lambda: generate_csv(df, f"/tmp/opseval-{chosen_dict[dataset]}-mc-{shot}.csv"), inputs=[], outputs=[download_link]) | |
else: | |
with gr.Tab('Question Answering'): | |
df_component = dataframe_to_gradio(df, is_mc=False) | |
# df_list.append(df_component) | |
# button = gr.Button("Export LaTeX Table", variant="primary") | |
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False) | |
# button.click(lambda: pop_latex_table( | |
# caption=f"{chosen_dict[dataset]} {shot} {lang} Leaderboard", | |
# label=f"tab:{dataset}_{shot}_{lang}", | |
# dataframe=df, | |
# ), inputs=[], outputs=[latex_textbox]) | |
tab_list.append(tab) | |
return tab_list | |
def get_latest_modification_date(): | |
latest = 0 | |
for file in os.listdir(config['dataset']['dataset_dir']): | |
if file.endswith('.csv'): | |
mtime = os.path.getmtime(os.path.join(config['dataset']['dataset_dir'], file)) | |
latest = max(latest, mtime) | |
latest = pd.to_datetime(latest, unit='s') | |
return latest.strftime("%Y-%m-%d %H:%M:%S") | |
translation_dict = { | |
'zh': { | |
'intro': ZH_INTRODUCTION_TEXT, | |
'title': ZH_TITLE, | |
'lb_sec': f"""# 🏅 排行榜 \n 更新时间: {get_latest_modification_date()}\n""", | |
}, | |
'en': { | |
'intro': INTRODUCTION_TEXT, | |
'title': TITLE, | |
'lb_sec': f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""" | |
} | |
} | |
def get_language_lb(language): | |
tab_dict = {'English': None, 'Chinese': None} | |
for key, dict in dict_lang.items(): | |
tab_list = create_lang_leader_board(dict, key, language) | |
tab_dict[key] = tab_list | |
return [*tab_dict['English'], *tab_dict['Chinese']] | |
def switch_language(language): | |
# gr.update(visible=True) | |
return translation_dict[language]['title'], translation_dict[language]['intro'], translation_dict[language]['lb_sec'], *get_language_lb(language), language | |
def get_lb_body(language='en'): | |
tab_dict = {'English': None, 'Chinese': None} | |
with gr.Blocks() as body: | |
for key, dict in dict_lang.items(): | |
with gr.Tab(key): | |
tab_list = create_lang_leader_board(dict, key, language) | |
tab_dict[key] = tab_list | |
return body, tab_dict | |
def launch_gradio(): | |
demo = gr.Blocks() | |
with demo: | |
lang_state = gr.State("en") | |
with gr.Row(): | |
en_button = gr.Button("English", variant="primary") | |
zh_button = gr.Button("中文", variant="primary") | |
title = gr.HTML(TITLE) | |
intro = gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
leaderboard_section = gr.Markdown(f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""", | |
elem_classes="markdown-text") | |
lb_body, tab_dict = get_lb_body(language=lang_state.value) | |
tab_list = [*tab_dict['English'], *tab_dict['Chinese']] | |
# print(tab_list) | |
en_button.click(switch_language, inputs=[gr.State("en")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False) | |
zh_button.click(switch_language, inputs=[gr.State("zh")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False) | |
demo.launch() | |
pd.set_option('display.float_format', '{:.02f}'.format) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(launch_gradio, 'interval', hours=1) | |
scheduler.start() | |
launch_gradio() |