Spaces:
Runtime error
Runtime error
# gradio display leaderboard | |
import pandas as pd | |
import numpy as np | |
import matplotlib | |
# matplotlib.use('macosx') | |
import gradio as gr | |
import matplotlib.pyplot as plt | |
import plotly.graph_objects as go | |
from apscheduler.schedulers.background import BackgroundScheduler | |
from texts import INTRODUCTION_TEXT, TITLE | |
from leaderboards import eng_leaderboards, chi_leaderboards | |
from opseval_datasets import * | |
# df_lang = { | |
# 'English': pd.read_csv("./leaderboard/wired_network_en.csv"), | |
# 'Chinese': pd.read_csv("./leaderboard/wired_network_zh.csv"), | |
# } | |
def create_lang_tabs(lang, lang_cates): | |
df_dict = {} | |
for dataset, cates in lang_cates: | |
dataset_dt = {} | |
for cat in cates: | |
leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv') | |
dataset_dt[cat] = leaderboard_df | |
df_dict[dataset] = dataset_dt | |
return df_dict | |
dict_lang = { | |
'English': create_lang_tabs('en', eng_leaderboards), | |
'Chinese': create_lang_tabs('zh', chi_leaderboards) | |
} | |
def process_mc_df(df, shot=None): | |
# 将name列重命名为Model | |
df = df.rename(columns={"name": "Model"}) | |
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency | |
df = df.set_index("Model") | |
# df = df.stack().unstack() | |
df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")]) | |
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留 | |
if shot: | |
df = df[shot] | |
# 将除了Model列之外的列的value转换为数值型,失败的为NaN | |
df = df.apply(pd.to_numeric, errors="coerce") | |
# 保留小数点后两位 | |
df = df.round(2) | |
# 给每一行添加一列BestScore | |
df["BestScore"] = df.max(axis=1) | |
# 根据BestScore给df排序 | |
df = df.sort_values(by="BestScore", ascending=False) | |
# reset_index | |
df = df.reset_index() | |
# 对于所有空的值,填充为'/' | |
df = df.fillna('/') | |
return df | |
def process_qa_df(df): | |
# 保留小数点后四位 | |
df = df.round(4) | |
return df | |
def dataframe_to_gradio(df, is_mc=True, shot=None): | |
if is_mc: | |
df = process_mc_df(df, shot) | |
else: | |
df = process_qa_df(df) | |
headers = df.columns | |
# types = ["str"] + ["number"] * (len(headers) - 1) | |
return gr.components.Dataframe( | |
value=df.values.tolist(), | |
headers=[label for label in df.columns], | |
# datatype=types, | |
# max_rows=10, | |
) | |
def plot_radar_chart(df, attributes): | |
fig = go.Figure() | |
for index, row in df.iterrows(): | |
model = row['Model'] | |
values = row[attributes].tolist() | |
fig.add_trace(go.Scatterpolar( | |
r=values, | |
theta=attributes, | |
fill='toself', | |
name=model | |
)) | |
fig.update_layout( | |
title="OpsEval", | |
polar=dict( | |
radialaxis=dict( | |
visible=True, | |
range=[0, 0.9] | |
)), | |
showlegend=True | |
) | |
return fig | |
def create_lang_leader_board(lang_dict): | |
best_scores = {} | |
best_plot_datasets = [] | |
for dataset, value in lang_dict.items(): | |
for cat, df in value.items(): | |
if cat == 'mc': | |
processed = process_mc_df(df) | |
bestscores = processed['BestScore'] | |
best_scores[dataset] = bestscores | |
best_plot_datasets.append(dataset) | |
best_df = pd.DataFrame(best_scores) | |
# print(best_scores) | |
# print(best_df) | |
# plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets) | |
# gr.Plot(plot) | |
for dataset, value in lang_dict.items(): | |
with gr.Tab(dataset_abbr_en_dict[dataset]): | |
for cat, df in value.items(): | |
if cat == 'mc': | |
for shot in ['Zeroshot', 'Fewshot']: | |
with gr.Tab(f'Multiple Choice Question ({shot})'): | |
dataframe_to_gradio(df, is_mc=True, shot=shot) | |
else: | |
with gr.Tab('Question Answering'): | |
dataframe_to_gradio(df, is_mc=False) | |
def launch_gradio(): | |
demo = gr.Blocks() | |
with demo: | |
gr.HTML(TITLE) | |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
gr.Markdown("""# 🏅 Leaderboard \n Latest update: 2024-05-15\n""", elem_classes="markdown-text") | |
for key, dict in dict_lang.items(): | |
with gr.Tab(key): | |
create_lang_leader_board(dict) | |
demo.launch() | |
pd.set_option('display.float_format', '{:.02f}'.format) | |
scheduler = BackgroundScheduler() | |
scheduler.add_job(launch_gradio, 'interval', hours=1) | |
scheduler.start() | |
launch_gradio() |