Spaces:
Runtime error
Runtime error
File size: 9,359 Bytes
0f89c55 733bd44 9a678a4 a6d507f 9a678a4 a6d507f 50154dd 733bd44 a6d507f 9a678a4 a6d507f 32e04fa a6d507f 733bd44 a6d507f 32e04fa a6d507f 22cd459 a6d507f 22cd459 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 50154dd a6d507f 50154dd a6d507f 9a678a4 a6d507f 50154dd 9a678a4 a6d507f 9a678a4 50154dd a6d507f 9a678a4 50154dd 9a678a4 a6d507f 9a678a4 50154dd 9a678a4 a6d507f 9a678a4 50154dd 9a678a4 733bd44 9a678a4 733bd44 0f89c55 a6d507f 733bd44 0f89c55 733bd44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
# gradio display leaderboard
import pandas as pd
import numpy as np
import matplotlib
# matplotlib.use('macosx')
import gradio as gr
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from apscheduler.schedulers.background import BackgroundScheduler
from texts import *
from leaderboards import eng_leaderboards, chi_leaderboards
import toml
import os
from opseval_datasets import *
from latex_utils import gen_latex_table
config = toml.load("config.toml")
def create_lang_tabs(lang, lang_cates):
df_dict = {}
for dataset, cates in lang_cates:
dataset_dt = {}
for cat in cates:
leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv')
dataset_dt[cat] = leaderboard_df
df_dict[dataset] = dataset_dt
return df_dict
dict_lang = {
'English': create_lang_tabs('en', eng_leaderboards),
'Chinese': create_lang_tabs('zh', chi_leaderboards)
}
def process_mc_df(df, shot=None):
# 将name列重命名为Model
df = df.rename(columns={"name": "Model"})
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
df = df.set_index("Model")
# df = df.stack().unstack()
df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")])
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
if shot:
df = df[shot]
# 将除了Model列之外的列的value转换为数值型,失败的为NaN
df = df.apply(pd.to_numeric, errors="coerce")
# 保留小数点后两位
df = df.round(2)
# 给每一行添加一列BestScore
df["BestScore"] = df.max(axis=1)
# 根据BestScore给df排序
df = df.sort_values(by="BestScore", ascending=False)
# reset_index
df = df.reset_index()
# 对于所有空的值,填充为'/'
df = df.fillna('/')
return df
def process_qa_df(df):
# 保留小数点后四位
df = df.round(4)
return df
def dataframe_to_gradio(df, is_mc=True, shot=None):
if is_mc:
df = process_mc_df(df, shot)
else:
df = process_qa_df(df)
headers = df.columns
# types = ["str"] + ["number"] * (len(headers) - 1)
return gr.components.Dataframe(
value=df.values.tolist(),
headers=[label for label in df.columns],
# datatype=types,
# max_rows=10,
)
def plot_radar_chart(df, attributes):
fig = go.Figure()
for index, row in df.iterrows():
model = row['Model']
values = row[attributes].tolist()
fig.add_trace(go.Scatterpolar(
r=values,
theta=attributes,
fill='toself',
name=model
))
fig.update_layout(
title="OpsEval",
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 0.9]
)),
showlegend=True
)
return fig
def pop_latex_table(caption, label, dataframe):
table = gen_latex_table(caption, label, dataframe)
return gr.Textbox(table, label="LaTeX Table", visible=True)
def generate_csv(df, filename):
df.to_csv(filename, index=False)
download_link = gr.File(label="Download Link", type="filepath", value=filename,
visible=True)
return download_link
def create_lang_leader_board(lang_dict, lang, dis_lang='en'):
best_scores = {}
best_plot_datasets = []
for dataset, value in lang_dict.items():
for cat, df in value.items():
if cat == 'mc':
processed = process_mc_df(df)
bestscores = processed['BestScore']
best_scores[dataset] = bestscores
best_plot_datasets.append(dataset)
best_df = pd.DataFrame(best_scores)
# print(best_scores)
# print(best_df)
# plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets)
# gr.Plot(plot)
tab_list = []
for dataset, value in lang_dict.items():
chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict
with gr.Tab(chosen_dict[dataset]) as tab:
for cat, df in value.items():
if cat == 'mc':
for shot in ['Zeroshot', 'Fewshot']:
with gr.Tab(f'Multiple Choice Question ({shot})'):
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口
# with gr.Row():
# latex_button = gr.Button("Export LaTeX Table", variant="primary")
# csv_button = gr.Button("Export CSV", variant="primary")
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
# download_link = gr.File(label="Download Link", type="filepath",
# visible=False)
# latex_button.click(lambda: pop_latex_table(
# caption=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard",
# label=f"tab:{dataset}_{shot}_{lang}",
# dataframe=df,
# ), inputs=[], outputs=[latex_textbox])
# csv_button.click(lambda: generate_csv(df, f"/tmp/opseval-{chosen_dict[dataset]}-mc-{shot}.csv"), inputs=[], outputs=[download_link])
else:
with gr.Tab('Question Answering'):
df_component = dataframe_to_gradio(df, is_mc=False)
# df_list.append(df_component)
# button = gr.Button("Export LaTeX Table", variant="primary")
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
# button.click(lambda: pop_latex_table(
# caption=f"{chosen_dict[dataset]} {shot} {lang} Leaderboard",
# label=f"tab:{dataset}_{shot}_{lang}",
# dataframe=df,
# ), inputs=[], outputs=[latex_textbox])
tab_list.append(tab)
return tab_list
def get_latest_modification_date():
latest = 0
for file in os.listdir(config['dataset']['dataset_dir']):
if file.endswith('.csv'):
mtime = os.path.getmtime(os.path.join(config['dataset']['dataset_dir'], file))
latest = max(latest, mtime)
latest = pd.to_datetime(latest, unit='s')
return latest.strftime("%Y-%m-%d %H:%M:%S")
translation_dict = {
'zh': {
'intro': ZH_INTRODUCTION_TEXT,
'title': ZH_TITLE,
'lb_sec': f"""# 🏅 排行榜 \n 更新时间: {get_latest_modification_date()}\n""",
},
'en': {
'intro': INTRODUCTION_TEXT,
'title': TITLE,
'lb_sec': f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n"""
}
}
def get_language_lb(language):
tab_dict = {'English': None, 'Chinese': None}
for key, dict in dict_lang.items():
tab_list = create_lang_leader_board(dict, key, language)
tab_dict[key] = tab_list
return [*tab_dict['English'], *tab_dict['Chinese']]
def switch_language(language):
# gr.update(visible=True)
return translation_dict[language]['title'], translation_dict[language]['intro'], translation_dict[language]['lb_sec'], *get_language_lb(language), language
def get_lb_body(language='en'):
tab_dict = {'English': None, 'Chinese': None}
with gr.Blocks() as body:
for key, dict in dict_lang.items():
with gr.Tab(key):
tab_list = create_lang_leader_board(dict, key, language)
tab_dict[key] = tab_list
return body, tab_dict
def launch_gradio():
demo = gr.Blocks()
with demo:
lang_state = gr.State("en")
with gr.Row():
en_button = gr.Button("English", variant="primary")
zh_button = gr.Button("中文", variant="primary")
title = gr.HTML(TITLE)
intro = gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
leaderboard_section = gr.Markdown(f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""",
elem_classes="markdown-text")
lb_body, tab_dict = get_lb_body(language=lang_state.value)
tab_list = [*tab_dict['English'], *tab_dict['Chinese']]
# print(tab_list)
en_button.click(switch_language, inputs=[gr.State("en")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False)
zh_button.click(switch_language, inputs=[gr.State("zh")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False)
demo.launch()
pd.set_option('display.float_format', '{:.02f}'.format)
scheduler = BackgroundScheduler()
scheduler.add_job(launch_gradio, 'interval', hours=1)
scheduler.start()
launch_gradio() |