Spaces:
Running
Running
File size: 4,665 Bytes
0f89c55 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 733bd44 a6d507f 733bd44 0f89c55 a6d507f 733bd44 0f89c55 733bd44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
# gradio display leaderboard
import pandas as pd
import numpy as np
import matplotlib
# matplotlib.use('macosx')
import gradio as gr
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from apscheduler.schedulers.background import BackgroundScheduler
from texts import INTRODUCTION_TEXT, TITLE
from leaderboards import eng_leaderboards, chi_leaderboards
from opseval_datasets import *
# df_lang = {
# 'English': pd.read_csv("./leaderboard/wired_network_en.csv"),
# 'Chinese': pd.read_csv("./leaderboard/wired_network_zh.csv"),
# }
def create_lang_tabs(lang, lang_cates):
df_dict = {}
for dataset, cates in lang_cates:
dataset_dt = {}
for cat in cates:
leaderboard_df = pd.read_csv(f'./data/{dataset}_{lang}_{cat}.csv')
dataset_dt[cat] = leaderboard_df
df_dict[dataset] = dataset_dt
return df_dict
dict_lang = {
'English': create_lang_tabs('en', eng_leaderboards),
'Chinese': create_lang_tabs('zh', chi_leaderboards)
}
def process_mc_df(df, shot=None):
# 将name列重命名为Model
df = df.rename(columns={"name": "Model"})
# 将zero_naive, zero_self_con, zero_cot, zero_cot_self_con, few_naive, few_self_con, few_cot, few_cot_self_con列重新组织成MultiIndex,一层为Zeroshot, Fewshot,一层为Naive, Self-Consistency, CoT, CoT+Self-Consistency
df = df.set_index("Model")
# df = df.stack().unstack()
df.columns = pd.MultiIndex.from_tuples([("Zeroshot", "Naive"), ("Zeroshot", "SC"), ("Zeroshot", "CoT"), ("Zeroshot", "CoT+SC"), ("Fewshot", "Naive"), ("Fewshot", "SC"), ("Fewshot", "CoT"), ("Fewshot", "CoT+SC")])
# 保留shot的列,比如如果shot=Zeroshot那么只有Zeroshot的列会被保留
if shot:
df = df[shot]
# 将除了Model列之外的列的value转换为数值型,失败的为NaN
df = df.apply(pd.to_numeric, errors="coerce")
# 保留小数点后两位
df = df.round(2)
# 给每一行添加一列BestScore
df["BestScore"] = df.max(axis=1)
# 根据BestScore给df排序
df = df.sort_values(by="BestScore", ascending=False)
# reset_index
df = df.reset_index()
return df
def dataframe_to_gradio(df, is_mc=True, shot=None):
if is_mc:
df = process_mc_df(df, shot)
headers = df.columns
# types = ["str"] + ["number"] * (len(headers) - 1)
return gr.components.Dataframe(
value=df.values.tolist(),
headers=[label for label in df.columns],
# datatype=types,
# max_rows=10,
)
def plot_radar_chart(df, attributes):
fig = go.Figure()
for index, row in df.iterrows():
model = row['Model']
values = row[attributes].tolist()
fig.add_trace(go.Scatterpolar(
r=values,
theta=attributes,
fill='toself',
name=model
))
fig.update_layout(
title="OpsEval",
polar=dict(
radialaxis=dict(
visible=True,
range=[0, 0.9]
)),
showlegend=True
)
return fig
def create_lang_leader_board(lang_dict):
best_scores = {}
best_plot_datasets = []
for dataset, value in lang_dict.items():
for cat, df in value.items():
if cat == 'mc':
processed = process_mc_df(df)
bestscores = processed['BestScore']
best_scores[dataset] = bestscores
best_plot_datasets.append(dataset)
best_df = pd.DataFrame(best_scores)
# print(best_scores)
# print(best_df)
# plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets)
# gr.Plot(plot)
for dataset, value in lang_dict.items():
with gr.Tab(dataset_abbr_en_dict[dataset]):
for cat, df in value.items():
if cat == 'mc':
for shot in ['Zeroshot', 'Fewshot']:
with gr.Tab(f'Multiple Choice Question ({shot})'):
dataframe_to_gradio(df, is_mc=True, shot=shot)
else:
with gr.Tab('Question Answering'):
dataframe_to_gradio(df, is_mc=False)
def launch_gradio():
demo = gr.Blocks()
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
for key, dict in dict_lang.items():
with gr.Tab(key):
create_lang_leader_board(dict)
demo.launch()
pd.set_option('display.float_format', '{:.02f}'.format)
scheduler = BackgroundScheduler()
scheduler.add_job(launch_gradio, 'interval', hours=1)
scheduler.start()
launch_gradio() |