Spaces:
Running
Running
Junetheriver
commited on
Commit
•
50154dd
1
Parent(s):
9a678a4
wip: latex table, fix requirements
Browse files- app.py +36 -5
- latex_utils.py +73 -0
- requirements.txt +2 -0
app.py
CHANGED
@@ -13,6 +13,7 @@ from leaderboards import eng_leaderboards, chi_leaderboards
|
|
13 |
import toml
|
14 |
import os
|
15 |
from opseval_datasets import *
|
|
|
16 |
|
17 |
|
18 |
config = toml.load("config.toml")
|
@@ -103,8 +104,17 @@ def plot_radar_chart(df, attributes):
|
|
103 |
|
104 |
return fig
|
105 |
|
|
|
|
|
|
|
106 |
|
107 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
best_scores = {}
|
109 |
best_plot_datasets = []
|
110 |
for dataset, value in lang_dict.items():
|
@@ -122,18 +132,39 @@ def create_lang_leader_board(lang_dict, lang='en'):
|
|
122 |
tab_list = []
|
123 |
|
124 |
for dataset, value in lang_dict.items():
|
125 |
-
chosen_dict = dataset_abbr_en_dict if
|
126 |
with gr.Tab(chosen_dict[dataset]) as tab:
|
127 |
for cat, df in value.items():
|
128 |
if cat == 'mc':
|
129 |
for shot in ['Zeroshot', 'Fewshot']:
|
130 |
with gr.Tab(f'Multiple Choice Question ({shot})'):
|
131 |
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
|
132 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
else:
|
134 |
with gr.Tab('Question Answering'):
|
135 |
df_component = dataframe_to_gradio(df, is_mc=False)
|
136 |
# df_list.append(df_component)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
tab_list.append(tab)
|
138 |
return tab_list
|
139 |
|
@@ -162,7 +193,7 @@ translation_dict = {
|
|
162 |
def get_language_lb(language):
|
163 |
tab_dict = {'English': None, 'Chinese': None}
|
164 |
for key, dict in dict_lang.items():
|
165 |
-
tab_list = create_lang_leader_board(dict, language)
|
166 |
tab_dict[key] = tab_list
|
167 |
return [*tab_dict['English'], *tab_dict['Chinese']]
|
168 |
|
@@ -175,7 +206,7 @@ def get_lb_body(language='en'):
|
|
175 |
with gr.Blocks() as body:
|
176 |
for key, dict in dict_lang.items():
|
177 |
with gr.Tab(key):
|
178 |
-
tab_list = create_lang_leader_board(dict, language)
|
179 |
tab_dict[key] = tab_list
|
180 |
return body, tab_dict
|
181 |
|
|
|
13 |
import toml
|
14 |
import os
|
15 |
from opseval_datasets import *
|
16 |
+
from latex_utils import gen_latex_table
|
17 |
|
18 |
|
19 |
config = toml.load("config.toml")
|
|
|
104 |
|
105 |
return fig
|
106 |
|
107 |
+
def pop_latex_table(caption, label, dataframe):
|
108 |
+
table = gen_latex_table(caption, label, dataframe)
|
109 |
+
return gr.Textbox(table, label="LaTeX Table", visible=True)
|
110 |
|
111 |
+
def generate_csv(df, filename):
|
112 |
+
df.to_csv(filename, index=False)
|
113 |
+
download_link = gr.File(label="Download Link", type="filepath", value=filename,
|
114 |
+
visible=True)
|
115 |
+
return download_link
|
116 |
+
|
117 |
+
def create_lang_leader_board(lang_dict, lang, dis_lang='en'):
|
118 |
best_scores = {}
|
119 |
best_plot_datasets = []
|
120 |
for dataset, value in lang_dict.items():
|
|
|
132 |
tab_list = []
|
133 |
|
134 |
for dataset, value in lang_dict.items():
|
135 |
+
chosen_dict = dataset_abbr_en_dict if dis_lang == "en" else dataset_abbr_zh_dict
|
136 |
with gr.Tab(chosen_dict[dataset]) as tab:
|
137 |
for cat, df in value.items():
|
138 |
if cat == 'mc':
|
139 |
for shot in ['Zeroshot', 'Fewshot']:
|
140 |
with gr.Tab(f'Multiple Choice Question ({shot})'):
|
141 |
df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
|
142 |
+
# 加一个latex表格导出按钮, 按一下弹出一个浮动文本窗口
|
143 |
+
# with gr.Row():
|
144 |
+
# latex_button = gr.Button("Export LaTeX Table", variant="primary")
|
145 |
+
# csv_button = gr.Button("Export CSV", variant="primary")
|
146 |
+
|
147 |
+
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
|
148 |
+
# download_link = gr.File(label="Download Link", type="filepath",
|
149 |
+
# visible=False)
|
150 |
+
|
151 |
+
# latex_button.click(lambda: pop_latex_table(
|
152 |
+
# caption=f"{chosen_dict[dataset]} Multiple Choice Question ({shot}, {lang}) Leaderboard",
|
153 |
+
# label=f"tab:{dataset}_{shot}_{lang}",
|
154 |
+
# dataframe=df,
|
155 |
+
# ), inputs=[], outputs=[latex_textbox])
|
156 |
+
# csv_button.click(lambda: generate_csv(df, f"/tmp/opseval-{chosen_dict[dataset]}-mc-{shot}.csv"), inputs=[], outputs=[download_link])
|
157 |
else:
|
158 |
with gr.Tab('Question Answering'):
|
159 |
df_component = dataframe_to_gradio(df, is_mc=False)
|
160 |
# df_list.append(df_component)
|
161 |
+
# button = gr.Button("Export LaTeX Table", variant="primary")
|
162 |
+
# latex_textbox = gr.Textbox(label="LaTeX Table", visible=False)
|
163 |
+
# button.click(lambda: pop_latex_table(
|
164 |
+
# caption=f"{chosen_dict[dataset]} {shot} {lang} Leaderboard",
|
165 |
+
# label=f"tab:{dataset}_{shot}_{lang}",
|
166 |
+
# dataframe=df,
|
167 |
+
# ), inputs=[], outputs=[latex_textbox])
|
168 |
tab_list.append(tab)
|
169 |
return tab_list
|
170 |
|
|
|
193 |
def get_language_lb(language):
|
194 |
tab_dict = {'English': None, 'Chinese': None}
|
195 |
for key, dict in dict_lang.items():
|
196 |
+
tab_list = create_lang_leader_board(dict, key, language)
|
197 |
tab_dict[key] = tab_list
|
198 |
return [*tab_dict['English'], *tab_dict['Chinese']]
|
199 |
|
|
|
206 |
with gr.Blocks() as body:
|
207 |
for key, dict in dict_lang.items():
|
208 |
with gr.Tab(key):
|
209 |
+
tab_list = create_lang_leader_board(dict, key, language)
|
210 |
tab_dict[key] = tab_list
|
211 |
return body, tab_dict
|
212 |
|
latex_utils.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from tabulate import tabulate
|
3 |
+
|
4 |
+
# 创建一个示例 DataFrame
|
5 |
+
data = {
|
6 |
+
'Model': [
|
7 |
+
'GPT-4', 'GLM-4', 'GPT-3.5-turbo', 'Qwen-72B-Chat', 'ERNIE-Bot-4.0', 'LLaMA-2-70B',
|
8 |
+
'DevOps-Model-14B-Chat', 'GLM-3-turbo', 'Qwen-14B-Chat', 'LLaMA-2-13B', 'InternLM2-Chat-20B',
|
9 |
+
'LLaMA-2-7B', 'Qwen-7B-Chat', 'Baichuan2-13B-Chat', 'InternLM2-Chat-7B', 'Mistral-7B', 'ChatGLM3-6B'
|
10 |
+
],
|
11 |
+
'Naive': [
|
12 |
+
'/', '64.77', '68.30', '70.32', '60.00', '55.00', '63.85', '59.53', '62.60', '53.30', '60.48',
|
13 |
+
'48.20', '52.10', '51.90', '48.20', '47.22', '42.10'
|
14 |
+
],
|
15 |
+
'SC': [
|
16 |
+
'/', '64.77', '68.30', '70.32', '60.00', '56.20', '61.96', '59.53', '59.70', '53.00', '60.48',
|
17 |
+
'46.80', '51.00', '51.60', '48.20', '47.22', '42.10'
|
18 |
+
],
|
19 |
+
'CoT': [
|
20 |
+
'88.70', '77.06', '70.90', '70.13', '70.00', '66.80', '41.15', '63.65', '50.58', '56.80', '45.10',
|
21 |
+
'52.00', '48.30', '44.50', '49.74', '45.58', '43.47'
|
22 |
+
],
|
23 |
+
'CoT+SC': [
|
24 |
+
'/', '77.06', '72.50', '70.22', '70.00', '67.20', '44.01', '63.65', '55.88', '61.00', '45.10',
|
25 |
+
'55.20', '49.80', '47.45', '49.74', '45.58', '43.47'
|
26 |
+
]
|
27 |
+
}
|
28 |
+
|
29 |
+
df = pd.DataFrame(data)
|
30 |
+
|
31 |
+
# 使用tabulate生成LaTeX表格
|
32 |
+
latex_table = tabulate(df, headers='keys', tablefmt='latex', showindex=False,
|
33 |
+
colalign='left')
|
34 |
+
|
35 |
+
def gen_latex_table(caption, label, dataframe):
|
36 |
+
table = tabulate(dataframe, headers='keys', tablefmt='latex', showindex=False,
|
37 |
+
colalign='left')
|
38 |
+
table = (
|
39 |
+
"\\begin{table}[]\n"
|
40 |
+
f"\\caption{{{caption}}}\n"
|
41 |
+
f"\\label{{{label}}}\n"
|
42 |
+
"\\footnotesize\n"
|
43 |
+
f"{table}\n"
|
44 |
+
"\\end{table}"
|
45 |
+
)
|
46 |
+
# 确认生成的\hline只有三个
|
47 |
+
assert table.count("\\hline") == 3
|
48 |
+
# 将table中的第一个\hline改为\toprule
|
49 |
+
table = table.replace("\\hline", "\\toprule", 1)
|
50 |
+
# 将table中的第二个\hline改为\midrule
|
51 |
+
table = table.replace("\\hline", "\\midrule", 1)
|
52 |
+
# 将table中的最后一个\hline改为\bottomrule
|
53 |
+
table = table.replace("\\hline", "\\bottom", 1)
|
54 |
+
return table
|
55 |
+
|
56 |
+
# # 添加表格环境
|
57 |
+
# latex_table = (
|
58 |
+
# "\\begin{table}[]\n"
|
59 |
+
# "\\caption{LLMs' overall performance (Accuracy\\%) on Wired Network Operations English test set (3-shot). "
|
60 |
+
# "\\normalfont Models are ranked based on their best performance (marked as bold) among different settings.}\n"
|
61 |
+
# "\\label{tab:network_eng_3shot}\n"
|
62 |
+
# "\\footnotesize\n"
|
63 |
+
# f"{latex_table}\n"
|
64 |
+
# "\\end{table}"
|
65 |
+
# )
|
66 |
+
# latex_table = gen_latex_table(
|
67 |
+
# caption="LLMs' overall performance (Accuracy\%) on Wired Network Operations English test set (3-shot). "
|
68 |
+
# "Models are ranked based on their best performance (marked as bold) among different settings.",
|
69 |
+
# label="tab:network_eng_3shot",
|
70 |
+
# table=latex_table
|
71 |
+
# )
|
72 |
+
|
73 |
+
# print(latex_table)
|
requirements.txt
CHANGED
@@ -6,3 +6,5 @@ pandas==2.0.0
|
|
6 |
matplotlib
|
7 |
numpy
|
8 |
plotly
|
|
|
|
|
|
6 |
matplotlib
|
7 |
numpy
|
8 |
plotly
|
9 |
+
toml
|
10 |
+
latextable
|