Junetheriver commited on
Commit
9a678a4
1 Parent(s): 147416c

feat: support bilingual switch

Browse files
Files changed (3) hide show
  1. app.py +73 -18
  2. config.toml +2 -0
  3. texts.py +26 -0
app.py CHANGED
@@ -8,16 +8,14 @@ import gradio as gr
8
  import matplotlib.pyplot as plt
9
  import plotly.graph_objects as go
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
- from texts import INTRODUCTION_TEXT, TITLE
12
  from leaderboards import eng_leaderboards, chi_leaderboards
 
 
13
  from opseval_datasets import *
14
 
15
 
16
- # df_lang = {
17
- # 'English': pd.read_csv("./leaderboard/wired_network_en.csv"),
18
- # 'Chinese': pd.read_csv("./leaderboard/wired_network_zh.csv"),
19
- # }
20
-
21
 
22
 
23
  def create_lang_tabs(lang, lang_cates):
@@ -66,7 +64,6 @@ def process_qa_df(df):
66
  return df
67
 
68
  def dataframe_to_gradio(df, is_mc=True, shot=None):
69
-
70
  if is_mc:
71
  df = process_mc_df(df, shot)
72
  else:
@@ -107,8 +104,7 @@ def plot_radar_chart(df, attributes):
107
  return fig
108
 
109
 
110
- def create_lang_leader_board(lang_dict):
111
-
112
  best_scores = {}
113
  best_plot_datasets = []
114
  for dataset, value in lang_dict.items():
@@ -123,30 +119,89 @@ def create_lang_leader_board(lang_dict):
123
  # print(best_df)
124
  # plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets)
125
  # gr.Plot(plot)
 
126
 
127
  for dataset, value in lang_dict.items():
128
- with gr.Tab(dataset_abbr_en_dict[dataset]):
 
129
  for cat, df in value.items():
130
  if cat == 'mc':
131
  for shot in ['Zeroshot', 'Fewshot']:
132
  with gr.Tab(f'Multiple Choice Question ({shot})'):
133
- dataframe_to_gradio(df, is_mc=True, shot=shot)
 
134
  else:
135
  with gr.Tab('Question Answering'):
136
- dataframe_to_gradio(df, is_mc=False)
 
 
 
137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  def launch_gradio():
141
  demo = gr.Blocks()
142
 
143
  with demo:
144
- gr.HTML(TITLE)
145
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
146
- gr.Markdown("""# 🏅 Leaderboard \n Latest update: 2024-05-15\n""", elem_classes="markdown-text")
147
- for key, dict in dict_lang.items():
148
- with gr.Tab(key):
149
- create_lang_leader_board(dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
  demo.launch()
152
 
 
8
  import matplotlib.pyplot as plt
9
  import plotly.graph_objects as go
10
  from apscheduler.schedulers.background import BackgroundScheduler
11
+ from texts import *
12
  from leaderboards import eng_leaderboards, chi_leaderboards
13
+ import toml
14
+ import os
15
  from opseval_datasets import *
16
 
17
 
18
+ config = toml.load("config.toml")
 
 
 
 
19
 
20
 
21
  def create_lang_tabs(lang, lang_cates):
 
64
  return df
65
 
66
  def dataframe_to_gradio(df, is_mc=True, shot=None):
 
67
  if is_mc:
68
  df = process_mc_df(df, shot)
69
  else:
 
104
  return fig
105
 
106
 
107
+ def create_lang_leader_board(lang_dict, lang='en'):
 
108
  best_scores = {}
109
  best_plot_datasets = []
110
  for dataset, value in lang_dict.items():
 
119
  # print(best_df)
120
  # plot = plot_radar_chart(pd.DataFrame(best_scores), best_plot_datasets)
121
  # gr.Plot(plot)
122
+ tab_list = []
123
 
124
  for dataset, value in lang_dict.items():
125
+ chosen_dict = dataset_abbr_en_dict if lang == "en" else dataset_abbr_zh_dict
126
+ with gr.Tab(chosen_dict[dataset]) as tab:
127
  for cat, df in value.items():
128
  if cat == 'mc':
129
  for shot in ['Zeroshot', 'Fewshot']:
130
  with gr.Tab(f'Multiple Choice Question ({shot})'):
131
+ df_component = dataframe_to_gradio(df, is_mc=True, shot=shot)
132
+ # df_list.append(df_component)
133
  else:
134
  with gr.Tab('Question Answering'):
135
+ df_component = dataframe_to_gradio(df, is_mc=False)
136
+ # df_list.append(df_component)
137
+ tab_list.append(tab)
138
+ return tab_list
139
 
140
+ def get_latest_modification_date():
141
+ latest = 0
142
+ for file in os.listdir(config['dataset']['dataset_dir']):
143
+ if file.endswith('.csv'):
144
+ mtime = os.path.getmtime(os.path.join(config['dataset']['dataset_dir'], file))
145
+ latest = max(latest, mtime)
146
+ latest = pd.to_datetime(latest, unit='s')
147
+ return latest.strftime("%Y-%m-%d %H:%M:%S")
148
+
149
+ translation_dict = {
150
+ 'zh': {
151
+ 'intro': ZH_INTRODUCTION_TEXT,
152
+ 'title': ZH_TITLE,
153
+ 'lb_sec': f"""# 🏅 排行榜 \n 更新时间: {get_latest_modification_date()}\n""",
154
+ },
155
+ 'en': {
156
+ 'intro': INTRODUCTION_TEXT,
157
+ 'title': TITLE,
158
+ 'lb_sec': f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n"""
159
+ }
160
+ }
161
+
162
+ def get_language_lb(language):
163
+ tab_dict = {'English': None, 'Chinese': None}
164
+ for key, dict in dict_lang.items():
165
+ tab_list = create_lang_leader_board(dict, language)
166
+ tab_dict[key] = tab_list
167
+ return [*tab_dict['English'], *tab_dict['Chinese']]
168
 
169
+ def switch_language(language):
170
+ # gr.update(visible=True)
171
+ return translation_dict[language]['title'], translation_dict[language]['intro'], translation_dict[language]['lb_sec'], *get_language_lb(language), language
172
+
173
+ def get_lb_body(language='en'):
174
+ tab_dict = {'English': None, 'Chinese': None}
175
+ with gr.Blocks() as body:
176
+ for key, dict in dict_lang.items():
177
+ with gr.Tab(key):
178
+ tab_list = create_lang_leader_board(dict, language)
179
+ tab_dict[key] = tab_list
180
+ return body, tab_dict
181
 
182
  def launch_gradio():
183
  demo = gr.Blocks()
184
 
185
  with demo:
186
+ lang_state = gr.State("en")
187
+ with gr.Row():
188
+ en_button = gr.Button("English", variant="primary")
189
+ zh_button = gr.Button("中文", variant="primary")
190
+
191
+ title = gr.HTML(TITLE)
192
+ intro = gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
193
+
194
+ leaderboard_section = gr.Markdown(f"""# 🏅 Leaderboard \n Latest update: {get_latest_modification_date()}\n""",
195
+ elem_classes="markdown-text")
196
+
197
+ lb_body, tab_dict = get_lb_body(language=lang_state.value)
198
+
199
+ tab_list = [*tab_dict['English'], *tab_dict['Chinese']]
200
+ # print(tab_list)
201
+
202
+ en_button.click(switch_language, inputs=[gr.State("en")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False)
203
+ zh_button.click(switch_language, inputs=[gr.State("zh")], outputs=[title, intro, leaderboard_section, *tab_list, lang_state], postprocess=False)
204
+
205
 
206
  demo.launch()
207
 
config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [dataset]
2
+ dataset_dir = "./data_v2"
texts.py CHANGED
@@ -21,3 +21,29 @@ This dataset is motivated by the emerging trend of utilizing AI in automated IT
21
  ```
22
 
23
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  ```
22
 
23
  '''
24
+
25
+
26
+ ZH_TITLE = '<h1 align="center" id="space-title">🎉 🎉 OpsEval 排行榜 👏 👏</h1>'
27
+
28
+ ZH_INTRODUCTION_TEXT = '''
29
+
30
+ # 🚀 关于 OpsEval
31
+
32
+ OpsEval 数据集代表了在 IT 运维(AIOps)领域评估人工智能(AI)的一次开创性努力,重点关注大型语言模型(LLMs)在该领域的应用。在一个越来越依赖 AI 技术进行自动化和提高效率的 IT 运维时代,了解 LLMs 在运维任务中的表现变得至关重要。OpsEval 提供了一个全面的任务导向基准,专门用于评估 LLMs 在各种重要 IT 运维场景中的表现。
33
+
34
+ 该数据集的动机源于 Gartner 预测的利用 AI 自动化 IT 运维的趋势,以及 LLMs 在自然语言处理(NLP)相关任务中展示的显著能力。OpsEval 旨在弥合评估这些模型在 AIOps 任务中的表现的差距,包括故障根因分析、运维脚本的生成和警报信息的总结。
35
+
36
+ # 📃 引用
37
+
38
+ ```
39
+ @misc{liu2023opseval,
40
+ title={OpsEval: A Comprehensive Task-Oriented AIOps Benchmark for Large Language Models},
41
+ author={Yuhe Liu and Changhua Pei and Longlong Xu and Bohan Chen and Mingze Sun and Zhirui Zhang and Yongqian Sun and Shenglin Zhang and Kun Wang and Haiming Zhang and Jianhui Li and Gaogang Xie and Xidao Wen and Xiaohui Nie and Dan Pei},
42
+ year={2023},
43
+ eprint={2310.07637},
44
+ archivePrefix={arXiv},
45
+ primaryClass={cs.AI}
46
+ }
47
+ ```
48
+
49
+ '''