nxphi47 commited on
Commit
01f830e
1 Parent(s): e650896

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -0
app.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import plotly.graph_objects as go
3
+ from datasets import load_dataset
4
+ import plotly.graph_objects as go
5
+ from plotly.subplots import make_subplots
6
+
7
+ import os
8
+ # ==
9
+ import json
10
+ import pandas as pd
11
+ import plotly.express as px
12
+ import plotly.graph_objects as go
13
+
14
+
15
+ # CATEGORIES = ["Writing", "Roleplay", "Reasoning", "Math", "Coding", "Extraction", "STEM", "Humanities"]
16
+ CATEGORIES = ["task-solving", "math-reasoning", "general-instruction", "natural-question", "safety"]
17
+ LANGS = ['en', 'vi', 'th', 'id', 'km', 'lo', 'ms', 'my', 'tl']
18
+
19
+ # benchmark_name = "sea_bench_all"
20
+
21
+ # with open(f"data/{benchmark_name}/question.jsonl", 'r') as f:
22
+ # questions = [
23
+ # json.loads(x)
24
+ # for x in f
25
+ # ]
26
+ # questions = {
27
+ # q['question_id']: q
28
+ # for q in questions
29
+ # }
30
+
31
+
32
+ # def get_model_df():
33
+ # cnt = 0
34
+ # q2result = []
35
+ # fin = open(f"data/{benchmark_name}/model_judgment/gpt-4_single.jsonl", "r")
36
+ # for line in fin:
37
+ # obj = json.loads(line)
38
+ # # obj["category"] = CATEGORIES[(obj["question_id"]-81)//10]
39
+ # obj["category"] = questions[obj['question_id']]['category']
40
+ # obj["lang"] = questions[obj['question_id']]['lang']
41
+ # q2result.append(obj)
42
+ # df = pd.DataFrame(q2result)
43
+ # return df
44
+
45
+
46
+ force_download = bool(int(os.environ.get("force_download", "1")))
47
+ HF_TOKEN = str(os.environ.get("HF_TOKEN", ""))
48
+ DATA_SET_REPO_PATH = str(os.environ.get("DATA_SET_REPO_PATH", ""))
49
+ PERFORMANCE_FILENAME = str(os.environ.get("PERFORMANCE_FILENAME", "gpt4_single_json.csv"))
50
+
51
+ MODEL_DFRAME = None
52
+
53
+ def get_model_df():
54
+ global MODEL_DFRAME
55
+ if isinstance(MODEL_DFRAME, pd.DataFrame):
56
+ print(f'Load cache data frame')
57
+ return MODEL_DFRAME
58
+ from huggingface_hub import hf_hub_download
59
+ assert DATA_SET_REPO_PATH != ''
60
+ assert HF_TOKEN != ''
61
+ repo_id = DATA_SET_REPO_PATH
62
+ filename = PERFORMANCE_FILENAME
63
+
64
+ # data_path = f"{DATA_SET_REPO_PATH}/{PERFORMANCE_FILENAME}"
65
+ file_path = hf_hub_download(
66
+ repo_id=repo_id,
67
+ filename=filename,
68
+ force_download=force_download,
69
+ local_dir='./hf_cache',
70
+ repo_type="dataset",
71
+ token=HF_TOKEN
72
+ )
73
+ print(f'Downloaded file at {file_path} from {DATA_SET_REPO_PATH} / {PERFORMANCE_FILENAME}')
74
+ MODEL_DFRAME = pd.read_csv(file_path)
75
+ return MODEL_DFRAME
76
+
77
+
78
+ def aggregate_df(df, model_dict, category_name, categories):
79
+ scores_all = []
80
+ all_models = df["model"].unique()
81
+ for model in all_models:
82
+ for i, cat in enumerate(categories):
83
+ # filter category/model, and score format error (<1% case)
84
+ res = df[(df[category_name]==cat) & (df["model"]==model) & (df["score"] >= 0)]
85
+ score = res["score"].mean()
86
+ cat_name = cat
87
+ scores_all.append({"model": model, category_name: cat_name, "score": score})
88
+
89
+ target_models = list(model_dict.keys())
90
+ scores_target = [scores_all[i] for i in range(len(scores_all)) if scores_all[i]["model"] in target_models]
91
+ scores_target = sorted(scores_target, key=lambda x: target_models.index(x["model"]), reverse=True)
92
+
93
+ df_score = pd.DataFrame(scores_target)
94
+ df_score = df_score[df_score["model"].isin(target_models)]
95
+
96
+ rename_map = model_dict
97
+
98
+ for k, v in rename_map.items():
99
+ df_score.replace(k, v, inplace=True)
100
+ return df_score
101
+
102
+
103
+ rename_map = {
104
+ "seallm13b10L4k_a_sft4xdpo_5a": "SeaLLM-13b-10L",
105
+ "polylm": "PolyLM-13b",
106
+ "qwen": "Qwen-14b",
107
+ "gpt-3.5-turbo": "GPT-3.5-turbo",
108
+ "gpt-4-1106-preview": "GPT-4-turbo",
109
+ }
110
+ CATEGORIES = [ "task-solving", "math-reasoning", "general-instruction", "natural-question", "safety", ]
111
+
112
+ CATEGORIES_NAMES = {
113
+ "task-solving": 'Task-solving',
114
+ "math-reasoning": 'Math',
115
+ "general-instruction": 'General-instruction',
116
+ "natural-question": 'NaturalQA',
117
+ "safety": 'Safety',
118
+ }
119
+
120
+
121
+ # LANGS = ['en', 'vi', 'th', 'id', 'km', 'lo', 'ms', 'my', 'tl']
122
+ LANGS = ['en', 'vi', 'id', 'ms', 'tl', 'th', 'km', 'lo', 'my']
123
+ LANG_NAMES = {
124
+ 'en': 'eng',
125
+ 'vi': 'vie',
126
+ 'th': 'tha',
127
+ 'id': 'ind',
128
+ 'km': 'khm',
129
+ 'lo': 'lao',
130
+ 'ms': 'msa',
131
+ 'my': 'mya',
132
+ 'tl': 'tgl',
133
+
134
+ }
135
+
136
+
137
+ def plot_fn():
138
+ df = get_model_df()
139
+
140
+ all_models = df["model"].unique()
141
+ model_names = list(rename_map.items())
142
+ colors = px.colors.qualitative.Plotly
143
+
144
+ cat_df = aggregate_df(df, rename_map, "category", CATEGORIES, )
145
+ lang_df = aggregate_df(df, rename_map, "lang", LANGS, )
146
+
147
+ fig = make_subplots(
148
+ rows=1, cols=2,
149
+ specs=[[{'type': 'polar'}]*2],
150
+ subplot_titles=("By Category", "By Language"),
151
+ )
152
+ fig.layout.annotations[0].y = 1.05
153
+ fig.layout.annotations[1].y = 1.05
154
+
155
+ # cat category
156
+ for i, (model, model_name) in enumerate(model_names):
157
+ cat_list = cat_df[cat_df['model'] == model_name]['category'].tolist()
158
+ score_list = cat_df[cat_df['model'] == model_name]['score'].tolist()
159
+ cat_list += [cat_list[0]]
160
+ cat_list = [CATEGORIES_NAMES[x] for x in cat_list]
161
+ score_list += [score_list[0]]
162
+ polar = go.Scatterpolar(
163
+ name = model_name,
164
+ r = score_list,
165
+ theta = cat_list,
166
+ legendgroup=f'{i}',
167
+ marker=dict(color=colors[i]),
168
+ hovertemplate="""Score: %{r:.2f}""",
169
+ )
170
+ fig.add_trace(polar, 1, 1)
171
+
172
+ # cat langs
173
+ for i, (model, model_name) in enumerate(model_names):
174
+ cat_list = lang_df[lang_df['model'] == model_name]['lang'].tolist()
175
+ score_list = lang_df[lang_df['model'] == model_name]['score'].tolist()
176
+ cat_list += [cat_list[0]]
177
+ score_list += [score_list[0]]
178
+ cat_list = [LANG_NAMES[x] for x in cat_list]
179
+ polar = go.Scatterpolar(
180
+ name = model_name,
181
+ r = score_list,
182
+ theta = cat_list,
183
+ legendgroup=f'{i}',
184
+ marker=dict(color=colors[i]),
185
+ hovertemplate="""Score: %{r:.2f}""",
186
+ showlegend=False,
187
+ )
188
+ fig.add_trace(polar, 1, 2)
189
+
190
+
191
+ polar_config = dict(
192
+ angularaxis = dict(
193
+ rotation=90, # start position of angular axis
194
+ ),
195
+ radialaxis = dict(
196
+ range=[0, 10],
197
+ ),
198
+ )
199
+
200
+ fig.update_layout(
201
+ polar = polar_config,
202
+ polar2 = polar_config,
203
+ title='Sea-Bench (rated by GPT-4)',
204
+ )
205
+ return fig
206
+
207
+ with gr.Blocks() as demo:
208
+ with gr.Column():
209
+ # with gr.Row():
210
+ # min_price = gr.Number(value=250, label="Minimum Price")
211
+ # max_price = gr.Number(value=1000, label="Maximum Price")
212
+ # boroughs = gr.CheckboxGroup(choices=["Queens", "Brooklyn", "Manhattan", "Bronx", "Staten Island"], value=["Queens", "Brooklyn"], label="Select Boroughs:")
213
+ # btn = gr.Button(value="Update Filter")
214
+ gr_plot = gr.Plot()
215
+ demo.load(plot_fn, [], gr_plot)
216
+ # btn.click(filter_map, [min_price, max_price, boroughs], map)
217
+
218
+ demo.launch()