wenhu commited on
Commit
af4a677
1 Parent(s): 09b4a5d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +380 -0
app.py ADDED
@@ -0,0 +1,380 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
2
+ import os
3
+
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import json
7
+ import tempfile
8
+
9
+ from constants import *
10
+ from huggingface_hub import Repository
11
+ HF_TOKEN = os.environ.get("HF_TOKEN")
12
+
13
+ global data_component, filter_component
14
+
15
+
16
+ def upload_file(files):
17
+ file_paths = [file.name for file in files]
18
+ return file_paths
19
+
20
+ def add_new_eval(
21
+ input_file,
22
+ model_name_textbox: str,
23
+ revision_name_textbox: str,
24
+ model_link: str,
25
+ ):
26
+ if input_file is None:
27
+ return "Error! Empty file!"
28
+
29
+ upload_data=json.loads(input_file)
30
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
31
+ submission_repo.git_pull()
32
+ shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))
33
+
34
+ csv_data = pd.read_csv(CSV_DIR)
35
+
36
+ if revision_name_textbox == '':
37
+ col = csv_data.shape[0]
38
+ model_name = model_name_textbox
39
+ else:
40
+ model_name = revision_name_textbox
41
+ model_name_list = csv_data['Model Name (clickable)']
42
+ name_list = [name.split(']')[0][1:] for name in model_name_list]
43
+ if revision_name_textbox not in name_list:
44
+ col = csv_data.shape[0]
45
+ else:
46
+ col = name_list.index(revision_name_textbox)
47
+
48
+ if model_link == '':
49
+ model_name = model_name # no url
50
+ else:
51
+ model_name = '[' + model_name + '](' + model_link + ')'
52
+
53
+ # add new data
54
+ new_data = [
55
+ model_name
56
+ ]
57
+ for key in TASK_INFO:
58
+ if key in upload_data:
59
+ new_data.append(upload_data[key][0])
60
+ else:
61
+ new_data.append(0)
62
+ csv_data.loc[col] = new_data
63
+ csv_data = csv_data.to_csv(CSV_DIR, index=False)
64
+ submission_repo.push_to_hub()
65
+ return 0
66
+
67
+ def get_normalized_df(df):
68
+ # final_score = df.drop('name', axis=1).sum(axis=1)
69
+ # df.insert(1, 'Overall Score', final_score)
70
+ normalize_df = df.copy().fillna(0.0)
71
+ for column in normalize_df.columns[1:]:
72
+ min_val = NORMALIZE_DIC[column]['Min']
73
+ max_val = NORMALIZE_DIC[column]['Max']
74
+ normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
75
+ return normalize_df
76
+
77
+ def calculate_selected_score(df, selected_columns):
78
+ # selected_score = df[selected_columns].sum(axis=1)
79
+ selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
80
+ selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
81
+ selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
82
+ selected_semantic_score = df[selected_SEMANTIC].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_SEMANTIC ])
83
+ if selected_quality_score.isna().any().any() and selected_semantic_score.isna().any().any():
84
+ selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
85
+ return selected_score.fillna(0.0)
86
+ if selected_quality_score.isna().any().any():
87
+ return selected_semantic_score
88
+ if selected_semantic_score.isna().any().any():
89
+ return selected_quality_score
90
+ # print(selected_semantic_score,selected_quality_score )
91
+ selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
92
+ return selected_score.fillna(0.0)
93
+
94
+ def get_final_score(df, selected_columns):
95
+ normalize_df = get_normalized_df(df)
96
+ #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
97
+ for name in normalize_df.drop('Model Name (clickable)', axis=1):
98
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
99
+ quality_score = normalize_df[QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in QUALITY_LIST])
100
+ semantic_score = normalize_df[SEMANTIC_LIST].sum(axis=1)/sum([DIM_WEIGHT[i] for i in SEMANTIC_LIST ])
101
+ final_score = (quality_score * QUALITY_WEIGHT + semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
102
+ if 'Total Score' in df:
103
+ df['Total Score'] = final_score
104
+ else:
105
+ df.insert(1, 'Total Score', final_score)
106
+ if 'Semantic Score' in df:
107
+ df['Semantic Score'] = semantic_score
108
+ else:
109
+ df.insert(2, 'Semantic Score', semantic_score)
110
+ if 'Quality Score' in df:
111
+ df['Quality Score'] = quality_score
112
+ else:
113
+ df.insert(3, 'Quality Score', quality_score)
114
+ selected_score = calculate_selected_score(normalize_df, selected_columns)
115
+ if 'Selected Score' in df:
116
+ df['Selected Score'] = selected_score
117
+ else:
118
+ df.insert(1, 'Selected Score', selected_score)
119
+ return df
120
+
121
+
122
+ def get_final_score_quality(df, selected_columns):
123
+ normalize_df = get_normalized_df(df)
124
+ for name in normalize_df.drop('Model Name (clickable)', axis=1):
125
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT[name]
126
+ quality_score = normalize_df[QUALITY_TAB].sum(axis=1) / sum([DIM_WEIGHT[i] for i in QUALITY_TAB])
127
+
128
+ if 'Quality Score' in df:
129
+ df['Quality Score'] = quality_score
130
+ else:
131
+ df.insert(1, 'Quality Score', quality_score)
132
+ # selected_score = normalize_df[selected_columns].sum(axis=1) / len(selected_columns)
133
+ selected_score = normalize_df[selected_columns].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_columns])
134
+ if 'Selected Score' in df:
135
+ df['Selected Score'] = selected_score
136
+ else:
137
+ df.insert(1, 'Selected Score', selected_score)
138
+ return df
139
+
140
+ def get_baseline_df():
141
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
142
+ submission_repo.git_pull()
143
+ df = pd.read_csv(CSV_DIR)
144
+ df = get_final_score(df, checkbox_group.value)
145
+ df = df.sort_values(by="Selected Score", ascending=False)
146
+ present_columns = MODEL_INFO + checkbox_group.value
147
+ df = df[present_columns]
148
+ df = convert_scores_to_percentage(df)
149
+ return df
150
+
151
+ def get_baseline_df_quality():
152
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
153
+ submission_repo.git_pull()
154
+ df = pd.read_csv(QUALITY_DIR)
155
+ df = get_final_score_quality(df, checkbox_group_quality.value)
156
+ df = df.sort_values(by="Selected Score", ascending=False)
157
+ present_columns = MODEL_INFO_TAB_QUALITY + checkbox_group_quality.value
158
+ df = df[present_columns]
159
+ df = convert_scores_to_percentage(df)
160
+ return df
161
+
162
+ def get_all_df(selected_columns, dir=CSV_DIR):
163
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
164
+ submission_repo.git_pull()
165
+ df = pd.read_csv(dir)
166
+ df = get_final_score(df, selected_columns)
167
+ df = df.sort_values(by="Selected Score", ascending=False)
168
+ return df
169
+
170
+ def get_all_df_quality(selected_columns, dir=QUALITY_DIR):
171
+ submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
172
+ submission_repo.git_pull()
173
+ df = pd.read_csv(dir)
174
+ df = get_final_score_quality(df, selected_columns)
175
+ df = df.sort_values(by="Selected Score", ascending=False)
176
+ return df
177
+
178
+
179
+ def convert_scores_to_percentage(df):
180
+ # 对DataFrame中的每一列(除了'name'列)进行操作
181
+ for column in df.columns[1:]: # 假设第一列是'name'
182
+ df[column] = round(df[column] * 100,2) # 将分数转换为百分数
183
+ df[column] = df[column].astype(str) + '%'
184
+ return df
185
+
186
+ def choose_all_quailty():
187
+ return gr.update(value=QUALITY_LIST)
188
+
189
+ def choose_all_semantic():
190
+ return gr.update(value=SEMANTIC_LIST)
191
+
192
+ def disable_all():
193
+ return gr.update(value=[])
194
+
195
+ def enable_all():
196
+ return gr.update(value=TASK_INFO)
197
+
198
+ def on_filter_model_size_method_change(selected_columns):
199
+ updated_data = get_all_df(selected_columns, CSV_DIR)
200
+ #print(updated_data)
201
+ # columns:
202
+ selected_columns = [item for item in TASK_INFO if item in selected_columns]
203
+ present_columns = MODEL_INFO + selected_columns
204
+ updated_data = updated_data[present_columns]
205
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
206
+ updated_data = convert_scores_to_percentage(updated_data)
207
+ updated_headers = present_columns
208
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
209
+ # print(updated_data,present_columns,update_datatype)
210
+ filter_component = gr.components.Dataframe(
211
+ value=updated_data,
212
+ headers=updated_headers,
213
+ type="pandas",
214
+ datatype=update_datatype,
215
+ interactive=False,
216
+ visible=True,
217
+ )
218
+ return filter_component#.value
219
+
220
+ def on_filter_model_size_method_change_quality(selected_columns):
221
+ updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
222
+ #print(updated_data)
223
+ # columns:
224
+ selected_columns = [item for item in QUALITY_TAB if item in selected_columns]
225
+ present_columns = MODEL_INFO_TAB_QUALITY + selected_columns
226
+ updated_data = updated_data[present_columns]
227
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
228
+ updated_data = convert_scores_to_percentage(updated_data)
229
+ updated_headers = present_columns
230
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
231
+ # print(updated_data,present_columns,update_datatype)
232
+ filter_component = gr.components.Dataframe(
233
+ value=updated_data,
234
+ headers=updated_headers,
235
+ type="pandas",
236
+ datatype=update_datatype,
237
+ interactive=False,
238
+ visible=True,
239
+ )
240
+ return filter_component#.value
241
+
242
+
243
+ block = gr.Blocks()
244
+
245
+
246
+ with block:
247
+ gr.Markdown(
248
+ LEADERBORAD_INTRODUCTION
249
+ )
250
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
251
+ # Table 0
252
+ with gr.TabItem("📊 VBench", elem_id="vbench-tab-table", id=1):
253
+ with gr.Row():
254
+ with gr.Accordion("Citation", open=False):
255
+ citation_button = gr.Textbox(
256
+ value=CITATION_BUTTON_TEXT,
257
+ label=CITATION_BUTTON_LABEL,
258
+ elem_id="citation-button",
259
+ lines=10,
260
+ )
261
+
262
+ gr.Markdown(
263
+ TABLE_INTRODUCTION
264
+ )
265
+ with gr.Row():
266
+ with gr.Column(scale=0.2):
267
+ choosen_q = gr.Button("Select Quality Dimensions")
268
+ choosen_s = gr.Button("Select Semantic Dimensions")
269
+ # enable_b = gr.Button("Select All")
270
+ disable_b = gr.Button("Deselect All")
271
+
272
+ with gr.Column(scale=0.8):
273
+ # selection for column part:
274
+ checkbox_group = gr.CheckboxGroup(
275
+ choices=TASK_INFO,
276
+ value=DEFAULT_INFO,
277
+ label="Evaluation Dimension",
278
+ interactive=True,
279
+ )
280
+
281
+ data_component = gr.components.Dataframe(
282
+ value=get_baseline_df,
283
+ headers=COLUMN_NAMES,
284
+ type="pandas",
285
+ datatype=DATA_TITILE_TYPE,
286
+ interactive=False,
287
+ visible=True,
288
+ )
289
+
290
+ choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
291
+ choosen_s.click(choose_all_semantic, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
292
+ # enable_b.click(enable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
293
+ disable_b.click(disable_all, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
294
+ checkbox_group.change(fn=on_filter_model_size_method_change, inputs=[ checkbox_group], outputs=data_component)
295
+
296
+ with gr.TabItem("Video Quaity", elem_id="vbench-tab-table", id=2):
297
+ with gr.Accordion("INSTRUCTION", open=False):
298
+ citation_button = gr.Textbox(
299
+ value=QUALITY_CLAIM_TEXT,
300
+ label="",
301
+ elem_id="quality-button",
302
+ lines=2,
303
+ )
304
+ with gr.Row():
305
+ with gr.Column(scale=1.0):
306
+ # selection for column part:
307
+ checkbox_group_quality = gr.CheckboxGroup(
308
+ choices=QUALITY_TAB,
309
+ value=QUALITY_TAB,
310
+ label="Evaluation Quality Dimension",
311
+ interactive=True,
312
+ )
313
+
314
+ data_component_quality = gr.components.Dataframe(
315
+ value=get_baseline_df_quality,
316
+ headers=COLUMN_NAMES_QUALITY,
317
+ type="pandas",
318
+ datatype=DATA_TITILE_TYPE,
319
+ interactive=False,
320
+ visible=True,
321
+ )
322
+
323
+ checkbox_group_quality.change(fn=on_filter_model_size_method_change_quality, inputs=[checkbox_group_quality], outputs=data_component_quality)
324
+
325
+ # table 2
326
+ with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=3):
327
+ gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
328
+
329
+ # table 3
330
+ with gr.TabItem("🚀 Submit here! ", elem_id="mvbench-tab-table", id=4):
331
+ gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
332
+
333
+ with gr.Row():
334
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
335
+
336
+ with gr.Row():
337
+ gr.Markdown("# ✉️✨ Submit your model evaluation json file here!", elem_classes="markdown-text")
338
+
339
+ with gr.Row():
340
+ with gr.Column():
341
+ model_name_textbox = gr.Textbox(
342
+ label="Model name", placeholder="LaVie"
343
+ )
344
+ revision_name_textbox = gr.Textbox(
345
+ label="Revision Model Name", placeholder="LaVie"
346
+ )
347
+
348
+ with gr.Column():
349
+ model_link = gr.Textbox(
350
+ label="Model Link", placeholder="https://huggingface.co/decapoda-research/llama-7b-hf"
351
+ )
352
+
353
+
354
+ with gr.Column():
355
+
356
+ input_file = gr.components.File(label = "Click to Upload a json File", file_count="single", type='binary')
357
+ submit_button = gr.Button("Submit Eval")
358
+
359
+ submission_result = gr.Markdown()
360
+ submit_button.click(
361
+ add_new_eval,
362
+ inputs = [
363
+ input_file,
364
+ model_name_textbox,
365
+ revision_name_textbox,
366
+ model_link,
367
+ ],
368
+ )
369
+
370
+
371
+ def refresh_data():
372
+ value1 = get_baseline_df()
373
+ return value1
374
+
375
+ with gr.Row():
376
+ data_run = gr.Button("Refresh")
377
+ data_run.click(on_filter_model_size_method_change, inputs=[checkbox_group], outputs=data_component)
378
+
379
+
380
+ block.launch()