Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -68,7 +68,14 @@ def add_new_eval(
|
|
68 |
else:
|
69 |
content = input_file.decode("utf-8")
|
70 |
prediction = prediction_analyse(content)
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
# count for average image\video\all
|
74 |
total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
|
@@ -77,20 +84,43 @@ def add_new_eval(
|
|
77 |
total_image = sum(prediction[i]["total"] for i in range(1, 10))
|
78 |
total_video = sum(prediction[i]["total"] for i in range(10, 13))
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
-
if LLM_type == '
|
85 |
LLM_name = LLM_name_textbox
|
86 |
else:
|
87 |
LLM_name = LLM_type
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
if model_link == '':
|
91 |
-
model_name =
|
92 |
else:
|
93 |
-
model_name = '[' +
|
|
|
94 |
# add new data
|
95 |
new_data = [
|
96 |
model_type,
|
@@ -112,22 +142,8 @@ def add_new_eval(
|
|
112 |
average_accuracy_video,
|
113 |
overall_accuracy]
|
114 |
# pdb.set_trace()
|
115 |
-
csv_data =
|
116 |
-
|
117 |
-
# pdb.set_trace()
|
118 |
-
if revision_name_textbox == '':
|
119 |
-
col = csv_data.shape[0]
|
120 |
-
csv_data.loc[col] = new_data
|
121 |
-
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
122 |
-
else:
|
123 |
-
model_name_list = csv_data['Model']
|
124 |
-
name_list = [name.split(']')[0][1:] for name in model_name_list]
|
125 |
-
if revision_name_textbox not in name_list:
|
126 |
-
col = csv_data.shape[0]
|
127 |
-
else:
|
128 |
-
col = name_list.index(revision_name_textbox)
|
129 |
-
csv_data.loc[col] = new_data
|
130 |
-
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
131 |
return 0
|
132 |
|
133 |
def get_baseline_df():
|
@@ -204,6 +220,8 @@ with block:
|
|
204 |
with gr.TabItem("π Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
|
205 |
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
|
206 |
|
|
|
|
|
207 |
|
208 |
with gr.Row():
|
209 |
gr.Markdown("# βοΈβ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
|
@@ -235,14 +253,14 @@ with block:
|
|
235 |
with gr.Column():
|
236 |
|
237 |
LLM_type = gr.Dropdown(
|
238 |
-
choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "
|
239 |
label="LLM type",
|
240 |
multiselect=False,
|
241 |
value="LLaMA-7B",
|
242 |
interactive=True,
|
243 |
)
|
244 |
LLM_name_textbox = gr.Textbox(
|
245 |
-
label="LLM model (for
|
246 |
placeholder="LLaMA-13B"
|
247 |
)
|
248 |
Evaluation_dimension = gr.Dropdown(
|
|
|
68 |
else:
|
69 |
content = input_file.decode("utf-8")
|
70 |
prediction = prediction_analyse(content)
|
71 |
+
csv_data = pd.read_csv(CSV_DIR)
|
72 |
+
|
73 |
+
Start_dimension, End_dimension = 1, 13
|
74 |
+
if Evaluation_dimension == 'Image':
|
75 |
+
End_dimension = 10
|
76 |
+
elif Evaluation_dimension == 'Video':
|
77 |
+
Start_dimension = 10
|
78 |
+
each_task_accuracy = {i: round(prediction[i]["correct"] / prediction[i]["total"] * 100, 1) if i >= Start_dimension and i < End_dimension else 0 for i in range(1, 13)}
|
79 |
|
80 |
# count for average image\video\all
|
81 |
total_correct_image = sum(prediction[i]["correct"] for i in range(1, 10))
|
|
|
84 |
total_image = sum(prediction[i]["total"] for i in range(1, 10))
|
85 |
total_video = sum(prediction[i]["total"] for i in range(10, 13))
|
86 |
|
87 |
+
if Evaluation_dimension != 'Video':
|
88 |
+
average_accuracy_image = round(total_correct_image / total_image * 100, 1)
|
89 |
+
else:
|
90 |
+
average_accuracy_image = 0
|
91 |
+
|
92 |
+
if Evaluation_dimension != 'Image':
|
93 |
+
average_accuracy_video = round(total_correct_video / total_video * 100, 1)
|
94 |
+
else:
|
95 |
+
average_accuracy_video = 0
|
96 |
+
|
97 |
+
if Evaluation_dimension == 'All':
|
98 |
+
overall_accuracy = round((total_correct_image + total_correct_video) / (total_image + total_video) * 100, 1)
|
99 |
+
else:
|
100 |
+
overall_accuracy = 0
|
101 |
|
102 |
+
if LLM_type == 'Other':
|
103 |
LLM_name = LLM_name_textbox
|
104 |
else:
|
105 |
LLM_name = LLM_type
|
106 |
|
107 |
+
if revision_name_textbox == '':
|
108 |
+
col = csv_data.shape[0]
|
109 |
+
model_name = model_name_textbox
|
110 |
+
else:
|
111 |
+
model_name = revision_name_textbox
|
112 |
+
model_name_list = csv_data['Model']
|
113 |
+
name_list = [name.split(']')[0][1:] for name in model_name_list]
|
114 |
+
if revision_name_textbox not in name_list:
|
115 |
+
col = csv_data.shape[0]
|
116 |
+
else:
|
117 |
+
col = name_list.index(revision_name_textbox)
|
118 |
|
119 |
if model_link == '':
|
120 |
+
model_name = model_name # no url
|
121 |
else:
|
122 |
+
model_name = '[' + model_name + '](' + model_link + ')'
|
123 |
+
|
124 |
# add new data
|
125 |
new_data = [
|
126 |
model_type,
|
|
|
142 |
average_accuracy_video,
|
143 |
overall_accuracy]
|
144 |
# pdb.set_trace()
|
145 |
+
csv_data.loc[col] = new_data
|
146 |
+
csv_data = csv_data.to_csv(CSV_DIR, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
return 0
|
148 |
|
149 |
def get_baseline_df():
|
|
|
220 |
with gr.TabItem("π Submit here! ", elem_id="seed-benchmark-tab-table", id=3):
|
221 |
gr.Markdown(LEADERBORAD_INTRODUCTION, elem_classes="markdown-text")
|
222 |
|
223 |
+
with gr.Row():
|
224 |
+
gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
|
225 |
|
226 |
with gr.Row():
|
227 |
gr.Markdown("# βοΈβ¨ Submit your model evaluation json file here!", elem_classes="markdown-text")
|
|
|
253 |
with gr.Column():
|
254 |
|
255 |
LLM_type = gr.Dropdown(
|
256 |
+
choices=["Vicuna-7B", "Flan-T5-XL", "LLaMA-7B", "Other"],
|
257 |
label="LLM type",
|
258 |
multiselect=False,
|
259 |
value="LLaMA-7B",
|
260 |
interactive=True,
|
261 |
)
|
262 |
LLM_name_textbox = gr.Textbox(
|
263 |
+
label="LLM model (for Other)",
|
264 |
placeholder="LLaMA-13B"
|
265 |
)
|
266 |
Evaluation_dimension = gr.Dropdown(
|