Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor-leaderboard-0605 (#16)
Browse files- refactor: refactoring the dashboard codes (1ac2307dc5bc3141ed2939476d35e4858a7bfbe8)
- refactor: refactor the listeners (943fef8c6b63aa3880531f366d75a6e94d65d2e5)
- app.py +54 -212
- src/display/gradio_formatting.py +92 -0
- src/display/utils.py +1 -1
app.py
CHANGED
@@ -14,14 +14,14 @@ from src.display.css_html_js import custom_css
|
|
14 |
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
-
from src.utils import
|
18 |
-
|
|
|
19 |
|
20 |
def restart_space():
|
21 |
API.restart_space(repo_id=REPO_ID)
|
22 |
|
23 |
|
24 |
-
|
25 |
try:
|
26 |
snapshot_download(
|
27 |
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
@@ -54,6 +54,9 @@ shown_columns_long_doc, types_long_doc = get_default_cols(
|
|
54 |
leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
|
55 |
leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
56 |
|
|
|
|
|
|
|
57 |
|
58 |
def update_metric_qa(
|
59 |
metric: str,
|
@@ -66,7 +69,6 @@ def update_metric_qa(
|
|
66 |
):
|
67 |
return update_metric(raw_data, 'qa', metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
|
68 |
|
69 |
-
|
70 |
def update_metric_long_doc(
|
71 |
metric: str,
|
72 |
domains: list,
|
@@ -90,124 +92,47 @@ with demo:
|
|
90 |
with gr.Column():
|
91 |
# search retrieval models
|
92 |
with gr.Row():
|
93 |
-
selected_version =
|
94 |
-
choices=["AIR-Bench_24.04",],
|
95 |
-
value="AIR-Bench_24.04",
|
96 |
-
label="Select the version of AIR-Bench",
|
97 |
-
interactive = True
|
98 |
-
)
|
99 |
with gr.Row():
|
100 |
-
search_bar =
|
101 |
-
placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`) and press ENTER...",
|
102 |
-
show_label=False,
|
103 |
-
elem_id="search-bar",
|
104 |
-
info="Search the retrieval methods"
|
105 |
-
)
|
106 |
-
# select reranking model
|
107 |
-
reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
|
108 |
with gr.Row():
|
109 |
-
selected_rerankings =
|
110 |
-
choices=reranking_models,
|
111 |
-
# value=reranking_models,
|
112 |
-
label="Select the reranking models",
|
113 |
-
elem_id="reranking-select",
|
114 |
-
interactive=True,
|
115 |
-
multiselect=True
|
116 |
-
)
|
117 |
with gr.Row():
|
118 |
-
select_noreranker_only_btn =
|
119 |
-
value="Only show results without ranking models",
|
120 |
-
)
|
121 |
|
122 |
with gr.Column(min_width=320):
|
123 |
# select the metric
|
124 |
-
selected_metric =
|
125 |
-
choices=METRIC_LIST,
|
126 |
-
value=DEFAULT_METRIC,
|
127 |
-
label="Select the metric",
|
128 |
-
interactive=True,
|
129 |
-
elem_id="metric-select",
|
130 |
-
)
|
131 |
# select domain
|
132 |
with gr.Row():
|
133 |
-
selected_domains =
|
134 |
-
choices=DOMAIN_COLS_QA,
|
135 |
-
value=DOMAIN_COLS_QA,
|
136 |
-
label="Select the domains",
|
137 |
-
elem_id="domain-column-select",
|
138 |
-
interactive=True,
|
139 |
-
)
|
140 |
# select language
|
141 |
with gr.Row():
|
142 |
-
selected_langs =
|
143 |
-
choices=LANG_COLS_QA,
|
144 |
-
value=LANG_COLS_QA,
|
145 |
-
label="Select the languages",
|
146 |
-
elem_id="language-column-select",
|
147 |
-
multiselect=True,
|
148 |
-
interactive=True
|
149 |
-
)
|
150 |
with gr.Row():
|
151 |
-
show_anonymous =
|
152 |
-
label="Show anonymous submissions",
|
153 |
-
value=False,
|
154 |
-
info="The anonymous submissions might have invalid model information."
|
155 |
-
)
|
156 |
with gr.Row():
|
157 |
-
show_revision_and_timestamp =
|
158 |
-
label="Show submission details",
|
159 |
-
value=False,
|
160 |
-
info="Show the revision and timestamp information of submissions"
|
161 |
-
)
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
datatype=types_qa,
|
166 |
-
elem_id="leaderboard-table",
|
167 |
-
interactive=False,
|
168 |
-
visible=True,
|
169 |
-
)
|
170 |
|
171 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
172 |
-
hidden_leaderboard_table_for_search =
|
173 |
-
value=original_df_qa,
|
174 |
-
datatype=types_qa,
|
175 |
-
visible=False,
|
176 |
-
)
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
update_table,
|
181 |
-
[
|
182 |
-
hidden_leaderboard_table_for_search,
|
183 |
-
selected_domains,
|
184 |
-
selected_langs,
|
185 |
-
selected_rerankings,
|
186 |
-
search_bar,
|
187 |
-
show_anonymous,
|
188 |
-
],
|
189 |
leaderboard_table,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
)
|
191 |
|
192 |
-
# Set column-wise listener
|
193 |
-
for selector in [
|
194 |
-
selected_domains, selected_langs, show_anonymous, show_revision_and_timestamp, selected_rerankings
|
195 |
-
]:
|
196 |
-
selector.change(
|
197 |
-
update_table,
|
198 |
-
[
|
199 |
-
hidden_leaderboard_table_for_search,
|
200 |
-
selected_domains,
|
201 |
-
selected_langs,
|
202 |
-
selected_rerankings,
|
203 |
-
search_bar,
|
204 |
-
show_anonymous,
|
205 |
-
show_revision_and_timestamp
|
206 |
-
],
|
207 |
-
leaderboard_table,
|
208 |
-
queue=True,
|
209 |
-
)
|
210 |
-
|
211 |
# set metric listener
|
212 |
selected_metric.change(
|
213 |
update_metric_qa,
|
@@ -223,135 +148,57 @@ with demo:
|
|
223 |
queue=True
|
224 |
)
|
225 |
|
226 |
-
select_noreranker_only_btn.click(
|
227 |
-
clear_reranking_selections,
|
228 |
-
outputs=selected_rerankings
|
229 |
-
)
|
230 |
-
|
231 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
232 |
with gr.Row():
|
233 |
with gr.Column():
|
234 |
with gr.Row():
|
235 |
-
selected_version =
|
236 |
-
choices=["AIR-Bench_24.04",],
|
237 |
-
value="AIR-Bench_24.04",
|
238 |
-
label="Select the version of AIR-Bench",
|
239 |
-
interactive=True
|
240 |
-
)
|
241 |
with gr.Row():
|
242 |
-
search_bar =
|
243 |
-
info="Search the retrieval methods",
|
244 |
-
placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`)"
|
245 |
-
" and press ENTER...",
|
246 |
-
show_label=False,
|
247 |
-
elem_id="search-bar-long-doc",
|
248 |
-
)
|
249 |
# select reranking model
|
250 |
-
reranking_models = list(frozenset([eval_result.reranking_model for eval_result in raw_data]))
|
251 |
with gr.Row():
|
252 |
-
selected_rerankings =
|
253 |
-
choices=reranking_models,
|
254 |
-
# value=reranking_models,
|
255 |
-
label="Select the reranking models",
|
256 |
-
elem_id="reranking-select-long-doc",
|
257 |
-
interactive=True,
|
258 |
-
multiselect=True,
|
259 |
-
)
|
260 |
with gr.Row():
|
261 |
-
select_noreranker_only_btn =
|
262 |
-
value="Only show results without ranking models",
|
263 |
-
)
|
264 |
with gr.Column(min_width=320):
|
265 |
# select the metric
|
266 |
with gr.Row():
|
267 |
-
selected_metric =
|
268 |
-
choices=METRIC_LIST,
|
269 |
-
value=DEFAULT_METRIC,
|
270 |
-
label="Select the metric",
|
271 |
-
interactive=True,
|
272 |
-
elem_id="metric-select-long-doc",
|
273 |
-
)
|
274 |
# select domain
|
275 |
with gr.Row():
|
276 |
-
selected_domains =
|
277 |
-
choices=DOMAIN_COLS_LONG_DOC,
|
278 |
-
value=DOMAIN_COLS_LONG_DOC,
|
279 |
-
label="Select the domains",
|
280 |
-
elem_id="domain-column-select-long-doc",
|
281 |
-
interactive=True,
|
282 |
-
)
|
283 |
# select language
|
284 |
with gr.Row():
|
285 |
-
selected_langs =
|
286 |
-
|
287 |
-
value=LANG_COLS_LONG_DOC,
|
288 |
-
label="Select the languages",
|
289 |
-
elem_id="language-column-select-long-doc",
|
290 |
-
multiselect=True,
|
291 |
-
interactive=True
|
292 |
)
|
293 |
with gr.Row():
|
294 |
-
show_anonymous =
|
295 |
-
label="Show anonymous submissions",
|
296 |
-
value=False,
|
297 |
-
info="The anonymous submissions might have invalid model information."
|
298 |
-
)
|
299 |
with gr.Row():
|
300 |
-
show_revision_and_timestamp =
|
301 |
-
label="Show submission details",
|
302 |
-
value=False,
|
303 |
-
info="Show the revision and timestamp information of submissions"
|
304 |
-
)
|
305 |
|
306 |
-
|
307 |
-
|
308 |
-
datatype=types_long_doc,
|
309 |
-
elem_id="leaderboard-table-long-doc",
|
310 |
-
interactive=False,
|
311 |
-
visible=True,
|
312 |
)
|
313 |
|
314 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
315 |
-
hidden_leaderboard_table_for_search =
|
316 |
-
|
317 |
-
datatype=types_long_doc,
|
318 |
-
visible=False,
|
319 |
)
|
320 |
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
],
|
333 |
-
leaderboard_table_long_doc,
|
334 |
)
|
335 |
|
336 |
-
# Set column-wise listener
|
337 |
-
for selector in [
|
338 |
-
selected_domains, selected_langs, show_anonymous, show_revision_and_timestamp, selected_rerankings
|
339 |
-
]:
|
340 |
-
selector.change(
|
341 |
-
update_table_long_doc,
|
342 |
-
[
|
343 |
-
hidden_leaderboard_table_for_search,
|
344 |
-
selected_domains,
|
345 |
-
selected_langs,
|
346 |
-
selected_rerankings,
|
347 |
-
search_bar,
|
348 |
-
show_anonymous,
|
349 |
-
show_revision_and_timestamp
|
350 |
-
],
|
351 |
-
leaderboard_table_long_doc,
|
352 |
-
queue=True,
|
353 |
-
)
|
354 |
-
|
355 |
# set metric listener
|
356 |
selected_metric.change(
|
357 |
update_metric_long_doc,
|
@@ -364,15 +211,10 @@ with demo:
|
|
364 |
show_anonymous,
|
365 |
show_revision_and_timestamp
|
366 |
],
|
367 |
-
|
368 |
queue=True
|
369 |
)
|
370 |
|
371 |
-
select_noreranker_only_btn.click(
|
372 |
-
clear_reranking_selections,
|
373 |
-
outputs=selected_rerankings
|
374 |
-
)
|
375 |
-
|
376 |
with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
|
377 |
with gr.Column():
|
378 |
with gr.Row():
|
|
|
14 |
from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
|
15 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
16 |
from src.read_evals import get_raw_eval_results, get_leaderboard_df
|
17 |
+
from src.utils import update_metric, upload_file, get_default_cols, submit_results
|
18 |
+
from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, get_noreranker_button, get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table
|
19 |
+
from src.display.gradio_listener import set_listeners
|
20 |
|
21 |
def restart_space():
|
22 |
API.restart_space(repo_id=REPO_ID)
|
23 |
|
24 |
|
|
|
25 |
try:
|
26 |
snapshot_download(
|
27 |
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
|
|
|
54 |
leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
|
55 |
leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
56 |
|
57 |
+
# select reranking model
|
58 |
+
reranking_models = sorted(list(frozenset([eval_result.reranking_model for eval_result in raw_data])))
|
59 |
+
|
60 |
|
61 |
def update_metric_qa(
|
62 |
metric: str,
|
|
|
69 |
):
|
70 |
return update_metric(raw_data, 'qa', metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
|
71 |
|
|
|
72 |
def update_metric_long_doc(
|
73 |
metric: str,
|
74 |
domains: list,
|
|
|
92 |
with gr.Column():
|
93 |
# search retrieval models
|
94 |
with gr.Row():
|
95 |
+
selected_version = get_version_dropdown()
|
|
|
|
|
|
|
|
|
|
|
96 |
with gr.Row():
|
97 |
+
search_bar = get_search_bar()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
with gr.Row():
|
99 |
+
selected_rerankings = get_reranking_dropdown(reranking_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
with gr.Row():
|
101 |
+
select_noreranker_only_btn = get_noreranker_button()
|
|
|
|
|
102 |
|
103 |
with gr.Column(min_width=320):
|
104 |
# select the metric
|
105 |
+
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC)
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
# select domain
|
107 |
with gr.Row():
|
108 |
+
selected_domains = get_domain_dropdown(DOMAIN_COLS_QA, DOMAIN_COLS_QA)
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
# select language
|
110 |
with gr.Row():
|
111 |
+
selected_langs = get_language_dropdown(LANG_COLS_QA, LANG_COLS_QA)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
with gr.Row():
|
113 |
+
show_anonymous = get_anonymous_checkbox()
|
|
|
|
|
|
|
|
|
114 |
with gr.Row():
|
115 |
+
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
|
|
|
|
|
|
|
|
116 |
|
117 |
+
|
118 |
+
leaderboard_table = get_leaderboard_table(leaderboard_df_qa, types_qa)
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
121 |
+
hidden_leaderboard_table_for_search = get_leaderboard_table(original_df_qa, types_qa, visible=False)
|
|
|
|
|
|
|
|
|
122 |
|
123 |
+
set_listeners(
|
124 |
+
"qa",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
leaderboard_table,
|
126 |
+
hidden_leaderboard_table_for_search,
|
127 |
+
search_bar,
|
128 |
+
select_noreranker_only_btn,
|
129 |
+
selected_domains,
|
130 |
+
selected_langs,
|
131 |
+
selected_rerankings,
|
132 |
+
show_anonymous,
|
133 |
+
show_revision_and_timestamp,
|
134 |
)
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
# set metric listener
|
137 |
selected_metric.change(
|
138 |
update_metric_qa,
|
|
|
148 |
queue=True
|
149 |
)
|
150 |
|
|
|
|
|
|
|
|
|
|
|
151 |
with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
|
152 |
with gr.Row():
|
153 |
with gr.Column():
|
154 |
with gr.Row():
|
155 |
+
selected_version = get_version_dropdown()
|
|
|
|
|
|
|
|
|
|
|
156 |
with gr.Row():
|
157 |
+
search_bar = get_search_bar()
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
# select reranking model
|
|
|
159 |
with gr.Row():
|
160 |
+
selected_rerankings = get_reranking_dropdown(reranking_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
with gr.Row():
|
162 |
+
select_noreranker_only_btn = get_noreranker_button()
|
|
|
|
|
163 |
with gr.Column(min_width=320):
|
164 |
# select the metric
|
165 |
with gr.Row():
|
166 |
+
selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC)
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
# select domain
|
168 |
with gr.Row():
|
169 |
+
selected_domains = get_domain_dropdown(DOMAIN_COLS_LONG_DOC, DOMAIN_COLS_LONG_DOC)
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
# select language
|
171 |
with gr.Row():
|
172 |
+
selected_langs = get_language_dropdown(
|
173 |
+
LANG_COLS_LONG_DOC, LANG_COLS_LONG_DOC
|
|
|
|
|
|
|
|
|
|
|
174 |
)
|
175 |
with gr.Row():
|
176 |
+
show_anonymous = get_anonymous_checkbox()
|
|
|
|
|
|
|
|
|
177 |
with gr.Row():
|
178 |
+
show_revision_and_timestamp = get_revision_and_ts_checkbox()
|
|
|
|
|
|
|
|
|
179 |
|
180 |
+
leaderboard_table = get_leaderboard_table(
|
181 |
+
leaderboard_df_long_doc, types_long_doc
|
|
|
|
|
|
|
|
|
182 |
)
|
183 |
|
184 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
185 |
+
hidden_leaderboard_table_for_search =get_leaderboard_table(
|
186 |
+
original_df_long_doc, types_long_doc, visible=False
|
|
|
|
|
187 |
)
|
188 |
|
189 |
+
set_listeners(
|
190 |
+
"long-doc",
|
191 |
+
leaderboard_table,
|
192 |
+
hidden_leaderboard_table_for_search,
|
193 |
+
search_bar,
|
194 |
+
select_noreranker_only_btn,
|
195 |
+
selected_domains,
|
196 |
+
selected_langs,
|
197 |
+
selected_rerankings,
|
198 |
+
show_anonymous,
|
199 |
+
show_revision_and_timestamp,
|
|
|
|
|
200 |
)
|
201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
# set metric listener
|
203 |
selected_metric.change(
|
204 |
update_metric_long_doc,
|
|
|
211 |
show_anonymous,
|
212 |
show_revision_and_timestamp
|
213 |
],
|
214 |
+
leaderboard_table,
|
215 |
queue=True
|
216 |
)
|
217 |
|
|
|
|
|
|
|
|
|
|
|
218 |
with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
|
219 |
with gr.Column():
|
220 |
with gr.Row():
|
src/display/gradio_formatting.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
|
3 |
+
|
4 |
+
def get_version_dropdown():
|
5 |
+
return gr.Dropdown(
|
6 |
+
choices=["AIR-Bench_24.04", ],
|
7 |
+
value="AIR-Bench_24.04",
|
8 |
+
label="Select the version of AIR-Bench",
|
9 |
+
interactive=True
|
10 |
+
)
|
11 |
+
|
12 |
+
|
13 |
+
def get_search_bar():
|
14 |
+
return gr.Textbox(
|
15 |
+
placeholder=" 🔍 Search for retrieval methods (separate multiple queries with `;`) and press ENTER...",
|
16 |
+
show_label=False,
|
17 |
+
# elem_id="search-bar",
|
18 |
+
info="Search the retrieval methods"
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
def get_reranking_dropdown(model_list):
|
23 |
+
return gr.Dropdown(
|
24 |
+
choices=model_list,
|
25 |
+
label="Select the reranking models",
|
26 |
+
# elem_id="reranking-select",
|
27 |
+
interactive=True,
|
28 |
+
multiselect=True
|
29 |
+
)
|
30 |
+
|
31 |
+
|
32 |
+
def get_noreranker_button():
|
33 |
+
return gr.Button(
|
34 |
+
value="Only show results without ranking models",
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
def get_metric_dropdown(metric_list, default_metrics):
|
39 |
+
return gr.Dropdown(
|
40 |
+
choices=metric_list,
|
41 |
+
value=default_metrics,
|
42 |
+
label="Select the metric",
|
43 |
+
interactive=True,
|
44 |
+
# elem_id="metric-select-long-doc",
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
def get_domain_dropdown(domain_list, default_domains):
|
49 |
+
return gr.CheckboxGroup(
|
50 |
+
choices=domain_list,
|
51 |
+
value=default_domains,
|
52 |
+
label="Select the domains",
|
53 |
+
# elem_id="domain-column-select",
|
54 |
+
interactive=True,
|
55 |
+
)
|
56 |
+
|
57 |
+
|
58 |
+
def get_language_dropdown(language_list, default_languages):
|
59 |
+
return gr.Dropdown(
|
60 |
+
choices=language_list,
|
61 |
+
value=language_list,
|
62 |
+
label="Select the languages",
|
63 |
+
# elem_id="language-column-select",
|
64 |
+
multiselect=True,
|
65 |
+
interactive=True
|
66 |
+
)
|
67 |
+
|
68 |
+
|
69 |
+
def get_anonymous_checkbox():
|
70 |
+
return gr.Checkbox(
|
71 |
+
label="Show anonymous submissions",
|
72 |
+
value=False,
|
73 |
+
info="The anonymous submissions might have invalid model information."
|
74 |
+
)
|
75 |
+
|
76 |
+
|
77 |
+
def get_revision_and_ts_checkbox():
|
78 |
+
return gr.Checkbox(
|
79 |
+
label="Show submission details",
|
80 |
+
value=False,
|
81 |
+
info="Show the revision and timestamp information of submissions"
|
82 |
+
)
|
83 |
+
|
84 |
+
|
85 |
+
def get_leaderboard_table(df, datatype, visible=True):
|
86 |
+
return gr.components.Dataframe(
|
87 |
+
value=df,
|
88 |
+
datatype=datatype,
|
89 |
+
elem_id="leaderboard-table",
|
90 |
+
interactive=False,
|
91 |
+
visible=visible,
|
92 |
+
)
|
src/display/utils.py
CHANGED
@@ -90,4 +90,4 @@ COLS_LITE = [c.name for c in fields(AutoEvalColumnQA) if c.displayed_by_default
|
|
90 |
|
91 |
QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
|
92 |
|
93 |
-
LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]
|
|
|
90 |
|
91 |
QA_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksQA]
|
92 |
|
93 |
+
LONG_DOC_BENCHMARK_COLS = [t.value.col_name for t in BenchmarksLongDoc]
|