Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor: reformatting the codes
Browse files- app.py +20 -20
- src/columns.py +0 -2
- src/loaders.py +18 -18
- src/models.py +1 -2
- src/utils.py +68 -68
app.py
CHANGED
@@ -63,13 +63,13 @@ datastore = ds_dict[LATEST_BENCHMARK_VERSION]
|
|
63 |
|
64 |
|
65 |
def update_qa_metric(
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
):
|
74 |
global datastore
|
75 |
return update_metric(
|
@@ -86,13 +86,13 @@ def update_qa_metric(
|
|
86 |
|
87 |
|
88 |
def update_doc_metric(
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
):
|
97 |
global datastore
|
98 |
return update_metric(
|
@@ -218,7 +218,7 @@ with demo:
|
|
218 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
219 |
_qa_df_ret_hidden = datastore.qa_raw_df[
|
220 |
datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
221 |
-
|
222 |
_qa_df_ret_hidden = reset_rank(_qa_df_ret_hidden)
|
223 |
qa_df_elem_ret_hidden = get_leaderboard_table(
|
224 |
_qa_df_ret_hidden, datastore.qa_types, visible=False
|
@@ -277,7 +277,7 @@ with demo:
|
|
277 |
|
278 |
_qa_df_rerank_hidden = datastore.qa_raw_df[
|
279 |
datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
280 |
-
|
281 |
_qa_df_rerank_hidden = reset_rank(_qa_df_rerank_hidden)
|
282 |
qa_df_elem_rerank_hidden = get_leaderboard_table(
|
283 |
_qa_df_rerank_hidden, datastore.qa_types, visible=False
|
@@ -391,13 +391,13 @@ with demo:
|
|
391 |
|
392 |
_doc_df_ret = datastore.doc_fmt_df[
|
393 |
datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
394 |
-
|
395 |
_doc_df_ret = reset_rank(_doc_df_ret)
|
396 |
doc_df_elem_ret = get_leaderboard_table(_doc_df_ret, datastore.doc_types)
|
397 |
|
398 |
_doc_df_ret_hidden = datastore.doc_raw_df[
|
399 |
datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
400 |
-
|
401 |
_doc_df_ret_hidden = reset_rank(_doc_df_ret_hidden)
|
402 |
doc_df_elem_ret_hidden = get_leaderboard_table(
|
403 |
_doc_df_ret_hidden, datastore.doc_types, visible=False
|
@@ -439,7 +439,7 @@ with demo:
|
|
439 |
with gr.TabItem("Reranking Only", id=22):
|
440 |
_doc_df_rerank = datastore.doc_fmt_df[
|
441 |
datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
442 |
-
|
443 |
_doc_df_rerank = reset_rank(_doc_df_rerank)
|
444 |
doc_rerank_models = (
|
445 |
_doc_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
@@ -452,7 +452,7 @@ with demo:
|
|
452 |
doc_df_elem_rerank = get_leaderboard_table(_doc_df_rerank, datastore.doc_types)
|
453 |
_doc_df_rerank_hidden = datastore.doc_raw_df[
|
454 |
datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
455 |
-
|
456 |
_doc_df_rerank_hidden = reset_rank(_doc_df_rerank_hidden)
|
457 |
doc_df_elem_rerank_hidden = get_leaderboard_table(
|
458 |
_doc_df_rerank_hidden, datastore.doc_types, visible=False
|
|
|
63 |
|
64 |
|
65 |
def update_qa_metric(
|
66 |
+
metric: str,
|
67 |
+
domains: list,
|
68 |
+
langs: list,
|
69 |
+
reranking_model: list,
|
70 |
+
query: str,
|
71 |
+
show_anonymous: bool,
|
72 |
+
show_revision_and_timestamp: bool,
|
73 |
):
|
74 |
global datastore
|
75 |
return update_metric(
|
|
|
86 |
|
87 |
|
88 |
def update_doc_metric(
|
89 |
+
metric: str,
|
90 |
+
domains: list,
|
91 |
+
langs: list,
|
92 |
+
reranking_model: list,
|
93 |
+
query: str,
|
94 |
+
show_anonymous: bool,
|
95 |
+
show_revision_and_timestamp,
|
96 |
):
|
97 |
global datastore
|
98 |
return update_metric(
|
|
|
218 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
219 |
_qa_df_ret_hidden = datastore.qa_raw_df[
|
220 |
datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
221 |
+
]
|
222 |
_qa_df_ret_hidden = reset_rank(_qa_df_ret_hidden)
|
223 |
qa_df_elem_ret_hidden = get_leaderboard_table(
|
224 |
_qa_df_ret_hidden, datastore.qa_types, visible=False
|
|
|
277 |
|
278 |
_qa_df_rerank_hidden = datastore.qa_raw_df[
|
279 |
datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
280 |
+
]
|
281 |
_qa_df_rerank_hidden = reset_rank(_qa_df_rerank_hidden)
|
282 |
qa_df_elem_rerank_hidden = get_leaderboard_table(
|
283 |
_qa_df_rerank_hidden, datastore.qa_types, visible=False
|
|
|
391 |
|
392 |
_doc_df_ret = datastore.doc_fmt_df[
|
393 |
datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
394 |
+
]
|
395 |
_doc_df_ret = reset_rank(_doc_df_ret)
|
396 |
doc_df_elem_ret = get_leaderboard_table(_doc_df_ret, datastore.doc_types)
|
397 |
|
398 |
_doc_df_ret_hidden = datastore.doc_raw_df[
|
399 |
datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
400 |
+
]
|
401 |
_doc_df_ret_hidden = reset_rank(_doc_df_ret_hidden)
|
402 |
doc_df_elem_ret_hidden = get_leaderboard_table(
|
403 |
_doc_df_ret_hidden, datastore.doc_types, visible=False
|
|
|
439 |
with gr.TabItem("Reranking Only", id=22):
|
440 |
_doc_df_rerank = datastore.doc_fmt_df[
|
441 |
datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
442 |
+
]
|
443 |
_doc_df_rerank = reset_rank(_doc_df_rerank)
|
444 |
doc_rerank_models = (
|
445 |
_doc_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
|
|
452 |
doc_df_elem_rerank = get_leaderboard_table(_doc_df_rerank, datastore.doc_types)
|
453 |
_doc_df_rerank_hidden = datastore.doc_raw_df[
|
454 |
datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
455 |
+
]
|
456 |
_doc_df_rerank_hidden = reset_rank(_doc_df_rerank_hidden)
|
457 |
doc_df_elem_rerank_hidden = get_leaderboard_table(
|
458 |
_doc_df_rerank_hidden, datastore.doc_types, visible=False
|
src/columns.py
CHANGED
@@ -95,5 +95,3 @@ COL_NAME_RANK = "Rank 🏆"
|
|
95 |
COL_NAME_REVISION = "Revision"
|
96 |
COL_NAME_TIMESTAMP = "Submission Date"
|
97 |
COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
|
98 |
-
|
99 |
-
|
|
|
95 |
COL_NAME_REVISION = "Revision"
|
96 |
COL_NAME_TIMESTAMP = "Submission Date"
|
97 |
COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
|
|
|
|
src/loaders.py
CHANGED
@@ -52,28 +52,28 @@ def load_raw_eval_results(results_path: str) -> List[FullEvalResult]:
|
|
52 |
|
53 |
def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
|
54 |
slug = get_safe_name(version)[-4:]
|
55 |
-
|
56 |
-
|
57 |
-
print(f"raw data: {len(
|
58 |
|
59 |
-
|
60 |
-
print(f"QA data loaded: {
|
61 |
-
|
62 |
-
qa_cols,
|
63 |
-
|
64 |
-
|
65 |
|
66 |
-
|
67 |
-
print(f"Long-Doc data loaded: {len(
|
68 |
-
|
69 |
-
doc_cols,
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
list(frozenset([eval_result.reranking_model for eval_result in
|
75 |
)
|
76 |
-
return
|
77 |
|
78 |
|
79 |
def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
|
|
|
52 |
|
53 |
def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
|
54 |
slug = get_safe_name(version)[-4:]
|
55 |
+
ds = LeaderboardDataStore(version, slug, None, None, None, None, None, None, None, None)
|
56 |
+
ds.raw_data = load_raw_eval_results(file_path)
|
57 |
+
print(f"raw data: {len(ds.raw_data)}")
|
58 |
|
59 |
+
ds.qa_raw_df = get_leaderboard_df(ds, TaskType.qa, DEFAULT_METRIC_QA)
|
60 |
+
print(f"QA data loaded: {ds.qa_raw_df.shape}")
|
61 |
+
ds.qa_fmt_df = ds.qa_raw_df.copy()
|
62 |
+
qa_cols, ds.qa_types = get_default_cols(TaskType.qa, ds.slug, add_fix_cols=True)
|
63 |
+
ds.qa_fmt_df = ds.qa_fmt_df[~ds.qa_fmt_df[COL_NAME_IS_ANONYMOUS]][qa_cols]
|
64 |
+
ds.qa_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
65 |
|
66 |
+
ds.doc_raw_df = get_leaderboard_df(ds, TaskType.long_doc, DEFAULT_METRIC_LONG_DOC)
|
67 |
+
print(f"Long-Doc data loaded: {len(ds.doc_raw_df)}")
|
68 |
+
ds.doc_fmt_df = ds.doc_raw_df.copy()
|
69 |
+
doc_cols, ds.doc_types = get_default_cols(TaskType.long_doc, ds.slug, add_fix_cols=True)
|
70 |
+
ds.doc_fmt_df = ds.doc_fmt_df[~ds.doc_fmt_df[COL_NAME_IS_ANONYMOUS]][doc_cols]
|
71 |
+
ds.doc_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
72 |
|
73 |
+
ds.reranking_models = sorted(
|
74 |
+
list(frozenset([eval_result.reranking_model for eval_result in ds.raw_data]))
|
75 |
)
|
76 |
+
return ds
|
77 |
|
78 |
|
79 |
def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
|
src/models.py
CHANGED
@@ -165,9 +165,8 @@ def make_clickable_model(model_name: str, model_link: str):
|
|
165 |
def model_hyperlink(link, model_name):
|
166 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
167 |
|
|
|
168 |
def get_safe_name(name: str):
|
169 |
"""Get RFC 1123 compatible safe name"""
|
170 |
name = name.replace("-", "_")
|
171 |
return "".join(character.lower() for character in name if (character.isalnum() or character == "_"))
|
172 |
-
|
173 |
-
|
|
|
165 |
def model_hyperlink(link, model_name):
|
166 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
167 |
|
168 |
+
|
169 |
def get_safe_name(name: str):
|
170 |
"""Get RFC 1123 compatible safe name"""
|
171 |
name = name.replace("-", "_")
|
172 |
return "".join(character.lower() for character in name if (character.isalnum() or character == "_"))
|
|
|
|
src/utils.py
CHANGED
@@ -99,12 +99,12 @@ def get_default_cols(task: TaskType, version_slug, add_fix_cols: bool = True) ->
|
|
99 |
|
100 |
|
101 |
def select_columns(
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
) -> pd.DataFrame:
|
109 |
cols, _ = get_default_cols(task=task, version_slug=version_slug, add_fix_cols=False)
|
110 |
selected_cols = []
|
@@ -134,16 +134,16 @@ def select_columns(
|
|
134 |
|
135 |
|
136 |
def _update_df_elem(
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
):
|
148 |
version_slug = get_safe_name(version)[-4:]
|
149 |
filtered_df = source_df.copy()
|
@@ -158,15 +158,15 @@ def _update_df_elem(
|
|
158 |
|
159 |
|
160 |
def update_doc_df_elem(
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
):
|
171 |
return _update_df_elem(
|
172 |
TaskType.long_doc,
|
@@ -183,15 +183,15 @@ def update_doc_df_elem(
|
|
183 |
|
184 |
|
185 |
def update_metric(
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
) -> pd.DataFrame:
|
196 |
if task == TaskType.qa:
|
197 |
update_func = update_qa_df_elem
|
@@ -247,13 +247,13 @@ def calculate_file_md5(file_path):
|
|
247 |
|
248 |
|
249 |
def submit_results(
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
):
|
258 |
if not filepath.endswith(".zip"):
|
259 |
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
@@ -368,16 +368,16 @@ def get_leaderboard_df(datastore, task: TaskType, metric: str) -> pd.DataFrame:
|
|
368 |
|
369 |
|
370 |
def set_listeners(
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
):
|
382 |
if task == TaskType.qa:
|
383 |
update_table_func = update_qa_df_elem
|
@@ -387,15 +387,15 @@ def set_listeners(
|
|
387 |
raise NotImplementedError
|
388 |
selector_list = [selected_domains, selected_langs, selected_rerankings, search_bar, show_anonymous]
|
389 |
search_bar_args = [
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
selector_args = (
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
)
|
400 |
# Set search_bar listener
|
401 |
search_bar.submit(update_table_func, search_bar_args, target_df)
|
@@ -411,15 +411,15 @@ def set_listeners(
|
|
411 |
|
412 |
|
413 |
def update_qa_df_elem(
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
):
|
424 |
return _update_df_elem(
|
425 |
TaskType.qa,
|
|
|
99 |
|
100 |
|
101 |
def select_columns(
|
102 |
+
df: pd.DataFrame,
|
103 |
+
domain_query: list,
|
104 |
+
language_query: list,
|
105 |
+
task: TaskType = TaskType.qa,
|
106 |
+
reset_ranking: bool = True,
|
107 |
+
version_slug: str = None,
|
108 |
) -> pd.DataFrame:
|
109 |
cols, _ = get_default_cols(task=task, version_slug=version_slug, add_fix_cols=False)
|
110 |
selected_cols = []
|
|
|
134 |
|
135 |
|
136 |
def _update_df_elem(
|
137 |
+
task: TaskType,
|
138 |
+
version: str,
|
139 |
+
source_df: pd.DataFrame,
|
140 |
+
domains: list,
|
141 |
+
langs: list,
|
142 |
+
reranking_query: list,
|
143 |
+
query: str,
|
144 |
+
show_anonymous: bool,
|
145 |
+
reset_ranking: bool = True,
|
146 |
+
show_revision_and_timestamp: bool = False,
|
147 |
):
|
148 |
version_slug = get_safe_name(version)[-4:]
|
149 |
filtered_df = source_df.copy()
|
|
|
158 |
|
159 |
|
160 |
def update_doc_df_elem(
|
161 |
+
version: str,
|
162 |
+
hidden_df: pd.DataFrame,
|
163 |
+
domains: list,
|
164 |
+
langs: list,
|
165 |
+
reranking_query: list,
|
166 |
+
query: str,
|
167 |
+
show_anonymous: bool,
|
168 |
+
show_revision_and_timestamp: bool = False,
|
169 |
+
reset_ranking: bool = True,
|
170 |
):
|
171 |
return _update_df_elem(
|
172 |
TaskType.long_doc,
|
|
|
183 |
|
184 |
|
185 |
def update_metric(
|
186 |
+
datastore,
|
187 |
+
task: TaskType,
|
188 |
+
metric: str,
|
189 |
+
domains: list,
|
190 |
+
langs: list,
|
191 |
+
reranking_model: list,
|
192 |
+
query: str,
|
193 |
+
show_anonymous: bool = False,
|
194 |
+
show_revision_and_timestamp: bool = False,
|
195 |
) -> pd.DataFrame:
|
196 |
if task == TaskType.qa:
|
197 |
update_func = update_qa_df_elem
|
|
|
247 |
|
248 |
|
249 |
def submit_results(
|
250 |
+
filepath: str,
|
251 |
+
model: str,
|
252 |
+
model_url: str,
|
253 |
+
reranking_model: str = "",
|
254 |
+
reranking_model_url: str = "",
|
255 |
+
version: str = LATEST_BENCHMARK_VERSION,
|
256 |
+
is_anonymous=False,
|
257 |
):
|
258 |
if not filepath.endswith(".zip"):
|
259 |
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
|
|
368 |
|
369 |
|
370 |
def set_listeners(
|
371 |
+
task: TaskType,
|
372 |
+
target_df,
|
373 |
+
source_df,
|
374 |
+
search_bar,
|
375 |
+
version,
|
376 |
+
selected_domains,
|
377 |
+
selected_langs,
|
378 |
+
selected_rerankings,
|
379 |
+
show_anonymous,
|
380 |
+
show_revision_and_timestamp,
|
381 |
):
|
382 |
if task == TaskType.qa:
|
383 |
update_table_func = update_qa_df_elem
|
|
|
387 |
raise NotImplementedError
|
388 |
selector_list = [selected_domains, selected_langs, selected_rerankings, search_bar, show_anonymous]
|
389 |
search_bar_args = [
|
390 |
+
source_df,
|
391 |
+
version,
|
392 |
+
] + selector_list
|
393 |
selector_args = (
|
394 |
+
[version, source_df]
|
395 |
+
+ selector_list
|
396 |
+
+ [
|
397 |
+
show_revision_and_timestamp,
|
398 |
+
]
|
399 |
)
|
400 |
# Set search_bar listener
|
401 |
search_bar.submit(update_table_func, search_bar_args, target_df)
|
|
|
411 |
|
412 |
|
413 |
def update_qa_df_elem(
|
414 |
+
version: str,
|
415 |
+
hidden_df: pd.DataFrame,
|
416 |
+
domains: list,
|
417 |
+
langs: list,
|
418 |
+
reranking_query: list,
|
419 |
+
query: str,
|
420 |
+
show_anonymous: bool,
|
421 |
+
show_revision_and_timestamp: bool = False,
|
422 |
+
reset_ranking: bool = True,
|
423 |
):
|
424 |
return _update_df_elem(
|
425 |
TaskType.qa,
|