Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
ace0540
1 Parent(s): 2bee5cb

refactor: reformatting the codes

Browse files
Files changed (5) hide show
  1. app.py +20 -20
  2. src/columns.py +0 -2
  3. src/loaders.py +18 -18
  4. src/models.py +1 -2
  5. src/utils.py +68 -68
app.py CHANGED
@@ -63,13 +63,13 @@ datastore = ds_dict[LATEST_BENCHMARK_VERSION]
63
 
64
 
65
  def update_qa_metric(
66
- metric: str,
67
- domains: list,
68
- langs: list,
69
- reranking_model: list,
70
- query: str,
71
- show_anonymous: bool,
72
- show_revision_and_timestamp: bool,
73
  ):
74
  global datastore
75
  return update_metric(
@@ -86,13 +86,13 @@ def update_qa_metric(
86
 
87
 
88
  def update_doc_metric(
89
- metric: str,
90
- domains: list,
91
- langs: list,
92
- reranking_model: list,
93
- query: str,
94
- show_anonymous: bool,
95
- show_revision_and_timestamp,
96
  ):
97
  global datastore
98
  return update_metric(
@@ -218,7 +218,7 @@ with demo:
218
  # Dummy leaderboard for handling the case when the user uses backspace key
219
  _qa_df_ret_hidden = datastore.qa_raw_df[
220
  datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
221
- ]
222
  _qa_df_ret_hidden = reset_rank(_qa_df_ret_hidden)
223
  qa_df_elem_ret_hidden = get_leaderboard_table(
224
  _qa_df_ret_hidden, datastore.qa_types, visible=False
@@ -277,7 +277,7 @@ with demo:
277
 
278
  _qa_df_rerank_hidden = datastore.qa_raw_df[
279
  datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
280
- ]
281
  _qa_df_rerank_hidden = reset_rank(_qa_df_rerank_hidden)
282
  qa_df_elem_rerank_hidden = get_leaderboard_table(
283
  _qa_df_rerank_hidden, datastore.qa_types, visible=False
@@ -391,13 +391,13 @@ with demo:
391
 
392
  _doc_df_ret = datastore.doc_fmt_df[
393
  datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
394
- ]
395
  _doc_df_ret = reset_rank(_doc_df_ret)
396
  doc_df_elem_ret = get_leaderboard_table(_doc_df_ret, datastore.doc_types)
397
 
398
  _doc_df_ret_hidden = datastore.doc_raw_df[
399
  datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
400
- ]
401
  _doc_df_ret_hidden = reset_rank(_doc_df_ret_hidden)
402
  doc_df_elem_ret_hidden = get_leaderboard_table(
403
  _doc_df_ret_hidden, datastore.doc_types, visible=False
@@ -439,7 +439,7 @@ with demo:
439
  with gr.TabItem("Reranking Only", id=22):
440
  _doc_df_rerank = datastore.doc_fmt_df[
441
  datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
442
- ]
443
  _doc_df_rerank = reset_rank(_doc_df_rerank)
444
  doc_rerank_models = (
445
  _doc_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
@@ -452,7 +452,7 @@ with demo:
452
  doc_df_elem_rerank = get_leaderboard_table(_doc_df_rerank, datastore.doc_types)
453
  _doc_df_rerank_hidden = datastore.doc_raw_df[
454
  datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
455
- ]
456
  _doc_df_rerank_hidden = reset_rank(_doc_df_rerank_hidden)
457
  doc_df_elem_rerank_hidden = get_leaderboard_table(
458
  _doc_df_rerank_hidden, datastore.doc_types, visible=False
 
63
 
64
 
65
  def update_qa_metric(
66
+ metric: str,
67
+ domains: list,
68
+ langs: list,
69
+ reranking_model: list,
70
+ query: str,
71
+ show_anonymous: bool,
72
+ show_revision_and_timestamp: bool,
73
  ):
74
  global datastore
75
  return update_metric(
 
86
 
87
 
88
  def update_doc_metric(
89
+ metric: str,
90
+ domains: list,
91
+ langs: list,
92
+ reranking_model: list,
93
+ query: str,
94
+ show_anonymous: bool,
95
+ show_revision_and_timestamp,
96
  ):
97
  global datastore
98
  return update_metric(
 
218
  # Dummy leaderboard for handling the case when the user uses backspace key
219
  _qa_df_ret_hidden = datastore.qa_raw_df[
220
  datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
221
+ ]
222
  _qa_df_ret_hidden = reset_rank(_qa_df_ret_hidden)
223
  qa_df_elem_ret_hidden = get_leaderboard_table(
224
  _qa_df_ret_hidden, datastore.qa_types, visible=False
 
277
 
278
  _qa_df_rerank_hidden = datastore.qa_raw_df[
279
  datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
280
+ ]
281
  _qa_df_rerank_hidden = reset_rank(_qa_df_rerank_hidden)
282
  qa_df_elem_rerank_hidden = get_leaderboard_table(
283
  _qa_df_rerank_hidden, datastore.qa_types, visible=False
 
391
 
392
  _doc_df_ret = datastore.doc_fmt_df[
393
  datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
394
+ ]
395
  _doc_df_ret = reset_rank(_doc_df_ret)
396
  doc_df_elem_ret = get_leaderboard_table(_doc_df_ret, datastore.doc_types)
397
 
398
  _doc_df_ret_hidden = datastore.doc_raw_df[
399
  datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
400
+ ]
401
  _doc_df_ret_hidden = reset_rank(_doc_df_ret_hidden)
402
  doc_df_elem_ret_hidden = get_leaderboard_table(
403
  _doc_df_ret_hidden, datastore.doc_types, visible=False
 
439
  with gr.TabItem("Reranking Only", id=22):
440
  _doc_df_rerank = datastore.doc_fmt_df[
441
  datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
442
+ ]
443
  _doc_df_rerank = reset_rank(_doc_df_rerank)
444
  doc_rerank_models = (
445
  _doc_df_rerank[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
 
452
  doc_df_elem_rerank = get_leaderboard_table(_doc_df_rerank, datastore.doc_types)
453
  _doc_df_rerank_hidden = datastore.doc_raw_df[
454
  datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
455
+ ]
456
  _doc_df_rerank_hidden = reset_rank(_doc_df_rerank_hidden)
457
  doc_df_elem_rerank_hidden = get_leaderboard_table(
458
  _doc_df_rerank_hidden, datastore.doc_types, visible=False
src/columns.py CHANGED
@@ -95,5 +95,3 @@ COL_NAME_RANK = "Rank 🏆"
95
  COL_NAME_REVISION = "Revision"
96
  COL_NAME_TIMESTAMP = "Submission Date"
97
  COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
98
-
99
-
 
95
  COL_NAME_REVISION = "Revision"
96
  COL_NAME_TIMESTAMP = "Submission Date"
97
  COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
 
 
src/loaders.py CHANGED
@@ -52,28 +52,28 @@ def load_raw_eval_results(results_path: str) -> List[FullEvalResult]:
52
 
53
  def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
54
  slug = get_safe_name(version)[-4:]
55
- datastore = LeaderboardDataStore(version, slug, None, None, None, None, None, None, None, None)
56
- datastore.raw_data = load_raw_eval_results(file_path)
57
- print(f"raw data: {len(datastore.raw_data)}")
58
 
59
- datastore.qa_raw_df = get_leaderboard_df(datastore, TaskType.qa, DEFAULT_METRIC_QA)
60
- print(f"QA data loaded: {datastore.qa_raw_df.shape}")
61
- datastore.qa_fmt_df = datastore.qa_raw_df.copy()
62
- qa_cols, datastore.qa_types = get_default_cols(TaskType.qa, datastore.slug, add_fix_cols=True)
63
- datastore.qa_fmt_df = datastore.qa_fmt_df[~datastore.qa_fmt_df[COL_NAME_IS_ANONYMOUS]][qa_cols]
64
- datastore.qa_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
65
 
66
- datastore.doc_raw_df = get_leaderboard_df(datastore, TaskType.long_doc, DEFAULT_METRIC_LONG_DOC)
67
- print(f"Long-Doc data loaded: {len(datastore.doc_raw_df)}")
68
- datastore.doc_fmt_df = datastore.doc_raw_df.copy()
69
- doc_cols, datastore.doc_types = get_default_cols(TaskType.long_doc, datastore.slug, add_fix_cols=True)
70
- datastore.doc_fmt_df = datastore.doc_fmt_df[~datastore.doc_fmt_df[COL_NAME_IS_ANONYMOUS]][doc_cols]
71
- datastore.doc_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
72
 
73
- datastore.reranking_models = sorted(
74
- list(frozenset([eval_result.reranking_model for eval_result in datastore.raw_data]))
75
  )
76
- return datastore
77
 
78
 
79
  def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
 
52
 
53
  def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
54
  slug = get_safe_name(version)[-4:]
55
+ ds = LeaderboardDataStore(version, slug, None, None, None, None, None, None, None, None)
56
+ ds.raw_data = load_raw_eval_results(file_path)
57
+ print(f"raw data: {len(ds.raw_data)}")
58
 
59
+ ds.qa_raw_df = get_leaderboard_df(ds, TaskType.qa, DEFAULT_METRIC_QA)
60
+ print(f"QA data loaded: {ds.qa_raw_df.shape}")
61
+ ds.qa_fmt_df = ds.qa_raw_df.copy()
62
+ qa_cols, ds.qa_types = get_default_cols(TaskType.qa, ds.slug, add_fix_cols=True)
63
+ ds.qa_fmt_df = ds.qa_fmt_df[~ds.qa_fmt_df[COL_NAME_IS_ANONYMOUS]][qa_cols]
64
+ ds.qa_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
65
 
66
+ ds.doc_raw_df = get_leaderboard_df(ds, TaskType.long_doc, DEFAULT_METRIC_LONG_DOC)
67
+ print(f"Long-Doc data loaded: {len(ds.doc_raw_df)}")
68
+ ds.doc_fmt_df = ds.doc_raw_df.copy()
69
+ doc_cols, ds.doc_types = get_default_cols(TaskType.long_doc, ds.slug, add_fix_cols=True)
70
+ ds.doc_fmt_df = ds.doc_fmt_df[~ds.doc_fmt_df[COL_NAME_IS_ANONYMOUS]][doc_cols]
71
+ ds.doc_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
72
 
73
+ ds.reranking_models = sorted(
74
+ list(frozenset([eval_result.reranking_model for eval_result in ds.raw_data]))
75
  )
76
+ return ds
77
 
78
 
79
  def load_eval_results(file_path: str) -> Dict[str, LeaderboardDataStore]:
src/models.py CHANGED
@@ -165,9 +165,8 @@ def make_clickable_model(model_name: str, model_link: str):
165
  def model_hyperlink(link, model_name):
166
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
167
 
 
168
  def get_safe_name(name: str):
169
  """Get RFC 1123 compatible safe name"""
170
  name = name.replace("-", "_")
171
  return "".join(character.lower() for character in name if (character.isalnum() or character == "_"))
172
-
173
-
 
165
  def model_hyperlink(link, model_name):
166
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
167
 
168
+
169
  def get_safe_name(name: str):
170
  """Get RFC 1123 compatible safe name"""
171
  name = name.replace("-", "_")
172
  return "".join(character.lower() for character in name if (character.isalnum() or character == "_"))
 
 
src/utils.py CHANGED
@@ -99,12 +99,12 @@ def get_default_cols(task: TaskType, version_slug, add_fix_cols: bool = True) ->
99
 
100
 
101
  def select_columns(
102
- df: pd.DataFrame,
103
- domain_query: list,
104
- language_query: list,
105
- task: TaskType = TaskType.qa,
106
- reset_ranking: bool = True,
107
- version_slug: str = None,
108
  ) -> pd.DataFrame:
109
  cols, _ = get_default_cols(task=task, version_slug=version_slug, add_fix_cols=False)
110
  selected_cols = []
@@ -134,16 +134,16 @@ def select_columns(
134
 
135
 
136
  def _update_df_elem(
137
- task: TaskType,
138
- version: str,
139
- source_df: pd.DataFrame,
140
- domains: list,
141
- langs: list,
142
- reranking_query: list,
143
- query: str,
144
- show_anonymous: bool,
145
- reset_ranking: bool = True,
146
- show_revision_and_timestamp: bool = False,
147
  ):
148
  version_slug = get_safe_name(version)[-4:]
149
  filtered_df = source_df.copy()
@@ -158,15 +158,15 @@ def _update_df_elem(
158
 
159
 
160
  def update_doc_df_elem(
161
- version: str,
162
- hidden_df: pd.DataFrame,
163
- domains: list,
164
- langs: list,
165
- reranking_query: list,
166
- query: str,
167
- show_anonymous: bool,
168
- show_revision_and_timestamp: bool = False,
169
- reset_ranking: bool = True,
170
  ):
171
  return _update_df_elem(
172
  TaskType.long_doc,
@@ -183,15 +183,15 @@ def update_doc_df_elem(
183
 
184
 
185
  def update_metric(
186
- datastore,
187
- task: TaskType,
188
- metric: str,
189
- domains: list,
190
- langs: list,
191
- reranking_model: list,
192
- query: str,
193
- show_anonymous: bool = False,
194
- show_revision_and_timestamp: bool = False,
195
  ) -> pd.DataFrame:
196
  if task == TaskType.qa:
197
  update_func = update_qa_df_elem
@@ -247,13 +247,13 @@ def calculate_file_md5(file_path):
247
 
248
 
249
  def submit_results(
250
- filepath: str,
251
- model: str,
252
- model_url: str,
253
- reranking_model: str = "",
254
- reranking_model_url: str = "",
255
- version: str = LATEST_BENCHMARK_VERSION,
256
- is_anonymous=False,
257
  ):
258
  if not filepath.endswith(".zip"):
259
  return styled_error(f"file uploading aborted. wrong file type: {filepath}")
@@ -368,16 +368,16 @@ def get_leaderboard_df(datastore, task: TaskType, metric: str) -> pd.DataFrame:
368
 
369
 
370
  def set_listeners(
371
- task: TaskType,
372
- target_df,
373
- source_df,
374
- search_bar,
375
- version,
376
- selected_domains,
377
- selected_langs,
378
- selected_rerankings,
379
- show_anonymous,
380
- show_revision_and_timestamp,
381
  ):
382
  if task == TaskType.qa:
383
  update_table_func = update_qa_df_elem
@@ -387,15 +387,15 @@ def set_listeners(
387
  raise NotImplementedError
388
  selector_list = [selected_domains, selected_langs, selected_rerankings, search_bar, show_anonymous]
389
  search_bar_args = [
390
- source_df,
391
- version,
392
- ] + selector_list
393
  selector_args = (
394
- [version, source_df]
395
- + selector_list
396
- + [
397
- show_revision_and_timestamp,
398
- ]
399
  )
400
  # Set search_bar listener
401
  search_bar.submit(update_table_func, search_bar_args, target_df)
@@ -411,15 +411,15 @@ def set_listeners(
411
 
412
 
413
  def update_qa_df_elem(
414
- version: str,
415
- hidden_df: pd.DataFrame,
416
- domains: list,
417
- langs: list,
418
- reranking_query: list,
419
- query: str,
420
- show_anonymous: bool,
421
- show_revision_and_timestamp: bool = False,
422
- reset_ranking: bool = True,
423
  ):
424
  return _update_df_elem(
425
  TaskType.qa,
 
99
 
100
 
101
  def select_columns(
102
+ df: pd.DataFrame,
103
+ domain_query: list,
104
+ language_query: list,
105
+ task: TaskType = TaskType.qa,
106
+ reset_ranking: bool = True,
107
+ version_slug: str = None,
108
  ) -> pd.DataFrame:
109
  cols, _ = get_default_cols(task=task, version_slug=version_slug, add_fix_cols=False)
110
  selected_cols = []
 
134
 
135
 
136
  def _update_df_elem(
137
+ task: TaskType,
138
+ version: str,
139
+ source_df: pd.DataFrame,
140
+ domains: list,
141
+ langs: list,
142
+ reranking_query: list,
143
+ query: str,
144
+ show_anonymous: bool,
145
+ reset_ranking: bool = True,
146
+ show_revision_and_timestamp: bool = False,
147
  ):
148
  version_slug = get_safe_name(version)[-4:]
149
  filtered_df = source_df.copy()
 
158
 
159
 
160
  def update_doc_df_elem(
161
+ version: str,
162
+ hidden_df: pd.DataFrame,
163
+ domains: list,
164
+ langs: list,
165
+ reranking_query: list,
166
+ query: str,
167
+ show_anonymous: bool,
168
+ show_revision_and_timestamp: bool = False,
169
+ reset_ranking: bool = True,
170
  ):
171
  return _update_df_elem(
172
  TaskType.long_doc,
 
183
 
184
 
185
  def update_metric(
186
+ datastore,
187
+ task: TaskType,
188
+ metric: str,
189
+ domains: list,
190
+ langs: list,
191
+ reranking_model: list,
192
+ query: str,
193
+ show_anonymous: bool = False,
194
+ show_revision_and_timestamp: bool = False,
195
  ) -> pd.DataFrame:
196
  if task == TaskType.qa:
197
  update_func = update_qa_df_elem
 
247
 
248
 
249
  def submit_results(
250
+ filepath: str,
251
+ model: str,
252
+ model_url: str,
253
+ reranking_model: str = "",
254
+ reranking_model_url: str = "",
255
+ version: str = LATEST_BENCHMARK_VERSION,
256
+ is_anonymous=False,
257
  ):
258
  if not filepath.endswith(".zip"):
259
  return styled_error(f"file uploading aborted. wrong file type: {filepath}")
 
368
 
369
 
370
  def set_listeners(
371
+ task: TaskType,
372
+ target_df,
373
+ source_df,
374
+ search_bar,
375
+ version,
376
+ selected_domains,
377
+ selected_langs,
378
+ selected_rerankings,
379
+ show_anonymous,
380
+ show_revision_and_timestamp,
381
  ):
382
  if task == TaskType.qa:
383
  update_table_func = update_qa_df_elem
 
387
  raise NotImplementedError
388
  selector_list = [selected_domains, selected_langs, selected_rerankings, search_bar, show_anonymous]
389
  search_bar_args = [
390
+ source_df,
391
+ version,
392
+ ] + selector_list
393
  selector_args = (
394
+ [version, source_df]
395
+ + selector_list
396
+ + [
397
+ show_revision_and_timestamp,
398
+ ]
399
  )
400
  # Set search_bar listener
401
  search_bar.submit(update_table_func, search_bar_args, target_df)
 
411
 
412
 
413
  def update_qa_df_elem(
414
+ version: str,
415
+ hidden_df: pd.DataFrame,
416
+ domains: list,
417
+ langs: list,
418
+ reranking_query: list,
419
+ query: str,
420
+ show_anonymous: bool,
421
+ show_revision_and_timestamp: bool = False,
422
+ reset_ranking: bool = True,
423
  ):
424
  return _update_df_elem(
425
  TaskType.qa,