Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
34e2886
1 Parent(s): 1199e4c

feat: revert the comments

Browse files
Files changed (1) hide show
  1. app.py +54 -49
app.py CHANGED
@@ -1,30 +1,15 @@
1
  import gradio as gr
2
- import pandas as pd
3
  from apscheduler.schedulers.background import BackgroundScheduler
 
4
 
5
  from src.about import (
6
- INTRODUCTION_TEXT,
7
- TITLE
8
  )
9
  from src.benchmarks import (
10
  QABenchmarks,
11
  LongDocBenchmarks
12
  )
13
  from src.display.css_html_js import custom_css
14
- from src.envs import (
15
- API,
16
- EVAL_RESULTS_PATH,
17
- REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK, BENCHMARK_VERSION_LIST
18
- )
19
- from src.loaders import (
20
- load_eval_results
21
- )
22
- from src.utils import (
23
- update_metric,
24
- set_listeners,
25
- reset_rank,
26
- remove_html, upload_file, submit_results
27
- )
28
  from src.display.gradio_formatting import (
29
  get_version_dropdown,
30
  get_search_bar,
@@ -37,28 +22,40 @@ from src.display.gradio_formatting import (
37
  get_revision_and_ts_checkbox,
38
  get_leaderboard_table
39
  )
40
-
41
- from src.about import EVALUATION_QUEUE_TEXT, BENCHMARKS_TEXT
 
 
 
 
 
 
 
 
 
 
 
42
 
43
 
44
  def restart_space():
45
  API.restart_space(repo_id=REPO_ID)
46
 
47
 
48
- # try:
49
- # snapshot_download(
50
- # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
51
- # token=TOKEN
52
- # )
53
- # except Exception as e:
54
- # print(f'failed to download')
55
- # restart_space()
56
 
57
  global data
58
  data = load_eval_results(EVAL_RESULTS_PATH)
59
  global datastore
60
  datastore = data[LATEST_BENCHMARK_VERSION]
61
 
 
62
  def update_metric_qa(
63
  metric: str,
64
  domains: list,
@@ -68,7 +65,8 @@ def update_metric_qa(
68
  show_anonymous: bool,
69
  show_revision_and_timestamp: bool,
70
  ):
71
- return update_metric(datastore, 'qa', metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
 
72
 
73
 
74
  def update_metric_long_doc(
@@ -80,7 +78,8 @@ def update_metric_long_doc(
80
  show_anonymous: bool,
81
  show_revision_and_timestamp,
82
  ):
83
- return update_metric(datastore, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
 
84
 
85
 
86
  def update_datastore(version):
@@ -158,7 +157,7 @@ with demo:
158
 
159
  selected_version.change(
160
  update_datastore,
161
- [selected_version,],
162
  [selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table]
163
  )
164
 
@@ -198,19 +197,22 @@ with demo:
198
  with gr.Column(scale=1):
199
  selected_noreranker = get_noreranking_dropdown()
200
 
201
- lb_df_retriever = datastore.leaderboard_df_qa[datastore.leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
 
202
  lb_df_retriever = reset_rank(lb_df_retriever)
203
  lb_table_retriever = get_leaderboard_table(
204
  lb_df_retriever, datastore.types_qa)
205
 
206
  # Dummy leaderboard for handling the case when the user uses backspace key
207
- hidden_lb_df_retriever = datastore.raw_df_qa[datastore.raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
 
208
  hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
209
- hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, datastore.types_qa, visible=False)
 
210
 
211
  selected_version.change(
212
  update_datastore,
213
- [selected_version,],
214
  [
215
  selected_domains,
216
  selected_langs,
@@ -254,9 +256,10 @@ with demo:
254
  datastore.leaderboard_df_qa[
255
  COL_NAME_RETRIEVAL_MODEL
256
  ] == BM25_LINK
257
- ]
258
  lb_df_reranker = reset_rank(lb_df_reranker)
259
- reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
 
260
  with gr.Row():
261
  with gr.Column(scale=1):
262
  selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
@@ -265,7 +268,8 @@ with demo:
265
  lb_table_reranker = get_leaderboard_table(
266
  lb_df_reranker, datastore.types_qa)
267
 
268
- hidden_lb_df_reranker = datastore.raw_df_qa[datastore.raw_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
 
269
  hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
270
  hidden_lb_table_reranker = get_leaderboard_table(
271
  hidden_lb_df_reranker,
@@ -274,7 +278,7 @@ with demo:
274
 
275
  selected_version.change(
276
  update_datastore,
277
- [selected_version,],
278
  [
279
  selected_domains,
280
  selected_langs,
@@ -348,7 +352,7 @@ with demo:
348
 
349
  selected_version.change(
350
  update_datastore_long_doc,
351
- [selected_version,],
352
  [
353
  selected_domains,
354
  selected_langs,
@@ -394,14 +398,14 @@ with demo:
394
  selected_noreranker = get_noreranking_dropdown()
395
  lb_df_retriever_long_doc = datastore.leaderboard_df_long_doc[
396
  datastore.leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
397
- ]
398
  lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
399
  lb_table_retriever_long_doc = get_leaderboard_table(
400
  lb_df_retriever_long_doc, datastore.types_long_doc)
401
 
402
  hidden_lb_df_retriever_long_doc = datastore.raw_df_long_doc[
403
  datastore.raw_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
404
- ]
405
  hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
406
  hidden_lb_table_retriever_long_doc = get_leaderboard_table(
407
  hidden_lb_df_retriever_long_doc, datastore.types_long_doc, visible=False
@@ -409,7 +413,7 @@ with demo:
409
 
410
  selected_version.change(
411
  update_datastore_long_doc,
412
- [selected_version,],
413
  [
414
  selected_domains,
415
  selected_langs,
@@ -452,16 +456,19 @@ with demo:
452
  datastore.leaderboard_df_long_doc[
453
  COL_NAME_RETRIEVAL_MODEL
454
  ] == BM25_LINK
455
- ]
456
  lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
457
- reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
 
458
  with gr.Row():
459
  with gr.Column(scale=1):
460
- selected_rerankings_reranker_ldoc = get_reranking_dropdown(reranking_models_reranker_ldoc)
 
461
  with gr.Column(scale=1):
462
  search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
463
  lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.types_long_doc)
464
- hidden_lb_df_reranker_ldoc = datastore.raw_df_long_doc[datastore.raw_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
 
465
  hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
466
  hidden_lb_table_reranker_ldoc = get_leaderboard_table(
467
  hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
@@ -469,7 +476,7 @@ with demo:
469
 
470
  selected_version.change(
471
  update_datastore_long_doc,
472
- [selected_version,],
473
  [
474
  selected_domains,
475
  selected_langs,
@@ -580,5 +587,3 @@ if __name__ == "__main__":
580
  scheduler.start()
581
  demo.queue(default_concurrency_limit=40)
582
  demo.launch()
583
-
584
-
 
1
  import gradio as gr
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
+ from huggingface_hub import snapshot_download
4
 
5
  from src.about import (
6
+ INTRODUCTION_TEXT, TITLE, EVALUATION_QUEUE_TEXT, BENCHMARKS_TEXT
 
7
  )
8
  from src.benchmarks import (
9
  QABenchmarks,
10
  LongDocBenchmarks
11
  )
12
  from src.display.css_html_js import custom_css
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  from src.display.gradio_formatting import (
14
  get_version_dropdown,
15
  get_search_bar,
 
22
  get_revision_and_ts_checkbox,
23
  get_leaderboard_table
24
  )
25
+ from src.envs import (
26
+ API,
27
+ EVAL_RESULTS_PATH,
28
+ REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION,
29
+ COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK, BENCHMARK_VERSION_LIST, RESULTS_REPO, TOKEN
30
+ )
31
+ from src.loaders import load_eval_results
32
+ from src.utils import (
33
+ update_metric,
34
+ set_listeners,
35
+ reset_rank,
36
+ remove_html, upload_file, submit_results
37
+ )
38
 
39
 
40
  def restart_space():
41
  API.restart_space(repo_id=REPO_ID)
42
 
43
 
44
+ try:
45
+ snapshot_download(
46
+ repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
47
+ token=TOKEN
48
+ )
49
+ except Exception as e:
50
+ print(f'failed to download')
51
+ restart_space()
52
 
53
  global data
54
  data = load_eval_results(EVAL_RESULTS_PATH)
55
  global datastore
56
  datastore = data[LATEST_BENCHMARK_VERSION]
57
 
58
+
59
  def update_metric_qa(
60
  metric: str,
61
  domains: list,
 
65
  show_anonymous: bool,
66
  show_revision_and_timestamp: bool,
67
  ):
68
+ return update_metric(datastore, 'qa', metric, domains, langs, reranking_model, query, show_anonymous,
69
+ show_revision_and_timestamp)
70
 
71
 
72
  def update_metric_long_doc(
 
78
  show_anonymous: bool,
79
  show_revision_and_timestamp,
80
  ):
81
+ return update_metric(datastore, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous,
82
+ show_revision_and_timestamp)
83
 
84
 
85
  def update_datastore(version):
 
157
 
158
  selected_version.change(
159
  update_datastore,
160
+ [selected_version, ],
161
  [selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table]
162
  )
163
 
 
197
  with gr.Column(scale=1):
198
  selected_noreranker = get_noreranking_dropdown()
199
 
200
+ lb_df_retriever = datastore.leaderboard_df_qa[
201
+ datastore.leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
202
  lb_df_retriever = reset_rank(lb_df_retriever)
203
  lb_table_retriever = get_leaderboard_table(
204
  lb_df_retriever, datastore.types_qa)
205
 
206
  # Dummy leaderboard for handling the case when the user uses backspace key
207
+ hidden_lb_df_retriever = datastore.raw_df_qa[
208
+ datastore.raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
209
  hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
210
+ hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, datastore.types_qa,
211
+ visible=False)
212
 
213
  selected_version.change(
214
  update_datastore,
215
+ [selected_version, ],
216
  [
217
  selected_domains,
218
  selected_langs,
 
256
  datastore.leaderboard_df_qa[
257
  COL_NAME_RETRIEVAL_MODEL
258
  ] == BM25_LINK
259
+ ]
260
  lb_df_reranker = reset_rank(lb_df_reranker)
261
+ reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(
262
+ remove_html).unique().tolist()
263
  with gr.Row():
264
  with gr.Column(scale=1):
265
  selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
 
268
  lb_table_reranker = get_leaderboard_table(
269
  lb_df_reranker, datastore.types_qa)
270
 
271
+ hidden_lb_df_reranker = datastore.raw_df_qa[
272
+ datastore.raw_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
273
  hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
274
  hidden_lb_table_reranker = get_leaderboard_table(
275
  hidden_lb_df_reranker,
 
278
 
279
  selected_version.change(
280
  update_datastore,
281
+ [selected_version, ],
282
  [
283
  selected_domains,
284
  selected_langs,
 
352
 
353
  selected_version.change(
354
  update_datastore_long_doc,
355
+ [selected_version, ],
356
  [
357
  selected_domains,
358
  selected_langs,
 
398
  selected_noreranker = get_noreranking_dropdown()
399
  lb_df_retriever_long_doc = datastore.leaderboard_df_long_doc[
400
  datastore.leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
401
+ ]
402
  lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
403
  lb_table_retriever_long_doc = get_leaderboard_table(
404
  lb_df_retriever_long_doc, datastore.types_long_doc)
405
 
406
  hidden_lb_df_retriever_long_doc = datastore.raw_df_long_doc[
407
  datastore.raw_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
408
+ ]
409
  hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
410
  hidden_lb_table_retriever_long_doc = get_leaderboard_table(
411
  hidden_lb_df_retriever_long_doc, datastore.types_long_doc, visible=False
 
413
 
414
  selected_version.change(
415
  update_datastore_long_doc,
416
+ [selected_version, ],
417
  [
418
  selected_domains,
419
  selected_langs,
 
456
  datastore.leaderboard_df_long_doc[
457
  COL_NAME_RETRIEVAL_MODEL
458
  ] == BM25_LINK
459
+ ]
460
  lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
461
+ reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(
462
+ remove_html).unique().tolist()
463
  with gr.Row():
464
  with gr.Column(scale=1):
465
+ selected_rerankings_reranker_ldoc = get_reranking_dropdown(
466
+ reranking_models_reranker_ldoc)
467
  with gr.Column(scale=1):
468
  search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
469
  lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.types_long_doc)
470
+ hidden_lb_df_reranker_ldoc = datastore.raw_df_long_doc[
471
+ datastore.raw_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
472
  hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
473
  hidden_lb_table_reranker_ldoc = get_leaderboard_table(
474
  hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
 
476
 
477
  selected_version.change(
478
  update_datastore_long_doc,
479
+ [selected_version, ],
480
  [
481
  selected_domains,
482
  selected_langs,
 
587
  scheduler.start()
588
  demo.queue(default_concurrency_limit=40)
589
  demo.launch()