Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
4791ac5
1 Parent(s): ba13e25

refactor: refactor the envs

Browse files
Files changed (3) hide show
  1. app.py +2 -3
  2. src/benchmarks.py +1 -32
  3. src/envs.py +32 -0
app.py CHANGED
@@ -7,14 +7,13 @@ from src.about import (
7
  )
8
  from src.benchmarks import (
9
  qa_benchmark_dict,
10
- long_doc_benchmark_dict,
11
- METRIC_LIST
12
  )
13
  from src.display.css_html_js import custom_css
14
  from src.envs import (
15
  API,
16
  EVAL_RESULTS_PATH,
17
- REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
18
  )
19
  from src.loaders import (
20
  load_eval_results
 
7
  )
8
  from src.benchmarks import (
9
  qa_benchmark_dict,
10
+ long_doc_benchmark_dict
 
11
  )
12
  from src.display.css_html_js import custom_css
13
  from src.envs import (
14
  API,
15
  EVAL_RESULTS_PATH,
16
+ REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST
17
  )
18
  from src.loaders import (
19
  load_eval_results
src/benchmarks.py CHANGED
@@ -3,38 +3,7 @@ from enum import Enum
3
 
4
  from air_benchmark.tasks.tasks import BenchmarkTable
5
 
6
- METRIC_LIST = [
7
- "ndcg_at_1",
8
- "ndcg_at_3",
9
- "ndcg_at_5",
10
- "ndcg_at_10",
11
- "ndcg_at_100",
12
- "ndcg_at_1000",
13
- "map_at_1",
14
- "map_at_3",
15
- "map_at_5",
16
- "map_at_10",
17
- "map_at_100",
18
- "map_at_1000",
19
- "recall_at_1",
20
- "recall_at_3",
21
- "recall_at_5",
22
- "recall_at_10",
23
- "recall_at_100",
24
- "recall_at_1000",
25
- "precision_at_1",
26
- "precision_at_3",
27
- "precision_at_5",
28
- "precision_at_10",
29
- "precision_at_100",
30
- "precision_at_1000",
31
- "mrr_at_1",
32
- "mrr_at_3",
33
- "mrr_at_5",
34
- "mrr_at_10",
35
- "mrr_at_100",
36
- "mrr_at_1000"
37
- ]
38
 
39
 
40
  def get_safe_name(name: str):
 
3
 
4
  from air_benchmark.tasks.tasks import BenchmarkTable
5
 
6
+ from src.envs import METRIC_LIST
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
 
9
  def get_safe_name(name: str):
src/envs.py CHANGED
@@ -33,3 +33,35 @@ BENCHMARK_VERSION_LIST = [
33
  LATEST_BENCHMARK_VERSION = BENCHMARK_VERSION_LIST[-1]
34
  DEFAULT_METRIC_QA = "ndcg_at_10"
35
  DEFAULT_METRIC_LONG_DOC = "recall_at_10"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  LATEST_BENCHMARK_VERSION = BENCHMARK_VERSION_LIST[-1]
34
  DEFAULT_METRIC_QA = "ndcg_at_10"
35
  DEFAULT_METRIC_LONG_DOC = "recall_at_10"
36
+ METRIC_LIST = [
37
+ "ndcg_at_1",
38
+ "ndcg_at_3",
39
+ "ndcg_at_5",
40
+ "ndcg_at_10",
41
+ "ndcg_at_100",
42
+ "ndcg_at_1000",
43
+ "map_at_1",
44
+ "map_at_3",
45
+ "map_at_5",
46
+ "map_at_10",
47
+ "map_at_100",
48
+ "map_at_1000",
49
+ "recall_at_1",
50
+ "recall_at_3",
51
+ "recall_at_5",
52
+ "recall_at_10",
53
+ "recall_at_100",
54
+ "recall_at_1000",
55
+ "precision_at_1",
56
+ "precision_at_3",
57
+ "precision_at_5",
58
+ "precision_at_10",
59
+ "precision_at_100",
60
+ "precision_at_1000",
61
+ "mrr_at_1",
62
+ "mrr_at_3",
63
+ "mrr_at_5",
64
+ "mrr_at_10",
65
+ "mrr_at_100",
66
+ "mrr_at_1000"
67
+ ]