Spaces:
AIR-Bench
/
Running on CPU Upgrade

File size: 4,234 Bytes
f766ce9
982af90
 
f766ce9
 
 
982af90
f766ce9
 
 
8a1daf9
f766ce9
 
b9d42b4
f766ce9
 
 
b9d42b4
 
 
 
606d718
b9d42b4
 
 
 
 
606d718
ca2a141
606d718
ca2a141
 
 
 
 
 
 
 
 
 
b9d42b4
 
 
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
f766ce9
b9d42b4
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
36c5a0c
b9d42b4
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
f766ce9
b9d42b4
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
 
 
 
ca2a141
51dbe36
 
 
ca2a141
 
 
51dbe36
2dca7ce
51dbe36
 
ca2a141
b9d42b4
ca2a141
 
 
 
 
 
 
51dbe36
b9d42b4
f766ce9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark
 (Preview) </h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
"""

# Which evaluations are you running? how can people reproduce what you have?
BENCHMARKS_TEXT = f"""
## How it works

Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
"""

EVALUATION_QUEUE_TEXT = """
## Steps for submit to AIR-Bench

1. Install AIR-Bench
```bash
pip install air-benchmark
```
2. Run the evaluation script
```bash
cd AIR-Bench/scripts
# Run all tasks
python run_air_benchmark.py \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified task type
python run_air_benchmark.py \\
--task_types long-doc \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified task type and domains
python run_air_benchmark.py \\
--task_types long-doc \\
--domains arxiv book \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified languages
python run_air_benchmark.py \\
--languages en \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified task type, domains, and languages
python run_air_benchmark.py \\
--task_types qa \\
--domains wiki web \\
--languages en \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
```
3. Package the search results.
```bash
# Zip "Embedding Model + NoReranker" search results in "<search_results>/<model_name>/NoReranker" to "<save_dir>/<model_name>_NoReranker.zip".
python zip_results.py \\
--results_dir search_results \\
--model_name bge-m3 \\
--save_dir search_results/zipped_results

# Zip "Embedding Model + Reranker" search results in "<search_results>/<model_name>/<reranker_name>" to "<save_dir>/<model_name>_<reranker_name>.zip".
python zip_results.py \\
--results_dir search_results \\
--model_name bge-m3 \\
--reranker_name bge-reranker-v2-m3 \\
--save_dir search_results/zipped_results
```
4. Upload the `.zip` file on this page and fill in the model information: 
- Model Name: such as `bge-m3`.
- Model URL: such as `https://huggingface.co/BAAI/bge-m3`.
- Reranker Name: such as `bge-reranker-v2-m3`. Keep empty for `NoReranker`.
- Reranker URL: such as `https://huggingface.co/BAAI/bge-reranker-v2-m3`. Keep empty for `NoReranker`.

If you want to stay anonymous, you can only fill in the Model Name and Reranker Name (keep empty for `NoReranker`), and check the selection box below befor submission. 

5. Congratulation! Your results will be shown on the leaderboard in up to one hour.
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
"""