leaderboard

Running on CPU Upgrade

File size: 4,234 Bytes

f766ce9
982af90
 
f766ce9
 
 
982af90
f766ce9
 
 
8a1daf9
f766ce9
 
b9d42b4
f766ce9
 
 
b9d42b4
 
 
 
606d718
b9d42b4
 
 
 
 
606d718
ca2a141
606d718
ca2a141
 
 
 
 
 
 
 
 
 
b9d42b4
 
 
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
f766ce9
b9d42b4
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
36c5a0c
b9d42b4
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
f766ce9
b9d42b4
606d718
ca2a141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9d42b4
 
 
 
ca2a141
51dbe36
 
 
ca2a141
 
 
51dbe36
2dca7ce
51dbe36
 
ca2a141
b9d42b4
ca2a141
 
 
 
 
 
 
51dbe36
b9d42b4
f766ce9

# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">AIR-Bench: Automated Heterogeneous Information Retrieval Benchmark
 (Preview) </h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
"""

# Which evaluations are you running? how can people reproduce what you have?
BENCHMARKS_TEXT = f"""
## How it works

Check more information at [our GitHub repo](https://github.com/AIR-Bench/AIR-Bench)
"""

EVALUATION_QUEUE_TEXT = """
## Steps for submit to AIR-Bench

1. Install AIR-Bench
```bash
pip install air-benchmark
```
2. Run the evaluation script
```bash
cd AIR-Bench/scripts
# Run all tasks
python run_air_benchmark.py \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified task type
python run_air_benchmark.py \\
--task_types long-doc \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified task type and domains
python run_air_benchmark.py \\
--task_types long-doc \\
--domains arxiv book \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified languages
python run_air_benchmark.py \\
--languages en \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False

# Run the tasks in the specified task type, domains, and languages
python run_air_benchmark.py \\
--task_types qa \\
--domains wiki web \\
--languages en \\
--output_dir ./search_results \\
--encoder BAAI/bge-m3 \\
--reranker BAAI/bge-reranker-v2-m3 \\
--search_top_k 1000 \\
--rerank_top_k 100 \\
--max_query_length 512 \\
--max_passage_length 512 \\
--batch_size 512 \\
--pooling_method cls \\
--normalize_embeddings True \\
--use_fp16 True \\
--add_instruction False \\
--overwrite False
```
3. Package the search results.
```bash
# Zip "Embedding Model + NoReranker" search results in "<search_results>/<model_name>/NoReranker" to "<save_dir>/<model_name>_NoReranker.zip".
python zip_results.py \\
--results_dir search_results \\
--model_name bge-m3 \\
--save_dir search_results/zipped_results

# Zip "Embedding Model + Reranker" search results in "<search_results>/<model_name>/<reranker_name>" to "<save_dir>/<model_name>_<reranker_name>.zip".
python zip_results.py \\
--results_dir search_results \\
--model_name bge-m3 \\
--reranker_name bge-reranker-v2-m3 \\
--save_dir search_results/zipped_results
```
4. Upload the `.zip` file on this page and fill in the model information: 
- Model Name: such as `bge-m3`.
- Model URL: such as `https://huggingface.co/BAAI/bge-m3`.
- Reranker Name: such as `bge-reranker-v2-m3`. Keep empty for `NoReranker`.
- Reranker URL: such as `https://huggingface.co/BAAI/bge-reranker-v2-m3`. Keep empty for `NoReranker`.

If you want to stay anonymous, you can only fill in the Model Name and Reranker Name (keep empty for `NoReranker`), and check the selection box below befor submission. 

5. Congratulation! Your results will be shown on the leaderboard in up to one hour.
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
"""