conditions: - name: bm25-flat command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query datasets: - dataset: trec-covid scores: - nDCG@10: 0.5947 R@100: 0.1091 R@1000: 0.3955 - dataset: bioasq scores: - nDCG@10: 0.5225 R@100: 0.7687 R@1000: 0.9030 - dataset: nfcorpus scores: - nDCG@10: 0.3218 R@100: 0.2457 R@1000: 0.3704 - dataset: nq scores: - nDCG@10: 0.3055 R@100: 0.7513 R@1000: 0.8958 - dataset: hotpotqa scores: - nDCG@10: 0.6330 R@100: 0.7957 R@1000: 0.8820 - dataset: fiqa scores: - nDCG@10: 0.2361 R@100: 0.5395 R@1000: 0.7393 - dataset: signal1m scores: - nDCG@10: 0.3304 R@100: 0.3703 R@1000: 0.5642 - dataset: trec-news scores: - nDCG@10: 0.3952 R@100: 0.4469 R@1000: 0.7051 - dataset: robust04 scores: - nDCG@10: 0.4070 R@100: 0.3746 R@1000: 0.6345 - dataset: arguana scores: - nDCG@10: 0.3970 R@100: 0.9324 R@1000: 0.9872 - dataset: webis-touche2020 scores: - nDCG@10: 0.4422 R@100: 0.5822 R@1000: 0.8621 - dataset: cqadupstack-android scores: - nDCG@10: 0.3801 R@100: 0.6829 R@1000: 0.8632 - dataset: cqadupstack-english scores: - nDCG@10: 0.3453 R@100: 0.5757 R@1000: 0.7323 - dataset: cqadupstack-gaming scores: - nDCG@10: 0.4822 R@100: 0.7651 R@1000: 0.8945 - dataset: cqadupstack-gis scores: - nDCG@10: 0.2901 R@100: 0.6119 R@1000: 0.8174 - dataset: cqadupstack-mathematica scores: - nDCG@10: 0.2015 R@100: 0.4877 R@1000: 0.7221 - dataset: cqadupstack-physics scores: - nDCG@10: 0.3214 R@100: 0.6326 R@1000: 0.8340 - dataset: cqadupstack-programmers scores: - nDCG@10: 0.2802 R@100: 0.5588 R@1000: 0.7734 - dataset: cqadupstack-stats scores: - nDCG@10: 0.2711 R@100: 0.5338 R@1000: 0.7310 - dataset: cqadupstack-tex scores: - nDCG@10: 0.2244 R@100: 0.4686 R@1000: 0.6907 - dataset: cqadupstack-unix scores: - nDCG@10: 0.2749 R@100: 0.5417 R@1000: 0.7616 - dataset: cqadupstack-webmasters scores: - nDCG@10: 0.3059 R@100: 0.5820 R@1000: 0.8066 - dataset: cqadupstack-wordpress scores: - nDCG@10: 0.2483 R@100: 0.5152 R@1000: 0.7552 - dataset: quora scores: - nDCG@10: 0.7886 R@100: 0.9733 R@1000: 0.9950 - dataset: dbpedia-entity scores: - nDCG@10: 0.3180 R@100: 0.4682 R@1000: 0.6760 - dataset: scidocs scores: - nDCG@10: 0.1490 R@100: 0.3477 R@1000: 0.5638 - dataset: fever scores: - nDCG@10: 0.6513 R@100: 0.9185 R@1000: 0.9589 - dataset: climate-fever scores: - nDCG@10: 0.1651 R@100: 0.4249 R@1000: 0.6324 - dataset: scifact scores: - nDCG@10: 0.6789 R@100: 0.9253 R@1000: 0.9767 - name: bm25-multifield command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0 datasets: - dataset: trec-covid scores: - nDCG@10: 0.6559 R@100: 0.1141 R@1000: 0.3891 - dataset: bioasq scores: - nDCG@10: 0.4646 R@100: 0.7145 R@1000: 0.8428 - dataset: nfcorpus scores: - nDCG@10: 0.3254 R@100: 0.2500 R@1000: 0.3718 - dataset: nq scores: - nDCG@10: 0.3285 R@100: 0.7597 R@1000: 0.9019 - dataset: hotpotqa scores: - nDCG@10: 0.6027 R@100: 0.7400 R@1000: 0.8405 - dataset: fiqa scores: - nDCG@10: 0.2361 R@100: 0.5395 R@1000: 0.7393 - dataset: signal1m scores: - nDCG@10: 0.3304 R@100: 0.3703 R@1000: 0.5642 - dataset: trec-news scores: - nDCG@10: 0.3977 R@100: 0.4216 R@1000: 0.6993 - dataset: robust04 scores: - nDCG@10: 0.4070 R@100: 0.3746 R@1000: 0.6345 - dataset: arguana scores: - nDCG@10: 0.4142 R@100: 0.9431 R@1000: 0.9893 - dataset: webis-touche2020 scores: - nDCG@10: 0.3673 R@100: 0.5376 R@1000: 0.8668 - dataset: cqadupstack-android scores: - nDCG@10: 0.3709 R@100: 0.6889 R@1000: 0.8712 - dataset: cqadupstack-english scores: - nDCG@10: 0.3321 R@100: 0.5842 R@1000: 0.7574 - dataset: cqadupstack-gaming scores: - nDCG@10: 0.4418 R@100: 0.7571 R@1000: 0.8882 - dataset: cqadupstack-gis scores: - nDCG@10: 0.2904 R@100: 0.6458 R@1000: 0.8248 - dataset: cqadupstack-mathematica scores: - nDCG@10: 0.2046 R@100: 0.5215 R@1000: 0.7559 - dataset: cqadupstack-physics scores: - nDCG@10: 0.3248 R@100: 0.6486 R@1000: 0.8506 - dataset: cqadupstack-programmers scores: - nDCG@10: 0.2963 R@100: 0.6194 R@1000: 0.8096 - dataset: cqadupstack-stats scores: - nDCG@10: 0.2790 R@100: 0.5719 R@1000: 0.7619 - dataset: cqadupstack-tex scores: - nDCG@10: 0.2086 R@100: 0.4954 R@1000: 0.7222 - dataset: cqadupstack-unix scores: - nDCG@10: 0.2788 R@100: 0.5721 R@1000: 0.7783 - dataset: cqadupstack-webmasters scores: - nDCG@10: 0.3008 R@100: 0.6100 R@1000: 0.8226 - dataset: cqadupstack-wordpress scores: - nDCG@10: 0.2562 R@100: 0.5526 R@1000: 0.7848 - dataset: quora scores: - nDCG@10: 0.7886 R@100: 0.9733 R@1000: 0.9950 - dataset: dbpedia-entity scores: - nDCG@10: 0.3128 R@100: 0.3981 R@1000: 0.5848 - dataset: scidocs scores: - nDCG@10: 0.1581 R@100: 0.3561 R@1000: 0.5599 - dataset: fever scores: - nDCG@10: 0.7530 R@100: 0.9309 R@1000: 0.9599 - dataset: climate-fever scores: - nDCG@10: 0.2129 R@100: 0.4357 R@1000: 0.6099 - dataset: scifact scores: - nDCG@10: 0.6647 R@100: 0.9076 R@1000: 0.9800 - name: splade-distil-cocodenser-medium command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}-splade_distil_cocodenser_medium --topics beir-v1.0.0-${dataset}-test-splade_distil_cocodenser_medium --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --impact --remove-query datasets: - dataset: trec-covid scores: - nDCG@10: 0.7109 R@100: 0.1308 R@1000: 0.4433 - dataset: bioasq scores: - nDCG@10: 0.5035 R@100: 0.7422 R@1000: 0.8904 - dataset: nfcorpus scores: - nDCG@10: 0.3454 R@100: 0.2891 R@1000: 0.5694 - dataset: nq scores: - nDCG@10: 0.5442 R@100: 0.9285 R@1000: 0.9812 - dataset: hotpotqa scores: - nDCG@10: 0.6860 R@100: 0.8144 R@1000: 0.8945 - dataset: fiqa scores: - nDCG@10: 0.3514 R@100: 0.6298 R@1000: 0.8323 - dataset: signal1m scores: - nDCG@10: 0.2957 R@100: 0.3311 R@1000: 0.5514 - dataset: trec-news scores: - nDCG@10: 0.3936 R@100: 0.4323 R@1000: 0.6977 - dataset: robust04 scores: - nDCG@10: 0.4581 R@100: 0.3773 R@1000: 0.6099 - dataset: arguana scores: - nDCG@10: 0.5210 R@100: 0.9822 R@1000: 0.9950 - dataset: webis-touche2020 scores: - nDCG@10: 0.2435 R@100: 0.4723 R@1000: 0.8116 - dataset: cqadupstack-android scores: - nDCG@10: 0.3954 R@100: 0.7405 R@1000: 0.9035 - dataset: cqadupstack-english scores: - nDCG@10: 0.4026 R@100: 0.6768 R@1000: 0.8346 - dataset: cqadupstack-gaming scores: - nDCG@10: 0.5061 R@100: 0.8138 R@1000: 0.9253 - dataset: cqadupstack-gis scores: - nDCG@10: 0.3223 R@100: 0.6419 R@1000: 0.8385 - dataset: cqadupstack-mathematica scores: - nDCG@10: 0.2423 R@100: 0.5732 R@1000: 0.7848 - dataset: cqadupstack-physics scores: - nDCG@10: 0.3668 R@100: 0.7286 R@1000: 0.8931 - dataset: cqadupstack-programmers scores: - nDCG@10: 0.3412 R@100: 0.6653 R@1000: 0.8451 - dataset: cqadupstack-stats scores: - nDCG@10: 0.3142 R@100: 0.5889 R@1000: 0.7823 - dataset: cqadupstack-tex scores: - nDCG@10: 0.2575 R@100: 0.5231 R@1000: 0.7372 - dataset: cqadupstack-unix scores: - nDCG@10: 0.3292 R@100: 0.6192 R@1000: 0.8225 - dataset: cqadupstack-webmasters scores: - nDCG@10: 0.3343 R@100: 0.6404 R@1000: 0.8767 - dataset: cqadupstack-wordpress scores: - nDCG@10: 0.2839 R@100: 0.5974 R@1000: 0.8036 - dataset: quora scores: - nDCG@10: 0.8136 R@100: 0.9817 R@1000: 0.9979 - dataset: dbpedia-entity scores: - nDCG@10: 0.4416 R@100: 0.5636 R@1000: 0.7774 - dataset: scidocs scores: - nDCG@10: 0.1590 R@100: 0.3671 R@1000: 0.5891 - dataset: fever scores: - nDCG@10: 0.7962 R@100: 0.9550 R@1000: 0.9751 - dataset: climate-fever scores: - nDCG@10: 0.2276 R@100: 0.5140 R@1000: 0.7084 - dataset: scifact scores: - nDCG@10: 0.6992 R@100: 0.9270 R@1000: 0.9767 - name: contriever command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query datasets: - dataset: trec-covid scores: - nDCG@10: 0.2732 R@100: 0.0368 R@1000: 0.1675 - dataset: bioasq scores: - nDCG@10: 0.3016 R@100: 0.5412 R@1000: 0.7396 - dataset: nfcorpus scores: - nDCG@10: 0.3173 R@100: 0.2943 R@1000: 0.6232 - dataset: nq scores: - nDCG@10: 0.2536 R@100: 0.7712 R@1000: 0.9286 - dataset: hotpotqa scores: - nDCG@10: 0.4807 R@100: 0.7046 R@1000: 0.8294 - dataset: fiqa scores: - nDCG@10: 0.2449 R@100: 0.5619 R@1000: 0.8215 - dataset: signal1m scores: - nDCG@10: 0.2338 R@100: 0.2568 R@1000: 0.4757 - dataset: trec-news scores: - nDCG@10: 0.3484 R@100: 0.4234 R@1000: 0.7389 - dataset: robust04 scores: - nDCG@10: 0.3155 R@100: 0.2757 R@1000: 0.5097 - dataset: arguana scores: - nDCG@10: 0.3791 R@100: 0.9011 R@1000: 0.9851 - dataset: webis-touche2020 scores: - nDCG@10: 0.1668 R@100: 0.3736 R@1000: 0.7144 - dataset: cqadupstack-android scores: - nDCG@10: 0.3771 R@100: 0.7436 R@1000: 0.9173 - dataset: cqadupstack-english scores: - nDCG@10: 0.3571 R@100: 0.6442 R@1000: 0.8042 - dataset: cqadupstack-gaming scores: - nDCG@10: 0.4597 R@100: 0.8092 R@1000: 0.9354 - dataset: cqadupstack-gis scores: - nDCG@10: 0.2411 R@100: 0.5792 R@1000: 0.8018 - dataset: cqadupstack-mathematica scores: - nDCG@10: 0.1841 R@100: 0.5127 R@1000: 0.7757 - dataset: cqadupstack-physics scores: - nDCG@10: 0.3430 R@100: 0.7013 R@1000: 0.8980 - dataset: cqadupstack-programmers scores: - nDCG@10: 0.3029 R@100: 0.6402 R@1000: 0.8434 - dataset: cqadupstack-stats scores: - nDCG@10: 0.2483 R@100: 0.5269 R@1000: 0.7417 - dataset: cqadupstack-tex scores: - nDCG@10: 0.1540 R@100: 0.4333 R@1000: 0.6870 - dataset: cqadupstack-unix scores: - nDCG@10: 0.2636 R@100: 0.5879 R@1000: 0.8212 - dataset: cqadupstack-webmasters scores: - nDCG@10: 0.2878 R@100: 0.6485 R@1000: 0.8800 - dataset: cqadupstack-wordpress scores: - nDCG@10: 0.1914 R@100: 0.5364 R@1000: 0.7551 - dataset: quora scores: - nDCG@10: 0.8349 R@100: 0.9871 R@1000: 0.9981 - dataset: dbpedia-entity scores: - nDCG@10: 0.2916 R@100: 0.4529 R@1000: 0.7142 - dataset: scidocs scores: - nDCG@10: 0.1491 R@100: 0.3601 R@1000: 0.6105 - dataset: fever scores: - nDCG@10: 0.6821 R@100: 0.9356 R@1000: 0.9655 - dataset: climate-fever scores: - nDCG@10: 0.1550 R@100: 0.4422 R@1000: 0.7232 - dataset: scifact scores: - nDCG@10: 0.6493 R@100: 0.9260 R@1000: 0.9967 - name: contriever-msmarco command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever-msmarco --index beir-v1.0.0-${dataset}.contriever-msmarco --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query datasets: - dataset: trec-covid scores: - nDCG@10: 0.5964 R@100: 0.0907 R@1000: 0.3351 - dataset: bioasq scores: - nDCG@10: 0.3829 R@100: 0.6072 R@1000: 0.7666 - dataset: nfcorpus scores: - nDCG@10: 0.3281 R@100: 0.3008 R@1000: 0.6305 - dataset: nq scores: - nDCG@10: 0.4977 R@100: 0.9252 R@1000: 0.986 - dataset: hotpotqa scores: - nDCG@10: 0.6376 R@100: 0.7772 R@1000: 0.8718 - dataset: fiqa scores: - nDCG@10: 0.3293 R@100: 0.6558 R@1000: 0.8695 - dataset: signal1m scores: - nDCG@10: 0.2783 R@100: 0.322 R@1000: 0.5419 - dataset: trec-news scores: - nDCG@10: 0.4283 R@100: 0.4924 R@1000: 0.7752 - dataset: robust04 scores: - nDCG@10: 0.4729 R@100: 0.3917 R@1000: 0.6552 - dataset: arguana scores: - nDCG@10: 0.4461 R@100: 0.9765 R@1000: 0.9964 - dataset: webis-touche2020 scores: - nDCG@10: 0.204 R@100: 0.442 R@1000: 0.829 - dataset: cqadupstack-android scores: - nDCG@10: 0.4255 R@100: 0.7503 R@1000: 0.9304 - dataset: cqadupstack-english scores: - nDCG@10: 0.4326 R@100: 0.6935 R@1000: 0.8435 - dataset: cqadupstack-gaming scores: - nDCG@10: 0.5276 R@100: 0.8481 R@1000: 0.9427 - dataset: cqadupstack-gis scores: - nDCG@10: 0.3022 R@100: 0.6272 R@1000: 0.8417 - dataset: cqadupstack-mathematica scores: - nDCG@10: 0.2355 R@100: 0.5726 R@1000: 0.7995 - dataset: cqadupstack-physics scores: - nDCG@10: 0.4159 R@100: 0.7619 R@1000: 0.9162 - dataset: cqadupstack-programmers scores: - nDCG@10: 0.3574 R@100: 0.7191 R@1000: 0.8878 - dataset: cqadupstack-stats scores: - nDCG@10: 0.3095 R@100: 0.586 R@1000: 0.7805 - dataset: cqadupstack-tex scores: - nDCG@10: 0.2209 R@100: 0.4985 R@1000: 0.7348 - dataset: cqadupstack-unix scores: - nDCG@10: 0.3257 R@100: 0.6161 R@1000: 0.8373 - dataset: cqadupstack-webmasters scores: - nDCG@10: 0.3392 R@100: 0.7032 R@1000: 0.8956 - dataset: cqadupstack-wordpress scores: - nDCG@10: 0.2532 R@100: 0.5769 R@1000: 0.7929 - dataset: quora scores: - nDCG@10: 0.8648 R@100: 0.9935 R@1000: 0.9994 - dataset: dbpedia-entity scores: - nDCG@10: 0.4128 R@100: 0.5414 R@1000: 0.7751 - dataset: scidocs scores: - nDCG@10: 0.1652 R@100: 0.3783 R@1000: 0.6216 - dataset: fever scores: - nDCG@10: 0.7583 R@100: 0.9494 R@1000: 0.9705 - dataset: climate-fever scores: - nDCG@10: 0.2371 R@100: 0.5746 R@1000: 0.8019 - dataset: scifact scores: - nDCG@10: 0.6768 R@100: 0.947 R@1000: 0.9833