geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
conditions:
- name: bm25-flat
command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.flat --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query
datasets:
- dataset: trec-covid
scores:
- nDCG@10: 0.5947
R@100: 0.1091
R@1000: 0.3955
- dataset: bioasq
scores:
- nDCG@10: 0.5225
R@100: 0.7687
R@1000: 0.9030
- dataset: nfcorpus
scores:
- nDCG@10: 0.3218
R@100: 0.2457
R@1000: 0.3704
- dataset: nq
scores:
- nDCG@10: 0.3055
R@100: 0.7513
R@1000: 0.8958
- dataset: hotpotqa
scores:
- nDCG@10: 0.6330
R@100: 0.7957
R@1000: 0.8820
- dataset: fiqa
scores:
- nDCG@10: 0.2361
R@100: 0.5395
R@1000: 0.7393
- dataset: signal1m
scores:
- nDCG@10: 0.3304
R@100: 0.3703
R@1000: 0.5642
- dataset: trec-news
scores:
- nDCG@10: 0.3952
R@100: 0.4469
R@1000: 0.7051
- dataset: robust04
scores:
- nDCG@10: 0.4070
R@100: 0.3746
R@1000: 0.6345
- dataset: arguana
scores:
- nDCG@10: 0.3970
R@100: 0.9324
R@1000: 0.9872
- dataset: webis-touche2020
scores:
- nDCG@10: 0.4422
R@100: 0.5822
R@1000: 0.8621
- dataset: cqadupstack-android
scores:
- nDCG@10: 0.3801
R@100: 0.6829
R@1000: 0.8632
- dataset: cqadupstack-english
scores:
- nDCG@10: 0.3453
R@100: 0.5757
R@1000: 0.7323
- dataset: cqadupstack-gaming
scores:
- nDCG@10: 0.4822
R@100: 0.7651
R@1000: 0.8945
- dataset: cqadupstack-gis
scores:
- nDCG@10: 0.2901
R@100: 0.6119
R@1000: 0.8174
- dataset: cqadupstack-mathematica
scores:
- nDCG@10: 0.2015
R@100: 0.4877
R@1000: 0.7221
- dataset: cqadupstack-physics
scores:
- nDCG@10: 0.3214
R@100: 0.6326
R@1000: 0.8340
- dataset: cqadupstack-programmers
scores:
- nDCG@10: 0.2802
R@100: 0.5588
R@1000: 0.7734
- dataset: cqadupstack-stats
scores:
- nDCG@10: 0.2711
R@100: 0.5338
R@1000: 0.7310
- dataset: cqadupstack-tex
scores:
- nDCG@10: 0.2244
R@100: 0.4686
R@1000: 0.6907
- dataset: cqadupstack-unix
scores:
- nDCG@10: 0.2749
R@100: 0.5417
R@1000: 0.7616
- dataset: cqadupstack-webmasters
scores:
- nDCG@10: 0.3059
R@100: 0.5820
R@1000: 0.8066
- dataset: cqadupstack-wordpress
scores:
- nDCG@10: 0.2483
R@100: 0.5152
R@1000: 0.7552
- dataset: quora
scores:
- nDCG@10: 0.7886
R@100: 0.9733
R@1000: 0.9950
- dataset: dbpedia-entity
scores:
- nDCG@10: 0.3180
R@100: 0.4682
R@1000: 0.6760
- dataset: scidocs
scores:
- nDCG@10: 0.1490
R@100: 0.3477
R@1000: 0.5638
- dataset: fever
scores:
- nDCG@10: 0.6513
R@100: 0.9185
R@1000: 0.9589
- dataset: climate-fever
scores:
- nDCG@10: 0.1651
R@100: 0.4249
R@1000: 0.6324
- dataset: scifact
scores:
- nDCG@10: 0.6789
R@100: 0.9253
R@1000: 0.9767
- name: bm25-multifield
command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}.multifield --topics beir-v1.0.0-${dataset}-test --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --bm25 --remove-query --fields contents=1.0 title=1.0
datasets:
- dataset: trec-covid
scores:
- nDCG@10: 0.6559
R@100: 0.1141
R@1000: 0.3891
- dataset: bioasq
scores:
- nDCG@10: 0.4646
R@100: 0.7145
R@1000: 0.8428
- dataset: nfcorpus
scores:
- nDCG@10: 0.3254
R@100: 0.2500
R@1000: 0.3718
- dataset: nq
scores:
- nDCG@10: 0.3285
R@100: 0.7597
R@1000: 0.9019
- dataset: hotpotqa
scores:
- nDCG@10: 0.6027
R@100: 0.7400
R@1000: 0.8405
- dataset: fiqa
scores:
- nDCG@10: 0.2361
R@100: 0.5395
R@1000: 0.7393
- dataset: signal1m
scores:
- nDCG@10: 0.3304
R@100: 0.3703
R@1000: 0.5642
- dataset: trec-news
scores:
- nDCG@10: 0.3977
R@100: 0.4216
R@1000: 0.6993
- dataset: robust04
scores:
- nDCG@10: 0.4070
R@100: 0.3746
R@1000: 0.6345
- dataset: arguana
scores:
- nDCG@10: 0.4142
R@100: 0.9431
R@1000: 0.9893
- dataset: webis-touche2020
scores:
- nDCG@10: 0.3673
R@100: 0.5376
R@1000: 0.8668
- dataset: cqadupstack-android
scores:
- nDCG@10: 0.3709
R@100: 0.6889
R@1000: 0.8712
- dataset: cqadupstack-english
scores:
- nDCG@10: 0.3321
R@100: 0.5842
R@1000: 0.7574
- dataset: cqadupstack-gaming
scores:
- nDCG@10: 0.4418
R@100: 0.7571
R@1000: 0.8882
- dataset: cqadupstack-gis
scores:
- nDCG@10: 0.2904
R@100: 0.6458
R@1000: 0.8248
- dataset: cqadupstack-mathematica
scores:
- nDCG@10: 0.2046
R@100: 0.5215
R@1000: 0.7559
- dataset: cqadupstack-physics
scores:
- nDCG@10: 0.3248
R@100: 0.6486
R@1000: 0.8506
- dataset: cqadupstack-programmers
scores:
- nDCG@10: 0.2963
R@100: 0.6194
R@1000: 0.8096
- dataset: cqadupstack-stats
scores:
- nDCG@10: 0.2790
R@100: 0.5719
R@1000: 0.7619
- dataset: cqadupstack-tex
scores:
- nDCG@10: 0.2086
R@100: 0.4954
R@1000: 0.7222
- dataset: cqadupstack-unix
scores:
- nDCG@10: 0.2788
R@100: 0.5721
R@1000: 0.7783
- dataset: cqadupstack-webmasters
scores:
- nDCG@10: 0.3008
R@100: 0.6100
R@1000: 0.8226
- dataset: cqadupstack-wordpress
scores:
- nDCG@10: 0.2562
R@100: 0.5526
R@1000: 0.7848
- dataset: quora
scores:
- nDCG@10: 0.7886
R@100: 0.9733
R@1000: 0.9950
- dataset: dbpedia-entity
scores:
- nDCG@10: 0.3128
R@100: 0.3981
R@1000: 0.5848
- dataset: scidocs
scores:
- nDCG@10: 0.1581
R@100: 0.3561
R@1000: 0.5599
- dataset: fever
scores:
- nDCG@10: 0.7530
R@100: 0.9309
R@1000: 0.9599
- dataset: climate-fever
scores:
- nDCG@10: 0.2129
R@100: 0.4357
R@1000: 0.6099
- dataset: scifact
scores:
- nDCG@10: 0.6647
R@100: 0.9076
R@1000: 0.9800
- name: splade-distil-cocodenser-medium
command: python -m pyserini.search.lucene --index beir-v1.0.0-${dataset}-splade_distil_cocodenser_medium --topics beir-v1.0.0-${dataset}-test-splade_distil_cocodenser_medium --output $output --output-format trec --batch 36 --threads 12 --hits 1000 --impact --remove-query
datasets:
- dataset: trec-covid
scores:
- nDCG@10: 0.7109
R@100: 0.1308
R@1000: 0.4433
- dataset: bioasq
scores:
- nDCG@10: 0.5035
R@100: 0.7422
R@1000: 0.8904
- dataset: nfcorpus
scores:
- nDCG@10: 0.3454
R@100: 0.2891
R@1000: 0.5694
- dataset: nq
scores:
- nDCG@10: 0.5442
R@100: 0.9285
R@1000: 0.9812
- dataset: hotpotqa
scores:
- nDCG@10: 0.6860
R@100: 0.8144
R@1000: 0.8945
- dataset: fiqa
scores:
- nDCG@10: 0.3514
R@100: 0.6298
R@1000: 0.8323
- dataset: signal1m
scores:
- nDCG@10: 0.2957
R@100: 0.3311
R@1000: 0.5514
- dataset: trec-news
scores:
- nDCG@10: 0.3936
R@100: 0.4323
R@1000: 0.6977
- dataset: robust04
scores:
- nDCG@10: 0.4581
R@100: 0.3773
R@1000: 0.6099
- dataset: arguana
scores:
- nDCG@10: 0.5210
R@100: 0.9822
R@1000: 0.9950
- dataset: webis-touche2020
scores:
- nDCG@10: 0.2435
R@100: 0.4723
R@1000: 0.8116
- dataset: cqadupstack-android
scores:
- nDCG@10: 0.3954
R@100: 0.7405
R@1000: 0.9035
- dataset: cqadupstack-english
scores:
- nDCG@10: 0.4026
R@100: 0.6768
R@1000: 0.8346
- dataset: cqadupstack-gaming
scores:
- nDCG@10: 0.5061
R@100: 0.8138
R@1000: 0.9253
- dataset: cqadupstack-gis
scores:
- nDCG@10: 0.3223
R@100: 0.6419
R@1000: 0.8385
- dataset: cqadupstack-mathematica
scores:
- nDCG@10: 0.2423
R@100: 0.5732
R@1000: 0.7848
- dataset: cqadupstack-physics
scores:
- nDCG@10: 0.3668
R@100: 0.7286
R@1000: 0.8931
- dataset: cqadupstack-programmers
scores:
- nDCG@10: 0.3412
R@100: 0.6653
R@1000: 0.8451
- dataset: cqadupstack-stats
scores:
- nDCG@10: 0.3142
R@100: 0.5889
R@1000: 0.7823
- dataset: cqadupstack-tex
scores:
- nDCG@10: 0.2575
R@100: 0.5231
R@1000: 0.7372
- dataset: cqadupstack-unix
scores:
- nDCG@10: 0.3292
R@100: 0.6192
R@1000: 0.8225
- dataset: cqadupstack-webmasters
scores:
- nDCG@10: 0.3343
R@100: 0.6404
R@1000: 0.8767
- dataset: cqadupstack-wordpress
scores:
- nDCG@10: 0.2839
R@100: 0.5974
R@1000: 0.8036
- dataset: quora
scores:
- nDCG@10: 0.8136
R@100: 0.9817
R@1000: 0.9979
- dataset: dbpedia-entity
scores:
- nDCG@10: 0.4416
R@100: 0.5636
R@1000: 0.7774
- dataset: scidocs
scores:
- nDCG@10: 0.1590
R@100: 0.3671
R@1000: 0.5891
- dataset: fever
scores:
- nDCG@10: 0.7962
R@100: 0.9550
R@1000: 0.9751
- dataset: climate-fever
scores:
- nDCG@10: 0.2276
R@100: 0.5140
R@1000: 0.7084
- dataset: scifact
scores:
- nDCG@10: 0.6992
R@100: 0.9270
R@1000: 0.9767
- name: contriever
command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever --index beir-v1.0.0-${dataset}.contriever --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
datasets:
- dataset: trec-covid
scores:
- nDCG@10: 0.2732
R@100: 0.0368
R@1000: 0.1675
- dataset: bioasq
scores:
- nDCG@10: 0.3016
R@100: 0.5412
R@1000: 0.7396
- dataset: nfcorpus
scores:
- nDCG@10: 0.3173
R@100: 0.2943
R@1000: 0.6232
- dataset: nq
scores:
- nDCG@10: 0.2536
R@100: 0.7712
R@1000: 0.9286
- dataset: hotpotqa
scores:
- nDCG@10: 0.4807
R@100: 0.7046
R@1000: 0.8294
- dataset: fiqa
scores:
- nDCG@10: 0.2449
R@100: 0.5619
R@1000: 0.8215
- dataset: signal1m
scores:
- nDCG@10: 0.2338
R@100: 0.2568
R@1000: 0.4757
- dataset: trec-news
scores:
- nDCG@10: 0.3484
R@100: 0.4234
R@1000: 0.7389
- dataset: robust04
scores:
- nDCG@10: 0.3155
R@100: 0.2757
R@1000: 0.5097
- dataset: arguana
scores:
- nDCG@10: 0.3791
R@100: 0.9011
R@1000: 0.9851
- dataset: webis-touche2020
scores:
- nDCG@10: 0.1668
R@100: 0.3736
R@1000: 0.7144
- dataset: cqadupstack-android
scores:
- nDCG@10: 0.3771
R@100: 0.7436
R@1000: 0.9173
- dataset: cqadupstack-english
scores:
- nDCG@10: 0.3571
R@100: 0.6442
R@1000: 0.8042
- dataset: cqadupstack-gaming
scores:
- nDCG@10: 0.4597
R@100: 0.8092
R@1000: 0.9354
- dataset: cqadupstack-gis
scores:
- nDCG@10: 0.2411
R@100: 0.5792
R@1000: 0.8018
- dataset: cqadupstack-mathematica
scores:
- nDCG@10: 0.1841
R@100: 0.5127
R@1000: 0.7757
- dataset: cqadupstack-physics
scores:
- nDCG@10: 0.3430
R@100: 0.7013
R@1000: 0.8980
- dataset: cqadupstack-programmers
scores:
- nDCG@10: 0.3029
R@100: 0.6402
R@1000: 0.8434
- dataset: cqadupstack-stats
scores:
- nDCG@10: 0.2483
R@100: 0.5269
R@1000: 0.7417
- dataset: cqadupstack-tex
scores:
- nDCG@10: 0.1540
R@100: 0.4333
R@1000: 0.6870
- dataset: cqadupstack-unix
scores:
- nDCG@10: 0.2636
R@100: 0.5879
R@1000: 0.8212
- dataset: cqadupstack-webmasters
scores:
- nDCG@10: 0.2878
R@100: 0.6485
R@1000: 0.8800
- dataset: cqadupstack-wordpress
scores:
- nDCG@10: 0.1914
R@100: 0.5364
R@1000: 0.7551
- dataset: quora
scores:
- nDCG@10: 0.8349
R@100: 0.9871
R@1000: 0.9981
- dataset: dbpedia-entity
scores:
- nDCG@10: 0.2916
R@100: 0.4529
R@1000: 0.7142
- dataset: scidocs
scores:
- nDCG@10: 0.1491
R@100: 0.3601
R@1000: 0.6105
- dataset: fever
scores:
- nDCG@10: 0.6821
R@100: 0.9356
R@1000: 0.9655
- dataset: climate-fever
scores:
- nDCG@10: 0.1550
R@100: 0.4422
R@1000: 0.7232
- dataset: scifact
scores:
- nDCG@10: 0.6493
R@100: 0.9260
R@1000: 0.9967
- name: contriever-msmarco
command: python -m pyserini.search.faiss --encoder-class contriever --encoder facebook/contriever-msmarco --index beir-v1.0.0-${dataset}.contriever-msmarco --topics beir-v1.0.0-${dataset}-test --output $output --batch 128 --threads 16 --hits 1000 --remove-query
datasets:
- dataset: trec-covid
scores:
- nDCG@10: 0.5964
R@100: 0.0907
R@1000: 0.3351
- dataset: bioasq
scores:
- nDCG@10: 0.3829
R@100: 0.6072
R@1000: 0.7666
- dataset: nfcorpus
scores:
- nDCG@10: 0.3281
R@100: 0.3008
R@1000: 0.6305
- dataset: nq
scores:
- nDCG@10: 0.4977
R@100: 0.9252
R@1000: 0.986
- dataset: hotpotqa
scores:
- nDCG@10: 0.6376
R@100: 0.7772
R@1000: 0.8718
- dataset: fiqa
scores:
- nDCG@10: 0.3293
R@100: 0.6558
R@1000: 0.8695
- dataset: signal1m
scores:
- nDCG@10: 0.2783
R@100: 0.322
R@1000: 0.5419
- dataset: trec-news
scores:
- nDCG@10: 0.4283
R@100: 0.4924
R@1000: 0.7752
- dataset: robust04
scores:
- nDCG@10: 0.4729
R@100: 0.3917
R@1000: 0.6552
- dataset: arguana
scores:
- nDCG@10: 0.4461
R@100: 0.9765
R@1000: 0.9964
- dataset: webis-touche2020
scores:
- nDCG@10: 0.204
R@100: 0.442
R@1000: 0.829
- dataset: cqadupstack-android
scores:
- nDCG@10: 0.4255
R@100: 0.7503
R@1000: 0.9304
- dataset: cqadupstack-english
scores:
- nDCG@10: 0.4326
R@100: 0.6935
R@1000: 0.8435
- dataset: cqadupstack-gaming
scores:
- nDCG@10: 0.5276
R@100: 0.8481
R@1000: 0.9427
- dataset: cqadupstack-gis
scores:
- nDCG@10: 0.3022
R@100: 0.6272
R@1000: 0.8417
- dataset: cqadupstack-mathematica
scores:
- nDCG@10: 0.2355
R@100: 0.5726
R@1000: 0.7995
- dataset: cqadupstack-physics
scores:
- nDCG@10: 0.4159
R@100: 0.7619
R@1000: 0.9162
- dataset: cqadupstack-programmers
scores:
- nDCG@10: 0.3574
R@100: 0.7191
R@1000: 0.8878
- dataset: cqadupstack-stats
scores:
- nDCG@10: 0.3095
R@100: 0.586
R@1000: 0.7805
- dataset: cqadupstack-tex
scores:
- nDCG@10: 0.2209
R@100: 0.4985
R@1000: 0.7348
- dataset: cqadupstack-unix
scores:
- nDCG@10: 0.3257
R@100: 0.6161
R@1000: 0.8373
- dataset: cqadupstack-webmasters
scores:
- nDCG@10: 0.3392
R@100: 0.7032
R@1000: 0.8956
- dataset: cqadupstack-wordpress
scores:
- nDCG@10: 0.2532
R@100: 0.5769
R@1000: 0.7929
- dataset: quora
scores:
- nDCG@10: 0.8648
R@100: 0.9935
R@1000: 0.9994
- dataset: dbpedia-entity
scores:
- nDCG@10: 0.4128
R@100: 0.5414
R@1000: 0.7751
- dataset: scidocs
scores:
- nDCG@10: 0.1652
R@100: 0.3783
R@1000: 0.6216
- dataset: fever
scores:
- nDCG@10: 0.7583
R@100: 0.9494
R@1000: 0.9705
- dataset: climate-fever
scores:
- nDCG@10: 0.2371
R@100: 0.5746
R@1000: 0.8019
- dataset: scifact
scores:
- nDCG@10: 0.6768
R@100: 0.947
R@1000: 0.9833