michael-newsrx-com's picture
Duplicate from hkunlp/instructor-large
c6c9972
---
pipeline_tag: sentence-similarity
tags:
- text-embedding
- embeddings
- information-retrieval
- beir
- text-classification
- language-model
- text-clustering
- text-semantic-similarity
- text-evaluation
- prompt-retrieval
- text-reranking
- sentence-transformers
- feature-extraction
- sentence-similarity
- transformers
- t5
- English
- Sentence Similarity
- natural_questions
- ms_marco
- fever
- hotpot_qa
- mteb
language: en
inference: false
license: apache-2.0
model-index:
- name: INSTRUCTOR
results:
- task:
type: Classification
dataset:
type: mteb/amazon_counterfactual
name: MTEB AmazonCounterfactualClassification (en)
config: en
split: test
revision: e8379541af4e31359cca9fbcf4b00f2671dba205
metrics:
- type: accuracy
value: 88.13432835820896
- type: ap
value: 59.298209334395665
- type: f1
value: 83.31769058643586
- task:
type: Classification
dataset:
type: mteb/amazon_polarity
name: MTEB AmazonPolarityClassification
config: default
split: test
revision: e2d317d38cd51312af73b3d32a06d1a08b442046
metrics:
- type: accuracy
value: 91.526375
- type: ap
value: 88.16327709705504
- type: f1
value: 91.51095801287843
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (en)
config: en
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 47.856
- type: f1
value: 45.41490917650942
- task:
type: Retrieval
dataset:
type: arguana
name: MTEB ArguAna
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 31.223
- type: map_at_10
value: 47.947
- type: map_at_100
value: 48.742000000000004
- type: map_at_1000
value: 48.745
- type: map_at_3
value: 43.137
- type: map_at_5
value: 45.992
- type: mrr_at_1
value: 32.432
- type: mrr_at_10
value: 48.4
- type: mrr_at_100
value: 49.202
- type: mrr_at_1000
value: 49.205
- type: mrr_at_3
value: 43.551
- type: mrr_at_5
value: 46.467999999999996
- type: ndcg_at_1
value: 31.223
- type: ndcg_at_10
value: 57.045
- type: ndcg_at_100
value: 60.175
- type: ndcg_at_1000
value: 60.233000000000004
- type: ndcg_at_3
value: 47.171
- type: ndcg_at_5
value: 52.322
- type: precision_at_1
value: 31.223
- type: precision_at_10
value: 8.599
- type: precision_at_100
value: 0.991
- type: precision_at_1000
value: 0.1
- type: precision_at_3
value: 19.63
- type: precision_at_5
value: 14.282
- type: recall_at_1
value: 31.223
- type: recall_at_10
value: 85.989
- type: recall_at_100
value: 99.075
- type: recall_at_1000
value: 99.502
- type: recall_at_3
value: 58.89
- type: recall_at_5
value: 71.408
- task:
type: Clustering
dataset:
type: mteb/arxiv-clustering-p2p
name: MTEB ArxivClusteringP2P
config: default
split: test
revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
metrics:
- type: v_measure
value: 43.1621946393635
- task:
type: Clustering
dataset:
type: mteb/arxiv-clustering-s2s
name: MTEB ArxivClusteringS2S
config: default
split: test
revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
metrics:
- type: v_measure
value: 32.56417132407894
- task:
type: Reranking
dataset:
type: mteb/askubuntudupquestions-reranking
name: MTEB AskUbuntuDupQuestions
config: default
split: test
revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
metrics:
- type: map
value: 64.29539304390207
- type: mrr
value: 76.44484017060196
- task:
type: STS
dataset:
type: mteb/biosses-sts
name: MTEB BIOSSES
config: default
split: test
revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
metrics:
- type: cos_sim_spearman
value: 84.38746499431112
- task:
type: Classification
dataset:
type: mteb/banking77
name: MTEB Banking77Classification
config: default
split: test
revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
metrics:
- type: accuracy
value: 78.51298701298701
- type: f1
value: 77.49041754069235
- task:
type: Clustering
dataset:
type: mteb/biorxiv-clustering-p2p
name: MTEB BiorxivClusteringP2P
config: default
split: test
revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
metrics:
- type: v_measure
value: 37.61848554098577
- task:
type: Clustering
dataset:
type: mteb/biorxiv-clustering-s2s
name: MTEB BiorxivClusteringS2S
config: default
split: test
revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
metrics:
- type: v_measure
value: 31.32623280148178
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackAndroidRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 35.803000000000004
- type: map_at_10
value: 48.848
- type: map_at_100
value: 50.5
- type: map_at_1000
value: 50.602999999999994
- type: map_at_3
value: 45.111000000000004
- type: map_at_5
value: 47.202
- type: mrr_at_1
value: 44.635000000000005
- type: mrr_at_10
value: 55.593
- type: mrr_at_100
value: 56.169999999999995
- type: mrr_at_1000
value: 56.19499999999999
- type: mrr_at_3
value: 53.361999999999995
- type: mrr_at_5
value: 54.806999999999995
- type: ndcg_at_1
value: 44.635000000000005
- type: ndcg_at_10
value: 55.899
- type: ndcg_at_100
value: 60.958
- type: ndcg_at_1000
value: 62.302
- type: ndcg_at_3
value: 51.051
- type: ndcg_at_5
value: 53.351000000000006
- type: precision_at_1
value: 44.635000000000005
- type: precision_at_10
value: 10.786999999999999
- type: precision_at_100
value: 1.6580000000000001
- type: precision_at_1000
value: 0.213
- type: precision_at_3
value: 24.893
- type: precision_at_5
value: 17.740000000000002
- type: recall_at_1
value: 35.803000000000004
- type: recall_at_10
value: 68.657
- type: recall_at_100
value: 89.77199999999999
- type: recall_at_1000
value: 97.67
- type: recall_at_3
value: 54.066
- type: recall_at_5
value: 60.788
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackEnglishRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 33.706
- type: map_at_10
value: 44.896
- type: map_at_100
value: 46.299
- type: map_at_1000
value: 46.44
- type: map_at_3
value: 41.721000000000004
- type: map_at_5
value: 43.486000000000004
- type: mrr_at_1
value: 41.592
- type: mrr_at_10
value: 50.529
- type: mrr_at_100
value: 51.22
- type: mrr_at_1000
value: 51.258
- type: mrr_at_3
value: 48.205999999999996
- type: mrr_at_5
value: 49.528
- type: ndcg_at_1
value: 41.592
- type: ndcg_at_10
value: 50.77199999999999
- type: ndcg_at_100
value: 55.383
- type: ndcg_at_1000
value: 57.288
- type: ndcg_at_3
value: 46.324
- type: ndcg_at_5
value: 48.346000000000004
- type: precision_at_1
value: 41.592
- type: precision_at_10
value: 9.516
- type: precision_at_100
value: 1.541
- type: precision_at_1000
value: 0.2
- type: precision_at_3
value: 22.399
- type: precision_at_5
value: 15.770999999999999
- type: recall_at_1
value: 33.706
- type: recall_at_10
value: 61.353
- type: recall_at_100
value: 80.182
- type: recall_at_1000
value: 91.896
- type: recall_at_3
value: 48.204
- type: recall_at_5
value: 53.89699999999999
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGamingRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 44.424
- type: map_at_10
value: 57.169000000000004
- type: map_at_100
value: 58.202
- type: map_at_1000
value: 58.242000000000004
- type: map_at_3
value: 53.825
- type: map_at_5
value: 55.714
- type: mrr_at_1
value: 50.470000000000006
- type: mrr_at_10
value: 60.489000000000004
- type: mrr_at_100
value: 61.096
- type: mrr_at_1000
value: 61.112
- type: mrr_at_3
value: 58.192
- type: mrr_at_5
value: 59.611999999999995
- type: ndcg_at_1
value: 50.470000000000006
- type: ndcg_at_10
value: 63.071999999999996
- type: ndcg_at_100
value: 66.964
- type: ndcg_at_1000
value: 67.659
- type: ndcg_at_3
value: 57.74399999999999
- type: ndcg_at_5
value: 60.367000000000004
- type: precision_at_1
value: 50.470000000000006
- type: precision_at_10
value: 10.019
- type: precision_at_100
value: 1.29
- type: precision_at_1000
value: 0.13899999999999998
- type: precision_at_3
value: 25.558999999999997
- type: precision_at_5
value: 17.467
- type: recall_at_1
value: 44.424
- type: recall_at_10
value: 77.02
- type: recall_at_100
value: 93.738
- type: recall_at_1000
value: 98.451
- type: recall_at_3
value: 62.888
- type: recall_at_5
value: 69.138
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackGisRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 26.294
- type: map_at_10
value: 34.503
- type: map_at_100
value: 35.641
- type: map_at_1000
value: 35.724000000000004
- type: map_at_3
value: 31.753999999999998
- type: map_at_5
value: 33.190999999999995
- type: mrr_at_1
value: 28.362
- type: mrr_at_10
value: 36.53
- type: mrr_at_100
value: 37.541000000000004
- type: mrr_at_1000
value: 37.602000000000004
- type: mrr_at_3
value: 33.917
- type: mrr_at_5
value: 35.358000000000004
- type: ndcg_at_1
value: 28.362
- type: ndcg_at_10
value: 39.513999999999996
- type: ndcg_at_100
value: 44.815
- type: ndcg_at_1000
value: 46.839
- type: ndcg_at_3
value: 34.02
- type: ndcg_at_5
value: 36.522
- type: precision_at_1
value: 28.362
- type: precision_at_10
value: 6.101999999999999
- type: precision_at_100
value: 0.9129999999999999
- type: precision_at_1000
value: 0.11399999999999999
- type: precision_at_3
value: 14.161999999999999
- type: precision_at_5
value: 9.966
- type: recall_at_1
value: 26.294
- type: recall_at_10
value: 53.098
- type: recall_at_100
value: 76.877
- type: recall_at_1000
value: 91.834
- type: recall_at_3
value: 38.266
- type: recall_at_5
value: 44.287
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackMathematicaRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 16.407
- type: map_at_10
value: 25.185999999999996
- type: map_at_100
value: 26.533
- type: map_at_1000
value: 26.657999999999998
- type: map_at_3
value: 22.201999999999998
- type: map_at_5
value: 23.923
- type: mrr_at_1
value: 20.522000000000002
- type: mrr_at_10
value: 29.522
- type: mrr_at_100
value: 30.644
- type: mrr_at_1000
value: 30.713
- type: mrr_at_3
value: 26.679000000000002
- type: mrr_at_5
value: 28.483000000000004
- type: ndcg_at_1
value: 20.522000000000002
- type: ndcg_at_10
value: 30.656
- type: ndcg_at_100
value: 36.864999999999995
- type: ndcg_at_1000
value: 39.675
- type: ndcg_at_3
value: 25.319000000000003
- type: ndcg_at_5
value: 27.992
- type: precision_at_1
value: 20.522000000000002
- type: precision_at_10
value: 5.795999999999999
- type: precision_at_100
value: 1.027
- type: precision_at_1000
value: 0.13999999999999999
- type: precision_at_3
value: 12.396
- type: precision_at_5
value: 9.328
- type: recall_at_1
value: 16.407
- type: recall_at_10
value: 43.164
- type: recall_at_100
value: 69.695
- type: recall_at_1000
value: 89.41900000000001
- type: recall_at_3
value: 28.634999999999998
- type: recall_at_5
value: 35.308
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackPhysicsRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 30.473
- type: map_at_10
value: 41.676
- type: map_at_100
value: 43.120999999999995
- type: map_at_1000
value: 43.230000000000004
- type: map_at_3
value: 38.306000000000004
- type: map_at_5
value: 40.355999999999995
- type: mrr_at_1
value: 37.536
- type: mrr_at_10
value: 47.643
- type: mrr_at_100
value: 48.508
- type: mrr_at_1000
value: 48.551
- type: mrr_at_3
value: 45.348
- type: mrr_at_5
value: 46.744
- type: ndcg_at_1
value: 37.536
- type: ndcg_at_10
value: 47.823
- type: ndcg_at_100
value: 53.395
- type: ndcg_at_1000
value: 55.271
- type: ndcg_at_3
value: 42.768
- type: ndcg_at_5
value: 45.373000000000005
- type: precision_at_1
value: 37.536
- type: precision_at_10
value: 8.681
- type: precision_at_100
value: 1.34
- type: precision_at_1000
value: 0.165
- type: precision_at_3
value: 20.468
- type: precision_at_5
value: 14.495
- type: recall_at_1
value: 30.473
- type: recall_at_10
value: 60.092999999999996
- type: recall_at_100
value: 82.733
- type: recall_at_1000
value: 94.875
- type: recall_at_3
value: 45.734
- type: recall_at_5
value: 52.691
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackProgrammersRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 29.976000000000003
- type: map_at_10
value: 41.097
- type: map_at_100
value: 42.547000000000004
- type: map_at_1000
value: 42.659000000000006
- type: map_at_3
value: 37.251
- type: map_at_5
value: 39.493
- type: mrr_at_1
value: 37.557
- type: mrr_at_10
value: 46.605000000000004
- type: mrr_at_100
value: 47.487
- type: mrr_at_1000
value: 47.54
- type: mrr_at_3
value: 43.721
- type: mrr_at_5
value: 45.411
- type: ndcg_at_1
value: 37.557
- type: ndcg_at_10
value: 47.449000000000005
- type: ndcg_at_100
value: 53.052
- type: ndcg_at_1000
value: 55.010999999999996
- type: ndcg_at_3
value: 41.439
- type: ndcg_at_5
value: 44.292
- type: precision_at_1
value: 37.557
- type: precision_at_10
value: 8.847
- type: precision_at_100
value: 1.357
- type: precision_at_1000
value: 0.16999999999999998
- type: precision_at_3
value: 20.091
- type: precision_at_5
value: 14.384
- type: recall_at_1
value: 29.976000000000003
- type: recall_at_10
value: 60.99099999999999
- type: recall_at_100
value: 84.245
- type: recall_at_1000
value: 96.97200000000001
- type: recall_at_3
value: 43.794
- type: recall_at_5
value: 51.778999999999996
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 28.099166666666665
- type: map_at_10
value: 38.1365
- type: map_at_100
value: 39.44491666666667
- type: map_at_1000
value: 39.55858333333334
- type: map_at_3
value: 35.03641666666666
- type: map_at_5
value: 36.79833333333334
- type: mrr_at_1
value: 33.39966666666667
- type: mrr_at_10
value: 42.42583333333333
- type: mrr_at_100
value: 43.28575
- type: mrr_at_1000
value: 43.33741666666667
- type: mrr_at_3
value: 39.94975
- type: mrr_at_5
value: 41.41633333333334
- type: ndcg_at_1
value: 33.39966666666667
- type: ndcg_at_10
value: 43.81741666666667
- type: ndcg_at_100
value: 49.08166666666667
- type: ndcg_at_1000
value: 51.121166666666674
- type: ndcg_at_3
value: 38.73575
- type: ndcg_at_5
value: 41.18158333333333
- type: precision_at_1
value: 33.39966666666667
- type: precision_at_10
value: 7.738916666666667
- type: precision_at_100
value: 1.2265833333333331
- type: precision_at_1000
value: 0.15983333333333336
- type: precision_at_3
value: 17.967416666666665
- type: precision_at_5
value: 12.78675
- type: recall_at_1
value: 28.099166666666665
- type: recall_at_10
value: 56.27049999999999
- type: recall_at_100
value: 78.93291666666667
- type: recall_at_1000
value: 92.81608333333334
- type: recall_at_3
value: 42.09775
- type: recall_at_5
value: 48.42533333333334
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackStatsRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 23.663
- type: map_at_10
value: 30.377
- type: map_at_100
value: 31.426
- type: map_at_1000
value: 31.519000000000002
- type: map_at_3
value: 28.069
- type: map_at_5
value: 29.256999999999998
- type: mrr_at_1
value: 26.687
- type: mrr_at_10
value: 33.107
- type: mrr_at_100
value: 34.055
- type: mrr_at_1000
value: 34.117999999999995
- type: mrr_at_3
value: 31.058000000000003
- type: mrr_at_5
value: 32.14
- type: ndcg_at_1
value: 26.687
- type: ndcg_at_10
value: 34.615
- type: ndcg_at_100
value: 39.776
- type: ndcg_at_1000
value: 42.05
- type: ndcg_at_3
value: 30.322
- type: ndcg_at_5
value: 32.157000000000004
- type: precision_at_1
value: 26.687
- type: precision_at_10
value: 5.491
- type: precision_at_100
value: 0.877
- type: precision_at_1000
value: 0.11499999999999999
- type: precision_at_3
value: 13.139000000000001
- type: precision_at_5
value: 9.049
- type: recall_at_1
value: 23.663
- type: recall_at_10
value: 45.035
- type: recall_at_100
value: 68.554
- type: recall_at_1000
value: 85.077
- type: recall_at_3
value: 32.982
- type: recall_at_5
value: 37.688
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackTexRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 17.403
- type: map_at_10
value: 25.197000000000003
- type: map_at_100
value: 26.355
- type: map_at_1000
value: 26.487
- type: map_at_3
value: 22.733
- type: map_at_5
value: 24.114
- type: mrr_at_1
value: 21.37
- type: mrr_at_10
value: 29.091
- type: mrr_at_100
value: 30.018
- type: mrr_at_1000
value: 30.096
- type: mrr_at_3
value: 26.887
- type: mrr_at_5
value: 28.157
- type: ndcg_at_1
value: 21.37
- type: ndcg_at_10
value: 30.026000000000003
- type: ndcg_at_100
value: 35.416
- type: ndcg_at_1000
value: 38.45
- type: ndcg_at_3
value: 25.764
- type: ndcg_at_5
value: 27.742
- type: precision_at_1
value: 21.37
- type: precision_at_10
value: 5.609
- type: precision_at_100
value: 0.9860000000000001
- type: precision_at_1000
value: 0.14300000000000002
- type: precision_at_3
value: 12.423
- type: precision_at_5
value: 9.009
- type: recall_at_1
value: 17.403
- type: recall_at_10
value: 40.573
- type: recall_at_100
value: 64.818
- type: recall_at_1000
value: 86.53699999999999
- type: recall_at_3
value: 28.493000000000002
- type: recall_at_5
value: 33.660000000000004
- task:
type: Retrieval
dataset:
type: BeIR/cqadupstack
name: MTEB CQADupstackUnixRetrieval
config: default
split: test
revision: None
metrics:
- type: map_at_1
value: 28.639
- type: map_at_10
value: 38.951
- type: map_at_100
value: 40.238
- type: map_at_1000
value: 40.327
- type: map_at_3
value: 35.842
- type: map_at_5
value: 37.617
- type: mrr_at_1
value: 33.769
- type: mrr_at_10
value: 43.088
- type: mrr_at_100
value: 44.03
- type: mrr_at_1000