metadata
tags:
- mteb
model-index:
- name: IYun-large-zh
results:
- task:
type: STS
dataset:
type: C-MTEB/AFQMC
name: MTEB AFQMC
config: default
split: validation
revision: None
metrics:
- type: cos_sim_pearson
value: 57.37728676415047
- type: cos_sim_spearman
value: 60.89131895307699
- type: euclidean_pearson
value: 60.056754800315595
- type: euclidean_spearman
value: 60.891479787418966
- type: manhattan_pearson
value: 60.03850823371572
- type: manhattan_spearman
value: 60.8597150048781
- task:
type: STS
dataset:
type: C-MTEB/ATEC
name: MTEB ATEC
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 57.29704921148904
- type: cos_sim_spearman
value: 58.81607331373972
- type: euclidean_pearson
value: 63.69251756281332
- type: euclidean_spearman
value: 58.81608232068536
- type: manhattan_pearson
value: 63.665668138742284
- type: manhattan_spearman
value: 58.80224314871406
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (zh)
config: zh
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 49.672
- type: f1
value: 47.27737512126165
- task:
type: STS
dataset:
type: C-MTEB/BQ
name: MTEB BQ
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 71.65025725548176
- type: cos_sim_spearman
value: 72.53278026251562
- type: euclidean_pearson
value: 71.29771814474996
- type: euclidean_spearman
value: 72.53241999594584
- type: manhattan_pearson
value: 71.29290351258575
- type: manhattan_spearman
value: 72.52505531587519
- task:
type: Clustering
dataset:
type: C-MTEB/CLSClusteringP2P
name: MTEB CLSClusteringP2P
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 60.19892651814847
- task:
type: Clustering
dataset:
type: C-MTEB/CLSClusteringS2S
name: MTEB CLSClusteringS2S
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 58.39897986042561
- task:
type: Reranking
dataset:
type: C-MTEB/CMedQAv1-reranking
name: MTEB CMedQAv1
config: default
split: test
revision: None
metrics:
- type: map
value: 88.73563192647498
- type: mrr
value: 91.00214285714286
- task:
type: Reranking
dataset:
type: C-MTEB/CMedQAv2-reranking
name: MTEB CMedQAv2
config: default
split: test
revision: None
metrics:
- type: map
value: 89.42396184634322
- type: mrr
value: 91.90503968253968
- task:
type: Retrieval
dataset:
type: C-MTEB/CmedqaRetrieval
name: MTEB CmedqaRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 26.950000000000003
- type: map_at_10
value: 39.982
- type: map_at_100
value: 41.844
- type: map_at_1000
value: 41.948
- type: map_at_3
value: 35.664
- type: map_at_5
value: 38.061
- type: mrr_at_1
value: 41.11
- type: mrr_at_10
value: 49.183
- type: mrr_at_100
value: 50.166999999999994
- type: mrr_at_1000
value: 50.205999999999996
- type: mrr_at_3
value: 46.778
- type: mrr_at_5
value: 48.120000000000005
- type: ndcg_at_1
value: 41.11
- type: ndcg_at_10
value: 46.678
- type: ndcg_at_100
value: 53.876000000000005
- type: ndcg_at_1000
value: 55.627
- type: ndcg_at_3
value: 41.429
- type: ndcg_at_5
value: 43.551
- type: precision_at_1
value: 41.11
- type: precision_at_10
value: 10.325
- type: precision_at_100
value: 1.6119999999999999
- type: precision_at_1000
value: 0.184
- type: precision_at_3
value: 23.498
- type: precision_at_5
value: 16.894000000000002
- type: recall_at_1
value: 26.950000000000003
- type: recall_at_10
value: 57.239
- type: recall_at_100
value: 86.9
- type: recall_at_1000
value: 98.581
- type: recall_at_3
value: 41.221000000000004
- type: recall_at_5
value: 47.976
- task:
type: PairClassification
dataset:
type: C-MTEB/CMNLI
name: MTEB Cmnli
config: default
split: validation
revision: None
metrics:
- type: cos_sim_accuracy
value: 86.13968597726043
- type: cos_sim_ap
value: 90.86724630443385
- type: cos_sim_f1
value: 86.9653767820774
- type: cos_sim_precision
value: 83.9724680432645
- type: cos_sim_recall
value: 90.17951425554382
- type: dot_accuracy
value: 86.13968597726043
- type: dot_ap
value: 90.85181504536696
- type: dot_f1
value: 86.9653767820774
- type: dot_precision
value: 83.9724680432645
- type: dot_recall
value: 90.17951425554382
- type: euclidean_accuracy
value: 86.13968597726043
- type: euclidean_ap
value: 90.86657368513809
- type: euclidean_f1
value: 86.95208970438327
- type: euclidean_precision
value: 84.03940886699507
- type: euclidean_recall
value: 90.07391763463569
- type: manhattan_accuracy
value: 85.97726042230644
- type: manhattan_ap
value: 90.85259484237685
- type: manhattan_f1
value: 86.79435483870968
- type: manhattan_precision
value: 83.02796528447445
- type: manhattan_recall
value: 90.91869060190075
- type: max_accuracy
value: 86.13968597726043
- type: max_ap
value: 90.86724630443385
- type: max_f1
value: 86.9653767820774
- task:
type: Retrieval
dataset:
type: C-MTEB/CovidRetrieval
name: MTEB CovidRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 73.34
- type: map_at_10
value: 81.722
- type: map_at_100
value: 81.916
- type: map_at_1000
value: 81.919
- type: map_at_3
value: 80.25999999999999
- type: map_at_5
value: 81.11699999999999
- type: mrr_at_1
value: 73.551
- type: mrr_at_10
value: 81.727
- type: mrr_at_100
value: 81.911
- type: mrr_at_1000
value: 81.914
- type: mrr_at_3
value: 80.242
- type: mrr_at_5
value: 81.149
- type: ndcg_at_1
value: 73.551
- type: ndcg_at_10
value: 85.244
- type: ndcg_at_100
value: 86.005
- type: ndcg_at_1000
value: 86.084
- type: ndcg_at_3
value: 82.334
- type: ndcg_at_5
value: 83.878
- type: precision_at_1
value: 73.551
- type: precision_at_10
value: 9.705
- type: precision_at_100
value: 1.0030000000000001
- type: precision_at_1000
value: 0.101
- type: precision_at_3
value: 29.645
- type: precision_at_5
value: 18.567
- type: recall_at_1
value: 73.34
- type: recall_at_10
value: 96.048
- type: recall_at_100
value: 99.262
- type: recall_at_1000
value: 99.895
- type: recall_at_3
value: 88.303
- type: recall_at_5
value: 91.99199999999999
- task:
type: Retrieval
dataset:
type: C-MTEB/DuRetrieval
name: MTEB DuRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 26.506
- type: map_at_10
value: 81.29899999999999
- type: map_at_100
value: 83.997
- type: map_at_1000
value: 84.03399999999999
- type: map_at_3
value: 56.69
- type: map_at_5
value: 71.389
- type: mrr_at_1
value: 91.10000000000001
- type: mrr_at_10
value: 93.952
- type: mrr_at_100
value: 94.00500000000001
- type: mrr_at_1000
value: 94.00699999999999
- type: mrr_at_3
value: 93.683
- type: mrr_at_5
value: 93.858
- type: ndcg_at_1
value: 91.10000000000001
- type: ndcg_at_10
value: 88.25699999999999
- type: ndcg_at_100
value: 90.84100000000001
- type: ndcg_at_1000
value: 91.167
- type: ndcg_at_3
value: 87.595
- type: ndcg_at_5
value: 86.346
- type: precision_at_1
value: 91.10000000000001
- type: precision_at_10
value: 42.04
- type: precision_at_100
value: 4.804
- type: precision_at_1000
value: 0.48900000000000005
- type: precision_at_3
value: 78.583
- type: precision_at_5
value: 66.09
- type: recall_at_1
value: 26.506
- type: recall_at_10
value: 89.12299999999999
- type: recall_at_100
value: 97.717
- type: recall_at_1000
value: 99.285
- type: recall_at_3
value: 58.865
- type: recall_at_5
value: 75.753
- task:
type: Retrieval
dataset:
type: C-MTEB/EcomRetrieval
name: MTEB EcomRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 52.7
- type: map_at_10
value: 62.239
- type: map_at_100
value: 62.744
- type: map_at_1000
value: 62.755
- type: map_at_3
value: 59.75
- type: map_at_5
value: 61.050000000000004
- type: mrr_at_1
value: 52.7
- type: mrr_at_10
value: 62.239
- type: mrr_at_100
value: 62.744
- type: mrr_at_1000
value: 62.755
- type: mrr_at_3
value: 59.75
- type: mrr_at_5
value: 61.050000000000004
- type: ndcg_at_1
value: 52.7
- type: ndcg_at_10
value: 67.23
- type: ndcg_at_100
value: 69.729
- type: ndcg_at_1000
value: 70.00999999999999
- type: ndcg_at_3
value: 62.025
- type: ndcg_at_5
value: 64.37
- type: precision_at_1
value: 52.7
- type: precision_at_10
value: 8.309999999999999
- type: precision_at_100
value: 0.9490000000000001
- type: precision_at_1000
value: 0.097
- type: precision_at_3
value: 22.867
- type: precision_at_5
value: 14.860000000000001
- type: recall_at_1
value: 52.7
- type: recall_at_10
value: 83.1
- type: recall_at_100
value: 94.89999999999999
- type: recall_at_1000
value: 97.1
- type: recall_at_3
value: 68.60000000000001
- type: recall_at_5
value: 74.3
- task:
type: Classification
dataset:
type: C-MTEB/IFlyTek-classification
name: MTEB IFlyTek
config: default
split: validation
revision: None
metrics:
- type: accuracy
value: 52.64332435552135
- type: f1
value: 42.17147347490132
- task:
type: Classification
dataset:
type: C-MTEB/JDReview-classification
name: MTEB JDReview
config: default
split: test
revision: None
metrics:
- type: accuracy
value: 87.5984990619137
- type: ap
value: 57.59814850574554
- type: f1
value: 82.62140959655022
- task:
type: STS
dataset:
type: C-MTEB/LCQMC
name: MTEB LCQMC
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 74.58027418203673
- type: cos_sim_spearman
value: 79.19473724464046
- type: euclidean_pearson
value: 79.2941422188887
- type: euclidean_spearman
value: 79.1944889378359
- type: manhattan_pearson
value: 79.26535092062532
- type: manhattan_spearman
value: 79.17298822899023
- task:
type: Reranking
dataset:
type: C-MTEB/Mmarco-reranking
name: MTEB MMarcoReranking
config: default
split: dev
revision: None
metrics:
- type: map
value: 31.611379937191025
- type: mrr
value: 30.88968253968254
- task:
type: Retrieval
dataset:
type: C-MTEB/MMarcoRetrieval
name: MTEB MMarcoRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 65.603
- type: map_at_10
value: 74.834
- type: map_at_100
value: 75.16199999999999
- type: map_at_1000
value: 75.17399999999999
- type: map_at_3
value: 72.979
- type: map_at_5
value: 74.154
- type: mrr_at_1
value: 67.837
- type: mrr_at_10
value: 75.46199999999999
- type: mrr_at_100
value: 75.751
- type: mrr_at_1000
value: 75.762
- type: mrr_at_3
value: 73.832
- type: mrr_at_5
value: 74.875
- type: ndcg_at_1
value: 67.837
- type: ndcg_at_10
value: 78.636
- type: ndcg_at_100
value: 80.083
- type: ndcg_at_1000
value: 80.394
- type: ndcg_at_3
value: 75.12
- type: ndcg_at_5
value: 77.12
- type: precision_at_1
value: 67.837
- type: precision_at_10
value: 9.536999999999999
- type: precision_at_100
value: 1.0250000000000001
- type: precision_at_1000
value: 0.105
- type: precision_at_3
value: 28.352
- type: precision_at_5
value: 18.074
- type: recall_at_1
value: 65.603
- type: recall_at_10
value: 89.704
- type: recall_at_100
value: 96.2
- type: recall_at_1000
value: 98.588
- type: recall_at_3
value: 80.444
- type: recall_at_5
value: 85.205
- task:
type: Classification
dataset:
type: mteb/amazon_massive_intent
name: MTEB MassiveIntentClassification (zh-CN)
config: zh-CN
split: test
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
metrics:
- type: accuracy
value: 77.43106926698049
- type: f1
value: 73.96808004721824
- task:
type: Classification
dataset:
type: mteb/amazon_massive_scenario
name: MTEB MassiveScenarioClassification (zh-CN)
config: zh-CN
split: test
revision: 7d571f92784cd94a019292a1f45445077d0ef634
metrics:
- type: accuracy
value: 83.86684599865501
- type: f1
value: 83.05645257324346
- task:
type: Retrieval
dataset:
type: C-MTEB/MedicalRetrieval
name: MTEB MedicalRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 55.00000000000001
- type: map_at_10
value: 61.129
- type: map_at_100
value: 61.61
- type: map_at_1000
value: 61.655
- type: map_at_3
value: 59.533
- type: map_at_5
value: 60.478
- type: mrr_at_1
value: 54.900000000000006
- type: mrr_at_10
value: 61.090999999999994
- type: mrr_at_100
value: 61.562
- type: mrr_at_1000
value: 61.608
- type: mrr_at_3
value: 59.483
- type: mrr_at_5
value: 60.428000000000004
- type: ndcg_at_1
value: 55.00000000000001
- type: ndcg_at_10
value: 64.288
- type: ndcg_at_100
value: 66.991
- type: ndcg_at_1000
value: 68.27
- type: ndcg_at_3
value: 61.014
- type: ndcg_at_5
value: 62.68899999999999
- type: precision_at_1
value: 55.00000000000001
- type: precision_at_10
value: 7.430000000000001
- type: precision_at_100
value: 0.878
- type: precision_at_1000
value: 0.098
- type: precision_at_3
value: 21.767
- type: precision_at_5
value: 13.86
- type: recall_at_1
value: 55.00000000000001
- type: recall_at_10
value: 74.3
- type: recall_at_100
value: 87.8
- type: recall_at_1000
value: 98
- type: recall_at_3
value: 65.3
- type: recall_at_5
value: 69.3
- task:
type: Classification
dataset:
type: C-MTEB/MultilingualSentiment-classification
name: MTEB MultilingualSentiment
config: default
split: validation
revision: None
metrics:
- type: accuracy
value: 78.48333333333333
- type: f1
value: 78.36516159631131
- task:
type: PairClassification
dataset:
type: C-MTEB/OCNLI
name: MTEB Ocnli
config: default
split: validation
revision: None
metrics:
- type: cos_sim_accuracy
value: 86.13968597726043
- type: cos_sim_ap
value: 90.86724630443385
- type: cos_sim_f1
value: 86.9653767820774
- type: cos_sim_precision
value: 83.9724680432645
- type: cos_sim_recall
value: 90.17951425554382
- type: dot_accuracy
value: 86.13968597726043
- type: dot_ap
value: 90.85181504536696
- type: dot_f1
value: 86.9653767820774
- type: dot_precision
value: 83.9724680432645
- type: dot_recall
value: 90.17951425554382
- type: euclidean_accuracy
value: 86.13968597726043
- type: euclidean_ap
value: 90.86657368513809
- type: euclidean_f1
value: 86.95208970438327
- type: euclidean_precision
value: 84.03940886699507
- type: euclidean_recall
value: 90.07391763463569
- type: manhattan_accuracy
value: 85.97726042230644
- type: manhattan_ap
value: 90.85259484237685
- type: manhattan_f1
value: 86.79435483870968
- type: manhattan_precision
value: 83.02796528447445
- type: manhattan_recall
value: 90.91869060190075
- type: max_accuracy
value: 86.13968597726043
- type: max_ap
value: 90.86724630443385
- type: max_f1
value: 86.9653767820774
- task:
type: Classification
dataset:
type: C-MTEB/OnlineShopping-classification
name: MTEB OnlineShopping
config: default
split: test
revision: None
metrics:
- type: accuracy
value: 94.33999999999999
- type: ap
value: 92.566213965377
- type: f1
value: 94.32981412505542
- task:
type: STS
dataset:
type: C-MTEB/PAWSX
name: MTEB PAWSX
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 40.59979992480721
- type: cos_sim_spearman
value: 45.80272854477526
- type: euclidean_pearson
value: 45.51435650601272
- type: euclidean_spearman
value: 45.80481880049892
- type: manhattan_pearson
value: 45.50783698090448
- type: manhattan_spearman
value: 45.7962835896273
- task:
type: STS
dataset:
type: C-MTEB/QBQTC
name: MTEB QBQTC
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 41.95530336245604
- type: cos_sim_spearman
value: 43.94205325290135
- type: euclidean_pearson
value: 38.01893281522651
- type: euclidean_spearman
value: 43.9411389356089
- type: manhattan_pearson
value: 38.158512461951446
- type: manhattan_spearman
value: 44.055211140130815
- task:
type: STS
dataset:
type: mteb/sts22-crosslingual-sts
name: MTEB STS22 (zh)
config: zh
split: test
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
metrics:
- type: cos_sim_pearson
value: 63.64131281514482
- type: cos_sim_spearman
value: 65.17753570208333
- type: euclidean_pearson
value: 62.72868744500848
- type: euclidean_spearman
value: 65.17730738350589
- type: manhattan_pearson
value: 62.76099444782981
- type: manhattan_spearman
value: 65.2421498595002
- task:
type: STS
dataset:
type: C-MTEB/STSB
name: MTEB STSB
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 79.15762053490425
- type: cos_sim_spearman
value: 79.47824157657848
- type: euclidean_pearson
value: 79.11217669696227
- type: euclidean_spearman
value: 79.47857091559331
- type: manhattan_pearson
value: 79.07701011877683
- type: manhattan_spearman
value: 79.43942682897884
- task:
type: Reranking
dataset:
type: C-MTEB/T2Reranking
name: MTEB T2Reranking
config: default
split: dev
revision: None
metrics:
- type: map
value: 67.45068053105526
- type: mrr
value: 77.63560439973777
- task:
type: Retrieval
dataset:
type: C-MTEB/T2Retrieval
name: MTEB T2Retrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 27.837
- type: map_at_10
value: 77.803
- type: map_at_100
value: 81.402
- type: map_at_1000
value: 81.464
- type: map_at_3
value: 54.879
- type: map_at_5
value: 67.32900000000001
- type: mrr_at_1
value: 90.584
- type: mrr_at_10
value: 93.059
- type: mrr_at_100
value: 93.135
- type: mrr_at_1000
value: 93.138
- type: mrr_at_3
value: 92.659
- type: mrr_at_5
value: 92.914
- type: ndcg_at_1
value: 90.584
- type: ndcg_at_10
value: 85.29299999999999
- type: ndcg_at_100
value: 88.824
- type: ndcg_at_1000
value: 89.4
- type: ndcg_at_3
value: 86.79599999999999
- type: ndcg_at_5
value: 85.353
- type: precision_at_1
value: 90.584
- type: precision_at_10
value: 42.191
- type: precision_at_100
value: 5.0200000000000005
- type: precision_at_1000
value: 0.516
- type: precision_at_3
value: 75.785
- type: precision_at_5
value: 63.417
- type: recall_at_1
value: 27.837
- type: recall_at_10
value: 84.21600000000001
- type: recall_at_100
value: 95.719
- type: recall_at_1000
value: 98.565
- type: recall_at_3
value: 56.574999999999996
- type: recall_at_5
value: 70.682
- task:
type: Classification
dataset:
type: C-MTEB/TNews-classification
name: MTEB TNews
config: default
split: validation
revision: None
metrics:
- type: accuracy
value: 54.37
- type: f1
value: 52.57500124627352
- task:
type: Clustering
dataset:
type: C-MTEB/ThuNewsClusteringP2P
name: MTEB ThuNewsClusteringP2P
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 76.9781904739968
- task:
type: Clustering
dataset:
type: C-MTEB/ThuNewsClusteringS2S
name: MTEB ThuNewsClusteringS2S
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 69.82661181746705
- task:
type: Retrieval
dataset:
type: C-MTEB/VideoRetrieval
name: MTEB VideoRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 58.699999999999996
- type: map_at_10
value: 68.512
- type: map_at_100
value: 69.018
- type: map_at_1000
value: 69.028
- type: map_at_3
value: 66.51700000000001
- type: map_at_5
value: 67.91199999999999
- type: mrr_at_1
value: 58.599999999999994
- type: mrr_at_10
value: 68.462
- type: mrr_at_100
value: 68.96799999999999
- type: mrr_at_1000
value: 68.978
- type: mrr_at_3
value: 66.467
- type: mrr_at_5
value: 67.862
- type: ndcg_at_1
value: 58.699999999999996
- type: ndcg_at_10
value: 72.88900000000001
- type: ndcg_at_100
value: 75.262
- type: ndcg_at_1000
value: 75.48700000000001
- type: ndcg_at_3
value: 68.96
- type: ndcg_at_5
value: 71.452
- type: precision_at_1
value: 58.699999999999996
- type: precision_at_10
value: 8.64
- type: precision_at_100
value: 0.9730000000000001
- type: precision_at_1000
value: 0.099
- type: precision_at_3
value: 25.333
- type: precision_at_5
value: 16.400000000000002
- type: recall_at_1
value: 58.699999999999996
- type: recall_at_10
value: 86.4
- type: recall_at_100
value: 97.3
- type: recall_at_1000
value: 99
- type: recall_at_3
value: 76
- type: recall_at_5
value: 82
- task:
type: Classification
dataset:
type: C-MTEB/waimai-classification
name: MTEB Waimai
config: default
split: test
revision: None
metrics:
- type: accuracy
value: 89.23
- type: ap
value: 75.03115536738895
- type: f1
value: 87.71601665295442
使用方法
from sentence_transformers import SentenceTransformer
sentences = ["sentence1", "sentence2"]
model = SentenceTransformer('IYun-large-zh')
embeddings_1 = model.encode(sentences, normalize_embeddings=True)
embeddings_2 = model.encode(sentences, normalize_embeddings=True)
similarity = embeddings_1 @ embeddings_2.T
print(similarity)