Pristinenlp's picture
update
ff08b6a verified
---
tags:
- mteb
model-index:
- name: alime-embedding-large-zh
results:
- task:
type: STS
dataset:
type: C-MTEB/AFQMC
name: MTEB AFQMC
config: default
split: validation
revision: None
metrics:
- type: cos_sim_pearson
value: 49.6479989785073
- type: cos_sim_spearman
value: 54.733173049795425
- type: euclidean_pearson
value: 53.06330391299694
- type: euclidean_spearman
value: 54.73321325021156
- type: manhattan_pearson
value: 53.0477915350307
- type: manhattan_spearman
value: 54.728508847750845
- task:
type: STS
dataset:
type: C-MTEB/ATEC
name: MTEB ATEC
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 48.658812679136325
- type: cos_sim_spearman
value: 55.125070901329146
- type: euclidean_pearson
value: 55.73373519622172
- type: euclidean_spearman
value: 55.12506864911728
- type: manhattan_pearson
value: 55.71155132206361
- type: manhattan_spearman
value: 55.121598723227905
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (zh)
config: zh
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 46.95
- type: f1
value: 45.34383964066362
- task:
type: STS
dataset:
type: C-MTEB/BQ
name: MTEB BQ
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 62.92731050834033
- type: cos_sim_spearman
value: 64.8881453551134
- type: euclidean_pearson
value: 63.31447523186855
- type: euclidean_spearman
value: 64.88814189042776
- type: manhattan_pearson
value: 63.222442228527996
- type: manhattan_spearman
value: 64.79818263591122
- task:
type: Clustering
dataset:
type: C-MTEB/CLSClusteringP2P
name: MTEB CLSClusteringP2P
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 42.518811360488925
- task:
type: Clustering
dataset:
type: C-MTEB/CLSClusteringS2S
name: MTEB CLSClusteringS2S
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 39.72890397315954
- task:
type: Reranking
dataset:
type: C-MTEB/CMedQAv1-reranking
name: MTEB CMedQAv1
config: default
split: test
revision: None
metrics:
- type: map
value: 86.51852576014969
- type: mrr
value: 89.02047619047619
- task:
type: Reranking
dataset:
type: C-MTEB/CMedQAv2-reranking
name: MTEB CMedQAv2
config: default
split: test
revision: None
metrics:
- type: map
value: 87.11415162833914
- type: mrr
value: 89.6338492063492
- task:
type: Retrieval
dataset:
type: C-MTEB/CmedqaRetrieval
name: MTEB CmedqaRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 24.883
- type: map_at_10
value: 37.246
- type: map_at_100
value: 39.11
- type: map_at_1000
value: 39.222
- type: map_at_3
value: 32.956
- type: map_at_5
value: 35.411
- type: mrr_at_1
value: 37.834
- type: mrr_at_10
value: 46.031
- type: mrr_at_100
value: 47.033
- type: mrr_at_1000
value: 47.077000000000005
- type: mrr_at_3
value: 43.415
- type: mrr_at_5
value: 44.938
- type: ndcg_at_1
value: 37.834
- type: ndcg_at_10
value: 43.928
- type: ndcg_at_100
value: 51.312999999999995
- type: ndcg_at_1000
value: 53.23
- type: ndcg_at_3
value: 38.397
- type: ndcg_at_5
value: 40.848
- type: precision_at_1
value: 37.834
- type: precision_at_10
value: 9.782
- type: precision_at_100
value: 1.583
- type: precision_at_1000
value: 0.183
- type: precision_at_3
value: 21.664
- type: precision_at_5
value: 15.934000000000001
- type: recall_at_1
value: 24.883
- type: recall_at_10
value: 54.911
- type: recall_at_100
value: 85.419
- type: recall_at_1000
value: 98.16
- type: recall_at_3
value: 38.416
- type: recall_at_5
value: 45.778
- task:
type: PairClassification
dataset:
type: C-MTEB/CMNLI
name: MTEB Cmnli
config: default
split: validation
revision: None
metrics:
- type: cos_sim_accuracy
value: 82.5616355983163
- type: cos_sim_ap
value: 89.3612977679186
- type: cos_sim_f1
value: 83.93428161870108
- type: cos_sim_precision
value: 79.42404006677796
- type: cos_sim_recall
value: 88.98760813654431
- type: dot_accuracy
value: 82.5616355983163
- type: dot_ap
value: 89.38168095374776
- type: dot_f1
value: 83.93428161870108
- type: dot_precision
value: 79.42404006677796
- type: dot_recall
value: 88.98760813654431
- type: euclidean_accuracy
value: 82.5616355983163
- type: euclidean_ap
value: 89.36129603693611
- type: euclidean_f1
value: 83.93428161870108
- type: euclidean_precision
value: 79.42404006677796
- type: euclidean_recall
value: 88.98760813654431
- type: manhattan_accuracy
value: 82.42934455802767
- type: manhattan_ap
value: 89.36577661305246
- type: manhattan_f1
value: 83.94765539803707
- type: manhattan_precision
value: 78.66339668914776
- type: manhattan_recall
value: 89.99298573766659
- type: max_accuracy
value: 82.5616355983163
- type: max_ap
value: 89.38168095374776
- type: max_f1
value: 83.94765539803707
- task:
type: Retrieval
dataset:
type: C-MTEB/CovidRetrieval
name: MTEB CovidRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 77.608
- type: map_at_10
value: 85.1
- type: map_at_100
value: 85.215
- type: map_at_1000
value: 85.217
- type: map_at_3
value: 83.97
- type: map_at_5
value: 84.638
- type: mrr_at_1
value: 77.97699999999999
- type: mrr_at_10
value: 85.173
- type: mrr_at_100
value: 85.28
- type: mrr_at_1000
value: 85.282
- type: mrr_at_3
value: 84.089
- type: mrr_at_5
value: 84.726
- type: ndcg_at_1
value: 77.871
- type: ndcg_at_10
value: 88.141
- type: ndcg_at_100
value: 88.612
- type: ndcg_at_1000
value: 88.68
- type: ndcg_at_3
value: 85.9
- type: ndcg_at_5
value: 87.06
- type: precision_at_1
value: 77.871
- type: precision_at_10
value: 9.841999999999999
- type: precision_at_100
value: 1.005
- type: precision_at_1000
value: 0.101
- type: precision_at_3
value: 30.698999999999998
- type: precision_at_5
value: 19.009
- type: recall_at_1
value: 77.608
- type: recall_at_10
value: 97.418
- type: recall_at_100
value: 99.473
- type: recall_at_1000
value: 100.0
- type: recall_at_3
value: 91.307
- type: recall_at_5
value: 94.125
- task:
type: Retrieval
dataset:
type: C-MTEB/DuRetrieval
name: MTEB DuRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 26.104
- type: map_at_10
value: 78.62
- type: map_at_100
value: 81.417
- type: map_at_1000
value: 81.46600000000001
- type: map_at_3
value: 55.077
- type: map_at_5
value: 69.18900000000001
- type: mrr_at_1
value: 90.55
- type: mrr_at_10
value: 93.42200000000001
- type: mrr_at_100
value: 93.46900000000001
- type: mrr_at_1000
value: 93.472
- type: mrr_at_3
value: 93.108
- type: mrr_at_5
value: 93.318
- type: ndcg_at_1
value: 90.55
- type: ndcg_at_10
value: 86.227
- type: ndcg_at_100
value: 89.201
- type: ndcg_at_1000
value: 89.655
- type: ndcg_at_3
value: 85.89099999999999
- type: ndcg_at_5
value: 84.443
- type: precision_at_1
value: 90.55
- type: precision_at_10
value: 40.915
- type: precision_at_100
value: 4.749
- type: precision_at_1000
value: 0.486
- type: precision_at_3
value: 76.9
- type: precision_at_5
value: 64.56
- type: recall_at_1
value: 26.104
- type: recall_at_10
value: 86.924
- type: recall_at_100
value: 96.52
- type: recall_at_1000
value: 98.83800000000001
- type: recall_at_3
value: 57.196999999999996
- type: recall_at_5
value: 73.595
- task:
type: Retrieval
dataset:
type: C-MTEB/EcomRetrieval
name: MTEB EcomRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 51.9
- type: map_at_10
value: 62.446
- type: map_at_100
value: 62.922
- type: map_at_1000
value: 62.934999999999995
- type: map_at_3
value: 59.933
- type: map_at_5
value: 61.548
- type: mrr_at_1
value: 51.9
- type: mrr_at_10
value: 62.446
- type: mrr_at_100
value: 62.922
- type: mrr_at_1000
value: 62.934999999999995
- type: mrr_at_3
value: 59.933
- type: mrr_at_5
value: 61.548
- type: ndcg_at_1
value: 51.9
- type: ndcg_at_10
value: 67.561
- type: ndcg_at_100
value: 69.87400000000001
- type: ndcg_at_1000
value: 70.19800000000001
- type: ndcg_at_3
value: 62.474
- type: ndcg_at_5
value: 65.391
- type: precision_at_1
value: 51.9
- type: precision_at_10
value: 8.36
- type: precision_at_100
value: 0.9440000000000001
- type: precision_at_1000
value: 0.097
- type: precision_at_3
value: 23.267
- type: precision_at_5
value: 15.379999999999999
- type: recall_at_1
value: 51.9
- type: recall_at_10
value: 83.6
- type: recall_at_100
value: 94.39999999999999
- type: recall_at_1000
value: 96.89999999999999
- type: recall_at_3
value: 69.8
- type: recall_at_5
value: 76.9
- task:
type: Classification
dataset:
type: C-MTEB/IFlyTek-classification
name: MTEB IFlyTek
config: default
split: validation
revision: None
metrics:
- type: accuracy
value: 49.672951135051946
- type: f1
value: 38.246634605142084
- task:
type: Classification
dataset:
type: C-MTEB/JDReview-classification
name: MTEB JDReview
config: default
split: test
revision: None
metrics:
- type: accuracy
value: 86.52908067542214
- type: ap
value: 55.415146961759135
- type: f1
value: 81.38343036361825
- task:
type: STS
dataset:
type: C-MTEB/LCQMC
name: MTEB LCQMC
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 70.15572724302896
- type: cos_sim_spearman
value: 75.11630463239744
- type: euclidean_pearson
value: 74.2927184018677
- type: euclidean_spearman
value: 75.11630463089752
- type: manhattan_pearson
value: 74.27724224882166
- type: manhattan_spearman
value: 75.10012699894408
- task:
type: Reranking
dataset:
type: C-MTEB/Mmarco-reranking
name: MTEB MMarcoReranking
config: default
split: dev
revision: None
metrics:
- type: map
value: 30.62934327678744
- type: mrr
value: 29.48730158730159
- task:
type: Retrieval
dataset:
type: C-MTEB/MMarcoRetrieval
name: MTEB MMarcoRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 65.33
- type: map_at_10
value: 74.524
- type: map_at_100
value: 74.851
- type: map_at_1000
value: 74.86500000000001
- type: map_at_3
value: 72.748
- type: map_at_5
value: 73.896
- type: mrr_at_1
value: 67.593
- type: mrr_at_10
value: 75.19
- type: mrr_at_100
value: 75.472
- type: mrr_at_1000
value: 75.484
- type: mrr_at_3
value: 73.634
- type: mrr_at_5
value: 74.638
- type: ndcg_at_1
value: 67.593
- type: ndcg_at_10
value: 78.254
- type: ndcg_at_100
value: 79.727
- type: ndcg_at_1000
value: 80.09100000000001
- type: ndcg_at_3
value: 74.892
- type: ndcg_at_5
value: 76.835
- type: precision_at_1
value: 67.593
- type: precision_at_10
value: 9.46
- type: precision_at_100
value: 1.02
- type: precision_at_1000
value: 0.105
- type: precision_at_3
value: 28.227999999999998
- type: precision_at_5
value: 17.965999999999998
- type: recall_at_1
value: 65.33
- type: recall_at_10
value: 89.048
- type: recall_at_100
value: 95.732
- type: recall_at_1000
value: 98.598
- type: recall_at_3
value: 80.209
- type: recall_at_5
value: 84.824
- task:
type: Classification
dataset:
type: mteb/amazon_massive_intent
name: MTEB MassiveIntentClassification (zh-CN)
config: zh-CN
split: test
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
metrics:
- type: accuracy
value: 73.38937457969065
- type: f1
value: 70.87692475465195
- task:
type: Classification
dataset:
type: mteb/amazon_massive_scenario
name: MTEB MassiveScenarioClassification (zh-CN)
config: zh-CN
split: test
revision: 7d571f92784cd94a019292a1f45445077d0ef634
metrics:
- type: accuracy
value: 76.04236718224612
- type: f1
value: 75.52425703483891
- task:
type: Retrieval
dataset:
type: C-MTEB/MedicalRetrieval
name: MTEB MedicalRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 53.1
- type: map_at_10
value: 60.24
- type: map_at_100
value: 60.781
- type: map_at_1000
value: 60.81999999999999
- type: map_at_3
value: 58.733000000000004
- type: map_at_5
value: 59.618
- type: mrr_at_1
value: 53.0
- type: mrr_at_10
value: 60.195
- type: mrr_at_100
value: 60.736000000000004
- type: mrr_at_1000
value: 60.775
- type: mrr_at_3
value: 58.68299999999999
- type: mrr_at_5
value: 59.573
- type: ndcg_at_1
value: 53.1
- type: ndcg_at_10
value: 63.568999999999996
- type: ndcg_at_100
value: 66.401
- type: ndcg_at_1000
value: 67.597
- type: ndcg_at_3
value: 60.455000000000005
- type: ndcg_at_5
value: 62.05500000000001
- type: precision_at_1
value: 53.1
- type: precision_at_10
value: 7.3999999999999995
- type: precision_at_100
value: 0.877
- type: precision_at_1000
value: 0.097
- type: precision_at_3
value: 21.8
- type: precision_at_5
value: 13.86
- type: recall_at_1
value: 53.1
- type: recall_at_10
value: 74.0
- type: recall_at_100
value: 87.7
- type: recall_at_1000
value: 97.39999999999999
- type: recall_at_3
value: 65.4
- type: recall_at_5
value: 69.3
- task:
type: Classification
dataset:
type: C-MTEB/MultilingualSentiment-classification
name: MTEB MultilingualSentiment
config: default
split: validation
revision: None
metrics:
- type: accuracy
value: 76.40333333333332
- type: f1
value: 76.40924131087777
- task:
type: PairClassification
dataset:
type: C-MTEB/OCNLI
name: MTEB Ocnli
config: default
split: validation
revision: None
metrics:
- type: cos_sim_accuracy
value: 77.15213860314023
- type: cos_sim_ap
value: 79.30594584166899
- type: cos_sim_f1
value: 80.25889967637539
- type: cos_sim_precision
value: 71.38157894736842
- type: cos_sim_recall
value: 91.65786694825766
- type: dot_accuracy
value: 77.15213860314023
- type: dot_ap
value: 79.30594584166899
- type: dot_f1
value: 80.25889967637539
- type: dot_precision
value: 71.38157894736842
- type: dot_recall
value: 91.65786694825766
- type: euclidean_accuracy
value: 77.15213860314023
- type: euclidean_ap
value: 79.30594584166899
- type: euclidean_f1
value: 80.25889967637539
- type: euclidean_precision
value: 71.38157894736842
- type: euclidean_recall
value: 91.65786694825766
- type: manhattan_accuracy
value: 77.36870600974554
- type: manhattan_ap
value: 79.23401219102254
- type: manhattan_f1
value: 80.44901777362021
- type: manhattan_precision
value: 72.20822837951302
- type: manhattan_recall
value: 90.8130939809926
- type: max_accuracy
value: 77.36870600974554
- type: max_ap
value: 79.30594584166899
- type: max_f1
value: 80.44901777362021
- task:
type: Classification
dataset:
type: C-MTEB/OnlineShopping-classification
name: MTEB OnlineShopping
config: default
split: test
revision: None
metrics:
- type: accuracy
value: 92.6
- type: ap
value: 90.78779333103819
- type: f1
value: 92.59253441654515
- task:
type: STS
dataset:
type: C-MTEB/PAWSX
name: MTEB PAWSX
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 34.4442917065113
- type: cos_sim_spearman
value: 37.93070836936766
- type: euclidean_pearson
value: 38.35141108502335
- type: euclidean_spearman
value: 37.936378767247106
- type: manhattan_pearson
value: 38.357078125497566
- type: manhattan_spearman
value: 37.94413026678537
- task:
type: STS
dataset:
type: C-MTEB/QBQTC
name: MTEB QBQTC
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 32.84777948741198
- type: cos_sim_spearman
value: 34.212129449696285
- type: euclidean_pearson
value: 32.69161407750465
- type: euclidean_spearman
value: 34.21178008084197
- type: manhattan_pearson
value: 32.675418316752506
- type: manhattan_spearman
value: 34.178590557249
- task:
type: STS
dataset:
type: mteb/sts22-crosslingual-sts
name: MTEB STS22 (zh)
config: zh
split: test
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
metrics:
- type: cos_sim_pearson
value: 64.65903821549742
- type: cos_sim_spearman
value: 64.54376284777354
- type: euclidean_pearson
value: 63.70022677799055
- type: euclidean_spearman
value: 64.54376284777354
- type: manhattan_pearson
value: 64.46392290759724
- type: manhattan_spearman
value: 65.2496975447815
- task:
type: STS
dataset:
type: C-MTEB/STSB
name: MTEB STSB
config: default
split: test
revision: None
metrics:
- type: cos_sim_pearson
value: 80.05773088991484
- type: cos_sim_spearman
value: 80.71550237522008
- type: euclidean_pearson
value: 80.31115977415573
- type: euclidean_spearman
value: 80.71510951779365
- type: manhattan_pearson
value: 80.25235514937249
- type: manhattan_spearman
value: 80.65958309383224
- task:
type: Reranking
dataset:
type: C-MTEB/T2Reranking
name: MTEB T2Reranking
config: default
split: dev
revision: None
metrics:
- type: map
value: 66.18255262304848
- type: mrr
value: 75.95393252087565
- task:
type: Retrieval
dataset:
type: C-MTEB/T2Retrieval
name: MTEB T2Retrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 28.651
- type: map_at_10
value: 76.281
- type: map_at_100
value: 80.018
- type: map_at_1000
value: 80.098
- type: map_at_3
value: 54.783
- type: map_at_5
value: 66.508
- type: mrr_at_1
value: 90.99199999999999
- type: mrr_at_10
value: 93.812
- type: mrr_at_100
value: 93.87100000000001
- type: mrr_at_1000
value: 93.87299999999999
- type: mrr_at_3
value: 93.415
- type: mrr_at_5
value: 93.685
- type: ndcg_at_1
value: 90.99199999999999
- type: ndcg_at_10
value: 84.57900000000001
- type: ndcg_at_100
value: 88.474
- type: ndcg_at_1000
value: 89.172
- type: ndcg_at_3
value: 86.56099999999999
- type: ndcg_at_5
value: 84.811
- type: precision_at_1
value: 90.99199999999999
- type: precision_at_10
value: 40.969
- type: precision_at_100
value: 4.97
- type: precision_at_1000
value: 0.515
- type: precision_at_3
value: 74.734
- type: precision_at_5
value: 61.980999999999995
- type: recall_at_1
value: 28.651
- type: recall_at_10
value: 83.321
- type: recall_at_100
value: 95.498
- type: recall_at_1000
value: 98.759
- type: recall_at_3
value: 56.708000000000006
- type: recall_at_5
value: 70.25200000000001
- task:
type: Classification
dataset:
type: C-MTEB/TNews-classification
name: MTEB TNews
config: default
split: validation
revision: None
metrics:
- type: accuracy
value: 52.037
- type: f1
value: 50.3832093595745
- task:
type: Clustering
dataset:
type: C-MTEB/ThuNewsClusteringP2P
name: MTEB ThuNewsClusteringP2P
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 70.09793315196697
- task:
type: Clustering
dataset:
type: C-MTEB/ThuNewsClusteringS2S
name: MTEB ThuNewsClusteringS2S
config: default
split: test
revision: None
metrics:
- type: v_measure
value: 63.66930246094367
- task:
type: Retrieval
dataset:
type: C-MTEB/VideoRetrieval
name: MTEB VideoRetrieval
config: default
split: dev
revision: None
metrics:
- type: map_at_1
value: 60.4
- type: map_at_10
value: 69.878
- type: map_at_100
value: 70.285
- type: map_at_1000
value: 70.295
- type: map_at_3
value: 68.033
- type: map_at_5
value: 69.233
- type: mrr_at_1
value: 60.3
- type: mrr_at_10
value: 69.828
- type: mrr_at_100
value: 70.235
- type: mrr_at_1000
value: 70.245
- type: mrr_at_3
value: 67.983
- type: mrr_at_5
value: 69.18299999999999
- type: ndcg_at_1
value: 60.4
- type: ndcg_at_10
value: 74.155
- type: ndcg_at_100
value: 76.173
- type: ndcg_at_1000
value: 76.44800000000001
- type: ndcg_at_3
value: 70.44500000000001
- type: ndcg_at_5
value: 72.61800000000001
- type: precision_at_1
value: 60.4
- type: precision_at_10
value: 8.74
- type: precision_at_100
value: 0.9690000000000001
- type: precision_at_1000
value: 0.099
- type: precision_at_3
value: 25.8
- type: precision_at_5
value: 16.54
- type: recall_at_1
value: 60.4
- type: recall_at_10
value: 87.4
- type: recall_at_100
value: 96.89999999999999
- type: recall_at_1000
value: 99.1
- type: recall_at_3
value: 77.4
- type: recall_at_5
value: 82.69999999999999
- task:
type: Classification
dataset:
type: C-MTEB/waimai-classification
name: MTEB Waimai
config: default
split: test
revision: None
metrics:
- type: accuracy
value: 88.49000000000001
- type: ap
value: 73.5441395538586
- type: f1
value: 86.88114969870975
---
# alime-embedding-large-zh
The alime embedding model.
<!--- Describe your model here -->
## Usage (Sentence-Transformers)
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
```
pip install -U sentence-transformers
```
Then you can use the model like this:
```python
from sentence_transformers import SentenceTransformer
sentences = ["西湖在哪?", "西湖风景名胜区位于浙江省杭州市"]
model = SentenceTransformer('Pristinenlp/alime-embedding-large-zh')
embeddings = model.encode(sentences, normalize_embeddings=True)
print(embeddings)
```