--- tags: - mteb model-index: - name: alime-embedding-large-zh results: - task: type: STS dataset: type: C-MTEB/AFQMC name: MTEB AFQMC config: default split: validation revision: None metrics: - type: cos_sim_pearson value: 49.6479989785073 - type: cos_sim_spearman value: 54.733173049795425 - type: euclidean_pearson value: 53.06330391299694 - type: euclidean_spearman value: 54.73321325021156 - type: manhattan_pearson value: 53.0477915350307 - type: manhattan_spearman value: 54.728508847750845 - task: type: STS dataset: type: C-MTEB/ATEC name: MTEB ATEC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 48.658812679136325 - type: cos_sim_spearman value: 55.125070901329146 - type: euclidean_pearson value: 55.73373519622172 - type: euclidean_spearman value: 55.12506864911728 - type: manhattan_pearson value: 55.71155132206361 - type: manhattan_spearman value: 55.121598723227905 - task: type: Classification dataset: type: mteb/amazon_reviews_multi name: MTEB AmazonReviewsClassification (zh) config: zh split: test revision: 1399c76144fd37290681b995c656ef9b2e06e26d metrics: - type: accuracy value: 46.95 - type: f1 value: 45.34383964066362 - task: type: STS dataset: type: C-MTEB/BQ name: MTEB BQ config: default split: test revision: None metrics: - type: cos_sim_pearson value: 62.92731050834033 - type: cos_sim_spearman value: 64.8881453551134 - type: euclidean_pearson value: 63.31447523186855 - type: euclidean_spearman value: 64.88814189042776 - type: manhattan_pearson value: 63.222442228527996 - type: manhattan_spearman value: 64.79818263591122 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringP2P name: MTEB CLSClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 42.518811360488925 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringS2S name: MTEB CLSClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 39.72890397315954 - task: type: Reranking dataset: type: C-MTEB/CMedQAv1-reranking name: MTEB CMedQAv1 config: default split: test revision: None metrics: - type: map value: 86.51852576014969 - type: mrr value: 89.02047619047619 - task: type: Reranking dataset: type: C-MTEB/CMedQAv2-reranking name: MTEB CMedQAv2 config: default split: test revision: None metrics: - type: map value: 87.11415162833914 - type: mrr value: 89.6338492063492 - task: type: Retrieval dataset: type: C-MTEB/CmedqaRetrieval name: MTEB CmedqaRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 24.883 - type: map_at_10 value: 37.246 - type: map_at_100 value: 39.11 - type: map_at_1000 value: 39.222 - type: map_at_3 value: 32.956 - type: map_at_5 value: 35.411 - type: mrr_at_1 value: 37.834 - type: mrr_at_10 value: 46.031 - type: mrr_at_100 value: 47.033 - type: mrr_at_1000 value: 47.077000000000005 - type: mrr_at_3 value: 43.415 - type: mrr_at_5 value: 44.938 - type: ndcg_at_1 value: 37.834 - type: ndcg_at_10 value: 43.928 - type: ndcg_at_100 value: 51.312999999999995 - type: ndcg_at_1000 value: 53.23 - type: ndcg_at_3 value: 38.397 - type: ndcg_at_5 value: 40.848 - type: precision_at_1 value: 37.834 - type: precision_at_10 value: 9.782 - type: precision_at_100 value: 1.583 - type: precision_at_1000 value: 0.183 - type: precision_at_3 value: 21.664 - type: precision_at_5 value: 15.934000000000001 - type: recall_at_1 value: 24.883 - type: recall_at_10 value: 54.911 - type: recall_at_100 value: 85.419 - type: recall_at_1000 value: 98.16 - type: recall_at_3 value: 38.416 - type: recall_at_5 value: 45.778 - task: type: PairClassification dataset: type: C-MTEB/CMNLI name: MTEB Cmnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 82.5616355983163 - type: cos_sim_ap value: 89.3612977679186 - type: cos_sim_f1 value: 83.93428161870108 - type: cos_sim_precision value: 79.42404006677796 - type: cos_sim_recall value: 88.98760813654431 - type: dot_accuracy value: 82.5616355983163 - type: dot_ap value: 89.38168095374776 - type: dot_f1 value: 83.93428161870108 - type: dot_precision value: 79.42404006677796 - type: dot_recall value: 88.98760813654431 - type: euclidean_accuracy value: 82.5616355983163 - type: euclidean_ap value: 89.36129603693611 - type: euclidean_f1 value: 83.93428161870108 - type: euclidean_precision value: 79.42404006677796 - type: euclidean_recall value: 88.98760813654431 - type: manhattan_accuracy value: 82.42934455802767 - type: manhattan_ap value: 89.36577661305246 - type: manhattan_f1 value: 83.94765539803707 - type: manhattan_precision value: 78.66339668914776 - type: manhattan_recall value: 89.99298573766659 - type: max_accuracy value: 82.5616355983163 - type: max_ap value: 89.38168095374776 - type: max_f1 value: 83.94765539803707 - task: type: Retrieval dataset: type: C-MTEB/CovidRetrieval name: MTEB CovidRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 77.608 - type: map_at_10 value: 85.1 - type: map_at_100 value: 85.215 - type: map_at_1000 value: 85.217 - type: map_at_3 value: 83.97 - type: map_at_5 value: 84.638 - type: mrr_at_1 value: 77.97699999999999 - type: mrr_at_10 value: 85.173 - type: mrr_at_100 value: 85.28 - type: mrr_at_1000 value: 85.282 - type: mrr_at_3 value: 84.089 - type: mrr_at_5 value: 84.726 - type: ndcg_at_1 value: 77.871 - type: ndcg_at_10 value: 88.141 - type: ndcg_at_100 value: 88.612 - type: ndcg_at_1000 value: 88.68 - type: ndcg_at_3 value: 85.9 - type: ndcg_at_5 value: 87.06 - type: precision_at_1 value: 77.871 - type: precision_at_10 value: 9.841999999999999 - type: precision_at_100 value: 1.005 - type: precision_at_1000 value: 0.101 - type: precision_at_3 value: 30.698999999999998 - type: precision_at_5 value: 19.009 - type: recall_at_1 value: 77.608 - type: recall_at_10 value: 97.418 - type: recall_at_100 value: 99.473 - type: recall_at_1000 value: 100.0 - type: recall_at_3 value: 91.307 - type: recall_at_5 value: 94.125 - task: type: Retrieval dataset: type: C-MTEB/DuRetrieval name: MTEB DuRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 26.104 - type: map_at_10 value: 78.62 - type: map_at_100 value: 81.417 - type: map_at_1000 value: 81.46600000000001 - type: map_at_3 value: 55.077 - type: map_at_5 value: 69.18900000000001 - type: mrr_at_1 value: 90.55 - type: mrr_at_10 value: 93.42200000000001 - type: mrr_at_100 value: 93.46900000000001 - type: mrr_at_1000 value: 93.472 - type: mrr_at_3 value: 93.108 - type: mrr_at_5 value: 93.318 - type: ndcg_at_1 value: 90.55 - type: ndcg_at_10 value: 86.227 - type: ndcg_at_100 value: 89.201 - type: ndcg_at_1000 value: 89.655 - type: ndcg_at_3 value: 85.89099999999999 - type: ndcg_at_5 value: 84.443 - type: precision_at_1 value: 90.55 - type: precision_at_10 value: 40.915 - type: precision_at_100 value: 4.749 - type: precision_at_1000 value: 0.486 - type: precision_at_3 value: 76.9 - type: precision_at_5 value: 64.56 - type: recall_at_1 value: 26.104 - type: recall_at_10 value: 86.924 - type: recall_at_100 value: 96.52 - type: recall_at_1000 value: 98.83800000000001 - type: recall_at_3 value: 57.196999999999996 - type: recall_at_5 value: 73.595 - task: type: Retrieval dataset: type: C-MTEB/EcomRetrieval name: MTEB EcomRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 51.9 - type: map_at_10 value: 62.446 - type: map_at_100 value: 62.922 - type: map_at_1000 value: 62.934999999999995 - type: map_at_3 value: 59.933 - type: map_at_5 value: 61.548 - type: mrr_at_1 value: 51.9 - type: mrr_at_10 value: 62.446 - type: mrr_at_100 value: 62.922 - type: mrr_at_1000 value: 62.934999999999995 - type: mrr_at_3 value: 59.933 - type: mrr_at_5 value: 61.548 - type: ndcg_at_1 value: 51.9 - type: ndcg_at_10 value: 67.561 - type: ndcg_at_100 value: 69.87400000000001 - type: ndcg_at_1000 value: 70.19800000000001 - type: ndcg_at_3 value: 62.474 - type: ndcg_at_5 value: 65.391 - type: precision_at_1 value: 51.9 - type: precision_at_10 value: 8.36 - type: precision_at_100 value: 0.9440000000000001 - type: precision_at_1000 value: 0.097 - type: precision_at_3 value: 23.267 - type: precision_at_5 value: 15.379999999999999 - type: recall_at_1 value: 51.9 - type: recall_at_10 value: 83.6 - type: recall_at_100 value: 94.39999999999999 - type: recall_at_1000 value: 96.89999999999999 - type: recall_at_3 value: 69.8 - type: recall_at_5 value: 76.9 - task: type: Classification dataset: type: C-MTEB/IFlyTek-classification name: MTEB IFlyTek config: default split: validation revision: None metrics: - type: accuracy value: 49.672951135051946 - type: f1 value: 38.246634605142084 - task: type: Classification dataset: type: C-MTEB/JDReview-classification name: MTEB JDReview config: default split: test revision: None metrics: - type: accuracy value: 86.52908067542214 - type: ap value: 55.415146961759135 - type: f1 value: 81.38343036361825 - task: type: STS dataset: type: C-MTEB/LCQMC name: MTEB LCQMC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 70.15572724302896 - type: cos_sim_spearman value: 75.11630463239744 - type: euclidean_pearson value: 74.2927184018677 - type: euclidean_spearman value: 75.11630463089752 - type: manhattan_pearson value: 74.27724224882166 - type: manhattan_spearman value: 75.10012699894408 - task: type: Reranking dataset: type: C-MTEB/Mmarco-reranking name: MTEB MMarcoReranking config: default split: dev revision: None metrics: - type: map value: 30.62934327678744 - type: mrr value: 29.48730158730159 - task: type: Retrieval dataset: type: C-MTEB/MMarcoRetrieval name: MTEB MMarcoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 65.33 - type: map_at_10 value: 74.524 - type: map_at_100 value: 74.851 - type: map_at_1000 value: 74.86500000000001 - type: map_at_3 value: 72.748 - type: map_at_5 value: 73.896 - type: mrr_at_1 value: 67.593 - type: mrr_at_10 value: 75.19 - type: mrr_at_100 value: 75.472 - type: mrr_at_1000 value: 75.484 - type: mrr_at_3 value: 73.634 - type: mrr_at_5 value: 74.638 - type: ndcg_at_1 value: 67.593 - type: ndcg_at_10 value: 78.254 - type: ndcg_at_100 value: 79.727 - type: ndcg_at_1000 value: 80.09100000000001 - type: ndcg_at_3 value: 74.892 - type: ndcg_at_5 value: 76.835 - type: precision_at_1 value: 67.593 - type: precision_at_10 value: 9.46 - type: precision_at_100 value: 1.02 - type: precision_at_1000 value: 0.105 - type: precision_at_3 value: 28.227999999999998 - type: precision_at_5 value: 17.965999999999998 - type: recall_at_1 value: 65.33 - type: recall_at_10 value: 89.048 - type: recall_at_100 value: 95.732 - type: recall_at_1000 value: 98.598 - type: recall_at_3 value: 80.209 - type: recall_at_5 value: 84.824 - task: type: Classification dataset: type: mteb/amazon_massive_intent name: MTEB MassiveIntentClassification (zh-CN) config: zh-CN split: test revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 metrics: - type: accuracy value: 73.38937457969065 - type: f1 value: 70.87692475465195 - task: type: Classification dataset: type: mteb/amazon_massive_scenario name: MTEB MassiveScenarioClassification (zh-CN) config: zh-CN split: test revision: 7d571f92784cd94a019292a1f45445077d0ef634 metrics: - type: accuracy value: 76.04236718224612 - type: f1 value: 75.52425703483891 - task: type: Retrieval dataset: type: C-MTEB/MedicalRetrieval name: MTEB MedicalRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 53.1 - type: map_at_10 value: 60.24 - type: map_at_100 value: 60.781 - type: map_at_1000 value: 60.81999999999999 - type: map_at_3 value: 58.733000000000004 - type: map_at_5 value: 59.618 - type: mrr_at_1 value: 53.0 - type: mrr_at_10 value: 60.195 - type: mrr_at_100 value: 60.736000000000004 - type: mrr_at_1000 value: 60.775 - type: mrr_at_3 value: 58.68299999999999 - type: mrr_at_5 value: 59.573 - type: ndcg_at_1 value: 53.1 - type: ndcg_at_10 value: 63.568999999999996 - type: ndcg_at_100 value: 66.401 - type: ndcg_at_1000 value: 67.597 - type: ndcg_at_3 value: 60.455000000000005 - type: ndcg_at_5 value: 62.05500000000001 - type: precision_at_1 value: 53.1 - type: precision_at_10 value: 7.3999999999999995 - type: precision_at_100 value: 0.877 - type: precision_at_1000 value: 0.097 - type: precision_at_3 value: 21.8 - type: precision_at_5 value: 13.86 - type: recall_at_1 value: 53.1 - type: recall_at_10 value: 74.0 - type: recall_at_100 value: 87.7 - type: recall_at_1000 value: 97.39999999999999 - type: recall_at_3 value: 65.4 - type: recall_at_5 value: 69.3 - task: type: Classification dataset: type: C-MTEB/MultilingualSentiment-classification name: MTEB MultilingualSentiment config: default split: validation revision: None metrics: - type: accuracy value: 76.40333333333332 - type: f1 value: 76.40924131087777 - task: type: PairClassification dataset: type: C-MTEB/OCNLI name: MTEB Ocnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 77.15213860314023 - type: cos_sim_ap value: 79.30594584166899 - type: cos_sim_f1 value: 80.25889967637539 - type: cos_sim_precision value: 71.38157894736842 - type: cos_sim_recall value: 91.65786694825766 - type: dot_accuracy value: 77.15213860314023 - type: dot_ap value: 79.30594584166899 - type: dot_f1 value: 80.25889967637539 - type: dot_precision value: 71.38157894736842 - type: dot_recall value: 91.65786694825766 - type: euclidean_accuracy value: 77.15213860314023 - type: euclidean_ap value: 79.30594584166899 - type: euclidean_f1 value: 80.25889967637539 - type: euclidean_precision value: 71.38157894736842 - type: euclidean_recall value: 91.65786694825766 - type: manhattan_accuracy value: 77.36870600974554 - type: manhattan_ap value: 79.23401219102254 - type: manhattan_f1 value: 80.44901777362021 - type: manhattan_precision value: 72.20822837951302 - type: manhattan_recall value: 90.8130939809926 - type: max_accuracy value: 77.36870600974554 - type: max_ap value: 79.30594584166899 - type: max_f1 value: 80.44901777362021 - task: type: Classification dataset: type: C-MTEB/OnlineShopping-classification name: MTEB OnlineShopping config: default split: test revision: None metrics: - type: accuracy value: 92.6 - type: ap value: 90.78779333103819 - type: f1 value: 92.59253441654515 - task: type: STS dataset: type: C-MTEB/PAWSX name: MTEB PAWSX config: default split: test revision: None metrics: - type: cos_sim_pearson value: 34.4442917065113 - type: cos_sim_spearman value: 37.93070836936766 - type: euclidean_pearson value: 38.35141108502335 - type: euclidean_spearman value: 37.936378767247106 - type: manhattan_pearson value: 38.357078125497566 - type: manhattan_spearman value: 37.94413026678537 - task: type: STS dataset: type: C-MTEB/QBQTC name: MTEB QBQTC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 32.84777948741198 - type: cos_sim_spearman value: 34.212129449696285 - type: euclidean_pearson value: 32.69161407750465 - type: euclidean_spearman value: 34.21178008084197 - type: manhattan_pearson value: 32.675418316752506 - type: manhattan_spearman value: 34.178590557249 - task: type: STS dataset: type: mteb/sts22-crosslingual-sts name: MTEB STS22 (zh) config: zh split: test revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 metrics: - type: cos_sim_pearson value: 64.65903821549742 - type: cos_sim_spearman value: 64.54376284777354 - type: euclidean_pearson value: 63.70022677799055 - type: euclidean_spearman value: 64.54376284777354 - type: manhattan_pearson value: 64.46392290759724 - type: manhattan_spearman value: 65.2496975447815 - task: type: STS dataset: type: C-MTEB/STSB name: MTEB STSB config: default split: test revision: None metrics: - type: cos_sim_pearson value: 80.05773088991484 - type: cos_sim_spearman value: 80.71550237522008 - type: euclidean_pearson value: 80.31115977415573 - type: euclidean_spearman value: 80.71510951779365 - type: manhattan_pearson value: 80.25235514937249 - type: manhattan_spearman value: 80.65958309383224 - task: type: Reranking dataset: type: C-MTEB/T2Reranking name: MTEB T2Reranking config: default split: dev revision: None metrics: - type: map value: 66.18255262304848 - type: mrr value: 75.95393252087565 - task: type: Retrieval dataset: type: C-MTEB/T2Retrieval name: MTEB T2Retrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 28.651 - type: map_at_10 value: 76.281 - type: map_at_100 value: 80.018 - type: map_at_1000 value: 80.098 - type: map_at_3 value: 54.783 - type: map_at_5 value: 66.508 - type: mrr_at_1 value: 90.99199999999999 - type: mrr_at_10 value: 93.812 - type: mrr_at_100 value: 93.87100000000001 - type: mrr_at_1000 value: 93.87299999999999 - type: mrr_at_3 value: 93.415 - type: mrr_at_5 value: 93.685 - type: ndcg_at_1 value: 90.99199999999999 - type: ndcg_at_10 value: 84.57900000000001 - type: ndcg_at_100 value: 88.474 - type: ndcg_at_1000 value: 89.172 - type: ndcg_at_3 value: 86.56099999999999 - type: ndcg_at_5 value: 84.811 - type: precision_at_1 value: 90.99199999999999 - type: precision_at_10 value: 40.969 - type: precision_at_100 value: 4.97 - type: precision_at_1000 value: 0.515 - type: precision_at_3 value: 74.734 - type: precision_at_5 value: 61.980999999999995 - type: recall_at_1 value: 28.651 - type: recall_at_10 value: 83.321 - type: recall_at_100 value: 95.498 - type: recall_at_1000 value: 98.759 - type: recall_at_3 value: 56.708000000000006 - type: recall_at_5 value: 70.25200000000001 - task: type: Classification dataset: type: C-MTEB/TNews-classification name: MTEB TNews config: default split: validation revision: None metrics: - type: accuracy value: 52.037 - type: f1 value: 50.3832093595745 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringP2P name: MTEB ThuNewsClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 70.09793315196697 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringS2S name: MTEB ThuNewsClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 63.66930246094367 - task: type: Retrieval dataset: type: C-MTEB/VideoRetrieval name: MTEB VideoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 60.4 - type: map_at_10 value: 69.878 - type: map_at_100 value: 70.285 - type: map_at_1000 value: 70.295 - type: map_at_3 value: 68.033 - type: map_at_5 value: 69.233 - type: mrr_at_1 value: 60.3 - type: mrr_at_10 value: 69.828 - type: mrr_at_100 value: 70.235 - type: mrr_at_1000 value: 70.245 - type: mrr_at_3 value: 67.983 - type: mrr_at_5 value: 69.18299999999999 - type: ndcg_at_1 value: 60.4 - type: ndcg_at_10 value: 74.155 - type: ndcg_at_100 value: 76.173 - type: ndcg_at_1000 value: 76.44800000000001 - type: ndcg_at_3 value: 70.44500000000001 - type: ndcg_at_5 value: 72.61800000000001 - type: precision_at_1 value: 60.4 - type: precision_at_10 value: 8.74 - type: precision_at_100 value: 0.9690000000000001 - type: precision_at_1000 value: 0.099 - type: precision_at_3 value: 25.8 - type: precision_at_5 value: 16.54 - type: recall_at_1 value: 60.4 - type: recall_at_10 value: 87.4 - type: recall_at_100 value: 96.89999999999999 - type: recall_at_1000 value: 99.1 - type: recall_at_3 value: 77.4 - type: recall_at_5 value: 82.69999999999999 - task: type: Classification dataset: type: C-MTEB/waimai-classification name: MTEB Waimai config: default split: test revision: None metrics: - type: accuracy value: 88.49000000000001 - type: ap value: 73.5441395538586 - type: f1 value: 86.88114969870975 --- # alime-embedding-large-zh The alime embedding model. ## Usage (Sentence-Transformers) Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: ``` pip install -U sentence-transformers ``` Then you can use the model like this: ```python from sentence_transformers import SentenceTransformer sentences = ["西湖在哪?", "西湖风景名胜区位于浙江省杭州市"] model = SentenceTransformer('Pristinenlp/alime-embedding-large-zh') embeddings = model.encode(sentences, normalize_embeddings=True) print(embeddings) ```