--- pipeline_tag: sentence-similarity tags: - sentence-transformers - feature-extraction - sentence-similarity - mteb model-index: - name: tao-8k-origin results: - task: type: STS dataset: type: C-MTEB/AFQMC name: MTEB AFQMC config: default split: validation revision: None metrics: - type: cos_sim_pearson value: 47.33644889578121 - type: cos_sim_spearman value: 49.93968642502866 - type: euclidean_pearson value: 48.12029792973887 - type: euclidean_spearman value: 49.939666315145494 - type: manhattan_pearson value: 48.07449594650583 - type: manhattan_spearman value: 49.892461433911166 - task: type: STS dataset: type: C-MTEB/ATEC name: MTEB ATEC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 50.976148098905746 - type: cos_sim_spearman value: 53.11230114448237 - type: euclidean_pearson value: 55.119977161851054 - type: euclidean_spearman value: 53.11229776647941 - type: manhattan_pearson value: 55.096968162828034 - type: manhattan_spearman value: 53.107481302419465 - task: type: Classification dataset: type: mteb/amazon_reviews_multi name: MTEB AmazonReviewsClassification (zh) config: zh split: test revision: 1399c76144fd37290681b995c656ef9b2e06e26d metrics: - type: accuracy value: 40.804 - type: f1 value: 39.01066543513968 - task: type: STS dataset: type: C-MTEB/BQ name: MTEB BQ config: default split: test revision: None metrics: - type: cos_sim_pearson value: 62.843816050026824 - type: cos_sim_spearman value: 65.54142642656706 - type: euclidean_pearson value: 64.08809634876388 - type: euclidean_spearman value: 65.54142642558392 - type: manhattan_pearson value: 64.09391522108272 - type: manhattan_spearman value: 65.55445491162718 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringP2P name: MTEB CLSClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 40.028061591547804 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringS2S name: MTEB CLSClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 38.1897102944254 - task: type: Reranking dataset: type: C-MTEB/CMedQAv1-reranking name: MTEB CMedQAv1 config: default split: test revision: None metrics: - type: map value: 85.34294439514511 - type: mrr value: 88.03849206349206 - task: type: Reranking dataset: type: C-MTEB/CMedQAv2-reranking name: MTEB CMedQAv2 config: default split: test revision: None metrics: - type: map value: 85.81294364673899 - type: mrr value: 88.52146825396825 - task: type: Retrieval dataset: type: C-MTEB/CmedqaRetrieval name: MTEB CmedqaRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 23.982 - type: map_at_10 value: 36.21 - type: map_at_100 value: 38.072 - type: map_at_1000 value: 38.194 - type: map_at_3 value: 32.239000000000004 - type: map_at_5 value: 34.377 - type: mrr_at_1 value: 36.858999999999995 - type: mrr_at_10 value: 45.084999999999994 - type: mrr_at_100 value: 46.104 - type: mrr_at_1000 value: 46.154 - type: mrr_at_3 value: 42.623 - type: mrr_at_5 value: 43.995 - type: ndcg_at_1 value: 36.858999999999995 - type: ndcg_at_10 value: 42.735 - type: ndcg_at_100 value: 50.181 - type: ndcg_at_1000 value: 52.309000000000005 - type: ndcg_at_3 value: 37.728 - type: ndcg_at_5 value: 39.664 - type: precision_at_1 value: 36.858999999999995 - type: precision_at_10 value: 9.615 - type: precision_at_100 value: 1.564 - type: precision_at_1000 value: 0.183 - type: precision_at_3 value: 21.514 - type: precision_at_5 value: 15.568999999999999 - type: recall_at_1 value: 23.982 - type: recall_at_10 value: 53.04600000000001 - type: recall_at_100 value: 84.113 - type: recall_at_1000 value: 98.37 - type: recall_at_3 value: 37.824999999999996 - type: recall_at_5 value: 44.023 - task: type: PairClassification dataset: type: C-MTEB/CMNLI name: MTEB Cmnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 77.71497294046902 - type: cos_sim_ap value: 86.84526989595028 - type: cos_sim_f1 value: 79.31987247608926 - type: cos_sim_precision value: 72.70601987142022 - type: cos_sim_recall value: 87.2574234276362 - type: dot_accuracy value: 77.71497294046902 - type: dot_ap value: 86.83880734247957 - type: dot_f1 value: 79.31987247608926 - type: dot_precision value: 72.70601987142022 - type: dot_recall value: 87.2574234276362 - type: euclidean_accuracy value: 77.71497294046902 - type: euclidean_ap value: 86.84526869685902 - type: euclidean_f1 value: 79.31987247608926 - type: euclidean_precision value: 72.70601987142022 - type: euclidean_recall value: 87.2574234276362 - type: manhattan_accuracy value: 77.8111846061335 - type: manhattan_ap value: 86.81142881585656 - type: manhattan_f1 value: 79.4201671780764 - type: manhattan_precision value: 72.53575570158485 - type: manhattan_recall value: 87.74842179097499 - type: max_accuracy value: 77.8111846061335 - type: max_ap value: 86.84526989595028 - type: max_f1 value: 79.4201671780764 - task: type: Retrieval dataset: type: C-MTEB/CovidRetrieval name: MTEB CovidRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 70.706 - type: map_at_10 value: 78.619 - type: map_at_100 value: 78.915 - type: map_at_1000 value: 78.918 - type: map_at_3 value: 76.967 - type: map_at_5 value: 77.922 - type: mrr_at_1 value: 70.917 - type: mrr_at_10 value: 78.64 - type: mrr_at_100 value: 78.935 - type: mrr_at_1000 value: 78.938 - type: mrr_at_3 value: 77.081 - type: mrr_at_5 value: 77.972 - type: ndcg_at_1 value: 70.917 - type: ndcg_at_10 value: 82.186 - type: ndcg_at_100 value: 83.487 - type: ndcg_at_1000 value: 83.589 - type: ndcg_at_3 value: 78.874 - type: ndcg_at_5 value: 80.548 - type: precision_at_1 value: 70.917 - type: precision_at_10 value: 9.431000000000001 - type: precision_at_100 value: 1.001 - type: precision_at_1000 value: 0.101 - type: precision_at_3 value: 28.275 - type: precision_at_5 value: 17.829 - type: recall_at_1 value: 70.706 - type: recall_at_10 value: 93.256 - type: recall_at_100 value: 99.05199999999999 - type: recall_at_1000 value: 99.895 - type: recall_at_3 value: 84.247 - type: recall_at_5 value: 88.251 - task: type: Retrieval dataset: type: C-MTEB/DuRetrieval name: MTEB DuRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 25.989 - type: map_at_10 value: 80.882 - type: map_at_100 value: 83.63199999999999 - type: map_at_1000 value: 83.663 - type: map_at_3 value: 55.772 - type: map_at_5 value: 70.598 - type: mrr_at_1 value: 90.14999999999999 - type: mrr_at_10 value: 93.30000000000001 - type: mrr_at_100 value: 93.363 - type: mrr_at_1000 value: 93.366 - type: mrr_at_3 value: 93.083 - type: mrr_at_5 value: 93.206 - type: ndcg_at_1 value: 90.14999999999999 - type: ndcg_at_10 value: 88.016 - type: ndcg_at_100 value: 90.52900000000001 - type: ndcg_at_1000 value: 90.84400000000001 - type: ndcg_at_3 value: 86.529 - type: ndcg_at_5 value: 85.65899999999999 - type: precision_at_1 value: 90.14999999999999 - type: precision_at_10 value: 42.295 - type: precision_at_100 value: 4.826 - type: precision_at_1000 value: 0.48900000000000005 - type: precision_at_3 value: 77.717 - type: precision_at_5 value: 65.81 - type: recall_at_1 value: 25.989 - type: recall_at_10 value: 89.446 - type: recall_at_100 value: 97.832 - type: recall_at_1000 value: 99.568 - type: recall_at_3 value: 58.223 - type: recall_at_5 value: 75.411 - task: type: Retrieval dataset: type: C-MTEB/EcomRetrieval name: MTEB EcomRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 49.6 - type: map_at_10 value: 59.512 - type: map_at_100 value: 60.059 - type: map_at_1000 value: 60.077999999999996 - type: map_at_3 value: 56.882999999999996 - type: map_at_5 value: 58.298 - type: mrr_at_1 value: 49.6 - type: mrr_at_10 value: 59.512 - type: mrr_at_100 value: 60.059 - type: mrr_at_1000 value: 60.077999999999996 - type: mrr_at_3 value: 56.882999999999996 - type: mrr_at_5 value: 58.298 - type: ndcg_at_1 value: 49.6 - type: ndcg_at_10 value: 64.71000000000001 - type: ndcg_at_100 value: 67.238 - type: ndcg_at_1000 value: 67.74 - type: ndcg_at_3 value: 59.275 - type: ndcg_at_5 value: 61.805 - type: precision_at_1 value: 49.6 - type: precision_at_10 value: 8.12 - type: precision_at_100 value: 0.927 - type: precision_at_1000 value: 0.097 - type: precision_at_3 value: 22.067 - type: precision_at_5 value: 14.46 - type: recall_at_1 value: 49.6 - type: recall_at_10 value: 81.2 - type: recall_at_100 value: 92.7 - type: recall_at_1000 value: 96.6 - type: recall_at_3 value: 66.2 - type: recall_at_5 value: 72.3 - task: type: Classification dataset: type: C-MTEB/IFlyTek-classification name: MTEB IFlyTek config: default split: validation revision: None metrics: - type: accuracy value: 47.98768757214313 - type: f1 value: 35.24243089488371 - task: type: Classification dataset: type: C-MTEB/JDReview-classification name: MTEB JDReview config: default split: test revision: None metrics: - type: accuracy value: 87.01688555347093 - type: ap value: 56.39167630414159 - type: f1 value: 81.91756262306008 - task: type: STS dataset: type: C-MTEB/LCQMC name: MTEB LCQMC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 71.17874301231225 - type: cos_sim_spearman value: 77.47936067899236 - type: euclidean_pearson value: 76.3241109984839 - type: euclidean_spearman value: 77.47936511149533 - type: manhattan_pearson value: 76.3334642249198 - type: manhattan_spearman value: 77.48889610190774 - task: type: Reranking dataset: type: C-MTEB/Mmarco-reranking name: MTEB MMarcoReranking config: default split: dev revision: None metrics: - type: map value: 27.96872431410137 - type: mrr value: 26.92023809523809 - task: type: Retrieval dataset: type: C-MTEB/MMarcoRetrieval name: MTEB MMarcoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 66.83099999999999 - type: map_at_10 value: 75.945 - type: map_at_100 value: 76.259 - type: map_at_1000 value: 76.27000000000001 - type: map_at_3 value: 74.22999999999999 - type: map_at_5 value: 75.318 - type: mrr_at_1 value: 69.069 - type: mrr_at_10 value: 76.491 - type: mrr_at_100 value: 76.764 - type: mrr_at_1000 value: 76.775 - type: mrr_at_3 value: 75.01 - type: mrr_at_5 value: 75.934 - type: ndcg_at_1 value: 69.069 - type: ndcg_at_10 value: 79.557 - type: ndcg_at_100 value: 80.946 - type: ndcg_at_1000 value: 81.23700000000001 - type: ndcg_at_3 value: 76.31099999999999 - type: ndcg_at_5 value: 78.121 - type: precision_at_1 value: 69.069 - type: precision_at_10 value: 9.58 - type: precision_at_100 value: 1.027 - type: precision_at_1000 value: 0.105 - type: precision_at_3 value: 28.73 - type: precision_at_5 value: 18.201 - type: recall_at_1 value: 66.83099999999999 - type: recall_at_10 value: 90.118 - type: recall_at_100 value: 96.377 - type: recall_at_1000 value: 98.656 - type: recall_at_3 value: 81.516 - type: recall_at_5 value: 85.798 - task: type: Classification dataset: type: mteb/amazon_massive_intent name: MTEB MassiveIntentClassification (zh-CN) config: zh-CN split: test revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 metrics: - type: accuracy value: 68.2649630127774 - type: f1 value: 65.96868218344183 - task: type: Classification dataset: type: mteb/amazon_massive_scenario name: MTEB MassiveScenarioClassification (zh-CN) config: zh-CN split: test revision: 7d571f92784cd94a019292a1f45445077d0ef634 metrics: - type: accuracy value: 73.13382649630127 - type: f1 value: 72.69980239148315 - task: type: Retrieval dataset: type: C-MTEB/MedicalRetrieval name: MTEB MedicalRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 51.2 - type: map_at_10 value: 57.715 - type: map_at_100 value: 58.233999999999995 - type: map_at_1000 value: 58.289 - type: map_at_3 value: 56.483000000000004 - type: map_at_5 value: 57.193000000000005 - type: mrr_at_1 value: 51.2 - type: mrr_at_10 value: 57.714 - type: mrr_at_100 value: 58.233000000000004 - type: mrr_at_1000 value: 58.288 - type: mrr_at_3 value: 56.483000000000004 - type: mrr_at_5 value: 57.193000000000005 - type: ndcg_at_1 value: 51.2 - type: ndcg_at_10 value: 60.63499999999999 - type: ndcg_at_100 value: 63.458000000000006 - type: ndcg_at_1000 value: 64.992 - type: ndcg_at_3 value: 58.11300000000001 - type: ndcg_at_5 value: 59.391000000000005 - type: precision_at_1 value: 51.2 - type: precision_at_10 value: 6.97 - type: precision_at_100 value: 0.836 - type: precision_at_1000 value: 0.096 - type: precision_at_3 value: 20.933 - type: precision_at_5 value: 13.18 - type: recall_at_1 value: 51.2 - type: recall_at_10 value: 69.69999999999999 - type: recall_at_100 value: 83.6 - type: recall_at_1000 value: 95.8 - type: recall_at_3 value: 62.8 - type: recall_at_5 value: 65.9 - task: type: Classification dataset: type: C-MTEB/MultilingualSentiment-classification name: MTEB MultilingualSentiment config: default split: validation revision: None metrics: - type: accuracy value: 73.39 - type: f1 value: 72.85739851837214 - task: type: PairClassification dataset: type: C-MTEB/OCNLI name: MTEB Ocnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 73.36220898754738 - type: cos_sim_ap value: 78.50045169678386 - type: cos_sim_f1 value: 75.3875968992248 - type: cos_sim_precision value: 69.65085049239033 - type: cos_sim_recall value: 82.15417106652588 - type: dot_accuracy value: 73.36220898754738 - type: dot_ap value: 78.50039148302838 - type: dot_f1 value: 75.3875968992248 - type: dot_precision value: 69.65085049239033 - type: dot_recall value: 82.15417106652588 - type: euclidean_accuracy value: 73.36220898754738 - type: euclidean_ap value: 78.50045169678386 - type: euclidean_f1 value: 75.3875968992248 - type: euclidean_precision value: 69.65085049239033 - type: euclidean_recall value: 82.15417106652588 - type: manhattan_accuracy value: 73.09149972929075 - type: manhattan_ap value: 78.40911589236852 - type: manhattan_f1 value: 75.3623188405797 - type: manhattan_precision value: 69.45681211041853 - type: manhattan_recall value: 82.36536430834214 - type: max_accuracy value: 73.36220898754738 - type: max_ap value: 78.50045169678386 - type: max_f1 value: 75.3875968992248 - task: type: Classification dataset: type: C-MTEB/OnlineShopping-classification name: MTEB OnlineShopping config: default split: test revision: None metrics: - type: accuracy value: 91.81000000000002 - type: ap value: 89.35809579688139 - type: f1 value: 91.79220350456818 - task: type: STS dataset: type: C-MTEB/PAWSX name: MTEB PAWSX config: default split: test revision: None metrics: - type: cos_sim_pearson value: 30.06960208048424 - type: cos_sim_spearman value: 36.21568893707218 - type: euclidean_pearson value: 36.3789158810154 - type: euclidean_spearman value: 36.21568740241203 - type: manhattan_pearson value: 36.318190228955935 - type: manhattan_spearman value: 36.16813420759451 - task: type: STS dataset: type: C-MTEB/QBQTC name: MTEB QBQTC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 36.779942621488736 - type: cos_sim_spearman value: 38.73716529566492 - type: euclidean_pearson value: 37.134107612179605 - type: euclidean_spearman value: 38.737099842399545 - type: manhattan_pearson value: 37.17579625045808 - type: manhattan_spearman value: 38.746051563332315 - task: type: STS dataset: type: mteb/sts22-crosslingual-sts name: MTEB STS22 (zh) config: zh split: test revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 metrics: - type: cos_sim_pearson value: 65.97416499132073 - type: cos_sim_spearman value: 68.87894646940939 - type: euclidean_pearson value: 67.2366929400408 - type: euclidean_spearman value: 68.87894646940939 - type: manhattan_pearson value: 67.30590304353478 - type: manhattan_spearman value: 68.90546655032796 - task: type: STS dataset: type: C-MTEB/STSB name: MTEB STSB config: default split: test revision: None metrics: - type: cos_sim_pearson value: 78.99420906581649 - type: cos_sim_spearman value: 79.36553449000968 - type: euclidean_pearson value: 78.77734144763518 - type: euclidean_spearman value: 79.36545230850567 - type: manhattan_pearson value: 78.82512507141092 - type: manhattan_spearman value: 79.43977311125059 - task: type: Reranking dataset: type: C-MTEB/T2Reranking name: MTEB T2Reranking config: default split: dev revision: None metrics: - type: map value: 66.38018284846501 - type: mrr value: 76.11180965277104 - task: type: Retrieval dataset: type: C-MTEB/T2Retrieval name: MTEB T2Retrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 27.423 - type: map_at_10 value: 77.206 - type: map_at_100 value: 80.83500000000001 - type: map_at_1000 value: 80.9 - type: map_at_3 value: 54.190000000000005 - type: map_at_5 value: 66.662 - type: mrr_at_1 value: 90.049 - type: mrr_at_10 value: 92.48100000000001 - type: mrr_at_100 value: 92.567 - type: mrr_at_1000 value: 92.571 - type: mrr_at_3 value: 92.07 - type: mrr_at_5 value: 92.32900000000001 - type: ndcg_at_1 value: 90.049 - type: ndcg_at_10 value: 84.69 - type: ndcg_at_100 value: 88.254 - type: ndcg_at_1000 value: 88.89399999999999 - type: ndcg_at_3 value: 86.091 - type: ndcg_at_5 value: 84.685 - type: precision_at_1 value: 90.049 - type: precision_at_10 value: 42.141 - type: precision_at_100 value: 5.016 - type: precision_at_1000 value: 0.516 - type: precision_at_3 value: 75.352 - type: precision_at_5 value: 63.176 - type: recall_at_1 value: 27.423 - type: recall_at_10 value: 83.595 - type: recall_at_100 value: 95.21 - type: recall_at_1000 value: 98.503 - type: recall_at_3 value: 55.84400000000001 - type: recall_at_5 value: 69.987 - task: type: Classification dataset: type: C-MTEB/TNews-classification name: MTEB TNews config: default split: validation revision: None metrics: - type: accuracy value: 51.927 - type: f1 value: 50.16838216110367 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringP2P name: MTEB ThuNewsClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 60.85131720842154 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringS2S name: MTEB ThuNewsClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 57.0921610946628 - task: type: Retrieval dataset: type: C-MTEB/VideoRetrieval name: MTEB VideoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 56.99999999999999 - type: map_at_10 value: 67.611 - type: map_at_100 value: 68.095 - type: map_at_1000 value: 68.10300000000001 - type: map_at_3 value: 65.75 - type: map_at_5 value: 66.93 - type: mrr_at_1 value: 56.89999999999999 - type: mrr_at_10 value: 67.561 - type: mrr_at_100 value: 68.045 - type: mrr_at_1000 value: 68.053 - type: mrr_at_3 value: 65.7 - type: mrr_at_5 value: 66.88 - type: ndcg_at_1 value: 56.99999999999999 - type: ndcg_at_10 value: 72.25200000000001 - type: ndcg_at_100 value: 74.542 - type: ndcg_at_1000 value: 74.725 - type: ndcg_at_3 value: 68.47 - type: ndcg_at_5 value: 70.583 - type: precision_at_1 value: 56.99999999999999 - type: precision_at_10 value: 8.66 - type: precision_at_100 value: 0.972 - type: precision_at_1000 value: 0.099 - type: precision_at_3 value: 25.433 - type: precision_at_5 value: 16.28 - type: recall_at_1 value: 56.99999999999999 - type: recall_at_10 value: 86.6 - type: recall_at_100 value: 97.2 - type: recall_at_1000 value: 98.6 - type: recall_at_3 value: 76.3 - type: recall_at_5 value: 81.39999999999999 - task: type: Classification dataset: type: C-MTEB/waimai-classification name: MTEB Waimai config: default split: test revision: None metrics: - type: accuracy value: 87.10000000000001 - type: ap value: 70.81766065881429 - type: f1 value: 85.5323306120456 license: apache-2.0 language: - zh --- A try for emebdding model: The method is the same as the stella-v2, I just extend the length of the context on tao.(I found if you want to use the fully-8k context, you maybe need to convert the model to float32). Now I'm working on the tao-v2, It will have a different sturcture. I will release tao-v2 as fast as I can. Thank you to the open source community.