--- pipeline_tag: sentence-similarity tags: - sentence-transformers - feature-extraction - sentence-similarity - mteb model-index: - name: tao results: - task: type: STS dataset: type: C-MTEB/AFQMC name: MTEB AFQMC config: default split: validation revision: None metrics: - type: cos_sim_pearson value: 47.33752515292192 - type: cos_sim_spearman value: 49.940772056837176 - type: euclidean_pearson value: 48.12147487857213 - type: euclidean_spearman value: 49.9407519488174 - type: manhattan_pearson value: 48.07550286372865 - type: manhattan_spearman value: 49.89535645392862 - task: type: STS dataset: type: C-MTEB/ATEC name: MTEB ATEC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 50.976865711125626 - type: cos_sim_spearman value: 53.113084748593465 - type: euclidean_pearson value: 55.1209592747571 - type: euclidean_spearman value: 53.11308362230699 - type: manhattan_pearson value: 55.09799309322416 - type: manhattan_spearman value: 53.108059998577076 - task: type: Classification dataset: type: mteb/amazon_reviews_multi name: MTEB AmazonReviewsClassification (zh) config: zh split: test revision: 1399c76144fd37290681b995c656ef9b2e06e26d metrics: - type: accuracy value: 40.812 - type: f1 value: 39.02060856097395 - task: type: STS dataset: type: C-MTEB/BQ name: MTEB BQ config: default split: test revision: None metrics: - type: cos_sim_pearson value: 62.84336868097746 - type: cos_sim_spearman value: 65.540605433497 - type: euclidean_pearson value: 64.08759819387913 - type: euclidean_spearman value: 65.54060543369363 - type: manhattan_pearson value: 64.09334283385029 - type: manhattan_spearman value: 65.55376209169398 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringP2P name: MTEB CLSClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 39.964020691388505 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringS2S name: MTEB CLSClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 38.18628830038994 - task: type: Reranking dataset: type: C-MTEB/CMedQAv1-reranking name: MTEB CMedQAv1 config: default split: test revision: None metrics: - type: map value: 85.34294439514511 - type: mrr value: 88.03849206349206 - task: type: Reranking dataset: type: C-MTEB/CMedQAv2-reranking name: MTEB CMedQAv2 config: default split: test revision: None metrics: - type: map value: 85.87127698007234 - type: mrr value: 88.57980158730159 - task: type: Retrieval dataset: type: C-MTEB/CmedqaRetrieval name: MTEB CmedqaRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 24.484 - type: map_at_10 value: 36.3 - type: map_at_100 value: 38.181 - type: map_at_1000 value: 38.305 - type: map_at_3 value: 32.39 - type: map_at_5 value: 34.504000000000005 - type: mrr_at_1 value: 37.608999999999995 - type: mrr_at_10 value: 45.348 - type: mrr_at_100 value: 46.375 - type: mrr_at_1000 value: 46.425 - type: mrr_at_3 value: 42.969 - type: mrr_at_5 value: 44.285999999999994 - type: ndcg_at_1 value: 37.608999999999995 - type: ndcg_at_10 value: 42.675999999999995 - type: ndcg_at_100 value: 50.12799999999999 - type: ndcg_at_1000 value: 52.321 - type: ndcg_at_3 value: 37.864 - type: ndcg_at_5 value: 39.701 - type: precision_at_1 value: 37.608999999999995 - type: precision_at_10 value: 9.527 - type: precision_at_100 value: 1.555 - type: precision_at_1000 value: 0.183 - type: precision_at_3 value: 21.547 - type: precision_at_5 value: 15.504000000000001 - type: recall_at_1 value: 24.484 - type: recall_at_10 value: 52.43299999999999 - type: recall_at_100 value: 83.446 - type: recall_at_1000 value: 98.24199999999999 - type: recall_at_3 value: 37.653 - type: recall_at_5 value: 43.643 - task: type: PairClassification dataset: type: C-MTEB/CMNLI name: MTEB Cmnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 77.71497294046902 - type: cos_sim_ap value: 86.84542027578229 - type: cos_sim_f1 value: 79.31987247608926 - type: cos_sim_precision value: 72.70601987142022 - type: cos_sim_recall value: 87.2574234276362 - type: dot_accuracy value: 77.71497294046902 - type: dot_ap value: 86.86514752961159 - type: dot_f1 value: 79.31987247608926 - type: dot_precision value: 72.70601987142022 - type: dot_recall value: 87.2574234276362 - type: euclidean_accuracy value: 77.71497294046902 - type: euclidean_ap value: 86.84541456571337 - type: euclidean_f1 value: 79.31987247608926 - type: euclidean_precision value: 72.70601987142022 - type: euclidean_recall value: 87.2574234276362 - type: manhattan_accuracy value: 77.8111846061335 - type: manhattan_ap value: 86.81148050422539 - type: manhattan_f1 value: 79.41176470588236 - type: manhattan_precision value: 72.52173913043478 - type: manhattan_recall value: 87.74842179097499 - type: max_accuracy value: 77.8111846061335 - type: max_ap value: 86.86514752961159 - type: max_f1 value: 79.41176470588236 - task: type: Retrieval dataset: type: C-MTEB/CovidRetrieval name: MTEB CovidRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 68.862 - type: map_at_10 value: 77.079 - type: map_at_100 value: 77.428 - type: map_at_1000 value: 77.432 - type: map_at_3 value: 75.40400000000001 - type: map_at_5 value: 76.227 - type: mrr_at_1 value: 69.02000000000001 - type: mrr_at_10 value: 77.04299999999999 - type: mrr_at_100 value: 77.391 - type: mrr_at_1000 value: 77.395 - type: mrr_at_3 value: 75.44800000000001 - type: mrr_at_5 value: 76.23299999999999 - type: ndcg_at_1 value: 69.02000000000001 - type: ndcg_at_10 value: 80.789 - type: ndcg_at_100 value: 82.27499999999999 - type: ndcg_at_1000 value: 82.381 - type: ndcg_at_3 value: 77.40599999999999 - type: ndcg_at_5 value: 78.87100000000001 - type: precision_at_1 value: 69.02000000000001 - type: precision_at_10 value: 9.336 - type: precision_at_100 value: 0.9990000000000001 - type: precision_at_1000 value: 0.101 - type: precision_at_3 value: 27.889000000000003 - type: precision_at_5 value: 17.492 - type: recall_at_1 value: 68.862 - type: recall_at_10 value: 92.308 - type: recall_at_100 value: 98.84100000000001 - type: recall_at_1000 value: 99.684 - type: recall_at_3 value: 83.087 - type: recall_at_5 value: 86.617 - task: type: Retrieval dataset: type: C-MTEB/DuRetrieval name: MTEB DuRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 25.063999999999997 - type: map_at_10 value: 78.014 - type: map_at_100 value: 81.021 - type: map_at_1000 value: 81.059 - type: map_at_3 value: 53.616 - type: map_at_5 value: 68.00399999999999 - type: mrr_at_1 value: 87.8 - type: mrr_at_10 value: 91.824 - type: mrr_at_100 value: 91.915 - type: mrr_at_1000 value: 91.917 - type: mrr_at_3 value: 91.525 - type: mrr_at_5 value: 91.752 - type: ndcg_at_1 value: 87.8 - type: ndcg_at_10 value: 85.74199999999999 - type: ndcg_at_100 value: 88.82900000000001 - type: ndcg_at_1000 value: 89.208 - type: ndcg_at_3 value: 84.206 - type: ndcg_at_5 value: 83.421 - type: precision_at_1 value: 87.8 - type: precision_at_10 value: 41.325 - type: precision_at_100 value: 4.8 - type: precision_at_1000 value: 0.48900000000000005 - type: precision_at_3 value: 75.783 - type: precision_at_5 value: 64.25999999999999 - type: recall_at_1 value: 25.063999999999997 - type: recall_at_10 value: 87.324 - type: recall_at_100 value: 97.261 - type: recall_at_1000 value: 99.309 - type: recall_at_3 value: 56.281000000000006 - type: recall_at_5 value: 73.467 - task: type: Retrieval dataset: type: C-MTEB/EcomRetrieval name: MTEB EcomRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 46.800000000000004 - type: map_at_10 value: 56.887 - type: map_at_100 value: 57.556 - type: map_at_1000 value: 57.582 - type: map_at_3 value: 54.15 - type: map_at_5 value: 55.825 - type: mrr_at_1 value: 46.800000000000004 - type: mrr_at_10 value: 56.887 - type: mrr_at_100 value: 57.556 - type: mrr_at_1000 value: 57.582 - type: mrr_at_3 value: 54.15 - type: mrr_at_5 value: 55.825 - type: ndcg_at_1 value: 46.800000000000004 - type: ndcg_at_10 value: 62.061 - type: ndcg_at_100 value: 65.042 - type: ndcg_at_1000 value: 65.658 - type: ndcg_at_3 value: 56.52700000000001 - type: ndcg_at_5 value: 59.518 - type: precision_at_1 value: 46.800000000000004 - type: precision_at_10 value: 7.84 - type: precision_at_100 value: 0.9169999999999999 - type: precision_at_1000 value: 0.096 - type: precision_at_3 value: 21.133 - type: precision_at_5 value: 14.12 - type: recall_at_1 value: 46.800000000000004 - type: recall_at_10 value: 78.4 - type: recall_at_100 value: 91.7 - type: recall_at_1000 value: 96.39999999999999 - type: recall_at_3 value: 63.4 - type: recall_at_5 value: 70.6 - task: type: Classification dataset: type: C-MTEB/IFlyTek-classification name: MTEB IFlyTek config: default split: validation revision: None metrics: - type: accuracy value: 48.010773374374764 - type: f1 value: 35.25314495210735 - task: type: Classification dataset: type: C-MTEB/JDReview-classification name: MTEB JDReview config: default split: test revision: None metrics: - type: accuracy value: 87.01688555347093 - type: ap value: 56.39167630414159 - type: f1 value: 81.91756262306008 - task: type: STS dataset: type: C-MTEB/LCQMC name: MTEB LCQMC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 71.17867432738112 - type: cos_sim_spearman value: 77.47954247528372 - type: euclidean_pearson value: 76.32408876437825 - type: euclidean_spearman value: 77.47954025694959 - type: manhattan_pearson value: 76.33345801575938 - type: manhattan_spearman value: 77.48901582125997 - task: type: Reranking dataset: type: C-MTEB/Mmarco-reranking name: MTEB MMarcoReranking config: default split: dev revision: None metrics: - type: map value: 27.96333052746654 - type: mrr value: 26.92023809523809 - task: type: Retrieval dataset: type: C-MTEB/MMarcoRetrieval name: MTEB MMarcoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 66.144 - type: map_at_10 value: 75.036 - type: map_at_100 value: 75.36 - type: map_at_1000 value: 75.371 - type: map_at_3 value: 73.258 - type: map_at_5 value: 74.369 - type: mrr_at_1 value: 68.381 - type: mrr_at_10 value: 75.633 - type: mrr_at_100 value: 75.91799999999999 - type: mrr_at_1000 value: 75.928 - type: mrr_at_3 value: 74.093 - type: mrr_at_5 value: 75.036 - type: ndcg_at_1 value: 68.381 - type: ndcg_at_10 value: 78.661 - type: ndcg_at_100 value: 80.15 - type: ndcg_at_1000 value: 80.456 - type: ndcg_at_3 value: 75.295 - type: ndcg_at_5 value: 77.14999999999999 - type: precision_at_1 value: 68.381 - type: precision_at_10 value: 9.481 - type: precision_at_100 value: 1.023 - type: precision_at_1000 value: 0.105 - type: precision_at_3 value: 28.309 - type: precision_at_5 value: 17.974 - type: recall_at_1 value: 66.144 - type: recall_at_10 value: 89.24499999999999 - type: recall_at_100 value: 96.032 - type: recall_at_1000 value: 98.437 - type: recall_at_3 value: 80.327 - type: recall_at_5 value: 84.733 - task: type: Classification dataset: type: mteb/amazon_massive_intent name: MTEB MassiveIntentClassification (zh-CN) config: zh-CN split: test revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 metrics: - type: accuracy value: 68.26832548755884 - type: f1 value: 65.97422207086723 - task: type: Classification dataset: type: mteb/amazon_massive_scenario name: MTEB MassiveScenarioClassification (zh-CN) config: zh-CN split: test revision: 7d571f92784cd94a019292a1f45445077d0ef634 metrics: - type: accuracy value: 73.13046402151984 - type: f1 value: 72.69199129694121 - task: type: Retrieval dataset: type: C-MTEB/MedicalRetrieval name: MTEB MedicalRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 50.4 - type: map_at_10 value: 56.645 - type: map_at_100 value: 57.160999999999994 - type: map_at_1000 value: 57.218 - type: map_at_3 value: 55.383 - type: map_at_5 value: 56.08800000000001 - type: mrr_at_1 value: 50.6 - type: mrr_at_10 value: 56.745999999999995 - type: mrr_at_100 value: 57.262 - type: mrr_at_1000 value: 57.318999999999996 - type: mrr_at_3 value: 55.483000000000004 - type: mrr_at_5 value: 56.188 - type: ndcg_at_1 value: 50.4 - type: ndcg_at_10 value: 59.534 - type: ndcg_at_100 value: 62.400999999999996 - type: ndcg_at_1000 value: 64.01299999999999 - type: ndcg_at_3 value: 56.887 - type: ndcg_at_5 value: 58.160000000000004 - type: precision_at_1 value: 50.4 - type: precision_at_10 value: 6.859999999999999 - type: precision_at_100 value: 0.828 - type: precision_at_1000 value: 0.096 - type: precision_at_3 value: 20.4 - type: precision_at_5 value: 12.86 - type: recall_at_1 value: 50.4 - type: recall_at_10 value: 68.60000000000001 - type: recall_at_100 value: 82.8 - type: recall_at_1000 value: 95.7 - type: recall_at_3 value: 61.199999999999996 - type: recall_at_5 value: 64.3 - task: type: Classification dataset: type: C-MTEB/MultilingualSentiment-classification name: MTEB MultilingualSentiment config: default split: validation revision: None metrics: - type: accuracy value: 73.39666666666666 - type: f1 value: 72.86349039489504 - task: type: PairClassification dataset: type: C-MTEB/OCNLI name: MTEB Ocnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 73.36220898754738 - type: cos_sim_ap value: 78.50300066088354 - type: cos_sim_f1 value: 75.39370078740157 - type: cos_sim_precision value: 70.59907834101382 - type: cos_sim_recall value: 80.8870116156283 - type: dot_accuracy value: 73.36220898754738 - type: dot_ap value: 78.50300066088354 - type: dot_f1 value: 75.39370078740157 - type: dot_precision value: 70.59907834101382 - type: dot_recall value: 80.8870116156283 - type: euclidean_accuracy value: 73.36220898754738 - type: euclidean_ap value: 78.50300066088354 - type: euclidean_f1 value: 75.39370078740157 - type: euclidean_precision value: 70.59907834101382 - type: euclidean_recall value: 80.8870116156283 - type: manhattan_accuracy value: 73.09149972929075 - type: manhattan_ap value: 78.41160715817406 - type: manhattan_f1 value: 75.3623188405797 - type: manhattan_precision value: 69.45681211041853 - type: manhattan_recall value: 82.36536430834214 - type: max_accuracy value: 73.36220898754738 - type: max_ap value: 78.50300066088354 - type: max_f1 value: 75.39370078740157 - task: type: Classification dataset: type: C-MTEB/OnlineShopping-classification name: MTEB OnlineShopping config: default split: test revision: None metrics: - type: accuracy value: 91.82000000000001 - type: ap value: 89.3671278896903 - type: f1 value: 91.8021970144045 - task: type: STS dataset: type: C-MTEB/PAWSX name: MTEB PAWSX config: default split: test revision: None metrics: - type: cos_sim_pearson value: 30.07022294131062 - type: cos_sim_spearman value: 36.21542804954441 - type: euclidean_pearson value: 36.37841945307606 - type: euclidean_spearman value: 36.215513214835546 - type: manhattan_pearson value: 36.31755715017088 - type: manhattan_spearman value: 36.16848256918425 - task: type: STS dataset: type: C-MTEB/QBQTC name: MTEB QBQTC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 36.779755871073505 - type: cos_sim_spearman value: 38.736220679196606 - type: euclidean_pearson value: 37.13356686891227 - type: euclidean_spearman value: 38.73619198602118 - type: manhattan_pearson value: 37.175466658530816 - type: manhattan_spearman value: 38.74523158724344 - task: type: STS dataset: type: mteb/sts22-crosslingual-sts name: MTEB STS22 (zh) config: zh split: test revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 metrics: - type: cos_sim_pearson value: 65.9737863254904 - type: cos_sim_spearman value: 68.88293545840186 - type: euclidean_pearson value: 67.23730973929247 - type: euclidean_spearman value: 68.88293545840186 - type: manhattan_pearson value: 67.30647960940956 - type: manhattan_spearman value: 68.90553460682702 - task: type: STS dataset: type: C-MTEB/STSB name: MTEB STSB config: default split: test revision: None metrics: - type: cos_sim_pearson value: 78.99371432933002 - type: cos_sim_spearman value: 79.36496709214312 - type: euclidean_pearson value: 78.77721120706431 - type: euclidean_spearman value: 79.36500761622595 - type: manhattan_pearson value: 78.82503201285202 - type: manhattan_spearman value: 79.43915548337401 - task: type: Reranking dataset: type: C-MTEB/T2Reranking name: MTEB T2Reranking config: default split: dev revision: None metrics: - type: map value: 66.38418982516941 - type: mrr value: 76.09996131153883 - task: type: Retrieval dataset: type: C-MTEB/T2Retrieval name: MTEB T2Retrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 27.426000000000002 - type: map_at_10 value: 77.209 - type: map_at_100 value: 80.838 - type: map_at_1000 value: 80.903 - type: map_at_3 value: 54.196 - type: map_at_5 value: 66.664 - type: mrr_at_1 value: 90.049 - type: mrr_at_10 value: 92.482 - type: mrr_at_100 value: 92.568 - type: mrr_at_1000 value: 92.572 - type: mrr_at_3 value: 92.072 - type: mrr_at_5 value: 92.33 - type: ndcg_at_1 value: 90.049 - type: ndcg_at_10 value: 84.69200000000001 - type: ndcg_at_100 value: 88.25699999999999 - type: ndcg_at_1000 value: 88.896 - type: ndcg_at_3 value: 86.09700000000001 - type: ndcg_at_5 value: 84.68599999999999 - type: precision_at_1 value: 90.049 - type: precision_at_10 value: 42.142 - type: precision_at_100 value: 5.017 - type: precision_at_1000 value: 0.516 - type: precision_at_3 value: 75.358 - type: precision_at_5 value: 63.173 - type: recall_at_1 value: 27.426000000000002 - type: recall_at_10 value: 83.59400000000001 - type: recall_at_100 value: 95.21 - type: recall_at_1000 value: 98.503 - type: recall_at_3 value: 55.849000000000004 - type: recall_at_5 value: 69.986 - task: type: Classification dataset: type: C-MTEB/TNews-classification name: MTEB TNews config: default split: validation revision: None metrics: - type: accuracy value: 51.925999999999995 - type: f1 value: 50.16867723626971 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringP2P name: MTEB ThuNewsClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 60.738901671970005 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringS2S name: MTEB ThuNewsClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 57.08563183138733 - task: type: Retrieval dataset: type: C-MTEB/VideoRetrieval name: MTEB VideoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 52 - type: map_at_10 value: 62.956 - type: map_at_100 value: 63.491 - type: map_at_1000 value: 63.50599999999999 - type: map_at_3 value: 60.733000000000004 - type: map_at_5 value: 62.217999999999996 - type: mrr_at_1 value: 52 - type: mrr_at_10 value: 62.956 - type: mrr_at_100 value: 63.491 - type: mrr_at_1000 value: 63.50599999999999 - type: mrr_at_3 value: 60.733000000000004 - type: mrr_at_5 value: 62.217999999999996 - type: ndcg_at_1 value: 52 - type: ndcg_at_10 value: 67.956 - type: ndcg_at_100 value: 70.536 - type: ndcg_at_1000 value: 70.908 - type: ndcg_at_3 value: 63.456999999999994 - type: ndcg_at_5 value: 66.155 - type: precision_at_1 value: 52 - type: precision_at_10 value: 8.35 - type: precision_at_100 value: 0.955 - type: precision_at_1000 value: 0.098 - type: precision_at_3 value: 23.767 - type: precision_at_5 value: 15.58 - type: recall_at_1 value: 52 - type: recall_at_10 value: 83.5 - type: recall_at_100 value: 95.5 - type: recall_at_1000 value: 98.4 - type: recall_at_3 value: 71.3 - type: recall_at_5 value: 77.9 - task: type: Classification dataset: type: C-MTEB/waimai-classification name: MTEB Waimai config: default split: test revision: None metrics: - type: accuracy value: 87.10000000000001 - type: ap value: 70.81766065881429 - type: f1 value: 85.5323306120456 license: apache-2.0 language: - zh --- A try for emebdding model: The method is the same as the stella-v2, I just fine-tuned it in a small dataset for test. Now I'm working on the tao-v2, It will have a different sturcture. I will release tao-v2 as fast as I can. Thank you to the open source community.