|
--- |
|
library_name: sentence-transformers |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- transformers |
|
- sentence-embedding |
|
- mteb |
|
- mteb |
|
model-index: |
|
- name: e433e634850d125d8b85bee76db3a3b6a0c3bf56 |
|
results: |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringP2P |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 56.88600728743999 |
|
- type: v_measures |
|
value: [0.5396081553520281, 0.6022872403200437, 0.5515205944691852, 0.5595772885785736, 0.5632413941951575] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringS2S |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 38.199527329051804 |
|
- type: v_measures |
|
value: [0.42157254138936706, 0.36882298663461527, 0.3134327610337458, 0.40391031391690396, 0.3832775043562133] |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p |
|
name: MTEB AlloprofReranking |
|
config: default |
|
split: test |
|
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd |
|
metrics: |
|
- type: map |
|
value: 68.73372257500206 |
|
- type: mrr |
|
value: 70.07434479260904 |
|
- type: nAUC_map_diff1 |
|
value: 50.95933484071007 |
|
- type: nAUC_map_max |
|
value: 13.75463910519138 |
|
- type: nAUC_mrr_diff1 |
|
value: 50.494303783447656 |
|
- type: nAUC_mrr_max |
|
value: 14.460935217916187 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloprofRetrieval |
|
config: default |
|
split: test |
|
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd |
|
metrics: |
|
- type: map_at_1 |
|
value: 21.675 |
|
- type: map_at_10 |
|
value: 32.274 |
|
- type: map_at_100 |
|
value: 33.316 |
|
- type: map_at_1000 |
|
value: 33.387 |
|
- type: map_at_20 |
|
value: 32.864 |
|
- type: map_at_3 |
|
value: 29.166999999999998 |
|
- type: map_at_5 |
|
value: 30.946 |
|
- type: mrr_at_1 |
|
value: 21.675302245250432 |
|
- type: mrr_at_10 |
|
value: 32.274309839076714 |
|
- type: mrr_at_100 |
|
value: 33.31571024590564 |
|
- type: mrr_at_1000 |
|
value: 33.3868130424392 |
|
- type: mrr_at_20 |
|
value: 32.863978562081925 |
|
- type: mrr_at_3 |
|
value: 29.16666666666669 |
|
- type: mrr_at_5 |
|
value: 30.94559585492234 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 34.85808309940442 |
|
- type: nauc_map_at_1000_max |
|
value: 31.058801579682825 |
|
- type: nauc_map_at_100_diff1 |
|
value: 34.842898344470846 |
|
- type: nauc_map_at_100_max |
|
value: 31.077561464904342 |
|
- type: nauc_map_at_10_diff1 |
|
value: 34.6773118480208 |
|
- type: nauc_map_at_10_max |
|
value: 30.8489850780642 |
|
- type: nauc_map_at_1_diff1 |
|
value: 40.65773695743684 |
|
- type: nauc_map_at_1_max |
|
value: 28.766036921254617 |
|
- type: nauc_map_at_20_diff1 |
|
value: 34.73935242577166 |
|
- type: nauc_map_at_20_max |
|
value: 31.03143938077287 |
|
- type: nauc_map_at_3_diff1 |
|
value: 35.12059625476991 |
|
- type: nauc_map_at_3_max |
|
value: 30.48787855768291 |
|
- type: nauc_map_at_5_diff1 |
|
value: 34.73453235094986 |
|
- type: nauc_map_at_5_max |
|
value: 30.3860304682398 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 34.85808309940442 |
|
- type: nauc_mrr_at_1000_max |
|
value: 31.058801579682825 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 34.842898344470846 |
|
- type: nauc_mrr_at_100_max |
|
value: 31.077561464904342 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 34.6773118480208 |
|
- type: nauc_mrr_at_10_max |
|
value: 30.8489850780642 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 40.65773695743684 |
|
- type: nauc_mrr_at_1_max |
|
value: 28.766036921254617 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 34.73935242577166 |
|
- type: nauc_mrr_at_20_max |
|
value: 31.03143938077287 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 35.12059625476991 |
|
- type: nauc_mrr_at_3_max |
|
value: 30.48787855768291 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 34.73453235094986 |
|
- type: nauc_mrr_at_5_max |
|
value: 30.3860304682398 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 34.04342467121623 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 32.311398352704686 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 33.67278941726764 |
|
- type: nauc_ndcg_at_100_max |
|
value: 33.0229606203184 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 32.93808280492078 |
|
- type: nauc_ndcg_at_10_max |
|
value: 32.07111775221638 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 40.65773695743684 |
|
- type: nauc_ndcg_at_1_max |
|
value: 28.766036921254617 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 33.141323431064585 |
|
- type: nauc_ndcg_at_20_max |
|
value: 32.76436962238286 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 33.77769745974645 |
|
- type: nauc_ndcg_at_3_max |
|
value: 31.072988073016912 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 33.091582792245696 |
|
- type: nauc_ndcg_at_5_max |
|
value: 30.92378976230745 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 33.74743287990321 |
|
- type: nauc_precision_at_1000_max |
|
value: 60.08005213097628 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 28.869275501873236 |
|
- type: nauc_precision_at_100_max |
|
value: 46.35483380447927 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 27.910043146581497 |
|
- type: nauc_precision_at_10_max |
|
value: 36.07399824307888 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 40.65773695743684 |
|
- type: nauc_precision_at_1_max |
|
value: 28.766036921254617 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 28.144265629196163 |
|
- type: nauc_precision_at_20_max |
|
value: 39.60361579056115 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 30.31893725671278 |
|
- type: nauc_precision_at_3_max |
|
value: 32.63695126407254 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 28.699678130380235 |
|
- type: nauc_precision_at_5_max |
|
value: 32.37908851919098 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 33.74743287990342 |
|
- type: nauc_recall_at_1000_max |
|
value: 60.080052130975346 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 28.869275501873247 |
|
- type: nauc_recall_at_100_max |
|
value: 46.35483380447917 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 27.910043146581508 |
|
- type: nauc_recall_at_10_max |
|
value: 36.07399824307888 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 40.65773695743684 |
|
- type: nauc_recall_at_1_max |
|
value: 28.766036921254617 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 28.14426562919617 |
|
- type: nauc_recall_at_20_max |
|
value: 39.60361579056118 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 30.318937256712804 |
|
- type: nauc_recall_at_3_max |
|
value: 32.63695126407256 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 28.699678130380224 |
|
- type: nauc_recall_at_5_max |
|
value: 32.37908851919102 |
|
- type: ndcg_at_1 |
|
value: 21.675 |
|
- type: ndcg_at_10 |
|
value: 38.06 |
|
- type: ndcg_at_100 |
|
value: 43.491 |
|
- type: ndcg_at_1000 |
|
value: 45.432 |
|
- type: ndcg_at_20 |
|
value: 40.217000000000006 |
|
- type: ndcg_at_3 |
|
value: 31.642 |
|
- type: ndcg_at_5 |
|
value: 34.837 |
|
- type: precision_at_1 |
|
value: 21.675 |
|
- type: precision_at_10 |
|
value: 5.652 |
|
- type: precision_at_100 |
|
value: 0.827 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_20 |
|
value: 3.253 |
|
- type: precision_at_3 |
|
value: 12.939 |
|
- type: precision_at_5 |
|
value: 9.309000000000001 |
|
- type: recall_at_1 |
|
value: 21.675 |
|
- type: recall_at_10 |
|
value: 56.52 |
|
- type: recall_at_100 |
|
value: 82.729 |
|
- type: recall_at_1000 |
|
value: 98.1 |
|
- type: recall_at_20 |
|
value: 65.069 |
|
- type: recall_at_3 |
|
value: 38.817 |
|
- type: recall_at_5 |
|
value: 46.546 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 43.51 |
|
- type: f1 |
|
value: 41.3284674671926 |
|
- type: f1_weighted |
|
value: 41.3284674671926 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: maastrichtlawtech/bsard |
|
name: MTEB BSARDRetrieval |
|
config: default |
|
split: test |
|
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 |
|
metrics: |
|
- type: map_at_1 |
|
value: 5.405 |
|
- type: map_at_10 |
|
value: 9.008 |
|
- type: map_at_100 |
|
value: 9.932 |
|
- type: map_at_1000 |
|
value: 10.042 |
|
- type: map_at_20 |
|
value: 9.389 |
|
- type: map_at_3 |
|
value: 7.883 |
|
- type: map_at_5 |
|
value: 8.626000000000001 |
|
- type: mrr_at_1 |
|
value: 5.405405405405405 |
|
- type: mrr_at_10 |
|
value: 9.007579007579007 |
|
- type: mrr_at_100 |
|
value: 9.931517094611667 |
|
- type: mrr_at_1000 |
|
value: 10.0416462267215 |
|
- type: mrr_at_20 |
|
value: 9.38869595990339 |
|
- type: mrr_at_3 |
|
value: 7.882882882882883 |
|
- type: mrr_at_5 |
|
value: 8.626126126126126 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 23.53549434486455 |
|
- type: nauc_map_at_1000_max |
|
value: 9.977010641647402 |
|
- type: nauc_map_at_100_diff1 |
|
value: 23.50007884241435 |
|
- type: nauc_map_at_100_max |
|
value: 9.984274734441085 |
|
- type: nauc_map_at_10_diff1 |
|
value: 24.69444512826233 |
|
- type: nauc_map_at_10_max |
|
value: 9.726162724771594 |
|
- type: nauc_map_at_1_diff1 |
|
value: 40.88188899137848 |
|
- type: nauc_map_at_1_max |
|
value: 12.044739470755896 |
|
- type: nauc_map_at_20_diff1 |
|
value: 23.833757177107557 |
|
- type: nauc_map_at_20_max |
|
value: 9.94328216894336 |
|
- type: nauc_map_at_3_diff1 |
|
value: 28.320570164876653 |
|
- type: nauc_map_at_3_max |
|
value: 11.195397944839767 |
|
- type: nauc_map_at_5_diff1 |
|
value: 25.86894200735248 |
|
- type: nauc_map_at_5_max |
|
value: 8.43950569758736 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 23.53549434486455 |
|
- type: nauc_mrr_at_1000_max |
|
value: 9.977010641647402 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 23.50007884241435 |
|
- type: nauc_mrr_at_100_max |
|
value: 9.984274734441085 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 24.69444512826233 |
|
- type: nauc_mrr_at_10_max |
|
value: 9.726162724771594 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 40.88188899137848 |
|
- type: nauc_mrr_at_1_max |
|
value: 12.044739470755896 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 23.833757177107557 |
|
- type: nauc_mrr_at_20_max |
|
value: 9.94328216894336 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 28.320570164876653 |
|
- type: nauc_mrr_at_3_max |
|
value: 11.195397944839767 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 25.86894200735248 |
|
- type: nauc_mrr_at_5_max |
|
value: 8.43950569758736 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 15.939402272339343 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 10.076089125537772 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 16.12740122067642 |
|
- type: nauc_ndcg_at_100_max |
|
value: 10.39935154464689 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 20.455941061369295 |
|
- type: nauc_ndcg_at_10_max |
|
value: 9.350349883274461 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 40.88188899137848 |
|
- type: nauc_ndcg_at_1_max |
|
value: 12.044739470755896 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 18.267195122936364 |
|
- type: nauc_ndcg_at_20_max |
|
value: 10.211299135510837 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 26.453038443158267 |
|
- type: nauc_ndcg_at_3_max |
|
value: 10.628723618231271 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 22.815939702854084 |
|
- type: nauc_ndcg_at_5_max |
|
value: 6.308794763068443 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -7.915540524594587 |
|
- type: nauc_precision_at_1000_max |
|
value: 10.441250503021037 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 2.7415108070462253 |
|
- type: nauc_precision_at_100_max |
|
value: 11.957692005514204 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 12.731449206012213 |
|
- type: nauc_precision_at_10_max |
|
value: 9.218464561250887 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 40.88188899137848 |
|
- type: nauc_precision_at_1_max |
|
value: 12.044739470755896 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 8.658189595700664 |
|
- type: nauc_precision_at_20_max |
|
value: 11.571072137198621 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 22.7637681983756 |
|
- type: nauc_precision_at_3_max |
|
value: 9.361635703809425 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 17.02002973192349 |
|
- type: nauc_precision_at_5_max |
|
value: 1.8844406919262011 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: -7.915540524594531 |
|
- type: nauc_recall_at_1000_max |
|
value: 10.441250503021028 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 2.741510807046166 |
|
- type: nauc_recall_at_100_max |
|
value: 11.957692005514156 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 12.731449206012224 |
|
- type: nauc_recall_at_10_max |
|
value: 9.218464561250883 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 40.88188899137848 |
|
- type: nauc_recall_at_1_max |
|
value: 12.044739470755896 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 8.65818959570063 |
|
- type: nauc_recall_at_20_max |
|
value: 11.571072137198572 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 22.763768198375587 |
|
- type: nauc_recall_at_3_max |
|
value: 9.361635703809409 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 17.02002973192351 |
|
- type: nauc_recall_at_5_max |
|
value: 1.8844406919262173 |
|
- type: ndcg_at_1 |
|
value: 5.405 |
|
- type: ndcg_at_10 |
|
value: 11.045 |
|
- type: ndcg_at_100 |
|
value: 16.724 |
|
- type: ndcg_at_1000 |
|
value: 20.325 |
|
- type: ndcg_at_20 |
|
value: 12.42 |
|
- type: ndcg_at_3 |
|
value: 8.746 |
|
- type: ndcg_at_5 |
|
value: 10.065 |
|
- type: precision_at_1 |
|
value: 5.405 |
|
- type: precision_at_10 |
|
value: 1.757 |
|
- type: precision_at_100 |
|
value: 0.468 |
|
- type: precision_at_1000 |
|
value: 0.077 |
|
- type: precision_at_20 |
|
value: 1.149 |
|
- type: precision_at_3 |
|
value: 3.7539999999999996 |
|
- type: precision_at_5 |
|
value: 2.883 |
|
- type: recall_at_1 |
|
value: 5.405 |
|
- type: recall_at_10 |
|
value: 17.568 |
|
- type: recall_at_100 |
|
value: 46.847 |
|
- type: recall_at_1000 |
|
value: 76.577 |
|
- type: recall_at_20 |
|
value: 22.973 |
|
- type: recall_at_3 |
|
value: 11.261000000000001 |
|
- type: recall_at_5 |
|
value: 14.414 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/clustering-hal-s2s |
|
name: MTEB HALClusteringS2S |
|
config: default |
|
split: test |
|
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 |
|
metrics: |
|
- type: v_measure |
|
value: 24.495384349905265 |
|
- type: v_measures |
|
value: [0.2850587858600384, 0.274086904447773, 0.2446866774990972, 0.26946100959565517, 0.24156528297396174] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringP2P |
|
config: default |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 41.7878688793447 |
|
- type: v_measures |
|
value: [0.4201324393825989, 0.4205306567437461, 0.4221300501395374, 0.4210735177933313, 0.38124298228695813] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringS2S |
|
config: default |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 41.54533473611554 |
|
- type: v_measures |
|
value: [0.3978917671338969, 0.42610299599987944, 0.4152131658150196, 0.40558711021249855, 0.38327501252308305] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_domain |
|
name: MTEB MTOPDomainClassification (fr) |
|
config: fr |
|
split: test |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
metrics: |
|
- type: accuracy |
|
value: 85.33041027247104 |
|
- type: f1 |
|
value: 85.4043088703478 |
|
- type: f1_weighted |
|
value: 85.22086763441686 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_intent |
|
name: MTEB MTOPIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
metrics: |
|
- type: accuracy |
|
value: 59.01346695897275 |
|
- type: f1 |
|
value: 41.296845063208316 |
|
- type: f1_weighted |
|
value: 61.793813202867696 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/masakhanews |
|
name: MTEB MasakhaNEWSClassification (fra) |
|
config: fra |
|
split: test |
|
revision: 18193f187b92da67168c655c9973a165ed9593dd |
|
metrics: |
|
- type: accuracy |
|
value: 72.60663507109004 |
|
- type: f1 |
|
value: 68.67522100429781 |
|
- type: f1_weighted |
|
value: 72.75616093668002 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringP2P (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 49.17691007381563 |
|
- type: v_measures |
|
value: [1.0, 0.033833191750480725, 0.5707463198244268, 0.1318223737892885, 0.7224436183265853] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringS2S (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 26.9350763881635 |
|
- type: v_measures |
|
value: [1.0, 0.0002883507347309009, 0.18259625098776155, 0.025306110065234755, 0.1385631076204479] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 4672e20407010da34463acc759c162ca9734bca6 |
|
metrics: |
|
- type: accuracy |
|
value: 65.1546738399462 |
|
- type: f1 |
|
value: 62.81367149102006 |
|
- type: f1_weighted |
|
value: 64.45478181518959 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (fr) |
|
config: fr |
|
split: test |
|
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 |
|
metrics: |
|
- type: accuracy |
|
value: 69.94283792871553 |
|
- type: f1 |
|
value: 69.3387310036327 |
|
- type: f1_weighted |
|
value: 69.77979200675047 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/mintakaqa |
|
name: MTEB MintakaRetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e |
|
metrics: |
|
- type: map_at_1 |
|
value: 14.536999999999999 |
|
- type: map_at_10 |
|
value: 22.972 |
|
- type: map_at_100 |
|
value: 24.046 |
|
- type: map_at_1000 |
|
value: 24.15 |
|
- type: map_at_20 |
|
value: 23.56 |
|
- type: map_at_3 |
|
value: 20.639 |
|
- type: map_at_5 |
|
value: 21.886 |
|
- type: mrr_at_1 |
|
value: 14.537264537264537 |
|
- type: mrr_at_10 |
|
value: 22.97172172172171 |
|
- type: mrr_at_100 |
|
value: 24.04581030084757 |
|
- type: mrr_at_1000 |
|
value: 24.15012351833827 |
|
- type: mrr_at_20 |
|
value: 23.559920001131612 |
|
- type: mrr_at_3 |
|
value: 20.63882063882061 |
|
- type: mrr_at_5 |
|
value: 21.88574938574935 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 25.172495501911456 |
|
- type: nauc_map_at_1000_max |
|
value: 39.07442097828252 |
|
- type: nauc_map_at_100_diff1 |
|
value: 25.129142743145884 |
|
- type: nauc_map_at_100_max |
|
value: 39.03725272182565 |
|
- type: nauc_map_at_10_diff1 |
|
value: 25.52237435145409 |
|
- type: nauc_map_at_10_max |
|
value: 39.5761256079619 |
|
- type: nauc_map_at_1_diff1 |
|
value: 31.68506359690787 |
|
- type: nauc_map_at_1_max |
|
value: 39.251552013635425 |
|
- type: nauc_map_at_20_diff1 |
|
value: 25.223544981725286 |
|
- type: nauc_map_at_20_max |
|
value: 39.20307777977743 |
|
- type: nauc_map_at_3_diff1 |
|
value: 26.5913043939904 |
|
- type: nauc_map_at_3_max |
|
value: 40.38909639557377 |
|
- type: nauc_map_at_5_diff1 |
|
value: 25.90291761511258 |
|
- type: nauc_map_at_5_max |
|
value: 40.08746876057708 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 25.172495501911456 |
|
- type: nauc_mrr_at_1000_max |
|
value: 39.07442097828252 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 25.129142743145884 |
|
- type: nauc_mrr_at_100_max |
|
value: 39.03725272182565 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 25.52237435145409 |
|
- type: nauc_mrr_at_10_max |
|
value: 39.5761256079619 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 31.68506359690787 |
|
- type: nauc_mrr_at_1_max |
|
value: 39.251552013635425 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 25.223544981725286 |
|
- type: nauc_mrr_at_20_max |
|
value: 39.20307777977743 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 26.5913043939904 |
|
- type: nauc_mrr_at_3_max |
|
value: 40.38909639557377 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 25.90291761511258 |
|
- type: nauc_mrr_at_5_max |
|
value: 40.08746876057708 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 23.22275566961323 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 37.77760760027764 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 21.715763741257927 |
|
- type: nauc_ndcg_at_100_max |
|
value: 36.46541121995108 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 23.278761630662373 |
|
- type: nauc_ndcg_at_10_max |
|
value: 38.7930407055593 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 31.68506359690787 |
|
- type: nauc_ndcg_at_1_max |
|
value: 39.251552013635425 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 22.247483519405314 |
|
- type: nauc_ndcg_at_20_max |
|
value: 37.52699283756433 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 25.285332146360567 |
|
- type: nauc_ndcg_at_3_max |
|
value: 40.49755286945492 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 24.188132420084607 |
|
- type: nauc_ndcg_at_5_max |
|
value: 40.023420096094924 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 22.011383616462943 |
|
- type: nauc_precision_at_1000_max |
|
value: 33.1171975223399 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 8.869925191243802 |
|
- type: nauc_precision_at_100_max |
|
value: 24.642097404720463 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 17.74075352930919 |
|
- type: nauc_precision_at_10_max |
|
value: 36.488352516736775 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 31.68506359690787 |
|
- type: nauc_precision_at_1_max |
|
value: 39.251552013635425 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 14.092673370526898 |
|
- type: nauc_precision_at_20_max |
|
value: 32.16083119966346 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 22.16344389106631 |
|
- type: nauc_precision_at_3_max |
|
value: 40.70883095791623 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 20.119543069972256 |
|
- type: nauc_precision_at_5_max |
|
value: 39.79763147435235 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 22.011383616462528 |
|
- type: nauc_recall_at_1000_max |
|
value: 33.117197522340085 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 8.869925191243775 |
|
- type: nauc_recall_at_100_max |
|
value: 24.64209740472041 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 17.740753529309178 |
|
- type: nauc_recall_at_10_max |
|
value: 36.48835251673679 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 31.68506359690787 |
|
- type: nauc_recall_at_1_max |
|
value: 39.251552013635425 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 14.092673370526915 |
|
- type: nauc_recall_at_20_max |
|
value: 32.160831199663455 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 22.163443891066322 |
|
- type: nauc_recall_at_3_max |
|
value: 40.708830957916234 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 20.119543069972217 |
|
- type: nauc_recall_at_5_max |
|
value: 39.79763147435234 |
|
- type: ndcg_at_1 |
|
value: 14.536999999999999 |
|
- type: ndcg_at_10 |
|
value: 27.485 |
|
- type: ndcg_at_100 |
|
value: 33.206 |
|
- type: ndcg_at_1000 |
|
value: 36.382999999999996 |
|
- type: ndcg_at_20 |
|
value: 29.635 |
|
- type: ndcg_at_3 |
|
value: 22.597 |
|
- type: ndcg_at_5 |
|
value: 24.851 |
|
- type: precision_at_1 |
|
value: 14.536999999999999 |
|
- type: precision_at_10 |
|
value: 4.189 |
|
- type: precision_at_100 |
|
value: 0.698 |
|
- type: precision_at_1000 |
|
value: 0.096 |
|
- type: precision_at_20 |
|
value: 2.52 |
|
- type: precision_at_3 |
|
value: 9.419 |
|
- type: precision_at_5 |
|
value: 6.749 |
|
- type: recall_at_1 |
|
value: 14.536999999999999 |
|
- type: recall_at_10 |
|
value: 41.892 |
|
- type: recall_at_100 |
|
value: 69.779 |
|
- type: recall_at_1000 |
|
value: 95.61800000000001 |
|
- type: recall_at_20 |
|
value: 50.41 |
|
- type: recall_at_3 |
|
value: 28.255999999999997 |
|
- type: recall_at_5 |
|
value: 33.743 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: GEM/opusparcus |
|
name: MTEB OpusparcusPC (fr) |
|
config: fr |
|
split: test |
|
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 81.74386920980926 |
|
- type: cos_sim_ap |
|
value: 93.18281680904117 |
|
- type: cos_sim_f1 |
|
value: 87.37233054781802 |
|
- type: cos_sim_precision |
|
value: 82.04010462074979 |
|
- type: cos_sim_recall |
|
value: 93.44587884806356 |
|
- type: dot_accuracy |
|
value: 81.74386920980926 |
|
- type: dot_ap |
|
value: 93.18281680904117 |
|
- type: dot_f1 |
|
value: 87.37233054781802 |
|
- type: dot_precision |
|
value: 82.04010462074979 |
|
- type: dot_recall |
|
value: 93.44587884806356 |
|
- type: euclidean_accuracy |
|
value: 81.74386920980926 |
|
- type: euclidean_ap |
|
value: 93.18281680904117 |
|
- type: euclidean_f1 |
|
value: 87.37233054781802 |
|
- type: euclidean_precision |
|
value: 82.04010462074979 |
|
- type: euclidean_recall |
|
value: 93.44587884806356 |
|
- type: manhattan_accuracy |
|
value: 81.74386920980926 |
|
- type: manhattan_ap |
|
value: 93.17517480971131 |
|
- type: manhattan_f1 |
|
value: 87.37864077669903 |
|
- type: manhattan_precision |
|
value: 81.74740484429066 |
|
- type: manhattan_recall |
|
value: 93.84309831181727 |
|
- type: max_accuracy |
|
value: 81.74386920980926 |
|
- type: max_ap |
|
value: 93.18281680904117 |
|
- type: max_f1 |
|
value: 87.37864077669903 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: google-research-datasets/paws-x |
|
name: MTEB PawsX (fr) |
|
config: fr |
|
split: test |
|
revision: 8a04d940a42cd40658986fdd8e3da561533a3646 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 61.1 |
|
- type: cos_sim_ap |
|
value: 60.75603519868964 |
|
- type: cos_sim_f1 |
|
value: 62.78646780647509 |
|
- type: cos_sim_precision |
|
value: 46.74972914409534 |
|
- type: cos_sim_recall |
|
value: 95.5703211517165 |
|
- type: dot_accuracy |
|
value: 61.1 |
|
- type: dot_ap |
|
value: 60.74807680023078 |
|
- type: dot_f1 |
|
value: 62.78646780647509 |
|
- type: dot_precision |
|
value: 46.74972914409534 |
|
- type: dot_recall |
|
value: 95.5703211517165 |
|
- type: euclidean_accuracy |
|
value: 61.1 |
|
- type: euclidean_ap |
|
value: 60.756144387817734 |
|
- type: euclidean_f1 |
|
value: 62.78646780647509 |
|
- type: euclidean_precision |
|
value: 46.74972914409534 |
|
- type: euclidean_recall |
|
value: 95.5703211517165 |
|
- type: manhattan_accuracy |
|
value: 61.150000000000006 |
|
- type: manhattan_ap |
|
value: 60.685188544775116 |
|
- type: manhattan_f1 |
|
value: 62.7721335268505 |
|
- type: manhattan_precision |
|
value: 46.6810577441986 |
|
- type: manhattan_recall |
|
value: 95.79180509413068 |
|
- type: max_accuracy |
|
value: 61.150000000000006 |
|
- type: max_ap |
|
value: 60.756144387817734 |
|
- type: max_f1 |
|
value: 62.78646780647509 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: Lajavaness/SICK-fr |
|
name: MTEB SICKFr |
|
config: default |
|
split: test |
|
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 83.1543597030015 |
|
- type: cos_sim_spearman |
|
value: 77.10092303546944 |
|
- type: euclidean_pearson |
|
value: 80.27115846915481 |
|
- type: euclidean_spearman |
|
value: 77.10092516058822 |
|
- type: manhattan_pearson |
|
value: 80.30090425968062 |
|
- type: manhattan_spearman |
|
value: 77.09423647945061 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (fr) |
|
config: fr |
|
split: test |
|
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 79.20797144286122 |
|
- type: cos_sim_spearman |
|
value: 80.31452099282514 |
|
- type: euclidean_pearson |
|
value: 78.43621396282957 |
|
- type: euclidean_spearman |
|
value: 80.31452099282514 |
|
- type: manhattan_pearson |
|
value: 78.29678738374866 |
|
- type: manhattan_spearman |
|
value: 79.93185465249057 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: PhilipMay/stsb_multi_mt |
|
name: MTEB STSBenchmarkMultilingualSTS (fr) |
|
config: fr |
|
split: test |
|
revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 84.69215133897265 |
|
- type: cos_sim_spearman |
|
value: 84.35617480959016 |
|
- type: euclidean_pearson |
|
value: 83.85371663492563 |
|
- type: euclidean_spearman |
|
value: 84.35617480959016 |
|
- type: manhattan_pearson |
|
value: 83.85857789722276 |
|
- type: manhattan_spearman |
|
value: 84.30794186513978 |
|
- task: |
|
type: Summarization |
|
dataset: |
|
type: lyon-nlp/summarization-summeval-fr-p2p |
|
name: MTEB SummEvalFr |
|
config: default |
|
split: test |
|
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 29.187176809104393 |
|
- type: cos_sim_spearman |
|
value: 29.65160679657583 |
|
- type: dot_pearson |
|
value: 29.18717349611766 |
|
- type: dot_spearman |
|
value: 29.65160679657583 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-syntec-s2p |
|
name: MTEB SyntecReranking |
|
config: default |
|
split: test |
|
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad |
|
metrics: |
|
- type: map |
|
value: 82.76666666666667 |
|
- type: mrr |
|
value: 82.76666666666667 |
|
- type: nAUC_map_diff1 |
|
value: 52.548913230162405 |
|
- type: nAUC_map_max |
|
value: -2.824065935620183 |
|
- type: nAUC_mrr_diff1 |
|
value: 52.548913230162405 |
|
- type: nAUC_mrr_max |
|
value: -2.824065935620183 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p |
|
name: MTEB SyntecRetrieval |
|
config: default |
|
split: test |
|
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9 |
|
metrics: |
|
- type: map_at_1 |
|
value: 57.99999999999999 |
|
- type: map_at_10 |
|
value: 72.356 |
|
- type: map_at_100 |
|
value: 72.625 |
|
- type: map_at_1000 |
|
value: 72.625 |
|
- type: map_at_20 |
|
value: 72.625 |
|
- type: map_at_3 |
|
value: 70.333 |
|
- type: map_at_5 |
|
value: 71.48299999999999 |
|
- type: mrr_at_1 |
|
value: 57.99999999999999 |
|
- type: mrr_at_10 |
|
value: 72.35634920634922 |
|
- type: mrr_at_100 |
|
value: 72.62532693914275 |
|
- type: mrr_at_1000 |
|
value: 72.62532693914275 |
|
- type: mrr_at_20 |
|
value: 72.62532693914275 |
|
- type: mrr_at_3 |
|
value: 70.33333333333333 |
|
- type: mrr_at_5 |
|
value: 71.48333333333333 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 57.27081552588017 |
|
- type: nauc_map_at_1000_max |
|
value: 13.401922890723771 |
|
- type: nauc_map_at_100_diff1 |
|
value: 57.27081552588017 |
|
- type: nauc_map_at_100_max |
|
value: 13.401922890723771 |
|
- type: nauc_map_at_10_diff1 |
|
value: 57.39952453922188 |
|
- type: nauc_map_at_10_max |
|
value: 14.093164837730344 |
|
- type: nauc_map_at_1_diff1 |
|
value: 57.23800679107291 |
|
- type: nauc_map_at_1_max |
|
value: 11.039846765533865 |
|
- type: nauc_map_at_20_diff1 |
|
value: 57.27081552588017 |
|
- type: nauc_map_at_20_max |
|
value: 13.401922890723771 |
|
- type: nauc_map_at_3_diff1 |
|
value: 58.14875247321224 |
|
- type: nauc_map_at_3_max |
|
value: 14.538312305676238 |
|
- type: nauc_map_at_5_diff1 |
|
value: 57.34940275695991 |
|
- type: nauc_map_at_5_max |
|
value: 13.675180459395065 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 57.27081552588017 |
|
- type: nauc_mrr_at_1000_max |
|
value: 13.401922890723771 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 57.27081552588017 |
|
- type: nauc_mrr_at_100_max |
|
value: 13.401922890723771 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 57.39952453922188 |
|
- type: nauc_mrr_at_10_max |
|
value: 14.093164837730344 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 57.23800679107291 |
|
- type: nauc_mrr_at_1_max |
|
value: 11.039846765533865 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 57.27081552588017 |
|
- type: nauc_mrr_at_20_max |
|
value: 13.401922890723771 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 58.14875247321224 |
|
- type: nauc_mrr_at_3_max |
|
value: 14.538312305676238 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 57.34940275695991 |
|
- type: nauc_mrr_at_5_max |
|
value: 13.675180459395065 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 57.38511684819052 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 13.993185568467656 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 57.38511684819052 |
|
- type: nauc_ndcg_at_100_max |
|
value: 13.993185568467656 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 57.93396526410134 |
|
- type: nauc_ndcg_at_10_max |
|
value: 17.16319020800824 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 57.23800679107291 |
|
- type: nauc_ndcg_at_1_max |
|
value: 11.039846765533865 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 57.38511684819052 |
|
- type: nauc_ndcg_at_20_max |
|
value: 13.993185568467656 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 59.36410104940948 |
|
- type: nauc_ndcg_at_3_max |
|
value: 17.128826753860732 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 57.71094150714742 |
|
- type: nauc_ndcg_at_5_max |
|
value: 15.62784584334318 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: nan |
|
- type: nauc_precision_at_1000_max |
|
value: nan |
|
- type: nauc_precision_at_100_diff1 |
|
value: nan |
|
- type: nauc_precision_at_100_max |
|
value: nan |
|
- type: nauc_precision_at_10_diff1 |
|
value: 66.79505135387465 |
|
- type: nauc_precision_at_10_max |
|
value: 70.47152194211033 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 57.23800679107291 |
|
- type: nauc_precision_at_1_max |
|
value: 11.039846765533865 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 100.0 |
|
- type: nauc_precision_at_20_max |
|
value: 100.0 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 65.65896518060521 |
|
- type: nauc_precision_at_3_max |
|
value: 30.198503091441538 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 60.04201680672288 |
|
- type: nauc_precision_at_5_max |
|
value: 29.000933706816145 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: nan |
|
- type: nauc_recall_at_1000_max |
|
value: nan |
|
- type: nauc_recall_at_100_diff1 |
|
value: nan |
|
- type: nauc_recall_at_100_max |
|
value: nan |
|
- type: nauc_recall_at_10_diff1 |
|
value: 66.7950513538749 |
|
- type: nauc_recall_at_10_max |
|
value: 70.47152194211012 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 57.23800679107291 |
|
- type: nauc_recall_at_1_max |
|
value: 11.039846765533865 |
|
- type: nauc_recall_at_20_diff1 |
|
value: nan |
|
- type: nauc_recall_at_20_max |
|
value: nan |
|
- type: nauc_recall_at_3_diff1 |
|
value: 65.65896518060525 |
|
- type: nauc_recall_at_3_max |
|
value: 30.19850309144154 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 60.0420168067226 |
|
- type: nauc_recall_at_5_max |
|
value: 29.000933706816 |
|
- type: ndcg_at_1 |
|
value: 57.99999999999999 |
|
- type: ndcg_at_10 |
|
value: 78.19800000000001 |
|
- type: ndcg_at_100 |
|
value: 79.199 |
|
- type: ndcg_at_1000 |
|
value: 79.199 |
|
- type: ndcg_at_20 |
|
value: 79.199 |
|
- type: ndcg_at_3 |
|
value: 74.119 |
|
- type: ndcg_at_5 |
|
value: 76.184 |
|
- type: precision_at_1 |
|
value: 57.99999999999999 |
|
- type: precision_at_10 |
|
value: 9.6 |
|
- type: precision_at_100 |
|
value: 1.0 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_20 |
|
value: 5.0 |
|
- type: precision_at_3 |
|
value: 28.333000000000002 |
|
- type: precision_at_5 |
|
value: 18.0 |
|
- type: recall_at_1 |
|
value: 57.99999999999999 |
|
- type: recall_at_10 |
|
value: 96.0 |
|
- type: recall_at_100 |
|
value: 100.0 |
|
- type: recall_at_1000 |
|
value: 100.0 |
|
- type: recall_at_20 |
|
value: 100.0 |
|
- type: recall_at_3 |
|
value: 85.0 |
|
- type: recall_at_5 |
|
value: 90.0 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/xpqa |
|
name: MTEB XPQARetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f |
|
metrics: |
|
- type: map_at_1 |
|
value: 35.256 |
|
- type: map_at_10 |
|
value: 54.071999999999996 |
|
- type: map_at_100 |
|
value: 55.435 |
|
- type: map_at_1000 |
|
value: 55.53 |
|
- type: map_at_20 |
|
value: 54.855 |
|
- type: map_at_3 |
|
value: 48.762 |
|
- type: map_at_5 |
|
value: 51.949999999999996 |
|
- type: mrr_at_1 |
|
value: 56.34178905206942 |
|
- type: mrr_at_10 |
|
value: 63.30843240723078 |
|
- type: mrr_at_100 |
|
value: 63.92076387626982 |
|
- type: mrr_at_1000 |
|
value: 63.9435076251571 |
|
- type: mrr_at_20 |
|
value: 63.64110365119446 |
|
- type: mrr_at_3 |
|
value: 61.526479750778805 |
|
- type: mrr_at_5 |
|
value: 62.38762794837559 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 45.88957885553053 |
|
- type: nauc_map_at_1000_max |
|
value: 52.59013482565773 |
|
- type: nauc_map_at_100_diff1 |
|
value: 45.84948517422948 |
|
- type: nauc_map_at_100_max |
|
value: 52.55839985303019 |
|
- type: nauc_map_at_10_diff1 |
|
value: 45.763486819482196 |
|
- type: nauc_map_at_10_max |
|
value: 52.09054118600712 |
|
- type: nauc_map_at_1_diff1 |
|
value: 55.521911317670835 |
|
- type: nauc_map_at_1_max |
|
value: 34.68779817675579 |
|
- type: nauc_map_at_20_diff1 |
|
value: 45.757369615751884 |
|
- type: nauc_map_at_20_max |
|
value: 52.44708031434436 |
|
- type: nauc_map_at_3_diff1 |
|
value: 47.798733616712056 |
|
- type: nauc_map_at_3_max |
|
value: 46.87976781177451 |
|
- type: nauc_map_at_5_diff1 |
|
value: 46.215964363315884 |
|
- type: nauc_map_at_5_max |
|
value: 50.5765276342371 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 55.110400510640766 |
|
- type: nauc_mrr_at_1000_max |
|
value: 62.66171179919574 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 55.10166012000449 |
|
- type: nauc_mrr_at_100_max |
|
value: 62.66269343813773 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 55.087629594751256 |
|
- type: nauc_mrr_at_10_max |
|
value: 62.69978067726044 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 57.446957773325956 |
|
- type: nauc_mrr_at_1_max |
|
value: 63.22109004948565 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 55.067208283222016 |
|
- type: nauc_mrr_at_20_max |
|
value: 62.66935664582939 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 55.18870023658262 |
|
- type: nauc_mrr_at_3_max |
|
value: 62.597473549957996 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 54.87651100155316 |
|
- type: nauc_mrr_at_5_max |
|
value: 62.72845534030979 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 47.81162759706491 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 56.26337910947683 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 47.119077388160676 |
|
- type: nauc_ndcg_at_100_max |
|
value: 55.82354642959063 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 46.784535879466496 |
|
- type: nauc_ndcg_at_10_max |
|
value: 54.63437116703429 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 57.446957773325956 |
|
- type: nauc_ndcg_at_1_max |
|
value: 63.22109004948565 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 46.756211545478905 |
|
- type: nauc_ndcg_at_20_max |
|
value: 55.228917899613826 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 47.66168453462149 |
|
- type: nauc_ndcg_at_3_max |
|
value: 54.39836405112981 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 46.97491630908418 |
|
- type: nauc_ndcg_at_5_max |
|
value: 53.284362953526184 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -14.959536048875451 |
|
- type: nauc_precision_at_1000_max |
|
value: 19.740731727610537 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -10.329364912432421 |
|
- type: nauc_precision_at_100_max |
|
value: 27.80165890502952 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 0.7865296687777561 |
|
- type: nauc_precision_at_10_max |
|
value: 38.46291415400641 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 57.446957773325956 |
|
- type: nauc_precision_at_1_max |
|
value: 63.22109004948565 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -2.2696079664009385 |
|
- type: nauc_precision_at_20_max |
|
value: 35.38696590671127 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 14.016444043719714 |
|
- type: nauc_precision_at_3_max |
|
value: 46.68119169258843 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 6.466134759646741 |
|
- type: nauc_precision_at_5_max |
|
value: 43.245171983039256 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 10.588340380461794 |
|
- type: nauc_recall_at_1000_max |
|
value: 45.913607560926515 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 28.995302681864565 |
|
- type: nauc_recall_at_100_max |
|
value: 42.67608149089844 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 38.958724392572854 |
|
- type: nauc_recall_at_10_max |
|
value: 47.455666375173315 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 55.521911317670835 |
|
- type: nauc_recall_at_1_max |
|
value: 34.68779817675579 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 36.623788206732016 |
|
- type: nauc_recall_at_20_max |
|
value: 46.654888587980174 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 43.46749373705754 |
|
- type: nauc_recall_at_3_max |
|
value: 42.55592784672105 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 40.49018957054939 |
|
- type: nauc_recall_at_5_max |
|
value: 46.86884862874594 |
|
- type: ndcg_at_1 |
|
value: 56.342000000000006 |
|
- type: ndcg_at_10 |
|
value: 60.01800000000001 |
|
- type: ndcg_at_100 |
|
value: 65.182 |
|
- type: ndcg_at_1000 |
|
value: 66.809 |
|
- type: ndcg_at_20 |
|
value: 61.982000000000006 |
|
- type: ndcg_at_3 |
|
value: 55.688 |
|
- type: ndcg_at_5 |
|
value: 56.607 |
|
- type: precision_at_1 |
|
value: 56.342000000000006 |
|
- type: precision_at_10 |
|
value: 14.005 |
|
- type: precision_at_100 |
|
value: 1.821 |
|
- type: precision_at_1000 |
|
value: 0.20500000000000002 |
|
- type: precision_at_20 |
|
value: 7.684 |
|
- type: precision_at_3 |
|
value: 34.089999999999996 |
|
- type: precision_at_5 |
|
value: 24.005000000000003 |
|
- type: recall_at_1 |
|
value: 35.256 |
|
- type: recall_at_10 |
|
value: 67.583 |
|
- type: recall_at_100 |
|
value: 88.74300000000001 |
|
- type: recall_at_1000 |
|
value: 99.163 |
|
- type: recall_at_20 |
|
value: 73.87 |
|
- type: recall_at_3 |
|
value: 53.371 |
|
- type: recall_at_5 |
|
value: 59.399 |
|
license: apache-2.0 |
|
--- |
|
|
|
# [bilingual-embedding-base](https://huggingface.co/Lajavaness/bilingual-embedding-base) |
|
This repo is a fork of the original [Lajavaness/bilingual-embedding-base](https://huggingface.co/Lajavaness/bilingual-embedding-base). The only difference is the model type name, to be compatible with text-embeddings-inference. |
|
|
|
Bilingual-embedding is the Embedding Model for bilingual language: french and english. This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of [XLM-RoBERTa](https://huggingface.co/FacebookAI/xlm-roberta-base), a pre-trained language model based on the [XLM-RoBERTa](https://huggingface.co/FacebookAI/xlm-roberta-base) architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language. |
|
|
|
|
|
## Full Model Architecture |
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel |
|
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Normalize() |
|
) |
|
``` |
|
|
|
## Training and Fine-tuning process |
|
#### Stage 1: NLI Training |
|
- Dataset: [(SNLI+XNLI) for english+french] |
|
- Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics. |
|
### Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark |
|
- Dataset: [STSB-fr and en] |
|
- Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library. |
|
### Stage 4: Advanced Augmentation Fine-tuning |
|
- Dataset: STSB with generate [silver sample from gold sample](https://www.sbert.net/examples/training/data_augmentation/README.html) |
|
- Method: Employed an advanced strategy using [Augmented SBERT](https://arxiv.org/abs/2010.08240) with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy. |
|
|
|
|
|
## Usage: |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"] |
|
|
|
model = SentenceTransformer('Lajavaness/bilingual-embedding-base', trust_remote_code=True) |
|
print(embeddings) |
|
|
|
``` |
|
|
|
|
|
|
|
|
|
|
|
## Evaluation |
|
|
|
TODO |
|
|
|
## Citation |
|
|
|
@article{conneau2019unsupervised, |
|
title={Unsupervised cross-lingual representation learning at scale}, |
|
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
|
journal={arXiv preprint arXiv:1911.02116}, |
|
year={2019} |
|
} |
|
|
|
@article{reimers2019sentence, |
|
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, |
|
author={Nils Reimers, Iryna Gurevych}, |
|
journal={https://arxiv.org/abs/1908.10084}, |
|
year={2019} |
|
} |
|
|
|
@article{thakur2020augmented, |
|
title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks}, |
|
author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna}, |
|
journal={arXiv e-prints}, |
|
pages={arXiv--2010}, |
|
year={2020} |
|
|