|
--- |
|
library_name: sentence-transformers |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- transformers |
|
- sentence-embedding |
|
- mteb |
|
model-index: |
|
- name: bilingual-embedding-large |
|
results: |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringP2P |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 56.77190187231352 |
|
- type: v_measures |
|
value: [0.5591529760439443, 0.5974955147482336, 0.5823433887463434, 0.5582030533486634, 0.5608784889863708] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringS2S |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 43.8035296814567 |
|
- type: v_measures |
|
value: [0.48563274496248593, 0.39879051227524587, 0.4846924607458884, 0.48918774336682036, 0.42626900366019754] |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p |
|
name: MTEB AlloprofReranking |
|
config: default |
|
split: test |
|
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd |
|
metrics: |
|
- type: map |
|
value: 71.51753654257872 |
|
- type: mrr |
|
value: 72.59564245963209 |
|
- type: nAUC_map_diff1 |
|
value: 54.75803232873865 |
|
- type: nAUC_map_max |
|
value: 22.763024994564173 |
|
- type: nAUC_mrr_diff1 |
|
value: 54.34234068011684 |
|
- type: nAUC_mrr_max |
|
value: 23.607169979872587 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloprofRetrieval |
|
config: default |
|
split: test |
|
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd |
|
metrics: |
|
- type: map_at_1 |
|
value: 26.598 |
|
- type: map_at_10 |
|
value: 37.808 |
|
- type: map_at_100 |
|
value: 38.726 |
|
- type: map_at_1000 |
|
value: 38.778 |
|
- type: map_at_20 |
|
value: 38.34 |
|
- type: map_at_3 |
|
value: 34.599999999999994 |
|
- type: map_at_5 |
|
value: 36.385 |
|
- type: mrr_at_1 |
|
value: 26.59758203799655 |
|
- type: mrr_at_10 |
|
value: 37.80750541437081 |
|
- type: mrr_at_100 |
|
value: 38.72559086585011 |
|
- type: mrr_at_1000 |
|
value: 38.77782362787768 |
|
- type: mrr_at_20 |
|
value: 38.33973921272315 |
|
- type: mrr_at_3 |
|
value: 34.59988485895229 |
|
- type: mrr_at_5 |
|
value: 36.385290731145794 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 35.14662281558396 |
|
- type: nauc_map_at_1000_max |
|
value: 33.29444147034975 |
|
- type: nauc_map_at_100_diff1 |
|
value: 35.14901176407076 |
|
- type: nauc_map_at_100_max |
|
value: 33.32860103491456 |
|
- type: nauc_map_at_10_diff1 |
|
value: 34.96490139273049 |
|
- type: nauc_map_at_10_max |
|
value: 33.252929811567526 |
|
- type: nauc_map_at_1_diff1 |
|
value: 40.652132824871664 |
|
- type: nauc_map_at_1_max |
|
value: 28.53606743237387 |
|
- type: nauc_map_at_20_diff1 |
|
value: 35.11177701050558 |
|
- type: nauc_map_at_20_max |
|
value: 33.38822815064973 |
|
- type: nauc_map_at_3_diff1 |
|
value: 35.08068505968589 |
|
- type: nauc_map_at_3_max |
|
value: 32.10125944853496 |
|
- type: nauc_map_at_5_diff1 |
|
value: 34.78702330258393 |
|
- type: nauc_map_at_5_max |
|
value: 32.89738895858572 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 35.14662281558396 |
|
- type: nauc_mrr_at_1000_max |
|
value: 33.29444147034975 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 35.14901176407076 |
|
- type: nauc_mrr_at_100_max |
|
value: 33.32860103491456 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 34.96490139273049 |
|
- type: nauc_mrr_at_10_max |
|
value: 33.252929811567526 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 40.652132824871664 |
|
- type: nauc_mrr_at_1_max |
|
value: 28.53606743237387 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 35.11177701050558 |
|
- type: nauc_mrr_at_20_max |
|
value: 33.38822815064973 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 35.08068505968589 |
|
- type: nauc_mrr_at_3_max |
|
value: 32.10125944853496 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 34.78702330258393 |
|
- type: nauc_mrr_at_5_max |
|
value: 32.89738895858572 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 34.24563790011671 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 35.1750183970367 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 34.26540576121903 |
|
- type: nauc_ndcg_at_100_max |
|
value: 36.211723696019526 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 33.363793757214985 |
|
- type: nauc_ndcg_at_10_max |
|
value: 35.724249117130285 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 40.652132824871664 |
|
- type: nauc_ndcg_at_1_max |
|
value: 28.53606743237387 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 33.82271561239704 |
|
- type: nauc_ndcg_at_20_max |
|
value: 36.31393467921569 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 33.47602274641705 |
|
- type: nauc_ndcg_at_3_max |
|
value: 33.25855418368982 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 32.982858998932784 |
|
- type: nauc_ndcg_at_5_max |
|
value: 34.683197330270694 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 20.695105766988465 |
|
- type: nauc_precision_at_1000_max |
|
value: 79.24780459990231 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 32.1302666444128 |
|
- type: nauc_precision_at_100_max |
|
value: 59.829884850113594 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 28.35768310864352 |
|
- type: nauc_precision_at_10_max |
|
value: 44.56445585989902 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 40.652132824871664 |
|
- type: nauc_precision_at_1_max |
|
value: 28.53606743237387 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 29.66063385017264 |
|
- type: nauc_precision_at_20_max |
|
value: 48.87458861994212 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 29.054817586181176 |
|
- type: nauc_precision_at_3_max |
|
value: 36.488441946705876 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 27.79863250059474 |
|
- type: nauc_precision_at_5_max |
|
value: 40.02591480379938 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 20.695105766987187 |
|
- type: nauc_recall_at_1000_max |
|
value: 79.24780459990147 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 32.13026664441275 |
|
- type: nauc_recall_at_100_max |
|
value: 59.829884850113736 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 28.357683108643496 |
|
- type: nauc_recall_at_10_max |
|
value: 44.56445585989908 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 40.652132824871664 |
|
- type: nauc_recall_at_1_max |
|
value: 28.53606743237387 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 29.660633850172687 |
|
- type: nauc_recall_at_20_max |
|
value: 48.87458861994213 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 29.05481758618121 |
|
- type: nauc_recall_at_3_max |
|
value: 36.48844194670593 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 27.798632500594728 |
|
- type: nauc_recall_at_5_max |
|
value: 40.025914803799395 |
|
- type: ndcg_at_1 |
|
value: 26.598 |
|
- type: ndcg_at_10 |
|
value: 43.902 |
|
- type: ndcg_at_100 |
|
value: 48.647 |
|
- type: ndcg_at_1000 |
|
value: 50.135 |
|
- type: ndcg_at_20 |
|
value: 45.794000000000004 |
|
- type: ndcg_at_3 |
|
value: 37.233 |
|
- type: ndcg_at_5 |
|
value: 40.47 |
|
- type: precision_at_1 |
|
value: 26.598 |
|
- type: precision_at_10 |
|
value: 6.334 |
|
- type: precision_at_100 |
|
value: 0.8619999999999999 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_20 |
|
value: 3.5360000000000005 |
|
- type: precision_at_3 |
|
value: 14.954 |
|
- type: precision_at_5 |
|
value: 10.552999999999999 |
|
- type: recall_at_1 |
|
value: 26.598 |
|
- type: recall_at_10 |
|
value: 63.342 |
|
- type: recall_at_100 |
|
value: 86.226 |
|
- type: recall_at_1000 |
|
value: 98.143 |
|
- type: recall_at_20 |
|
value: 70.72500000000001 |
|
- type: recall_at_3 |
|
value: 44.862 |
|
- type: recall_at_5 |
|
value: 52.763000000000005 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 45.504 |
|
- type: f1 |
|
value: 43.653559521530944 |
|
- type: f1_weighted |
|
value: 43.65355952153093 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: maastrichtlawtech/bsard |
|
name: MTEB BSARDRetrieval |
|
config: default |
|
split: test |
|
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 |
|
metrics: |
|
- type: map_at_1 |
|
value: 6.757000000000001 |
|
- type: map_at_10 |
|
value: 10.569 |
|
- type: map_at_100 |
|
value: 11.641 |
|
- type: map_at_1000 |
|
value: 11.758000000000001 |
|
- type: map_at_20 |
|
value: 11.152 |
|
- type: map_at_3 |
|
value: 9.009 |
|
- type: map_at_5 |
|
value: 9.82 |
|
- type: mrr_at_1 |
|
value: 6.756756756756757 |
|
- type: mrr_at_10 |
|
value: 10.568604318604317 |
|
- type: mrr_at_100 |
|
value: 11.640572409499667 |
|
- type: mrr_at_1000 |
|
value: 11.757590970218725 |
|
- type: mrr_at_20 |
|
value: 11.152253605813977 |
|
- type: mrr_at_3 |
|
value: 9.00900900900901 |
|
- type: mrr_at_5 |
|
value: 9.819819819819822 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 20.497861726027473 |
|
- type: nauc_map_at_1000_max |
|
value: 8.515289767591149 |
|
- type: nauc_map_at_100_diff1 |
|
value: 20.457264311409304 |
|
- type: nauc_map_at_100_max |
|
value: 8.478315371126714 |
|
- type: nauc_map_at_10_diff1 |
|
value: 22.122755317386826 |
|
- type: nauc_map_at_10_max |
|
value: 8.684832816651243 |
|
- type: nauc_map_at_1_diff1 |
|
value: 32.7324170733489 |
|
- type: nauc_map_at_1_max |
|
value: 17.632507133954086 |
|
- type: nauc_map_at_20_diff1 |
|
value: 21.309144510706552 |
|
- type: nauc_map_at_20_max |
|
value: 8.357569194331324 |
|
- type: nauc_map_at_3_diff1 |
|
value: 24.640888091380244 |
|
- type: nauc_map_at_3_max |
|
value: 8.512417316260153 |
|
- type: nauc_map_at_5_diff1 |
|
value: 23.696706451320555 |
|
- type: nauc_map_at_5_max |
|
value: 10.065581499162409 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 20.497861726027473 |
|
- type: nauc_mrr_at_1000_max |
|
value: 8.515289767591149 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 20.457264311409304 |
|
- type: nauc_mrr_at_100_max |
|
value: 8.478315371126714 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 22.122755317386826 |
|
- type: nauc_mrr_at_10_max |
|
value: 8.684832816651243 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 32.7324170733489 |
|
- type: nauc_mrr_at_1_max |
|
value: 17.632507133954086 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 21.309144510706552 |
|
- type: nauc_mrr_at_20_max |
|
value: 8.357569194331324 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 24.640888091380244 |
|
- type: nauc_mrr_at_3_max |
|
value: 8.512417316260153 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 23.696706451320555 |
|
- type: nauc_mrr_at_5_max |
|
value: 10.065581499162409 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 13.492135744038377 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 9.019754831261519 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 12.386959698428296 |
|
- type: nauc_ndcg_at_100_max |
|
value: 8.140082932773288 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 18.08185602779908 |
|
- type: nauc_ndcg_at_10_max |
|
value: 6.451070792965509 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 32.7324170733489 |
|
- type: nauc_ndcg_at_1_max |
|
value: 17.632507133954086 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 16.104176022358285 |
|
- type: nauc_ndcg_at_20_max |
|
value: 5.670070730016123 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 22.51956353681352 |
|
- type: nauc_ndcg_at_3_max |
|
value: 6.154988622749747 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 20.90624606199523 |
|
- type: nauc_ndcg_at_5_max |
|
value: 9.018795972091642 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -7.5156227166904666 |
|
- type: nauc_precision_at_1000_max |
|
value: 21.389191162087265 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -3.485942444137334 |
|
- type: nauc_precision_at_100_max |
|
value: 11.604808001151612 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 10.569691962167767 |
|
- type: nauc_precision_at_10_max |
|
value: 2.5249979762023176 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 32.7324170733489 |
|
- type: nauc_precision_at_1_max |
|
value: 17.632507133954086 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 6.993263678069275 |
|
- type: nauc_precision_at_20_max |
|
value: 1.212342757686577 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 17.747702545254146 |
|
- type: nauc_precision_at_3_max |
|
value: 0.8595742156164364 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 15.109328976502349 |
|
- type: nauc_precision_at_5_max |
|
value: 7.278141846526832 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: -7.515622716690405 |
|
- type: nauc_recall_at_1000_max |
|
value: 21.389191162087183 |
|
- type: nauc_recall_at_100_diff1 |
|
value: -3.4859424441373585 |
|
- type: nauc_recall_at_100_max |
|
value: 11.604808001151582 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 10.569691962167727 |
|
- type: nauc_recall_at_10_max |
|
value: 2.524997976202258 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 32.7324170733489 |
|
- type: nauc_recall_at_1_max |
|
value: 17.632507133954086 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 6.9932636780692325 |
|
- type: nauc_recall_at_20_max |
|
value: 1.2123427576865247 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 17.74770254525415 |
|
- type: nauc_recall_at_3_max |
|
value: 0.8595742156164401 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 15.109328976502375 |
|
- type: nauc_recall_at_5_max |
|
value: 7.278141846526856 |
|
- type: ndcg_at_1 |
|
value: 6.757000000000001 |
|
- type: ndcg_at_10 |
|
value: 13.147 |
|
- type: ndcg_at_100 |
|
value: 18.932 |
|
- type: ndcg_at_1000 |
|
value: 22.663 |
|
- type: ndcg_at_20 |
|
value: 15.222 |
|
- type: ndcg_at_3 |
|
value: 9.812999999999999 |
|
- type: ndcg_at_5 |
|
value: 11.286 |
|
- type: precision_at_1 |
|
value: 6.757000000000001 |
|
- type: precision_at_10 |
|
value: 2.162 |
|
- type: precision_at_100 |
|
value: 0.5 |
|
- type: precision_at_1000 |
|
value: 0.08099999999999999 |
|
- type: precision_at_20 |
|
value: 1.486 |
|
- type: precision_at_3 |
|
value: 4.054 |
|
- type: precision_at_5 |
|
value: 3.1530000000000005 |
|
- type: recall_at_1 |
|
value: 6.757000000000001 |
|
- type: recall_at_10 |
|
value: 21.622 |
|
- type: recall_at_100 |
|
value: 50.0 |
|
- type: recall_at_1000 |
|
value: 80.631 |
|
- type: recall_at_20 |
|
value: 29.73 |
|
- type: recall_at_3 |
|
value: 12.162 |
|
- type: recall_at_5 |
|
value: 15.766 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/clustering-hal-s2s |
|
name: MTEB HALClusteringS2S |
|
config: default |
|
split: test |
|
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 |
|
metrics: |
|
- type: v_measure |
|
value: 24.858750161104126 |
|
- type: v_measures |
|
value: [0.2984717048033491, 0.2554963718476627, 0.27306743677619566, 0.2655317293951285, 0.22365142310852648] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mlsum |
|
name: MTEB MLSUMClusteringP2P |
|
config: fr |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 39.196489283513955 |
|
- type: v_measures |
|
value: [0.397079645426394, 0.41814959265244056, 0.4075805000522318, 0.3777207448521023, 0.3499037023664506] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: mlsum |
|
name: MTEB MLSUMClusteringS2S |
|
config: fr |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 38.90242301200363 |
|
- type: v_measures |
|
value: [0.3879152638224075, 0.4155376524170288, 0.41644489647223826, 0.3720493791140543, 0.33885028730003314] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_domain |
|
name: MTEB MTOPDomainClassification (fr) |
|
config: fr |
|
split: test |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
metrics: |
|
- type: accuracy |
|
value: 86.88380833072345 |
|
- type: f1 |
|
value: 86.96787213648228 |
|
- type: f1_weighted |
|
value: 86.83432508604707 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_intent |
|
name: MTEB MTOPIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
metrics: |
|
- type: accuracy |
|
value: 63.780144065142494 |
|
- type: f1 |
|
value: 46.070400220183394 |
|
- type: f1_weighted |
|
value: 66.2871618019472 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/masakhanews |
|
name: MTEB MasakhaNEWSClassification (fra) |
|
config: fra |
|
split: test |
|
revision: 18193f187b92da67168c655c9973a165ed9593dd |
|
metrics: |
|
- type: accuracy |
|
value: 73.64928909952606 |
|
- type: f1 |
|
value: 70.38554255346646 |
|
- type: f1_weighted |
|
value: 73.78534895009892 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringP2P (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 53.94667838037061 |
|
- type: v_measures |
|
value: [1.0, 0.02235188516574002, 0.3929431892497775, 0.5064698281428253, 0.7755690164601873] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringS2S (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 34.0804579102654 |
|
- type: v_measures |
|
value: [1.0, 0.03741154081338069, 0.3515971997960832, 0.027612964551341854, 0.2874011903524643] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 4672e20407010da34463acc759c162ca9734bca6 |
|
metrics: |
|
- type: accuracy |
|
value: 66.42905178211164 |
|
- type: f1 |
|
value: 64.35827544332014 |
|
- type: f1_weighted |
|
value: 65.57615486214955 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (fr) |
|
config: fr |
|
split: test |
|
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 |
|
metrics: |
|
- type: accuracy |
|
value: 70.72293207800942 |
|
- type: f1 |
|
value: 70.02549388005589 |
|
- type: f1_weighted |
|
value: 70.47189927452128 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/mintakaqa |
|
name: MTEB MintakaRetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e |
|
metrics: |
|
- type: map_at_1 |
|
value: 19.41 |
|
- type: map_at_10 |
|
value: 28.254 |
|
- type: map_at_100 |
|
value: 29.355999999999998 |
|
- type: map_at_1000 |
|
value: 29.444 |
|
- type: map_at_20 |
|
value: 28.918 |
|
- type: map_at_3 |
|
value: 25.833000000000002 |
|
- type: map_at_5 |
|
value: 27.141 |
|
- type: mrr_at_1 |
|
value: 19.41031941031941 |
|
- type: mrr_at_10 |
|
value: 28.254309504309553 |
|
- type: mrr_at_100 |
|
value: 29.35551984998816 |
|
- type: mrr_at_1000 |
|
value: 29.44409433115412 |
|
- type: mrr_at_20 |
|
value: 28.91784141291608 |
|
- type: mrr_at_3 |
|
value: 25.832650832650856 |
|
- type: mrr_at_5 |
|
value: 27.141004641004695 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 21.162510480664746 |
|
- type: nauc_map_at_1000_max |
|
value: 30.110199608609662 |
|
- type: nauc_map_at_100_diff1 |
|
value: 21.14297046777571 |
|
- type: nauc_map_at_100_max |
|
value: 30.122604363568612 |
|
- type: nauc_map_at_10_diff1 |
|
value: 21.199056431758258 |
|
- type: nauc_map_at_10_max |
|
value: 30.270246884757647 |
|
- type: nauc_map_at_1_diff1 |
|
value: 26.23217963475422 |
|
- type: nauc_map_at_1_max |
|
value: 25.259209130612533 |
|
- type: nauc_map_at_20_diff1 |
|
value: 21.13743075881012 |
|
- type: nauc_map_at_20_max |
|
value: 30.22788200140141 |
|
- type: nauc_map_at_3_diff1 |
|
value: 21.972972142629406 |
|
- type: nauc_map_at_3_max |
|
value: 29.75552354821982 |
|
- type: nauc_map_at_5_diff1 |
|
value: 21.79084317530715 |
|
- type: nauc_map_at_5_max |
|
value: 30.387209515342473 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 21.162510480664746 |
|
- type: nauc_mrr_at_1000_max |
|
value: 30.110199608609662 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 21.14297046777571 |
|
- type: nauc_mrr_at_100_max |
|
value: 30.122604363568612 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 21.199056431758258 |
|
- type: nauc_mrr_at_10_max |
|
value: 30.270246884757647 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 26.23217963475422 |
|
- type: nauc_mrr_at_1_max |
|
value: 25.259209130612533 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 21.13743075881012 |
|
- type: nauc_mrr_at_20_max |
|
value: 30.22788200140141 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 21.972972142629406 |
|
- type: nauc_mrr_at_3_max |
|
value: 29.75552354821982 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 21.79084317530715 |
|
- type: nauc_mrr_at_5_max |
|
value: 30.387209515342473 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 19.513865750647934 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 30.88832578481811 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 18.80377362018204 |
|
- type: nauc_ndcg_at_100_max |
|
value: 30.952837388928288 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 19.020409577228836 |
|
- type: nauc_ndcg_at_10_max |
|
value: 31.70346401198393 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 26.23217963475422 |
|
- type: nauc_ndcg_at_1_max |
|
value: 25.259209130612533 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 18.76950183960116 |
|
- type: nauc_ndcg_at_20_max |
|
value: 31.598953492190745 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 20.824179655562357 |
|
- type: nauc_ndcg_at_3_max |
|
value: 31.0541305570042 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 20.483978673834002 |
|
- type: nauc_ndcg_at_5_max |
|
value: 32.10721692420019 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 2.2196094973599374 |
|
- type: nauc_precision_at_1000_max |
|
value: 40.25789000420308 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 8.63682048375218 |
|
- type: nauc_precision_at_100_max |
|
value: 32.32052516290328 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 12.717058324435426 |
|
- type: nauc_precision_at_10_max |
|
value: 35.265251512978985 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 26.23217963475422 |
|
- type: nauc_precision_at_1_max |
|
value: 25.259209130612533 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 11.220279583941753 |
|
- type: nauc_precision_at_20_max |
|
value: 35.20392547315769 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 17.863403819374867 |
|
- type: nauc_precision_at_3_max |
|
value: 34.42645261456197 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 17.114444816553625 |
|
- type: nauc_precision_at_5_max |
|
value: 36.59616112935629 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 2.219609497359936 |
|
- type: nauc_recall_at_1000_max |
|
value: 40.257890004202366 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 8.63682048375225 |
|
- type: nauc_recall_at_100_max |
|
value: 32.320525162903365 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 12.71705832443547 |
|
- type: nauc_recall_at_10_max |
|
value: 35.26525151297903 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 26.23217963475422 |
|
- type: nauc_recall_at_1_max |
|
value: 25.259209130612533 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 11.220279583941677 |
|
- type: nauc_recall_at_20_max |
|
value: 35.203925473157646 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 17.86340381937489 |
|
- type: nauc_recall_at_3_max |
|
value: 34.42645261456199 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 17.11444481655362 |
|
- type: nauc_recall_at_5_max |
|
value: 36.59616112935631 |
|
- type: ndcg_at_1 |
|
value: 19.41 |
|
- type: ndcg_at_10 |
|
value: 32.956 |
|
- type: ndcg_at_100 |
|
value: 38.504 |
|
- type: ndcg_at_1000 |
|
value: 41.217 |
|
- type: ndcg_at_20 |
|
value: 35.35 |
|
- type: ndcg_at_3 |
|
value: 27.898 |
|
- type: ndcg_at_5 |
|
value: 30.249 |
|
- type: precision_at_1 |
|
value: 19.41 |
|
- type: precision_at_10 |
|
value: 4.795 |
|
- type: precision_at_100 |
|
value: 0.744 |
|
- type: precision_at_1000 |
|
value: 0.096 |
|
- type: precision_at_20 |
|
value: 2.869 |
|
- type: precision_at_3 |
|
value: 11.289 |
|
- type: precision_at_5 |
|
value: 7.912 |
|
- type: recall_at_1 |
|
value: 19.41 |
|
- type: recall_at_10 |
|
value: 47.952 |
|
- type: recall_at_100 |
|
value: 74.36500000000001 |
|
- type: recall_at_1000 |
|
value: 96.478 |
|
- type: recall_at_20 |
|
value: 57.371 |
|
- type: recall_at_3 |
|
value: 33.866 |
|
- type: recall_at_5 |
|
value: 39.558 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: GEM/opusparcus |
|
name: MTEB OpusparcusPC (fr) |
|
config: fr |
|
split: test |
|
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 84.60490463215259 |
|
- type: cos_sim_ap |
|
value: 94.73385323002613 |
|
- type: cos_sim_f1 |
|
value: 89.28571428571428 |
|
- type: cos_sim_precision |
|
value: 82.83772302463891 |
|
- type: cos_sim_recall |
|
value: 96.8222442899702 |
|
- type: dot_accuracy |
|
value: 84.60490463215259 |
|
- type: dot_ap |
|
value: 94.73385323002613 |
|
- type: dot_f1 |
|
value: 89.28571428571428 |
|
- type: dot_precision |
|
value: 82.83772302463891 |
|
- type: dot_recall |
|
value: 96.8222442899702 |
|
- type: euclidean_accuracy |
|
value: 84.60490463215259 |
|
- type: euclidean_ap |
|
value: 94.73385323002613 |
|
- type: euclidean_f1 |
|
value: 89.28571428571428 |
|
- type: euclidean_precision |
|
value: 82.83772302463891 |
|
- type: euclidean_recall |
|
value: 96.8222442899702 |
|
- type: manhattan_accuracy |
|
value: 84.46866485013624 |
|
- type: manhattan_ap |
|
value: 94.6952667850496 |
|
- type: manhattan_f1 |
|
value: 89.20454545454546 |
|
- type: manhattan_precision |
|
value: 85.24886877828054 |
|
- type: manhattan_recall |
|
value: 93.54518371400199 |
|
- type: max_accuracy |
|
value: 84.60490463215259 |
|
- type: max_ap |
|
value: 94.73385323002613 |
|
- type: max_f1 |
|
value: 89.28571428571428 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: paws-x |
|
name: MTEB PawsX (fr) |
|
config: fr |
|
split: test |
|
revision: 8a04d940a42cd40658986fdd8e3da561533a3646 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 64.25 |
|
- type: cos_sim_ap |
|
value: 64.3383111759356 |
|
- type: cos_sim_f1 |
|
value: 63.03703703703704 |
|
- type: cos_sim_precision |
|
value: 47.35670562047857 |
|
- type: cos_sim_recall |
|
value: 94.24141749723145 |
|
- type: dot_accuracy |
|
value: 64.25 |
|
- type: dot_ap |
|
value: 64.33647413039195 |
|
- type: dot_f1 |
|
value: 63.03703703703704 |
|
- type: dot_precision |
|
value: 47.35670562047857 |
|
- type: dot_recall |
|
value: 94.24141749723145 |
|
- type: euclidean_accuracy |
|
value: 64.25 |
|
- type: euclidean_ap |
|
value: 64.33837256418407 |
|
- type: euclidean_f1 |
|
value: 63.03703703703704 |
|
- type: euclidean_precision |
|
value: 47.35670562047857 |
|
- type: euclidean_recall |
|
value: 94.24141749723145 |
|
- type: manhattan_accuracy |
|
value: 64.3 |
|
- type: manhattan_ap |
|
value: 64.32795078191543 |
|
- type: manhattan_f1 |
|
value: 62.935507783543365 |
|
- type: manhattan_precision |
|
value: 47.298050139275766 |
|
- type: manhattan_recall |
|
value: 94.01993355481729 |
|
- type: max_accuracy |
|
value: 64.3 |
|
- type: max_ap |
|
value: 64.33837256418407 |
|
- type: max_f1 |
|
value: 63.03703703703704 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: Lajavaness/SICK-fr |
|
name: MTEB SICKFr |
|
config: default |
|
split: test |
|
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 84.72950355896789 |
|
- type: cos_sim_spearman |
|
value: 79.4608290812696 |
|
- type: euclidean_pearson |
|
value: 81.32539142627735 |
|
- type: euclidean_spearman |
|
value: 79.46019403205177 |
|
- type: manhattan_pearson |
|
value: 81.35998006674681 |
|
- type: manhattan_spearman |
|
value: 79.41438354042496 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (fr) |
|
config: fr |
|
split: test |
|
revision: eea2b4fe26a775864c896887d910b76a8098ad3f |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 81.94721498063055 |
|
- type: cos_sim_spearman |
|
value: 83.27561639335909 |
|
- type: euclidean_pearson |
|
value: 80.74250472409508 |
|
- type: euclidean_spearman |
|
value: 83.27561639335909 |
|
- type: manhattan_pearson |
|
value: 80.55336080634422 |
|
- type: manhattan_spearman |
|
value: 83.46556509775091 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: PhilipMay/stsb_multi_mt |
|
name: MTEB STSBenchmarkMultilingualSTS (fr) |
|
config: fr |
|
split: test |
|
revision: 93d57ef91790589e3ce9c365164337a8a78b7632 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 86.42208373352562 |
|
- type: cos_sim_spearman |
|
value: 86.99991276887566 |
|
- type: euclidean_pearson |
|
value: 85.50325028600815 |
|
- type: euclidean_spearman |
|
value: 87.00166758198344 |
|
- type: manhattan_pearson |
|
value: 85.51048739822163 |
|
- type: manhattan_spearman |
|
value: 86.98373812309134 |
|
- task: |
|
type: Summarization |
|
dataset: |
|
type: lyon-nlp/summarization-summeval-fr-p2p |
|
name: MTEB SummEvalFr |
|
config: default |
|
split: test |
|
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 31.911797754639164 |
|
- type: cos_sim_spearman |
|
value: 32.17186521965941 |
|
- type: dot_pearson |
|
value: 31.911796813216963 |
|
- type: dot_spearman |
|
value: 32.17186521965941 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-syntec-s2p |
|
name: MTEB SyntecReranking |
|
config: default |
|
split: test |
|
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad |
|
metrics: |
|
- type: map |
|
value: 86.04285714285714 |
|
- type: mrr |
|
value: 86.04285714285714 |
|
- type: nAUC_map_diff1 |
|
value: 67.26948440486838 |
|
- type: nAUC_map_max |
|
value: 1.8106095852919237 |
|
- type: nAUC_mrr_diff1 |
|
value: 67.26948440486838 |
|
- type: nAUC_mrr_max |
|
value: 1.8106095852919237 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p |
|
name: MTEB SyntecRetrieval |
|
config: default |
|
split: test |
|
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9 |
|
metrics: |
|
- type: map_at_1 |
|
value: 72.0 |
|
- type: map_at_10 |
|
value: 81.294 |
|
- type: map_at_100 |
|
value: 81.428 |
|
- type: map_at_1000 |
|
value: 81.428 |
|
- type: map_at_20 |
|
value: 81.38499999999999 |
|
- type: map_at_3 |
|
value: 79.833 |
|
- type: map_at_5 |
|
value: 80.88300000000001 |
|
- type: mrr_at_1 |
|
value: 72.0 |
|
- type: mrr_at_10 |
|
value: 81.2940476190476 |
|
- type: mrr_at_100 |
|
value: 81.42843497082626 |
|
- type: mrr_at_1000 |
|
value: 81.42843497082626 |
|
- type: mrr_at_20 |
|
value: 81.3849567099567 |
|
- type: mrr_at_3 |
|
value: 79.83333333333334 |
|
- type: mrr_at_5 |
|
value: 80.88333333333333 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 49.54228699135176 |
|
- type: nauc_map_at_1000_max |
|
value: 0.06067639126124744 |
|
- type: nauc_map_at_100_diff1 |
|
value: 49.54228699135176 |
|
- type: nauc_map_at_100_max |
|
value: 0.06067639126124744 |
|
- type: nauc_map_at_10_diff1 |
|
value: 49.53852081026105 |
|
- type: nauc_map_at_10_max |
|
value: 0.3053366025276881 |
|
- type: nauc_map_at_1_diff1 |
|
value: 46.31028693528697 |
|
- type: nauc_map_at_1_max |
|
value: -1.3144841269841376 |
|
- type: nauc_map_at_20_diff1 |
|
value: 49.506524393346865 |
|
- type: nauc_map_at_20_max |
|
value: -0.08620516816116486 |
|
- type: nauc_map_at_3_diff1 |
|
value: 51.19253694434259 |
|
- type: nauc_map_at_3_max |
|
value: 0.668963341320456 |
|
- type: nauc_map_at_5_diff1 |
|
value: 49.37737335974092 |
|
- type: nauc_map_at_5_max |
|
value: -0.33590080679527184 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 49.54228699135176 |
|
- type: nauc_mrr_at_1000_max |
|
value: 0.06067639126124744 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 49.54228699135176 |
|
- type: nauc_mrr_at_100_max |
|
value: 0.06067639126124744 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 49.53852081026105 |
|
- type: nauc_mrr_at_10_max |
|
value: 0.3053366025276881 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 46.31028693528697 |
|
- type: nauc_mrr_at_1_max |
|
value: -1.3144841269841376 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 49.506524393346865 |
|
- type: nauc_mrr_at_20_max |
|
value: -0.08620516816116486 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 51.19253694434259 |
|
- type: nauc_mrr_at_3_max |
|
value: 0.668963341320456 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 49.37737335974092 |
|
- type: nauc_mrr_at_5_max |
|
value: -0.33590080679527184 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 49.6848382380357 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 0.04870501937096382 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 49.6848382380357 |
|
- type: nauc_ndcg_at_100_max |
|
value: 0.04870501937096382 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 49.57645777272915 |
|
- type: nauc_ndcg_at_10_max |
|
value: 0.6430420679440534 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 46.31028693528697 |
|
- type: nauc_ndcg_at_1_max |
|
value: -1.3144841269841376 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 49.45017977018584 |
|
- type: nauc_ndcg_at_20_max |
|
value: -0.9049646537819854 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 52.898658060430904 |
|
- type: nauc_ndcg_at_3_max |
|
value: 1.3070987858400047 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 49.054456158711595 |
|
- type: nauc_ndcg_at_5_max |
|
value: -1.177736876794348 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: nan |
|
- type: nauc_precision_at_1000_max |
|
value: nan |
|
- type: nauc_precision_at_100_diff1 |
|
value: nan |
|
- type: nauc_precision_at_100_max |
|
value: nan |
|
- type: nauc_precision_at_10_diff1 |
|
value: 45.611577964519334 |
|
- type: nauc_precision_at_10_max |
|
value: 7.936507936508234 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 46.31028693528697 |
|
- type: nauc_precision_at_1_max |
|
value: -1.3144841269841376 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 35.80765639589114 |
|
- type: nauc_precision_at_20_max |
|
value: -56.34920634920767 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 61.56395891690006 |
|
- type: nauc_precision_at_3_max |
|
value: 4.509803921568394 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 43.15592903828254 |
|
- type: nauc_precision_at_5_max |
|
value: -11.783380018673482 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: nan |
|
- type: nauc_recall_at_1000_max |
|
value: nan |
|
- type: nauc_recall_at_100_diff1 |
|
value: nan |
|
- type: nauc_recall_at_100_max |
|
value: nan |
|
- type: nauc_recall_at_10_diff1 |
|
value: 45.61157796451899 |
|
- type: nauc_recall_at_10_max |
|
value: 7.9365079365084235 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 46.31028693528697 |
|
- type: nauc_recall_at_1_max |
|
value: -1.3144841269841376 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 35.80765639589109 |
|
- type: nauc_recall_at_20_max |
|
value: -56.34920634920657 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 61.5639589169002 |
|
- type: nauc_recall_at_3_max |
|
value: 4.509803921568655 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 43.15592903828185 |
|
- type: nauc_recall_at_5_max |
|
value: -11.783380018674132 |
|
- type: ndcg_at_1 |
|
value: 72.0 |
|
- type: ndcg_at_10 |
|
value: 85.39999999999999 |
|
- type: ndcg_at_100 |
|
value: 85.897 |
|
- type: ndcg_at_1000 |
|
value: 85.897 |
|
- type: ndcg_at_20 |
|
value: 85.679 |
|
- type: ndcg_at_3 |
|
value: 82.44 |
|
- type: ndcg_at_5 |
|
value: 84.418 |
|
- type: precision_at_1 |
|
value: 72.0 |
|
- type: precision_at_10 |
|
value: 9.8 |
|
- type: precision_at_100 |
|
value: 1.0 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_20 |
|
value: 4.95 |
|
- type: precision_at_3 |
|
value: 30.0 |
|
- type: precision_at_5 |
|
value: 19.0 |
|
- type: recall_at_1 |
|
value: 72.0 |
|
- type: recall_at_10 |
|
value: 98.0 |
|
- type: recall_at_100 |
|
value: 100.0 |
|
- type: recall_at_1000 |
|
value: 100.0 |
|
- type: recall_at_20 |
|
value: 99.0 |
|
- type: recall_at_3 |
|
value: 90.0 |
|
- type: recall_at_5 |
|
value: 95.0 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/xpqa |
|
name: MTEB XPQARetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f |
|
metrics: |
|
- type: map_at_1 |
|
value: 39.007999999999996 |
|
- type: map_at_10 |
|
value: 60.319 |
|
- type: map_at_100 |
|
value: 61.644 |
|
- type: map_at_1000 |
|
value: 61.712 |
|
- type: map_at_20 |
|
value: 61.053000000000004 |
|
- type: map_at_3 |
|
value: 53.942 |
|
- type: map_at_5 |
|
value: 58.132 |
|
- type: mrr_at_1 |
|
value: 60.747663551401864 |
|
- type: mrr_at_10 |
|
value: 68.10636404094345 |
|
- type: mrr_at_100 |
|
value: 68.57873500135119 |
|
- type: mrr_at_1000 |
|
value: 68.60183171580495 |
|
- type: mrr_at_20 |
|
value: 68.36478690417064 |
|
- type: mrr_at_3 |
|
value: 66.08811748998662 |
|
- type: mrr_at_5 |
|
value: 67.12950600801062 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 46.633293504114434 |
|
- type: nauc_map_at_1000_max |
|
value: 49.64101214126472 |
|
- type: nauc_map_at_100_diff1 |
|
value: 46.591074735810764 |
|
- type: nauc_map_at_100_max |
|
value: 49.60435205919251 |
|
- type: nauc_map_at_10_diff1 |
|
value: 46.43486044009182 |
|
- type: nauc_map_at_10_max |
|
value: 49.222376322201065 |
|
- type: nauc_map_at_1_diff1 |
|
value: 54.05448738315762 |
|
- type: nauc_map_at_1_max |
|
value: 24.89423418246206 |
|
- type: nauc_map_at_20_diff1 |
|
value: 46.5131815367993 |
|
- type: nauc_map_at_20_max |
|
value: 49.59934686413147 |
|
- type: nauc_map_at_3_diff1 |
|
value: 48.9688624037045 |
|
- type: nauc_map_at_3_max |
|
value: 42.49957358403678 |
|
- type: nauc_map_at_5_diff1 |
|
value: 46.99183039261338 |
|
- type: nauc_map_at_5_max |
|
value: 47.53639129265315 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 55.19161872624107 |
|
- type: nauc_mrr_at_1000_max |
|
value: 57.00358990989949 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 55.18922613003231 |
|
- type: nauc_mrr_at_100_max |
|
value: 56.99910514727928 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 55.030904237371224 |
|
- type: nauc_mrr_at_10_max |
|
value: 56.99207425207498 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 57.34582863547949 |
|
- type: nauc_mrr_at_1_max |
|
value: 56.98349812853321 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 55.0844531407365 |
|
- type: nauc_mrr_at_20_max |
|
value: 57.00264208604279 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 55.64735827895618 |
|
- type: nauc_mrr_at_3_max |
|
value: 57.29703659670222 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 55.38932070005733 |
|
- type: nauc_mrr_at_5_max |
|
value: 57.25407452051235 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 48.35929794131023 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 52.522778834890325 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 47.56861995950162 |
|
- type: nauc_ndcg_at_100_max |
|
value: 51.88529740719871 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 46.87985909159253 |
|
- type: nauc_ndcg_at_10_max |
|
value: 51.38385177949589 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 57.34582863547949 |
|
- type: nauc_ndcg_at_1_max |
|
value: 56.98349812853321 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 47.02262106267654 |
|
- type: nauc_ndcg_at_20_max |
|
value: 51.98897867452814 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 49.03406876232345 |
|
- type: nauc_ndcg_at_3_max |
|
value: 51.51024175391598 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 47.91197516404686 |
|
- type: nauc_ndcg_at_5_max |
|
value: 49.81634328349405 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -17.71701381388244 |
|
- type: nauc_precision_at_1000_max |
|
value: 21.388531798131364 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -15.539336210367946 |
|
- type: nauc_precision_at_100_max |
|
value: 24.698569356368356 |
|
- type: nauc_precision_at_10_diff1 |
|
value: -4.847564718245167 |
|
- type: nauc_precision_at_10_max |
|
value: 36.89232193362945 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 57.34582863547949 |
|
- type: nauc_precision_at_1_max |
|
value: 56.98349812853321 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -8.665616069723095 |
|
- type: nauc_precision_at_20_max |
|
value: 33.10610080847679 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 11.05940284130611 |
|
- type: nauc_precision_at_3_max |
|
value: 46.70869419036014 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 1.5834393191629197 |
|
- type: nauc_precision_at_5_max |
|
value: 41.862527913783865 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: -10.569638499832076 |
|
- type: nauc_recall_at_1000_max |
|
value: 8.749844537815326 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 27.487071443233262 |
|
- type: nauc_recall_at_100_max |
|
value: 35.4122293490153 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 35.91432645476102 |
|
- type: nauc_recall_at_10_max |
|
value: 44.19204765665408 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 54.05448738315762 |
|
- type: nauc_recall_at_1_max |
|
value: 24.89423418246206 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 34.4589376419412 |
|
- type: nauc_recall_at_20_max |
|
value: 44.798804712797136 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 45.60515874084114 |
|
- type: nauc_recall_at_3_max |
|
value: 39.109212749550764 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 41.35313462606428 |
|
- type: nauc_recall_at_5_max |
|
value: 43.39841866434438 |
|
- type: ndcg_at_1 |
|
value: 60.748000000000005 |
|
- type: ndcg_at_10 |
|
value: 66.232 |
|
- type: ndcg_at_100 |
|
value: 70.745 |
|
- type: ndcg_at_1000 |
|
value: 71.87 |
|
- type: ndcg_at_20 |
|
value: 67.977 |
|
- type: ndcg_at_3 |
|
value: 61.06399999999999 |
|
- type: ndcg_at_5 |
|
value: 62.744 |
|
- type: precision_at_1 |
|
value: 60.748000000000005 |
|
- type: precision_at_10 |
|
value: 15.434000000000001 |
|
- type: precision_at_100 |
|
value: 1.924 |
|
- type: precision_at_1000 |
|
value: 0.207 |
|
- type: precision_at_20 |
|
value: 8.378 |
|
- type: precision_at_3 |
|
value: 37.161 |
|
- type: precision_at_5 |
|
value: 26.916 |
|
- type: recall_at_1 |
|
value: 39.007999999999996 |
|
- type: recall_at_10 |
|
value: 74.929 |
|
- type: recall_at_100 |
|
value: 92.508 |
|
- type: recall_at_1000 |
|
value: 99.8 |
|
- type: recall_at_20 |
|
value: 80.343 |
|
- type: recall_at_3 |
|
value: 58.367000000000004 |
|
- type: recall_at_5 |
|
value: 66.446 |
|
license: apache-2.0 |
|
language: |
|
- fr |
|
- en |
|
--- |
|
|
|
# [bilingual-embedding-large](https://huggingface.co/Lajavaness/bilingual-embedding-large) |
|
|
|
Bilingual-embedding is the Embedding Model for bilingual language: french and english. This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of [XLM-RoBERTa](https://huggingface.co/FacebookAI/xlm-roberta-large), a pre-trained language model based on the [XLM-RoBERTa](https://huggingface.co/FacebookAI/xlm-roberta-large) architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language. |
|
|
|
|
|
## Full Model Architecture |
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel |
|
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Normalize() |
|
) |
|
``` |
|
|
|
## Training and Fine-tuning process |
|
#### Stage 1: NLI Training |
|
- Dataset: [(SNLI+XNLI) for english+french] |
|
- Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics. |
|
### Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark |
|
- Dataset: [STSB-fr and en] |
|
- Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library. |
|
### Stage 4: Advanced Augmentation Fine-tuning |
|
- Dataset: STSB with generate [silver sample from gold sample](https://www.sbert.net/examples/training/data_augmentation/README.html) |
|
- Method: Employed an advanced strategy using [Augmented SBERT](https://arxiv.org/abs/2010.08240) with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy. |
|
|
|
|
|
## Usage: |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"] |
|
|
|
model = SentenceTransformer('Lajavaness/bilingual-embedding-large', trust_remote_code=True) |
|
print(embeddings) |
|
|
|
``` |
|
|
|
|
|
|
|
|
|
|
|
## Evaluation |
|
|
|
TODO |
|
|
|
## Citation |
|
|
|
@article{conneau2019unsupervised, |
|
title={Unsupervised cross-lingual representation learning at scale}, |
|
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
|
journal={arXiv preprint arXiv:1911.02116}, |
|
year={2019} |
|
} |
|
|
|
@article{reimers2019sentence, |
|
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, |
|
author={Nils Reimers, Iryna Gurevych}, |
|
journal={https://arxiv.org/abs/1908.10084}, |
|
year={2019} |
|
} |
|
|
|
@article{thakur2020augmented, |
|
title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks}, |
|
author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna}, |
|
journal={arXiv e-prints}, |
|
pages={arXiv--2010}, |
|
year={2020} |