|
--- |
|
library_name: sentence-transformers |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- transformers |
|
- sentence-embedding |
|
- mteb |
|
model-index: |
|
- name: bilingual-document-embedding |
|
results: |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringP2P |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 55.52298673909706 |
|
- type: v_measures |
|
value: [0.5198748380785404, 0.5562521099012603, 0.5322986254464575, 0.5722250987615152, 0.532932258758668] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringS2S |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 35.802733348353094 |
|
- type: v_measures |
|
value: [0.37359796790048144, 0.36376421464272285, 0.37524966704915225, 0.3749296797757371, 0.36673700158106576] |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p |
|
name: MTEB AlloprofReranking |
|
config: default |
|
split: test |
|
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd |
|
metrics: |
|
- type: map |
|
value: 73.10088493122083 |
|
- type: mrr |
|
value: 74.33452929243086 |
|
- type: nAUC_map_diff1 |
|
value: 56.63750231223696 |
|
- type: nAUC_map_max |
|
value: 27.066268470355492 |
|
- type: nAUC_mrr_diff1 |
|
value: 55.33487252773409 |
|
- type: nAUC_mrr_max |
|
value: 27.328424865584367 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloprofRetrieval |
|
config: default |
|
split: test |
|
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd |
|
metrics: |
|
- type: map_at_1 |
|
value: 28.282 |
|
- type: map_at_10 |
|
value: 38.805 |
|
- type: map_at_100 |
|
value: 39.804 |
|
- type: map_at_1000 |
|
value: 39.859 |
|
- type: map_at_20 |
|
value: 39.428999999999995 |
|
- type: map_at_3 |
|
value: 35.838 |
|
- type: map_at_5 |
|
value: 37.537 |
|
- type: mrr_at_1 |
|
value: 28.281519861830745 |
|
- type: mrr_at_10 |
|
value: 38.805171752063 |
|
- type: mrr_at_100 |
|
value: 39.80444636265999 |
|
- type: mrr_at_1000 |
|
value: 39.858722309770464 |
|
- type: mrr_at_20 |
|
value: 39.42867574383368 |
|
- type: mrr_at_3 |
|
value: 35.83765112262528 |
|
- type: mrr_at_5 |
|
value: 37.53670120898111 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 42.44333601195352 |
|
- type: nauc_map_at_1000_max |
|
value: 41.88361927698375 |
|
- type: nauc_map_at_100_diff1 |
|
value: 42.42746715522874 |
|
- type: nauc_map_at_100_max |
|
value: 41.913701611267015 |
|
- type: nauc_map_at_10_diff1 |
|
value: 42.25094726032311 |
|
- type: nauc_map_at_10_max |
|
value: 41.76772459035808 |
|
- type: nauc_map_at_1_diff1 |
|
value: 48.03355307282109 |
|
- type: nauc_map_at_1_max |
|
value: 38.73226070718987 |
|
- type: nauc_map_at_20_diff1 |
|
value: 42.3550770875435 |
|
- type: nauc_map_at_20_max |
|
value: 41.89957212013687 |
|
- type: nauc_map_at_3_diff1 |
|
value: 42.88695727955848 |
|
- type: nauc_map_at_3_max |
|
value: 40.81262402287836 |
|
- type: nauc_map_at_5_diff1 |
|
value: 42.34041989483334 |
|
- type: nauc_map_at_5_max |
|
value: 41.36458206255729 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 42.44333601195352 |
|
- type: nauc_mrr_at_1000_max |
|
value: 41.88361927698375 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 42.42746715522874 |
|
- type: nauc_mrr_at_100_max |
|
value: 41.913701611267015 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 42.25094726032311 |
|
- type: nauc_mrr_at_10_max |
|
value: 41.76772459035808 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 48.03355307282109 |
|
- type: nauc_mrr_at_1_max |
|
value: 38.73226070718987 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 42.3550770875435 |
|
- type: nauc_mrr_at_20_max |
|
value: 41.89957212013687 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 42.88695727955848 |
|
- type: nauc_mrr_at_3_max |
|
value: 40.81262402287836 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 42.34041989483334 |
|
- type: nauc_mrr_at_5_max |
|
value: 41.36458206255729 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 41.35830258715452 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 43.2765379475269 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 40.95047094384412 |
|
- type: nauc_ndcg_at_100_max |
|
value: 44.293436979483594 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 40.0359339979518 |
|
- type: nauc_ndcg_at_10_max |
|
value: 43.390909520520076 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 48.03355307282109 |
|
- type: nauc_ndcg_at_1_max |
|
value: 38.73226070718987 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 40.35056898575259 |
|
- type: nauc_ndcg_at_20_max |
|
value: 43.991764985610345 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 41.40129960980627 |
|
- type: nauc_ndcg_at_3_max |
|
value: 41.41104483378663 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 40.36007384476364 |
|
- type: nauc_ndcg_at_5_max |
|
value: 42.383481303106414 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 34.36657255351115 |
|
- type: nauc_precision_at_1000_max |
|
value: 74.91976431868189 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 33.55702830592739 |
|
- type: nauc_precision_at_100_max |
|
value: 65.71347416493107 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 32.521448032129 |
|
- type: nauc_precision_at_10_max |
|
value: 49.19930788953475 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 48.03355307282109 |
|
- type: nauc_precision_at_1_max |
|
value: 38.73226070718987 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 32.57892299703891 |
|
- type: nauc_precision_at_20_max |
|
value: 53.45967162017302 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 37.249551957650795 |
|
- type: nauc_precision_at_3_max |
|
value: 43.08267504682664 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 34.44393985129692 |
|
- type: nauc_precision_at_5_max |
|
value: 45.460096642832646 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 34.36657255350965 |
|
- type: nauc_recall_at_1000_max |
|
value: 74.91976431868211 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 33.55702830592741 |
|
- type: nauc_recall_at_100_max |
|
value: 65.71347416493116 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 32.52144803212901 |
|
- type: nauc_recall_at_10_max |
|
value: 49.199307889534715 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 48.03355307282109 |
|
- type: nauc_recall_at_1_max |
|
value: 38.73226070718987 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 32.57892299703892 |
|
- type: nauc_recall_at_20_max |
|
value: 53.45967162017302 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 37.249551957650766 |
|
- type: nauc_recall_at_3_max |
|
value: 43.082675046826644 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 34.4439398512969 |
|
- type: nauc_recall_at_5_max |
|
value: 45.460096642832674 |
|
- type: ndcg_at_1 |
|
value: 28.282 |
|
- type: ndcg_at_10 |
|
value: 44.421 |
|
- type: ndcg_at_100 |
|
value: 49.447 |
|
- type: ndcg_at_1000 |
|
value: 50.981 |
|
- type: ndcg_at_20 |
|
value: 46.671 |
|
- type: ndcg_at_3 |
|
value: 38.289 |
|
- type: ndcg_at_5 |
|
value: 41.349999999999994 |
|
- type: precision_at_1 |
|
value: 28.282 |
|
- type: precision_at_10 |
|
value: 6.231 |
|
- type: precision_at_100 |
|
value: 0.8619999999999999 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_20 |
|
value: 3.558 |
|
- type: precision_at_3 |
|
value: 15.126999999999999 |
|
- type: precision_at_5 |
|
value: 10.561 |
|
- type: recall_at_1 |
|
value: 28.282 |
|
- type: recall_at_10 |
|
value: 62.306 |
|
- type: recall_at_100 |
|
value: 86.226 |
|
- type: recall_at_1000 |
|
value: 98.489 |
|
- type: recall_at_20 |
|
value: 71.15700000000001 |
|
- type: recall_at_3 |
|
value: 45.379999999999995 |
|
- type: recall_at_5 |
|
value: 52.807 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 44.10999999999999 |
|
- type: f1 |
|
value: 42.00584553745547 |
|
- type: f1_weighted |
|
value: 42.005845537455485 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: maastrichtlawtech/bsard |
|
name: MTEB BSARDRetrieval |
|
config: default |
|
split: test |
|
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 |
|
metrics: |
|
- type: map_at_1 |
|
value: 4.955 |
|
- type: map_at_10 |
|
value: 9.103 |
|
- type: map_at_100 |
|
value: 9.998999999999999 |
|
- type: map_at_1000 |
|
value: 10.136000000000001 |
|
- type: map_at_20 |
|
value: 9.554 |
|
- type: map_at_3 |
|
value: 7.4319999999999995 |
|
- type: map_at_5 |
|
value: 7.95 |
|
- type: mrr_at_1 |
|
value: 4.954954954954955 |
|
- type: mrr_at_10 |
|
value: 9.102852852852852 |
|
- type: mrr_at_100 |
|
value: 9.999215850941926 |
|
- type: mrr_at_1000 |
|
value: 10.13616308946331 |
|
- type: mrr_at_20 |
|
value: 9.554402632963003 |
|
- type: mrr_at_3 |
|
value: 7.432432432432434 |
|
- type: mrr_at_5 |
|
value: 7.950450450450451 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 14.655819915811785 |
|
- type: nauc_map_at_1000_max |
|
value: 9.188182207979008 |
|
- type: nauc_map_at_100_diff1 |
|
value: 14.517637755979687 |
|
- type: nauc_map_at_100_max |
|
value: 9.060725563022503 |
|
- type: nauc_map_at_10_diff1 |
|
value: 15.776144582905358 |
|
- type: nauc_map_at_10_max |
|
value: 9.448668398689462 |
|
- type: nauc_map_at_1_diff1 |
|
value: 19.10921794840591 |
|
- type: nauc_map_at_1_max |
|
value: 4.060331068810239 |
|
- type: nauc_map_at_20_diff1 |
|
value: 15.061809327427353 |
|
- type: nauc_map_at_20_max |
|
value: 9.085953657690329 |
|
- type: nauc_map_at_3_diff1 |
|
value: 18.42793018906856 |
|
- type: nauc_map_at_3_max |
|
value: 10.10140103912974 |
|
- type: nauc_map_at_5_diff1 |
|
value: 17.407972669931233 |
|
- type: nauc_map_at_5_max |
|
value: 10.064885264376228 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 14.655819915811785 |
|
- type: nauc_mrr_at_1000_max |
|
value: 9.188182207979008 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 14.517637755979687 |
|
- type: nauc_mrr_at_100_max |
|
value: 9.060725563022503 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 15.776144582905358 |
|
- type: nauc_mrr_at_10_max |
|
value: 9.448668398689462 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 19.10921794840591 |
|
- type: nauc_mrr_at_1_max |
|
value: 4.060331068810239 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 15.061809327427353 |
|
- type: nauc_mrr_at_20_max |
|
value: 9.085953657690329 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 18.42793018906856 |
|
- type: nauc_mrr_at_3_max |
|
value: 10.10140103912974 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 17.407972669931233 |
|
- type: nauc_mrr_at_5_max |
|
value: 10.064885264376228 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 11.940580725648152 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 11.004283166102807 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 10.009680762933215 |
|
- type: nauc_ndcg_at_100_max |
|
value: 8.444186642393188 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 14.423251037136561 |
|
- type: nauc_ndcg_at_10_max |
|
value: 10.614014795363303 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 19.10921794840591 |
|
- type: nauc_ndcg_at_1_max |
|
value: 4.060331068810239 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 12.486198272876521 |
|
- type: nauc_ndcg_at_20_max |
|
value: 9.550225653436467 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 18.813915768129757 |
|
- type: nauc_ndcg_at_3_max |
|
value: 11.865670858870484 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 17.01715479783127 |
|
- type: nauc_ndcg_at_5_max |
|
value: 11.523181173967899 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 6.162580085242911 |
|
- type: nauc_precision_at_1000_max |
|
value: 21.74545120171883 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 1.4492186570094137 |
|
- type: nauc_precision_at_100_max |
|
value: 5.320582161712451 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 12.199838986983115 |
|
- type: nauc_precision_at_10_max |
|
value: 12.409471572004998 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 19.10921794840591 |
|
- type: nauc_precision_at_1_max |
|
value: 4.060331068810239 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 8.089525252638769 |
|
- type: nauc_precision_at_20_max |
|
value: 9.829600854870332 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 19.71630962813128 |
|
- type: nauc_precision_at_3_max |
|
value: 15.560242379569136 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 16.151579517326258 |
|
- type: nauc_precision_at_5_max |
|
value: 14.225120177799683 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 6.1625800852429595 |
|
- type: nauc_recall_at_1000_max |
|
value: 21.745451201718687 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 1.4492186570093863 |
|
- type: nauc_recall_at_100_max |
|
value: 5.320582161712405 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 12.199838986983083 |
|
- type: nauc_recall_at_10_max |
|
value: 12.409471572004962 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 19.10921794840591 |
|
- type: nauc_recall_at_1_max |
|
value: 4.060331068810239 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 8.089525252638692 |
|
- type: nauc_recall_at_20_max |
|
value: 9.829600854870273 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 19.716309628131278 |
|
- type: nauc_recall_at_3_max |
|
value: 15.560242379569129 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 16.151579517326265 |
|
- type: nauc_recall_at_5_max |
|
value: 14.225120177799697 |
|
- type: ndcg_at_1 |
|
value: 4.955 |
|
- type: ndcg_at_10 |
|
value: 12.005 |
|
- type: ndcg_at_100 |
|
value: 17.238 |
|
- type: ndcg_at_1000 |
|
value: 21.287 |
|
- type: ndcg_at_20 |
|
value: 13.691999999999998 |
|
- type: ndcg_at_3 |
|
value: 8.296000000000001 |
|
- type: ndcg_at_5 |
|
value: 9.225999999999999 |
|
- type: precision_at_1 |
|
value: 4.955 |
|
- type: precision_at_10 |
|
value: 2.162 |
|
- type: precision_at_100 |
|
value: 0.482 |
|
- type: precision_at_1000 |
|
value: 0.08099999999999999 |
|
- type: precision_at_20 |
|
value: 1.419 |
|
- type: precision_at_3 |
|
value: 3.604 |
|
- type: precision_at_5 |
|
value: 2.613 |
|
- type: recall_at_1 |
|
value: 4.955 |
|
- type: recall_at_10 |
|
value: 21.622 |
|
- type: recall_at_100 |
|
value: 48.198 |
|
- type: recall_at_1000 |
|
value: 81.081 |
|
- type: recall_at_20 |
|
value: 28.377999999999997 |
|
- type: recall_at_3 |
|
value: 10.811 |
|
- type: recall_at_5 |
|
value: 13.062999999999999 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/clustering-hal-s2s |
|
name: MTEB HALClusteringS2S |
|
config: default |
|
split: test |
|
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 |
|
metrics: |
|
- type: v_measure |
|
value: 23.137623974622365 |
|
- type: v_measures |
|
value: [0.2802068838665942, 0.2565274984774815, 0.25245022445056786, 0.22595460950575297, 0.20177741591393913] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringP2P |
|
config: default |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 40.31003279146524 |
|
- type: v_measures |
|
value: [0.3943651322813771, 0.4189000344922205, 0.4101443880670743, 0.3832149080991847, 0.37602613534689566] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringS2S |
|
config: default |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 40.04524841336757 |
|
- type: v_measures |
|
value: [0.39835449199860185, 0.405905613221237, 0.40326782414397255, 0.40882879348632284, 0.3683302592759367] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_domain |
|
name: MTEB MTOPDomainClassification (fr) |
|
config: fr |
|
split: test |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
metrics: |
|
- type: accuracy |
|
value: 87.82023175696837 |
|
- type: f1 |
|
value: 87.58287510797385 |
|
- type: f1_weighted |
|
value: 87.75645870762435 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_intent |
|
name: MTEB MTOPIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
metrics: |
|
- type: accuracy |
|
value: 58.628249295333546 |
|
- type: f1 |
|
value: 42.22070573172825 |
|
- type: f1_weighted |
|
value: 60.62087995743649 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/masakhanews |
|
name: MTEB MasakhaNEWSClassification (fra) |
|
config: fra |
|
split: test |
|
revision: 18193f187b92da67168c655c9973a165ed9593dd |
|
metrics: |
|
- type: accuracy |
|
value: 69.81042654028435 |
|
- type: f1 |
|
value: 66.05811881796396 |
|
- type: f1_weighted |
|
value: 70.34901566149948 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringP2P (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 45.02712178986078 |
|
- type: v_measures |
|
value: [1.0, 0.23955793240111928, 0.7158920010774062, 0.036391635653837, 0.25951452036067674] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringS2S (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 30.38607254306223 |
|
- type: v_measures |
|
value: [1.0, 0.01936507478006705, 0.19876372667844472, 0.17182595867380823, 0.12934886702079137] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 4672e20407010da34463acc759c162ca9734bca6 |
|
metrics: |
|
- type: accuracy |
|
value: 66.13651647612645 |
|
- type: f1 |
|
value: 64.42898347709598 |
|
- type: f1_weighted |
|
value: 65.01442547020224 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (fr) |
|
config: fr |
|
split: test |
|
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 |
|
metrics: |
|
- type: accuracy |
|
value: 72.73705447209144 |
|
- type: f1 |
|
value: 72.09285609231057 |
|
- type: f1_weighted |
|
value: 72.34295244611339 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/mintakaqa |
|
name: MTEB MintakaRetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e |
|
metrics: |
|
- type: map_at_1 |
|
value: 13.677 |
|
- type: map_at_10 |
|
value: 21.044 |
|
- type: map_at_100 |
|
value: 22.012 |
|
- type: map_at_1000 |
|
value: 22.125 |
|
- type: map_at_20 |
|
value: 21.573999999999998 |
|
- type: map_at_3 |
|
value: 18.857 |
|
- type: map_at_5 |
|
value: 19.936999999999998 |
|
- type: mrr_at_1 |
|
value: 13.677313677313677 |
|
- type: mrr_at_10 |
|
value: 21.043933543933505 |
|
- type: mrr_at_100 |
|
value: 22.012160523798318 |
|
- type: mrr_at_1000 |
|
value: 22.124555014776913 |
|
- type: mrr_at_20 |
|
value: 21.574199922074904 |
|
- type: mrr_at_3 |
|
value: 18.857493857493825 |
|
- type: mrr_at_5 |
|
value: 19.93652743652738 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 20.63087633823352 |
|
- type: nauc_map_at_1000_max |
|
value: 31.753246807516362 |
|
- type: nauc_map_at_100_diff1 |
|
value: 20.602874174259885 |
|
- type: nauc_map_at_100_max |
|
value: 31.74109681792161 |
|
- type: nauc_map_at_10_diff1 |
|
value: 20.82028964049537 |
|
- type: nauc_map_at_10_max |
|
value: 32.082751313883705 |
|
- type: nauc_map_at_1_diff1 |
|
value: 27.838566854973656 |
|
- type: nauc_map_at_1_max |
|
value: 32.0217755083183 |
|
- type: nauc_map_at_20_diff1 |
|
value: 20.685607874578192 |
|
- type: nauc_map_at_20_max |
|
value: 31.89765440964895 |
|
- type: nauc_map_at_3_diff1 |
|
value: 22.385335765437958 |
|
- type: nauc_map_at_3_max |
|
value: 32.47346568889047 |
|
- type: nauc_map_at_5_diff1 |
|
value: 21.173253596770003 |
|
- type: nauc_map_at_5_max |
|
value: 32.2528418460596 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 20.63087633823352 |
|
- type: nauc_mrr_at_1000_max |
|
value: 31.753246807516362 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 20.602874174259885 |
|
- type: nauc_mrr_at_100_max |
|
value: 31.74109681792161 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 20.82028964049537 |
|
- type: nauc_mrr_at_10_max |
|
value: 32.082751313883705 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 27.838566854973656 |
|
- type: nauc_mrr_at_1_max |
|
value: 32.0217755083183 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 20.685607874578192 |
|
- type: nauc_mrr_at_20_max |
|
value: 31.89765440964895 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 22.385335765437958 |
|
- type: nauc_mrr_at_3_max |
|
value: 32.47346568889047 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 21.173253596770003 |
|
- type: nauc_mrr_at_5_max |
|
value: 32.2528418460596 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 18.08460876388022 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 30.282810360048217 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 17.119539175602068 |
|
- type: nauc_ndcg_at_100_max |
|
value: 29.66409825853174 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 18.23254548133648 |
|
- type: nauc_ndcg_at_10_max |
|
value: 31.52995550586078 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 27.838566854973656 |
|
- type: nauc_ndcg_at_1_max |
|
value: 32.0217755083183 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 17.769003159911446 |
|
- type: nauc_ndcg_at_20_max |
|
value: 30.929703630445033 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 20.96979719261237 |
|
- type: nauc_ndcg_at_3_max |
|
value: 32.363993132409526 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 19.00106027591966 |
|
- type: nauc_ndcg_at_5_max |
|
value: 31.962682994281664 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -0.439767274118902 |
|
- type: nauc_precision_at_1000_max |
|
value: 12.247737195943136 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 5.574224743755663 |
|
- type: nauc_precision_at_100_max |
|
value: 20.625486141114006 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 12.116438700823444 |
|
- type: nauc_precision_at_10_max |
|
value: 30.027073824365324 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 27.838566854973656 |
|
- type: nauc_precision_at_1_max |
|
value: 32.0217755083183 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 10.528730914479825 |
|
- type: nauc_precision_at_20_max |
|
value: 28.101643683820228 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 17.575083081784413 |
|
- type: nauc_precision_at_3_max |
|
value: 32.04257948042897 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 13.87097676219356 |
|
- type: nauc_precision_at_5_max |
|
value: 31.186621554981798 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: -0.4397672741187951 |
|
- type: nauc_recall_at_1000_max |
|
value: 12.247737195943454 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 5.574224743755691 |
|
- type: nauc_recall_at_100_max |
|
value: 20.625486141114028 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 12.116438700823482 |
|
- type: nauc_recall_at_10_max |
|
value: 30.027073824365335 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 27.838566854973656 |
|
- type: nauc_recall_at_1_max |
|
value: 32.0217755083183 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 10.528730914479794 |
|
- type: nauc_recall_at_20_max |
|
value: 28.101643683820228 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 17.57508308178443 |
|
- type: nauc_recall_at_3_max |
|
value: 32.042579480429 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 13.870976762193543 |
|
- type: nauc_recall_at_5_max |
|
value: 31.186621554981787 |
|
- type: ndcg_at_1 |
|
value: 13.677 |
|
- type: ndcg_at_10 |
|
value: 25.191000000000003 |
|
- type: ndcg_at_100 |
|
value: 30.379 |
|
- type: ndcg_at_1000 |
|
value: 33.961999999999996 |
|
- type: ndcg_at_20 |
|
value: 27.1 |
|
- type: ndcg_at_3 |
|
value: 20.546 |
|
- type: ndcg_at_5 |
|
value: 22.505 |
|
- type: precision_at_1 |
|
value: 13.677 |
|
- type: precision_at_10 |
|
value: 3.853 |
|
- type: precision_at_100 |
|
value: 0.639 |
|
- type: precision_at_1000 |
|
value: 0.093 |
|
- type: precision_at_20 |
|
value: 2.3009999999999997 |
|
- type: precision_at_3 |
|
value: 8.477 |
|
- type: precision_at_5 |
|
value: 6.0440000000000005 |
|
- type: recall_at_1 |
|
value: 13.677 |
|
- type: recall_at_10 |
|
value: 38.534 |
|
- type: recall_at_100 |
|
value: 63.922999999999995 |
|
- type: recall_at_1000 |
|
value: 93.407 |
|
- type: recall_at_20 |
|
value: 46.028000000000006 |
|
- type: recall_at_3 |
|
value: 25.430000000000003 |
|
- type: recall_at_5 |
|
value: 30.220999999999997 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: GEM/opusparcus |
|
name: MTEB OpusparcusPC (fr) |
|
config: fr |
|
split: test |
|
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 82.9700272479564 |
|
- type: cos_sim_ap |
|
value: 93.15021785539084 |
|
- type: cos_sim_f1 |
|
value: 87.97316722568279 |
|
- type: cos_sim_precision |
|
value: 85.0 |
|
- type: cos_sim_recall |
|
value: 91.16186693147964 |
|
- type: dot_accuracy |
|
value: 82.9700272479564 |
|
- type: dot_ap |
|
value: 93.15021785539084 |
|
- type: dot_f1 |
|
value: 87.97316722568279 |
|
- type: dot_precision |
|
value: 85.0 |
|
- type: dot_recall |
|
value: 91.16186693147964 |
|
- type: euclidean_accuracy |
|
value: 82.9700272479564 |
|
- type: euclidean_ap |
|
value: 93.15015081441638 |
|
- type: euclidean_f1 |
|
value: 87.97316722568279 |
|
- type: euclidean_precision |
|
value: 85.0 |
|
- type: euclidean_recall |
|
value: 91.16186693147964 |
|
- type: manhattan_accuracy |
|
value: 82.56130790190735 |
|
- type: manhattan_ap |
|
value: 93.14590481820592 |
|
- type: manhattan_f1 |
|
value: 87.86729857819905 |
|
- type: manhattan_precision |
|
value: 84.04351767905711 |
|
- type: manhattan_recall |
|
value: 92.05561072492552 |
|
- type: max_accuracy |
|
value: 82.9700272479564 |
|
- type: max_ap |
|
value: 93.15021785539084 |
|
- type: max_f1 |
|
value: 87.97316722568279 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: google-research-datasets/paws-x |
|
name: MTEB PawsX (fr) |
|
config: fr |
|
split: test |
|
revision: 8a04d940a42cd40658986fdd8e3da561533a3646 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 64.14999999999999 |
|
- type: cos_sim_ap |
|
value: 63.43794001840604 |
|
- type: cos_sim_f1 |
|
value: 62.59187620889749 |
|
- type: cos_sim_precision |
|
value: 48.097502972651604 |
|
- type: cos_sim_recall |
|
value: 89.59025470653378 |
|
- type: dot_accuracy |
|
value: 64.14999999999999 |
|
- type: dot_ap |
|
value: 63.52400235031554 |
|
- type: dot_f1 |
|
value: 62.59187620889749 |
|
- type: dot_precision |
|
value: 48.097502972651604 |
|
- type: dot_recall |
|
value: 89.59025470653378 |
|
- type: euclidean_accuracy |
|
value: 64.14999999999999 |
|
- type: euclidean_ap |
|
value: 63.43794001840604 |
|
- type: euclidean_f1 |
|
value: 62.59187620889749 |
|
- type: euclidean_precision |
|
value: 48.097502972651604 |
|
- type: euclidean_recall |
|
value: 89.59025470653378 |
|
- type: manhattan_accuracy |
|
value: 64.2 |
|
- type: manhattan_ap |
|
value: 63.46163243480347 |
|
- type: manhattan_f1 |
|
value: 62.540021344717175 |
|
- type: manhattan_precision |
|
value: 46.069182389937104 |
|
- type: manhattan_recall |
|
value: 97.34219269102991 |
|
- type: max_accuracy |
|
value: 64.2 |
|
- type: max_ap |
|
value: 63.52400235031554 |
|
- type: max_f1 |
|
value: 62.59187620889749 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: Lajavaness/SICK-fr |
|
name: MTEB SICKFr |
|
config: default |
|
split: test |
|
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 85.12347242597652 |
|
- type: cos_sim_spearman |
|
value: 79.80580538857501 |
|
- type: euclidean_pearson |
|
value: 82.03127787921382 |
|
- type: euclidean_spearman |
|
value: 79.80580538857501 |
|
- type: manhattan_pearson |
|
value: 82.02795155003601 |
|
- type: manhattan_spearman |
|
value: 79.7808784011127 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (fr) |
|
config: fr |
|
split: test |
|
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 82.34462624659417 |
|
- type: cos_sim_spearman |
|
value: 82.83867899462683 |
|
- type: euclidean_pearson |
|
value: 80.00679113308384 |
|
- type: euclidean_spearman |
|
value: 82.83867899462683 |
|
- type: manhattan_pearson |
|
value: 79.97582730301362 |
|
- type: manhattan_spearman |
|
value: 82.95718926500541 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/stsb_multi_mt |
|
name: MTEB STSBenchmarkMultilingualSTS (fr) |
|
config: fr |
|
split: test |
|
revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 86.0897698618904 |
|
- type: cos_sim_spearman |
|
value: 86.58814894229229 |
|
- type: euclidean_pearson |
|
value: 85.53992615842806 |
|
- type: euclidean_spearman |
|
value: 86.58814894229229 |
|
- type: manhattan_pearson |
|
value: 85.4985023034774 |
|
- type: manhattan_spearman |
|
value: 86.50239881298486 |
|
- task: |
|
type: Summarization |
|
dataset: |
|
type: lyon-nlp/summarization-summeval-fr-p2p |
|
name: MTEB SummEvalFr |
|
config: default |
|
split: test |
|
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 30.458145110977753 |
|
- type: cos_sim_spearman |
|
value: 31.624715940109265 |
|
- type: dot_pearson |
|
value: 30.458145236239915 |
|
- type: dot_spearman |
|
value: 31.624715940109265 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-syntec-s2p |
|
name: MTEB SyntecReranking |
|
config: default |
|
split: test |
|
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad |
|
metrics: |
|
- type: map |
|
value: 87.60277777777777 |
|
- type: mrr |
|
value: 87.60277777777777 |
|
- type: nAUC_map_diff1 |
|
value: 63.877496103879814 |
|
- type: nAUC_map_max |
|
value: -4.8943605546581725 |
|
- type: nAUC_mrr_diff1 |
|
value: 63.877496103879814 |
|
- type: nAUC_mrr_max |
|
value: -4.8943605546581725 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p |
|
name: MTEB SyntecRetrieval |
|
config: default |
|
split: test |
|
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9 |
|
metrics: |
|
- type: map_at_1 |
|
value: 67.0 |
|
- type: map_at_10 |
|
value: 78.47800000000001 |
|
- type: map_at_100 |
|
value: 78.616 |
|
- type: map_at_1000 |
|
value: 78.616 |
|
- type: map_at_20 |
|
value: 78.52799999999999 |
|
- type: map_at_3 |
|
value: 77.833 |
|
- type: map_at_5 |
|
value: 78.033 |
|
- type: mrr_at_1 |
|
value: 67.0 |
|
- type: mrr_at_10 |
|
value: 78.47777777777777 |
|
- type: mrr_at_100 |
|
value: 78.61609758846066 |
|
- type: mrr_at_1000 |
|
value: 78.61609758846066 |
|
- type: mrr_at_20 |
|
value: 78.52777777777777 |
|
- type: mrr_at_3 |
|
value: 77.83333333333331 |
|
- type: mrr_at_5 |
|
value: 78.03333333333333 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 54.76919250379753 |
|
- type: nauc_map_at_1000_max |
|
value: 24.03294042759147 |
|
- type: nauc_map_at_100_diff1 |
|
value: 54.76919250379753 |
|
- type: nauc_map_at_100_max |
|
value: 24.03294042759147 |
|
- type: nauc_map_at_10_diff1 |
|
value: 54.781660658782506 |
|
- type: nauc_map_at_10_max |
|
value: 24.45332707633837 |
|
- type: nauc_map_at_1_diff1 |
|
value: 54.48189466912695 |
|
- type: nauc_map_at_1_max |
|
value: 17.502666282947597 |
|
- type: nauc_map_at_20_diff1 |
|
value: 54.69518355408933 |
|
- type: nauc_map_at_20_max |
|
value: 24.285263763068183 |
|
- type: nauc_map_at_3_diff1 |
|
value: 54.98928575752318 |
|
- type: nauc_map_at_3_max |
|
value: 25.252117626643916 |
|
- type: nauc_map_at_5_diff1 |
|
value: 54.51750311747391 |
|
- type: nauc_map_at_5_max |
|
value: 25.141479081321766 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 54.76919250379753 |
|
- type: nauc_mrr_at_1000_max |
|
value: 24.03294042759147 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 54.76919250379753 |
|
- type: nauc_mrr_at_100_max |
|
value: 24.03294042759147 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 54.781660658782506 |
|
- type: nauc_mrr_at_10_max |
|
value: 24.45332707633837 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 54.48189466912695 |
|
- type: nauc_mrr_at_1_max |
|
value: 17.502666282947597 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 54.69518355408933 |
|
- type: nauc_mrr_at_20_max |
|
value: 24.285263763068183 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 54.98928575752318 |
|
- type: nauc_mrr_at_3_max |
|
value: 25.252117626643916 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 54.51750311747391 |
|
- type: nauc_mrr_at_5_max |
|
value: 25.141479081321766 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 54.411394691026196 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 25.003182969921014 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 54.411394691026196 |
|
- type: nauc_ndcg_at_100_max |
|
value: 25.003182969921014 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 53.97509194326736 |
|
- type: nauc_ndcg_at_10_max |
|
value: 27.51736442048005 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 54.48189466912695 |
|
- type: nauc_ndcg_at_1_max |
|
value: 17.502666282947597 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 53.46713794714154 |
|
- type: nauc_ndcg_at_20_max |
|
value: 26.601577957753005 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 54.521393171396525 |
|
- type: nauc_ndcg_at_3_max |
|
value: 29.07380139412928 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 53.42255297135452 |
|
- type: nauc_ndcg_at_5_max |
|
value: 28.91110004742623 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: nan |
|
- type: nauc_precision_at_1000_max |
|
value: nan |
|
- type: nauc_precision_at_100_diff1 |
|
value: nan |
|
- type: nauc_precision_at_100_max |
|
value: nan |
|
- type: nauc_precision_at_10_diff1 |
|
value: 41.59663865546228 |
|
- type: nauc_precision_at_10_max |
|
value: 67.44864612511667 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 54.48189466912695 |
|
- type: nauc_precision_at_1_max |
|
value: 17.502666282947597 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 26.486150015561265 |
|
- type: nauc_precision_at_20_max |
|
value: 60.95549330843449 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 50.78781512605074 |
|
- type: nauc_precision_at_3_max |
|
value: 55.48552754435131 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 43.75750300120062 |
|
- type: nauc_precision_at_5_max |
|
value: 58.29665199413101 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: nan |
|
- type: nauc_recall_at_1000_max |
|
value: nan |
|
- type: nauc_recall_at_100_diff1 |
|
value: nan |
|
- type: nauc_recall_at_100_max |
|
value: nan |
|
- type: nauc_recall_at_10_diff1 |
|
value: 41.59663865546242 |
|
- type: nauc_recall_at_10_max |
|
value: 67.44864612511677 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 54.48189466912695 |
|
- type: nauc_recall_at_1_max |
|
value: 17.502666282947597 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 26.486150015561737 |
|
- type: nauc_recall_at_20_max |
|
value: 60.95549330843472 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 50.787815126050376 |
|
- type: nauc_recall_at_3_max |
|
value: 55.48552754435111 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 43.75750300120054 |
|
- type: nauc_recall_at_5_max |
|
value: 58.29665199413113 |
|
- type: ndcg_at_1 |
|
value: 67.0 |
|
- type: ndcg_at_10 |
|
value: 82.864 |
|
- type: ndcg_at_100 |
|
value: 83.672 |
|
- type: ndcg_at_1000 |
|
value: 83.672 |
|
- type: ndcg_at_20 |
|
value: 83.092 |
|
- type: ndcg_at_3 |
|
value: 81.464 |
|
- type: ndcg_at_5 |
|
value: 81.851 |
|
- type: precision_at_1 |
|
value: 67.0 |
|
- type: precision_at_10 |
|
value: 9.6 |
|
- type: precision_at_100 |
|
value: 1.0 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_20 |
|
value: 4.8500000000000005 |
|
- type: precision_at_3 |
|
value: 30.667 |
|
- type: precision_at_5 |
|
value: 18.6 |
|
- type: recall_at_1 |
|
value: 67.0 |
|
- type: recall_at_10 |
|
value: 96.0 |
|
- type: recall_at_100 |
|
value: 100.0 |
|
- type: recall_at_1000 |
|
value: 100.0 |
|
- type: recall_at_20 |
|
value: 97.0 |
|
- type: recall_at_3 |
|
value: 92.0 |
|
- type: recall_at_5 |
|
value: 93.0 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/xpqa |
|
name: MTEB XPQARetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f |
|
metrics: |
|
- type: map_at_1 |
|
value: 40.038000000000004 |
|
- type: map_at_10 |
|
value: 62.409000000000006 |
|
- type: map_at_100 |
|
value: 63.63999999999999 |
|
- type: map_at_1000 |
|
value: 63.693 |
|
- type: map_at_20 |
|
value: 63.165000000000006 |
|
- type: map_at_3 |
|
value: 55.364999999999995 |
|
- type: map_at_5 |
|
value: 59.95399999999999 |
|
- type: mrr_at_1 |
|
value: 62.88384512683578 |
|
- type: mrr_at_10 |
|
value: 70.414944794117 |
|
- type: mrr_at_100 |
|
value: 70.85679259651413 |
|
- type: mrr_at_1000 |
|
value: 70.8680806615119 |
|
- type: mrr_at_20 |
|
value: 70.69824986774621 |
|
- type: mrr_at_3 |
|
value: 68.04628393413438 |
|
- type: mrr_at_5 |
|
value: 69.65509568313303 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 47.58306966781138 |
|
- type: nauc_map_at_1000_max |
|
value: 49.99853404950863 |
|
- type: nauc_map_at_100_diff1 |
|
value: 47.5473544905194 |
|
- type: nauc_map_at_100_max |
|
value: 49.98683021023155 |
|
- type: nauc_map_at_10_diff1 |
|
value: 47.443327641163705 |
|
- type: nauc_map_at_10_max |
|
value: 49.31862257934493 |
|
- type: nauc_map_at_1_diff1 |
|
value: 55.93203426614159 |
|
- type: nauc_map_at_1_max |
|
value: 27.467436111704224 |
|
- type: nauc_map_at_20_diff1 |
|
value: 47.454162467793985 |
|
- type: nauc_map_at_20_max |
|
value: 49.715459382963765 |
|
- type: nauc_map_at_3_diff1 |
|
value: 48.910525378486874 |
|
- type: nauc_map_at_3_max |
|
value: 42.13319318718595 |
|
- type: nauc_map_at_5_diff1 |
|
value: 48.56545403298638 |
|
- type: nauc_map_at_5_max |
|
value: 47.311811085622445 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 56.739822956274224 |
|
- type: nauc_mrr_at_1000_max |
|
value: 58.274212468278854 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 56.7308210328899 |
|
- type: nauc_mrr_at_100_max |
|
value: 58.27250671019899 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 56.647228471816405 |
|
- type: nauc_mrr_at_10_max |
|
value: 58.210342990657495 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 58.618266167104046 |
|
- type: nauc_mrr_at_1_max |
|
value: 58.55438607166539 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 56.63534799976597 |
|
- type: nauc_mrr_at_20_max |
|
value: 58.17181317797869 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 56.815531582264825 |
|
- type: nauc_mrr_at_3_max |
|
value: 58.32821204695344 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 56.79122022985127 |
|
- type: nauc_mrr_at_5_max |
|
value: 58.20366609452701 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 49.530062263932194 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 53.473298956705925 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 48.95703823297219 |
|
- type: nauc_ndcg_at_100_max |
|
value: 53.191721124797276 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 47.98530786084638 |
|
- type: nauc_ndcg_at_10_max |
|
value: 51.155857323188016 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 58.618266167104046 |
|
- type: nauc_ndcg_at_1_max |
|
value: 58.55438607166539 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 47.95544792051313 |
|
- type: nauc_ndcg_at_20_max |
|
value: 51.751640167194054 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 48.50900656884395 |
|
- type: nauc_ndcg_at_3_max |
|
value: 50.78667595293348 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 49.496100926859654 |
|
- type: nauc_ndcg_at_5_max |
|
value: 49.089893886856835 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -19.085707327488784 |
|
- type: nauc_precision_at_1000_max |
|
value: 22.16522736611267 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -16.92930793417545 |
|
- type: nauc_precision_at_100_max |
|
value: 26.119556898620655 |
|
- type: nauc_precision_at_10_diff1 |
|
value: -8.586758571265364 |
|
- type: nauc_precision_at_10_max |
|
value: 34.29909350105018 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 58.618266167104046 |
|
- type: nauc_precision_at_1_max |
|
value: 58.55438607166539 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -12.36545815755639 |
|
- type: nauc_precision_at_20_max |
|
value: 30.779202784243694 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 7.173290556095678 |
|
- type: nauc_precision_at_3_max |
|
value: 43.244915594569356 |
|
- type: nauc_precision_at_5_diff1 |
|
value: -0.5308831428158323 |
|
- type: nauc_precision_at_5_max |
|
value: 39.78478615216909 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 44.67738158424653 |
|
- type: nauc_recall_at_1000_max |
|
value: 71.12276250795361 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 30.071917991701135 |
|
- type: nauc_recall_at_100_max |
|
value: 42.226214389979326 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 36.275167481806804 |
|
- type: nauc_recall_at_10_max |
|
value: 40.16796727800884 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 55.93203426614159 |
|
- type: nauc_recall_at_1_max |
|
value: 27.467436111704224 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 32.189427460851505 |
|
- type: nauc_recall_at_20_max |
|
value: 38.926081167758205 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 43.959378195689894 |
|
- type: nauc_recall_at_3_max |
|
value: 36.441633750156335 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 42.6274479464408 |
|
- type: nauc_recall_at_5_max |
|
value: 38.9902118898862 |
|
- type: ndcg_at_1 |
|
value: 62.88399999999999 |
|
- type: ndcg_at_10 |
|
value: 68.907 |
|
- type: ndcg_at_100 |
|
value: 72.896 |
|
- type: ndcg_at_1000 |
|
value: 73.721 |
|
- type: ndcg_at_20 |
|
value: 70.738 |
|
- type: ndcg_at_3 |
|
value: 62.731 |
|
- type: ndcg_at_5 |
|
value: 65.191 |
|
- type: precision_at_1 |
|
value: 62.88399999999999 |
|
- type: precision_at_10 |
|
value: 16.101 |
|
- type: precision_at_100 |
|
value: 1.951 |
|
- type: precision_at_1000 |
|
value: 0.20600000000000002 |
|
- type: precision_at_20 |
|
value: 8.705 |
|
- type: precision_at_3 |
|
value: 38.095 |
|
- type: precision_at_5 |
|
value: 27.904 |
|
- type: recall_at_1 |
|
value: 40.038000000000004 |
|
- type: recall_at_10 |
|
value: 79.237 |
|
- type: recall_at_100 |
|
value: 94.17699999999999 |
|
- type: recall_at_1000 |
|
value: 99.466 |
|
- type: recall_at_20 |
|
value: 85.027 |
|
- type: recall_at_3 |
|
value: 60.336 |
|
- type: recall_at_5 |
|
value: 70.122 |
|
license: apache-2.0 |
|
language: |
|
- fr |
|
metrics: |
|
- pearsonr |
|
- spearmanr |
|
--- |
|
|
|
# [bilingual-document-embedding](https://huggingface.co/Lajavaness/bilingual-document-embedding) |
|
|
|
bilingual-document-embedding is the Embedding Model for document in bilingual language: french and english with context length up to 8096 tokens . This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of [BGE M3](https://huggingface.co/BAAI/bge-m3), a pre-trained language model larged on the [BGE M3](https://huggingface.co/BAAI/bge-m3) architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language. |
|
|
|
|
|
## Full Model Architecture |
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel |
|
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Normalize() |
|
) |
|
``` |
|
|
|
## Training and Fine-tuning process |
|
#### Stage 1: NLI Training |
|
- Dataset: [(SNLI+XNLI) for english+french] |
|
- Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics. |
|
### Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark |
|
- Dataset: [STSB-fr and en] |
|
- Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library. |
|
### Stage 4: Advanced Augmentation Fine-tuning |
|
- Dataset: STSB with generate [silver sample from gold sample](https://www.sbert.net/examples/training/data_augmentation/README.html) |
|
- Method: Employed an advanced strategy using [Augmented SBERT](https://arxiv.org/abs/2010.08240) with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy. |
|
|
|
|
|
## Usage: |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"] |
|
|
|
model = SentenceTransformer('Lajavaness/bilingual-document-embedding', trust_remote_code=True) |
|
print(embeddings) |
|
|
|
``` |
|
|
|
|
|
|
|
|
|
|
|
## Evaluation |
|
|
|
TODO |
|
|
|
## Citation |
|
@article{chen2024bge, |
|
title={Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation}, |
|
author={Chen, Jianlv and Xiao, Shitao and Zhang, Peitian and Luo, Kun and Lian, Defu and Liu, Zheng}, |
|
journal={arXiv preprint arXiv:2402.03216}, |
|
year={2024} |
|
} |
|
|
|
@article{conneau2019unsupervised, |
|
title={Unsupervised cross-lingual representation learning at scale}, |
|
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
|
journal={arXiv preprint arXiv:1911.02116}, |
|
year={2019} |
|
} |
|
|
|
@article{reimers2019sentence, |
|
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, |
|
author={Nils Reimers, Iryna Gurevych}, |
|
journal={https://arxiv.org/abs/1908.10084}, |
|
year={2019} |
|
} |
|
|
|
@article{thakur2020augmented, |
|
title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks}, |
|
author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna}, |
|
journal={arXiv e-prints}, |
|
pages={arXiv--2010}, |
|
year={2020} |
|
|