Yoann Couble commited on
Commit
9dba123
1 Parent(s): 67c255f

initial copy of lajavaness repo

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md CHANGED
@@ -1,3 +1,1601 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sentence-transformers
3
+ pipeline_tag: sentence-similarity
4
+ tags:
5
+ - sentence-transformers
6
+ - feature-extraction
7
+ - sentence-similarity
8
+ - transformers
9
+ - sentence-embedding
10
+ - mteb
11
+ - mteb
12
+ model-index:
13
+ - name: e433e634850d125d8b85bee76db3a3b6a0c3bf56
14
+ results:
15
+ - task:
16
+ type: Clustering
17
+ dataset:
18
+ type: lyon-nlp/alloprof
19
+ name: MTEB AlloProfClusteringP2P
20
+ config: default
21
+ split: test
22
+ revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
23
+ metrics:
24
+ - type: v_measure
25
+ value: 56.88600728743999
26
+ - type: v_measures
27
+ value: [0.5396081553520281, 0.6022872403200437, 0.5515205944691852, 0.5595772885785736, 0.5632413941951575]
28
+ - task:
29
+ type: Clustering
30
+ dataset:
31
+ type: lyon-nlp/alloprof
32
+ name: MTEB AlloProfClusteringS2S
33
+ config: default
34
+ split: test
35
+ revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
36
+ metrics:
37
+ - type: v_measure
38
+ value: 38.199527329051804
39
+ - type: v_measures
40
+ value: [0.42157254138936706, 0.36882298663461527, 0.3134327610337458, 0.40391031391690396, 0.3832775043562133]
41
+ - task:
42
+ type: Reranking
43
+ dataset:
44
+ type: lyon-nlp/mteb-fr-reranking-alloprof-s2p
45
+ name: MTEB AlloprofReranking
46
+ config: default
47
+ split: test
48
+ revision: 65393d0d7a08a10b4e348135e824f385d420b0fd
49
+ metrics:
50
+ - type: map
51
+ value: 68.73372257500206
52
+ - type: mrr
53
+ value: 70.07434479260904
54
+ - type: nAUC_map_diff1
55
+ value: 50.95933484071007
56
+ - type: nAUC_map_max
57
+ value: 13.75463910519138
58
+ - type: nAUC_mrr_diff1
59
+ value: 50.494303783447656
60
+ - type: nAUC_mrr_max
61
+ value: 14.460935217916187
62
+ - task:
63
+ type: Retrieval
64
+ dataset:
65
+ type: lyon-nlp/alloprof
66
+ name: MTEB AlloprofRetrieval
67
+ config: default
68
+ split: test
69
+ revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd
70
+ metrics:
71
+ - type: map_at_1
72
+ value: 21.675
73
+ - type: map_at_10
74
+ value: 32.274
75
+ - type: map_at_100
76
+ value: 33.316
77
+ - type: map_at_1000
78
+ value: 33.387
79
+ - type: map_at_20
80
+ value: 32.864
81
+ - type: map_at_3
82
+ value: 29.166999999999998
83
+ - type: map_at_5
84
+ value: 30.946
85
+ - type: mrr_at_1
86
+ value: 21.675302245250432
87
+ - type: mrr_at_10
88
+ value: 32.274309839076714
89
+ - type: mrr_at_100
90
+ value: 33.31571024590564
91
+ - type: mrr_at_1000
92
+ value: 33.3868130424392
93
+ - type: mrr_at_20
94
+ value: 32.863978562081925
95
+ - type: mrr_at_3
96
+ value: 29.16666666666669
97
+ - type: mrr_at_5
98
+ value: 30.94559585492234
99
+ - type: nauc_map_at_1000_diff1
100
+ value: 34.85808309940442
101
+ - type: nauc_map_at_1000_max
102
+ value: 31.058801579682825
103
+ - type: nauc_map_at_100_diff1
104
+ value: 34.842898344470846
105
+ - type: nauc_map_at_100_max
106
+ value: 31.077561464904342
107
+ - type: nauc_map_at_10_diff1
108
+ value: 34.6773118480208
109
+ - type: nauc_map_at_10_max
110
+ value: 30.8489850780642
111
+ - type: nauc_map_at_1_diff1
112
+ value: 40.65773695743684
113
+ - type: nauc_map_at_1_max
114
+ value: 28.766036921254617
115
+ - type: nauc_map_at_20_diff1
116
+ value: 34.73935242577166
117
+ - type: nauc_map_at_20_max
118
+ value: 31.03143938077287
119
+ - type: nauc_map_at_3_diff1
120
+ value: 35.12059625476991
121
+ - type: nauc_map_at_3_max
122
+ value: 30.48787855768291
123
+ - type: nauc_map_at_5_diff1
124
+ value: 34.73453235094986
125
+ - type: nauc_map_at_5_max
126
+ value: 30.3860304682398
127
+ - type: nauc_mrr_at_1000_diff1
128
+ value: 34.85808309940442
129
+ - type: nauc_mrr_at_1000_max
130
+ value: 31.058801579682825
131
+ - type: nauc_mrr_at_100_diff1
132
+ value: 34.842898344470846
133
+ - type: nauc_mrr_at_100_max
134
+ value: 31.077561464904342
135
+ - type: nauc_mrr_at_10_diff1
136
+ value: 34.6773118480208
137
+ - type: nauc_mrr_at_10_max
138
+ value: 30.8489850780642
139
+ - type: nauc_mrr_at_1_diff1
140
+ value: 40.65773695743684
141
+ - type: nauc_mrr_at_1_max
142
+ value: 28.766036921254617
143
+ - type: nauc_mrr_at_20_diff1
144
+ value: 34.73935242577166
145
+ - type: nauc_mrr_at_20_max
146
+ value: 31.03143938077287
147
+ - type: nauc_mrr_at_3_diff1
148
+ value: 35.12059625476991
149
+ - type: nauc_mrr_at_3_max
150
+ value: 30.48787855768291
151
+ - type: nauc_mrr_at_5_diff1
152
+ value: 34.73453235094986
153
+ - type: nauc_mrr_at_5_max
154
+ value: 30.3860304682398
155
+ - type: nauc_ndcg_at_1000_diff1
156
+ value: 34.04342467121623
157
+ - type: nauc_ndcg_at_1000_max
158
+ value: 32.311398352704686
159
+ - type: nauc_ndcg_at_100_diff1
160
+ value: 33.67278941726764
161
+ - type: nauc_ndcg_at_100_max
162
+ value: 33.0229606203184
163
+ - type: nauc_ndcg_at_10_diff1
164
+ value: 32.93808280492078
165
+ - type: nauc_ndcg_at_10_max
166
+ value: 32.07111775221638
167
+ - type: nauc_ndcg_at_1_diff1
168
+ value: 40.65773695743684
169
+ - type: nauc_ndcg_at_1_max
170
+ value: 28.766036921254617
171
+ - type: nauc_ndcg_at_20_diff1
172
+ value: 33.141323431064585
173
+ - type: nauc_ndcg_at_20_max
174
+ value: 32.76436962238286
175
+ - type: nauc_ndcg_at_3_diff1
176
+ value: 33.77769745974645
177
+ - type: nauc_ndcg_at_3_max
178
+ value: 31.072988073016912
179
+ - type: nauc_ndcg_at_5_diff1
180
+ value: 33.091582792245696
181
+ - type: nauc_ndcg_at_5_max
182
+ value: 30.92378976230745
183
+ - type: nauc_precision_at_1000_diff1
184
+ value: 33.74743287990321
185
+ - type: nauc_precision_at_1000_max
186
+ value: 60.08005213097628
187
+ - type: nauc_precision_at_100_diff1
188
+ value: 28.869275501873236
189
+ - type: nauc_precision_at_100_max
190
+ value: 46.35483380447927
191
+ - type: nauc_precision_at_10_diff1
192
+ value: 27.910043146581497
193
+ - type: nauc_precision_at_10_max
194
+ value: 36.07399824307888
195
+ - type: nauc_precision_at_1_diff1
196
+ value: 40.65773695743684
197
+ - type: nauc_precision_at_1_max
198
+ value: 28.766036921254617
199
+ - type: nauc_precision_at_20_diff1
200
+ value: 28.144265629196163
201
+ - type: nauc_precision_at_20_max
202
+ value: 39.60361579056115
203
+ - type: nauc_precision_at_3_diff1
204
+ value: 30.31893725671278
205
+ - type: nauc_precision_at_3_max
206
+ value: 32.63695126407254
207
+ - type: nauc_precision_at_5_diff1
208
+ value: 28.699678130380235
209
+ - type: nauc_precision_at_5_max
210
+ value: 32.37908851919098
211
+ - type: nauc_recall_at_1000_diff1
212
+ value: 33.74743287990342
213
+ - type: nauc_recall_at_1000_max
214
+ value: 60.080052130975346
215
+ - type: nauc_recall_at_100_diff1
216
+ value: 28.869275501873247
217
+ - type: nauc_recall_at_100_max
218
+ value: 46.35483380447917
219
+ - type: nauc_recall_at_10_diff1
220
+ value: 27.910043146581508
221
+ - type: nauc_recall_at_10_max
222
+ value: 36.07399824307888
223
+ - type: nauc_recall_at_1_diff1
224
+ value: 40.65773695743684
225
+ - type: nauc_recall_at_1_max
226
+ value: 28.766036921254617
227
+ - type: nauc_recall_at_20_diff1
228
+ value: 28.14426562919617
229
+ - type: nauc_recall_at_20_max
230
+ value: 39.60361579056118
231
+ - type: nauc_recall_at_3_diff1
232
+ value: 30.318937256712804
233
+ - type: nauc_recall_at_3_max
234
+ value: 32.63695126407256
235
+ - type: nauc_recall_at_5_diff1
236
+ value: 28.699678130380224
237
+ - type: nauc_recall_at_5_max
238
+ value: 32.37908851919102
239
+ - type: ndcg_at_1
240
+ value: 21.675
241
+ - type: ndcg_at_10
242
+ value: 38.06
243
+ - type: ndcg_at_100
244
+ value: 43.491
245
+ - type: ndcg_at_1000
246
+ value: 45.432
247
+ - type: ndcg_at_20
248
+ value: 40.217000000000006
249
+ - type: ndcg_at_3
250
+ value: 31.642
251
+ - type: ndcg_at_5
252
+ value: 34.837
253
+ - type: precision_at_1
254
+ value: 21.675
255
+ - type: precision_at_10
256
+ value: 5.652
257
+ - type: precision_at_100
258
+ value: 0.827
259
+ - type: precision_at_1000
260
+ value: 0.098
261
+ - type: precision_at_20
262
+ value: 3.253
263
+ - type: precision_at_3
264
+ value: 12.939
265
+ - type: precision_at_5
266
+ value: 9.309000000000001
267
+ - type: recall_at_1
268
+ value: 21.675
269
+ - type: recall_at_10
270
+ value: 56.52
271
+ - type: recall_at_100
272
+ value: 82.729
273
+ - type: recall_at_1000
274
+ value: 98.1
275
+ - type: recall_at_20
276
+ value: 65.069
277
+ - type: recall_at_3
278
+ value: 38.817
279
+ - type: recall_at_5
280
+ value: 46.546
281
+ - task:
282
+ type: Classification
283
+ dataset:
284
+ type: mteb/amazon_reviews_multi
285
+ name: MTEB AmazonReviewsClassification (fr)
286
+ config: fr
287
+ split: test
288
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
289
+ metrics:
290
+ - type: accuracy
291
+ value: 43.51
292
+ - type: f1
293
+ value: 41.3284674671926
294
+ - type: f1_weighted
295
+ value: 41.3284674671926
296
+ - task:
297
+ type: Retrieval
298
+ dataset:
299
+ type: maastrichtlawtech/bsard
300
+ name: MTEB BSARDRetrieval
301
+ config: default
302
+ split: test
303
+ revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59
304
+ metrics:
305
+ - type: map_at_1
306
+ value: 5.405
307
+ - type: map_at_10
308
+ value: 9.008
309
+ - type: map_at_100
310
+ value: 9.932
311
+ - type: map_at_1000
312
+ value: 10.042
313
+ - type: map_at_20
314
+ value: 9.389
315
+ - type: map_at_3
316
+ value: 7.883
317
+ - type: map_at_5
318
+ value: 8.626000000000001
319
+ - type: mrr_at_1
320
+ value: 5.405405405405405
321
+ - type: mrr_at_10
322
+ value: 9.007579007579007
323
+ - type: mrr_at_100
324
+ value: 9.931517094611667
325
+ - type: mrr_at_1000
326
+ value: 10.0416462267215
327
+ - type: mrr_at_20
328
+ value: 9.38869595990339
329
+ - type: mrr_at_3
330
+ value: 7.882882882882883
331
+ - type: mrr_at_5
332
+ value: 8.626126126126126
333
+ - type: nauc_map_at_1000_diff1
334
+ value: 23.53549434486455
335
+ - type: nauc_map_at_1000_max
336
+ value: 9.977010641647402
337
+ - type: nauc_map_at_100_diff1
338
+ value: 23.50007884241435
339
+ - type: nauc_map_at_100_max
340
+ value: 9.984274734441085
341
+ - type: nauc_map_at_10_diff1
342
+ value: 24.69444512826233
343
+ - type: nauc_map_at_10_max
344
+ value: 9.726162724771594
345
+ - type: nauc_map_at_1_diff1
346
+ value: 40.88188899137848
347
+ - type: nauc_map_at_1_max
348
+ value: 12.044739470755896
349
+ - type: nauc_map_at_20_diff1
350
+ value: 23.833757177107557
351
+ - type: nauc_map_at_20_max
352
+ value: 9.94328216894336
353
+ - type: nauc_map_at_3_diff1
354
+ value: 28.320570164876653
355
+ - type: nauc_map_at_3_max
356
+ value: 11.195397944839767
357
+ - type: nauc_map_at_5_diff1
358
+ value: 25.86894200735248
359
+ - type: nauc_map_at_5_max
360
+ value: 8.43950569758736
361
+ - type: nauc_mrr_at_1000_diff1
362
+ value: 23.53549434486455
363
+ - type: nauc_mrr_at_1000_max
364
+ value: 9.977010641647402
365
+ - type: nauc_mrr_at_100_diff1
366
+ value: 23.50007884241435
367
+ - type: nauc_mrr_at_100_max
368
+ value: 9.984274734441085
369
+ - type: nauc_mrr_at_10_diff1
370
+ value: 24.69444512826233
371
+ - type: nauc_mrr_at_10_max
372
+ value: 9.726162724771594
373
+ - type: nauc_mrr_at_1_diff1
374
+ value: 40.88188899137848
375
+ - type: nauc_mrr_at_1_max
376
+ value: 12.044739470755896
377
+ - type: nauc_mrr_at_20_diff1
378
+ value: 23.833757177107557
379
+ - type: nauc_mrr_at_20_max
380
+ value: 9.94328216894336
381
+ - type: nauc_mrr_at_3_diff1
382
+ value: 28.320570164876653
383
+ - type: nauc_mrr_at_3_max
384
+ value: 11.195397944839767
385
+ - type: nauc_mrr_at_5_diff1
386
+ value: 25.86894200735248
387
+ - type: nauc_mrr_at_5_max
388
+ value: 8.43950569758736
389
+ - type: nauc_ndcg_at_1000_diff1
390
+ value: 15.939402272339343
391
+ - type: nauc_ndcg_at_1000_max
392
+ value: 10.076089125537772
393
+ - type: nauc_ndcg_at_100_diff1
394
+ value: 16.12740122067642
395
+ - type: nauc_ndcg_at_100_max
396
+ value: 10.39935154464689
397
+ - type: nauc_ndcg_at_10_diff1
398
+ value: 20.455941061369295
399
+ - type: nauc_ndcg_at_10_max
400
+ value: 9.350349883274461
401
+ - type: nauc_ndcg_at_1_diff1
402
+ value: 40.88188899137848
403
+ - type: nauc_ndcg_at_1_max
404
+ value: 12.044739470755896
405
+ - type: nauc_ndcg_at_20_diff1
406
+ value: 18.267195122936364
407
+ - type: nauc_ndcg_at_20_max
408
+ value: 10.211299135510837
409
+ - type: nauc_ndcg_at_3_diff1
410
+ value: 26.453038443158267
411
+ - type: nauc_ndcg_at_3_max
412
+ value: 10.628723618231271
413
+ - type: nauc_ndcg_at_5_diff1
414
+ value: 22.815939702854084
415
+ - type: nauc_ndcg_at_5_max
416
+ value: 6.308794763068443
417
+ - type: nauc_precision_at_1000_diff1
418
+ value: -7.915540524594587
419
+ - type: nauc_precision_at_1000_max
420
+ value: 10.441250503021037
421
+ - type: nauc_precision_at_100_diff1
422
+ value: 2.7415108070462253
423
+ - type: nauc_precision_at_100_max
424
+ value: 11.957692005514204
425
+ - type: nauc_precision_at_10_diff1
426
+ value: 12.731449206012213
427
+ - type: nauc_precision_at_10_max
428
+ value: 9.218464561250887
429
+ - type: nauc_precision_at_1_diff1
430
+ value: 40.88188899137848
431
+ - type: nauc_precision_at_1_max
432
+ value: 12.044739470755896
433
+ - type: nauc_precision_at_20_diff1
434
+ value: 8.658189595700664
435
+ - type: nauc_precision_at_20_max
436
+ value: 11.571072137198621
437
+ - type: nauc_precision_at_3_diff1
438
+ value: 22.7637681983756
439
+ - type: nauc_precision_at_3_max
440
+ value: 9.361635703809425
441
+ - type: nauc_precision_at_5_diff1
442
+ value: 17.02002973192349
443
+ - type: nauc_precision_at_5_max
444
+ value: 1.8844406919262011
445
+ - type: nauc_recall_at_1000_diff1
446
+ value: -7.915540524594531
447
+ - type: nauc_recall_at_1000_max
448
+ value: 10.441250503021028
449
+ - type: nauc_recall_at_100_diff1
450
+ value: 2.741510807046166
451
+ - type: nauc_recall_at_100_max
452
+ value: 11.957692005514156
453
+ - type: nauc_recall_at_10_diff1
454
+ value: 12.731449206012224
455
+ - type: nauc_recall_at_10_max
456
+ value: 9.218464561250883
457
+ - type: nauc_recall_at_1_diff1
458
+ value: 40.88188899137848
459
+ - type: nauc_recall_at_1_max
460
+ value: 12.044739470755896
461
+ - type: nauc_recall_at_20_diff1
462
+ value: 8.65818959570063
463
+ - type: nauc_recall_at_20_max
464
+ value: 11.571072137198572
465
+ - type: nauc_recall_at_3_diff1
466
+ value: 22.763768198375587
467
+ - type: nauc_recall_at_3_max
468
+ value: 9.361635703809409
469
+ - type: nauc_recall_at_5_diff1
470
+ value: 17.02002973192351
471
+ - type: nauc_recall_at_5_max
472
+ value: 1.8844406919262173
473
+ - type: ndcg_at_1
474
+ value: 5.405
475
+ - type: ndcg_at_10
476
+ value: 11.045
477
+ - type: ndcg_at_100
478
+ value: 16.724
479
+ - type: ndcg_at_1000
480
+ value: 20.325
481
+ - type: ndcg_at_20
482
+ value: 12.42
483
+ - type: ndcg_at_3
484
+ value: 8.746
485
+ - type: ndcg_at_5
486
+ value: 10.065
487
+ - type: precision_at_1
488
+ value: 5.405
489
+ - type: precision_at_10
490
+ value: 1.757
491
+ - type: precision_at_100
492
+ value: 0.468
493
+ - type: precision_at_1000
494
+ value: 0.077
495
+ - type: precision_at_20
496
+ value: 1.149
497
+ - type: precision_at_3
498
+ value: 3.7539999999999996
499
+ - type: precision_at_5
500
+ value: 2.883
501
+ - type: recall_at_1
502
+ value: 5.405
503
+ - type: recall_at_10
504
+ value: 17.568
505
+ - type: recall_at_100
506
+ value: 46.847
507
+ - type: recall_at_1000
508
+ value: 76.577
509
+ - type: recall_at_20
510
+ value: 22.973
511
+ - type: recall_at_3
512
+ value: 11.261000000000001
513
+ - type: recall_at_5
514
+ value: 14.414
515
+ - task:
516
+ type: Clustering
517
+ dataset:
518
+ type: lyon-nlp/clustering-hal-s2s
519
+ name: MTEB HALClusteringS2S
520
+ config: default
521
+ split: test
522
+ revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915
523
+ metrics:
524
+ - type: v_measure
525
+ value: 24.495384349905265
526
+ - type: v_measures
527
+ value: [0.2850587858600384, 0.274086904447773, 0.2446866774990972, 0.26946100959565517, 0.24156528297396174]
528
+ - task:
529
+ type: Clustering
530
+ dataset:
531
+ type: reciTAL/mlsum
532
+ name: MTEB MLSUMClusteringP2P
533
+ config: default
534
+ split: test
535
+ revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
536
+ metrics:
537
+ - type: v_measure
538
+ value: 41.7878688793447
539
+ - type: v_measures
540
+ value: [0.4201324393825989, 0.4205306567437461, 0.4221300501395374, 0.4210735177933313, 0.38124298228695813]
541
+ - task:
542
+ type: Clustering
543
+ dataset:
544
+ type: reciTAL/mlsum
545
+ name: MTEB MLSUMClusteringS2S
546
+ config: default
547
+ split: test
548
+ revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
549
+ metrics:
550
+ - type: v_measure
551
+ value: 41.54533473611554
552
+ - type: v_measures
553
+ value: [0.3978917671338969, 0.42610299599987944, 0.4152131658150196, 0.40558711021249855, 0.38327501252308305]
554
+ - task:
555
+ type: Classification
556
+ dataset:
557
+ type: mteb/mtop_domain
558
+ name: MTEB MTOPDomainClassification (fr)
559
+ config: fr
560
+ split: test
561
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
562
+ metrics:
563
+ - type: accuracy
564
+ value: 85.33041027247104
565
+ - type: f1
566
+ value: 85.4043088703478
567
+ - type: f1_weighted
568
+ value: 85.22086763441686
569
+ - task:
570
+ type: Classification
571
+ dataset:
572
+ type: mteb/mtop_intent
573
+ name: MTEB MTOPIntentClassification (fr)
574
+ config: fr
575
+ split: test
576
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
577
+ metrics:
578
+ - type: accuracy
579
+ value: 59.01346695897275
580
+ - type: f1
581
+ value: 41.296845063208316
582
+ - type: f1_weighted
583
+ value: 61.793813202867696
584
+ - task:
585
+ type: Classification
586
+ dataset:
587
+ type: mteb/masakhanews
588
+ name: MTEB MasakhaNEWSClassification (fra)
589
+ config: fra
590
+ split: test
591
+ revision: 18193f187b92da67168c655c9973a165ed9593dd
592
+ metrics:
593
+ - type: accuracy
594
+ value: 72.60663507109004
595
+ - type: f1
596
+ value: 68.67522100429781
597
+ - type: f1_weighted
598
+ value: 72.75616093668002
599
+ - task:
600
+ type: Clustering
601
+ dataset:
602
+ type: masakhane/masakhanews
603
+ name: MTEB MasakhaNEWSClusteringP2P (fra)
604
+ config: fra
605
+ split: test
606
+ revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
607
+ metrics:
608
+ - type: v_measure
609
+ value: 49.17691007381563
610
+ - type: v_measures
611
+ value: [1.0, 0.033833191750480725, 0.5707463198244268, 0.1318223737892885, 0.7224436183265853]
612
+ - task:
613
+ type: Clustering
614
+ dataset:
615
+ type: masakhane/masakhanews
616
+ name: MTEB MasakhaNEWSClusteringS2S (fra)
617
+ config: fra
618
+ split: test
619
+ revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
620
+ metrics:
621
+ - type: v_measure
622
+ value: 26.9350763881635
623
+ - type: v_measures
624
+ value: [1.0, 0.0002883507347309009, 0.18259625098776155, 0.025306110065234755, 0.1385631076204479]
625
+ - task:
626
+ type: Classification
627
+ dataset:
628
+ type: mteb/amazon_massive_intent
629
+ name: MTEB MassiveIntentClassification (fr)
630
+ config: fr
631
+ split: test
632
+ revision: 4672e20407010da34463acc759c162ca9734bca6
633
+ metrics:
634
+ - type: accuracy
635
+ value: 65.1546738399462
636
+ - type: f1
637
+ value: 62.81367149102006
638
+ - type: f1_weighted
639
+ value: 64.45478181518959
640
+ - task:
641
+ type: Classification
642
+ dataset:
643
+ type: mteb/amazon_massive_scenario
644
+ name: MTEB MassiveScenarioClassification (fr)
645
+ config: fr
646
+ split: test
647
+ revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
648
+ metrics:
649
+ - type: accuracy
650
+ value: 69.94283792871553
651
+ - type: f1
652
+ value: 69.3387310036327
653
+ - type: f1_weighted
654
+ value: 69.77979200675047
655
+ - task:
656
+ type: Retrieval
657
+ dataset:
658
+ type: jinaai/mintakaqa
659
+ name: MTEB MintakaRetrieval (fr)
660
+ config: fr
661
+ split: test
662
+ revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e
663
+ metrics:
664
+ - type: map_at_1
665
+ value: 14.536999999999999
666
+ - type: map_at_10
667
+ value: 22.972
668
+ - type: map_at_100
669
+ value: 24.046
670
+ - type: map_at_1000
671
+ value: 24.15
672
+ - type: map_at_20
673
+ value: 23.56
674
+ - type: map_at_3
675
+ value: 20.639
676
+ - type: map_at_5
677
+ value: 21.886
678
+ - type: mrr_at_1
679
+ value: 14.537264537264537
680
+ - type: mrr_at_10
681
+ value: 22.97172172172171
682
+ - type: mrr_at_100
683
+ value: 24.04581030084757
684
+ - type: mrr_at_1000
685
+ value: 24.15012351833827
686
+ - type: mrr_at_20
687
+ value: 23.559920001131612
688
+ - type: mrr_at_3
689
+ value: 20.63882063882061
690
+ - type: mrr_at_5
691
+ value: 21.88574938574935
692
+ - type: nauc_map_at_1000_diff1
693
+ value: 25.172495501911456
694
+ - type: nauc_map_at_1000_max
695
+ value: 39.07442097828252
696
+ - type: nauc_map_at_100_diff1
697
+ value: 25.129142743145884
698
+ - type: nauc_map_at_100_max
699
+ value: 39.03725272182565
700
+ - type: nauc_map_at_10_diff1
701
+ value: 25.52237435145409
702
+ - type: nauc_map_at_10_max
703
+ value: 39.5761256079619
704
+ - type: nauc_map_at_1_diff1
705
+ value: 31.68506359690787
706
+ - type: nauc_map_at_1_max
707
+ value: 39.251552013635425
708
+ - type: nauc_map_at_20_diff1
709
+ value: 25.223544981725286
710
+ - type: nauc_map_at_20_max
711
+ value: 39.20307777977743
712
+ - type: nauc_map_at_3_diff1
713
+ value: 26.5913043939904
714
+ - type: nauc_map_at_3_max
715
+ value: 40.38909639557377
716
+ - type: nauc_map_at_5_diff1
717
+ value: 25.90291761511258
718
+ - type: nauc_map_at_5_max
719
+ value: 40.08746876057708
720
+ - type: nauc_mrr_at_1000_diff1
721
+ value: 25.172495501911456
722
+ - type: nauc_mrr_at_1000_max
723
+ value: 39.07442097828252
724
+ - type: nauc_mrr_at_100_diff1
725
+ value: 25.129142743145884
726
+ - type: nauc_mrr_at_100_max
727
+ value: 39.03725272182565
728
+ - type: nauc_mrr_at_10_diff1
729
+ value: 25.52237435145409
730
+ - type: nauc_mrr_at_10_max
731
+ value: 39.5761256079619
732
+ - type: nauc_mrr_at_1_diff1
733
+ value: 31.68506359690787
734
+ - type: nauc_mrr_at_1_max
735
+ value: 39.251552013635425
736
+ - type: nauc_mrr_at_20_diff1
737
+ value: 25.223544981725286
738
+ - type: nauc_mrr_at_20_max
739
+ value: 39.20307777977743
740
+ - type: nauc_mrr_at_3_diff1
741
+ value: 26.5913043939904
742
+ - type: nauc_mrr_at_3_max
743
+ value: 40.38909639557377
744
+ - type: nauc_mrr_at_5_diff1
745
+ value: 25.90291761511258
746
+ - type: nauc_mrr_at_5_max
747
+ value: 40.08746876057708
748
+ - type: nauc_ndcg_at_1000_diff1
749
+ value: 23.22275566961323
750
+ - type: nauc_ndcg_at_1000_max
751
+ value: 37.77760760027764
752
+ - type: nauc_ndcg_at_100_diff1
753
+ value: 21.715763741257927
754
+ - type: nauc_ndcg_at_100_max
755
+ value: 36.46541121995108
756
+ - type: nauc_ndcg_at_10_diff1
757
+ value: 23.278761630662373
758
+ - type: nauc_ndcg_at_10_max
759
+ value: 38.7930407055593
760
+ - type: nauc_ndcg_at_1_diff1
761
+ value: 31.68506359690787
762
+ - type: nauc_ndcg_at_1_max
763
+ value: 39.251552013635425
764
+ - type: nauc_ndcg_at_20_diff1
765
+ value: 22.247483519405314
766
+ - type: nauc_ndcg_at_20_max
767
+ value: 37.52699283756433
768
+ - type: nauc_ndcg_at_3_diff1
769
+ value: 25.285332146360567
770
+ - type: nauc_ndcg_at_3_max
771
+ value: 40.49755286945492
772
+ - type: nauc_ndcg_at_5_diff1
773
+ value: 24.188132420084607
774
+ - type: nauc_ndcg_at_5_max
775
+ value: 40.023420096094924
776
+ - type: nauc_precision_at_1000_diff1
777
+ value: 22.011383616462943
778
+ - type: nauc_precision_at_1000_max
779
+ value: 33.1171975223399
780
+ - type: nauc_precision_at_100_diff1
781
+ value: 8.869925191243802
782
+ - type: nauc_precision_at_100_max
783
+ value: 24.642097404720463
784
+ - type: nauc_precision_at_10_diff1
785
+ value: 17.74075352930919
786
+ - type: nauc_precision_at_10_max
787
+ value: 36.488352516736775
788
+ - type: nauc_precision_at_1_diff1
789
+ value: 31.68506359690787
790
+ - type: nauc_precision_at_1_max
791
+ value: 39.251552013635425
792
+ - type: nauc_precision_at_20_diff1
793
+ value: 14.092673370526898
794
+ - type: nauc_precision_at_20_max
795
+ value: 32.16083119966346
796
+ - type: nauc_precision_at_3_diff1
797
+ value: 22.16344389106631
798
+ - type: nauc_precision_at_3_max
799
+ value: 40.70883095791623
800
+ - type: nauc_precision_at_5_diff1
801
+ value: 20.119543069972256
802
+ - type: nauc_precision_at_5_max
803
+ value: 39.79763147435235
804
+ - type: nauc_recall_at_1000_diff1
805
+ value: 22.011383616462528
806
+ - type: nauc_recall_at_1000_max
807
+ value: 33.117197522340085
808
+ - type: nauc_recall_at_100_diff1
809
+ value: 8.869925191243775
810
+ - type: nauc_recall_at_100_max
811
+ value: 24.64209740472041
812
+ - type: nauc_recall_at_10_diff1
813
+ value: 17.740753529309178
814
+ - type: nauc_recall_at_10_max
815
+ value: 36.48835251673679
816
+ - type: nauc_recall_at_1_diff1
817
+ value: 31.68506359690787
818
+ - type: nauc_recall_at_1_max
819
+ value: 39.251552013635425
820
+ - type: nauc_recall_at_20_diff1
821
+ value: 14.092673370526915
822
+ - type: nauc_recall_at_20_max
823
+ value: 32.160831199663455
824
+ - type: nauc_recall_at_3_diff1
825
+ value: 22.163443891066322
826
+ - type: nauc_recall_at_3_max
827
+ value: 40.708830957916234
828
+ - type: nauc_recall_at_5_diff1
829
+ value: 20.119543069972217
830
+ - type: nauc_recall_at_5_max
831
+ value: 39.79763147435234
832
+ - type: ndcg_at_1
833
+ value: 14.536999999999999
834
+ - type: ndcg_at_10
835
+ value: 27.485
836
+ - type: ndcg_at_100
837
+ value: 33.206
838
+ - type: ndcg_at_1000
839
+ value: 36.382999999999996
840
+ - type: ndcg_at_20
841
+ value: 29.635
842
+ - type: ndcg_at_3
843
+ value: 22.597
844
+ - type: ndcg_at_5
845
+ value: 24.851
846
+ - type: precision_at_1
847
+ value: 14.536999999999999
848
+ - type: precision_at_10
849
+ value: 4.189
850
+ - type: precision_at_100
851
+ value: 0.698
852
+ - type: precision_at_1000
853
+ value: 0.096
854
+ - type: precision_at_20
855
+ value: 2.52
856
+ - type: precision_at_3
857
+ value: 9.419
858
+ - type: precision_at_5
859
+ value: 6.749
860
+ - type: recall_at_1
861
+ value: 14.536999999999999
862
+ - type: recall_at_10
863
+ value: 41.892
864
+ - type: recall_at_100
865
+ value: 69.779
866
+ - type: recall_at_1000
867
+ value: 95.61800000000001
868
+ - type: recall_at_20
869
+ value: 50.41
870
+ - type: recall_at_3
871
+ value: 28.255999999999997
872
+ - type: recall_at_5
873
+ value: 33.743
874
+ - task:
875
+ type: PairClassification
876
+ dataset:
877
+ type: GEM/opusparcus
878
+ name: MTEB OpusparcusPC (fr)
879
+ config: fr
880
+ split: test
881
+ revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a
882
+ metrics:
883
+ - type: cos_sim_accuracy
884
+ value: 81.74386920980926
885
+ - type: cos_sim_ap
886
+ value: 93.18281680904117
887
+ - type: cos_sim_f1
888
+ value: 87.37233054781802
889
+ - type: cos_sim_precision
890
+ value: 82.04010462074979
891
+ - type: cos_sim_recall
892
+ value: 93.44587884806356
893
+ - type: dot_accuracy
894
+ value: 81.74386920980926
895
+ - type: dot_ap
896
+ value: 93.18281680904117
897
+ - type: dot_f1
898
+ value: 87.37233054781802
899
+ - type: dot_precision
900
+ value: 82.04010462074979
901
+ - type: dot_recall
902
+ value: 93.44587884806356
903
+ - type: euclidean_accuracy
904
+ value: 81.74386920980926
905
+ - type: euclidean_ap
906
+ value: 93.18281680904117
907
+ - type: euclidean_f1
908
+ value: 87.37233054781802
909
+ - type: euclidean_precision
910
+ value: 82.04010462074979
911
+ - type: euclidean_recall
912
+ value: 93.44587884806356
913
+ - type: manhattan_accuracy
914
+ value: 81.74386920980926
915
+ - type: manhattan_ap
916
+ value: 93.17517480971131
917
+ - type: manhattan_f1
918
+ value: 87.37864077669903
919
+ - type: manhattan_precision
920
+ value: 81.74740484429066
921
+ - type: manhattan_recall
922
+ value: 93.84309831181727
923
+ - type: max_accuracy
924
+ value: 81.74386920980926
925
+ - type: max_ap
926
+ value: 93.18281680904117
927
+ - type: max_f1
928
+ value: 87.37864077669903
929
+ - task:
930
+ type: PairClassification
931
+ dataset:
932
+ type: google-research-datasets/paws-x
933
+ name: MTEB PawsX (fr)
934
+ config: fr
935
+ split: test
936
+ revision: 8a04d940a42cd40658986fdd8e3da561533a3646
937
+ metrics:
938
+ - type: cos_sim_accuracy
939
+ value: 61.1
940
+ - type: cos_sim_ap
941
+ value: 60.75603519868964
942
+ - type: cos_sim_f1
943
+ value: 62.78646780647509
944
+ - type: cos_sim_precision
945
+ value: 46.74972914409534
946
+ - type: cos_sim_recall
947
+ value: 95.5703211517165
948
+ - type: dot_accuracy
949
+ value: 61.1
950
+ - type: dot_ap
951
+ value: 60.74807680023078
952
+ - type: dot_f1
953
+ value: 62.78646780647509
954
+ - type: dot_precision
955
+ value: 46.74972914409534
956
+ - type: dot_recall
957
+ value: 95.5703211517165
958
+ - type: euclidean_accuracy
959
+ value: 61.1
960
+ - type: euclidean_ap
961
+ value: 60.756144387817734
962
+ - type: euclidean_f1
963
+ value: 62.78646780647509
964
+ - type: euclidean_precision
965
+ value: 46.74972914409534
966
+ - type: euclidean_recall
967
+ value: 95.5703211517165
968
+ - type: manhattan_accuracy
969
+ value: 61.150000000000006
970
+ - type: manhattan_ap
971
+ value: 60.685188544775116
972
+ - type: manhattan_f1
973
+ value: 62.7721335268505
974
+ - type: manhattan_precision
975
+ value: 46.6810577441986
976
+ - type: manhattan_recall
977
+ value: 95.79180509413068
978
+ - type: max_accuracy
979
+ value: 61.150000000000006
980
+ - type: max_ap
981
+ value: 60.756144387817734
982
+ - type: max_f1
983
+ value: 62.78646780647509
984
+ - task:
985
+ type: STS
986
+ dataset:
987
+ type: Lajavaness/SICK-fr
988
+ name: MTEB SICKFr
989
+ config: default
990
+ split: test
991
+ revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a
992
+ metrics:
993
+ - type: cos_sim_pearson
994
+ value: 83.1543597030015
995
+ - type: cos_sim_spearman
996
+ value: 77.10092303546944
997
+ - type: euclidean_pearson
998
+ value: 80.27115846915481
999
+ - type: euclidean_spearman
1000
+ value: 77.10092516058822
1001
+ - type: manhattan_pearson
1002
+ value: 80.30090425968062
1003
+ - type: manhattan_spearman
1004
+ value: 77.09423647945061
1005
+ - task:
1006
+ type: STS
1007
+ dataset:
1008
+ type: mteb/sts22-crosslingual-sts
1009
+ name: MTEB STS22 (fr)
1010
+ config: fr
1011
+ split: test
1012
+ revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
1013
+ metrics:
1014
+ - type: cos_sim_pearson
1015
+ value: 79.20797144286122
1016
+ - type: cos_sim_spearman
1017
+ value: 80.31452099282514
1018
+ - type: euclidean_pearson
1019
+ value: 78.43621396282957
1020
+ - type: euclidean_spearman
1021
+ value: 80.31452099282514
1022
+ - type: manhattan_pearson
1023
+ value: 78.29678738374866
1024
+ - type: manhattan_spearman
1025
+ value: 79.93185465249057
1026
+ - task:
1027
+ type: STS
1028
+ dataset:
1029
+ type: PhilipMay/stsb_multi_mt
1030
+ name: MTEB STSBenchmarkMultilingualSTS (fr)
1031
+ config: fr
1032
+ split: test
1033
+ revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c
1034
+ metrics:
1035
+ - type: cos_sim_pearson
1036
+ value: 84.69215133897265
1037
+ - type: cos_sim_spearman
1038
+ value: 84.35617480959016
1039
+ - type: euclidean_pearson
1040
+ value: 83.85371663492563
1041
+ - type: euclidean_spearman
1042
+ value: 84.35617480959016
1043
+ - type: manhattan_pearson
1044
+ value: 83.85857789722276
1045
+ - type: manhattan_spearman
1046
+ value: 84.30794186513978
1047
+ - task:
1048
+ type: Summarization
1049
+ dataset:
1050
+ type: lyon-nlp/summarization-summeval-fr-p2p
1051
+ name: MTEB SummEvalFr
1052
+ config: default
1053
+ split: test
1054
+ revision: b385812de6a9577b6f4d0f88c6a6e35395a94054
1055
+ metrics:
1056
+ - type: cos_sim_pearson
1057
+ value: 29.187176809104393
1058
+ - type: cos_sim_spearman
1059
+ value: 29.65160679657583
1060
+ - type: dot_pearson
1061
+ value: 29.18717349611766
1062
+ - type: dot_spearman
1063
+ value: 29.65160679657583
1064
+ - task:
1065
+ type: Reranking
1066
+ dataset:
1067
+ type: lyon-nlp/mteb-fr-reranking-syntec-s2p
1068
+ name: MTEB SyntecReranking
1069
+ config: default
1070
+ split: test
1071
+ revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad
1072
+ metrics:
1073
+ - type: map
1074
+ value: 82.76666666666667
1075
+ - type: mrr
1076
+ value: 82.76666666666667
1077
+ - type: nAUC_map_diff1
1078
+ value: 52.548913230162405
1079
+ - type: nAUC_map_max
1080
+ value: -2.824065935620183
1081
+ - type: nAUC_mrr_diff1
1082
+ value: 52.548913230162405
1083
+ - type: nAUC_mrr_max
1084
+ value: -2.824065935620183
1085
+ - task:
1086
+ type: Retrieval
1087
+ dataset:
1088
+ type: lyon-nlp/mteb-fr-retrieval-syntec-s2p
1089
+ name: MTEB SyntecRetrieval
1090
+ config: default
1091
+ split: test
1092
+ revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9
1093
+ metrics:
1094
+ - type: map_at_1
1095
+ value: 57.99999999999999
1096
+ - type: map_at_10
1097
+ value: 72.356
1098
+ - type: map_at_100
1099
+ value: 72.625
1100
+ - type: map_at_1000
1101
+ value: 72.625
1102
+ - type: map_at_20
1103
+ value: 72.625
1104
+ - type: map_at_3
1105
+ value: 70.333
1106
+ - type: map_at_5
1107
+ value: 71.48299999999999
1108
+ - type: mrr_at_1
1109
+ value: 57.99999999999999
1110
+ - type: mrr_at_10
1111
+ value: 72.35634920634922
1112
+ - type: mrr_at_100
1113
+ value: 72.62532693914275
1114
+ - type: mrr_at_1000
1115
+ value: 72.62532693914275
1116
+ - type: mrr_at_20
1117
+ value: 72.62532693914275
1118
+ - type: mrr_at_3
1119
+ value: 70.33333333333333
1120
+ - type: mrr_at_5
1121
+ value: 71.48333333333333
1122
+ - type: nauc_map_at_1000_diff1
1123
+ value: 57.27081552588017
1124
+ - type: nauc_map_at_1000_max
1125
+ value: 13.401922890723771
1126
+ - type: nauc_map_at_100_diff1
1127
+ value: 57.27081552588017
1128
+ - type: nauc_map_at_100_max
1129
+ value: 13.401922890723771
1130
+ - type: nauc_map_at_10_diff1
1131
+ value: 57.39952453922188
1132
+ - type: nauc_map_at_10_max
1133
+ value: 14.093164837730344
1134
+ - type: nauc_map_at_1_diff1
1135
+ value: 57.23800679107291
1136
+ - type: nauc_map_at_1_max
1137
+ value: 11.039846765533865
1138
+ - type: nauc_map_at_20_diff1
1139
+ value: 57.27081552588017
1140
+ - type: nauc_map_at_20_max
1141
+ value: 13.401922890723771
1142
+ - type: nauc_map_at_3_diff1
1143
+ value: 58.14875247321224
1144
+ - type: nauc_map_at_3_max
1145
+ value: 14.538312305676238
1146
+ - type: nauc_map_at_5_diff1
1147
+ value: 57.34940275695991
1148
+ - type: nauc_map_at_5_max
1149
+ value: 13.675180459395065
1150
+ - type: nauc_mrr_at_1000_diff1
1151
+ value: 57.27081552588017
1152
+ - type: nauc_mrr_at_1000_max
1153
+ value: 13.401922890723771
1154
+ - type: nauc_mrr_at_100_diff1
1155
+ value: 57.27081552588017
1156
+ - type: nauc_mrr_at_100_max
1157
+ value: 13.401922890723771
1158
+ - type: nauc_mrr_at_10_diff1
1159
+ value: 57.39952453922188
1160
+ - type: nauc_mrr_at_10_max
1161
+ value: 14.093164837730344
1162
+ - type: nauc_mrr_at_1_diff1
1163
+ value: 57.23800679107291
1164
+ - type: nauc_mrr_at_1_max
1165
+ value: 11.039846765533865
1166
+ - type: nauc_mrr_at_20_diff1
1167
+ value: 57.27081552588017
1168
+ - type: nauc_mrr_at_20_max
1169
+ value: 13.401922890723771
1170
+ - type: nauc_mrr_at_3_diff1
1171
+ value: 58.14875247321224
1172
+ - type: nauc_mrr_at_3_max
1173
+ value: 14.538312305676238
1174
+ - type: nauc_mrr_at_5_diff1
1175
+ value: 57.34940275695991
1176
+ - type: nauc_mrr_at_5_max
1177
+ value: 13.675180459395065
1178
+ - type: nauc_ndcg_at_1000_diff1
1179
+ value: 57.38511684819052
1180
+ - type: nauc_ndcg_at_1000_max
1181
+ value: 13.993185568467656
1182
+ - type: nauc_ndcg_at_100_diff1
1183
+ value: 57.38511684819052
1184
+ - type: nauc_ndcg_at_100_max
1185
+ value: 13.993185568467656
1186
+ - type: nauc_ndcg_at_10_diff1
1187
+ value: 57.93396526410134
1188
+ - type: nauc_ndcg_at_10_max
1189
+ value: 17.16319020800824
1190
+ - type: nauc_ndcg_at_1_diff1
1191
+ value: 57.23800679107291
1192
+ - type: nauc_ndcg_at_1_max
1193
+ value: 11.039846765533865
1194
+ - type: nauc_ndcg_at_20_diff1
1195
+ value: 57.38511684819052
1196
+ - type: nauc_ndcg_at_20_max
1197
+ value: 13.993185568467656
1198
+ - type: nauc_ndcg_at_3_diff1
1199
+ value: 59.36410104940948
1200
+ - type: nauc_ndcg_at_3_max
1201
+ value: 17.128826753860732
1202
+ - type: nauc_ndcg_at_5_diff1
1203
+ value: 57.71094150714742
1204
+ - type: nauc_ndcg_at_5_max
1205
+ value: 15.62784584334318
1206
+ - type: nauc_precision_at_1000_diff1
1207
+ value: nan
1208
+ - type: nauc_precision_at_1000_max
1209
+ value: nan
1210
+ - type: nauc_precision_at_100_diff1
1211
+ value: nan
1212
+ - type: nauc_precision_at_100_max
1213
+ value: nan
1214
+ - type: nauc_precision_at_10_diff1
1215
+ value: 66.79505135387465
1216
+ - type: nauc_precision_at_10_max
1217
+ value: 70.47152194211033
1218
+ - type: nauc_precision_at_1_diff1
1219
+ value: 57.23800679107291
1220
+ - type: nauc_precision_at_1_max
1221
+ value: 11.039846765533865
1222
+ - type: nauc_precision_at_20_diff1
1223
+ value: 100.0
1224
+ - type: nauc_precision_at_20_max
1225
+ value: 100.0
1226
+ - type: nauc_precision_at_3_diff1
1227
+ value: 65.65896518060521
1228
+ - type: nauc_precision_at_3_max
1229
+ value: 30.198503091441538
1230
+ - type: nauc_precision_at_5_diff1
1231
+ value: 60.04201680672288
1232
+ - type: nauc_precision_at_5_max
1233
+ value: 29.000933706816145
1234
+ - type: nauc_recall_at_1000_diff1
1235
+ value: nan
1236
+ - type: nauc_recall_at_1000_max
1237
+ value: nan
1238
+ - type: nauc_recall_at_100_diff1
1239
+ value: nan
1240
+ - type: nauc_recall_at_100_max
1241
+ value: nan
1242
+ - type: nauc_recall_at_10_diff1
1243
+ value: 66.7950513538749
1244
+ - type: nauc_recall_at_10_max
1245
+ value: 70.47152194211012
1246
+ - type: nauc_recall_at_1_diff1
1247
+ value: 57.23800679107291
1248
+ - type: nauc_recall_at_1_max
1249
+ value: 11.039846765533865
1250
+ - type: nauc_recall_at_20_diff1
1251
+ value: nan
1252
+ - type: nauc_recall_at_20_max
1253
+ value: nan
1254
+ - type: nauc_recall_at_3_diff1
1255
+ value: 65.65896518060525
1256
+ - type: nauc_recall_at_3_max
1257
+ value: 30.19850309144154
1258
+ - type: nauc_recall_at_5_diff1
1259
+ value: 60.0420168067226
1260
+ - type: nauc_recall_at_5_max
1261
+ value: 29.000933706816
1262
+ - type: ndcg_at_1
1263
+ value: 57.99999999999999
1264
+ - type: ndcg_at_10
1265
+ value: 78.19800000000001
1266
+ - type: ndcg_at_100
1267
+ value: 79.199
1268
+ - type: ndcg_at_1000
1269
+ value: 79.199
1270
+ - type: ndcg_at_20
1271
+ value: 79.199
1272
+ - type: ndcg_at_3
1273
+ value: 74.119
1274
+ - type: ndcg_at_5
1275
+ value: 76.184
1276
+ - type: precision_at_1
1277
+ value: 57.99999999999999
1278
+ - type: precision_at_10
1279
+ value: 9.6
1280
+ - type: precision_at_100
1281
+ value: 1.0
1282
+ - type: precision_at_1000
1283
+ value: 0.1
1284
+ - type: precision_at_20
1285
+ value: 5.0
1286
+ - type: precision_at_3
1287
+ value: 28.333000000000002
1288
+ - type: precision_at_5
1289
+ value: 18.0
1290
+ - type: recall_at_1
1291
+ value: 57.99999999999999
1292
+ - type: recall_at_10
1293
+ value: 96.0
1294
+ - type: recall_at_100
1295
+ value: 100.0
1296
+ - type: recall_at_1000
1297
+ value: 100.0
1298
+ - type: recall_at_20
1299
+ value: 100.0
1300
+ - type: recall_at_3
1301
+ value: 85.0
1302
+ - type: recall_at_5
1303
+ value: 90.0
1304
+ - task:
1305
+ type: Retrieval
1306
+ dataset:
1307
+ type: jinaai/xpqa
1308
+ name: MTEB XPQARetrieval (fr)
1309
+ config: fr
1310
+ split: test
1311
+ revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f
1312
+ metrics:
1313
+ - type: map_at_1
1314
+ value: 35.256
1315
+ - type: map_at_10
1316
+ value: 54.071999999999996
1317
+ - type: map_at_100
1318
+ value: 55.435
1319
+ - type: map_at_1000
1320
+ value: 55.53
1321
+ - type: map_at_20
1322
+ value: 54.855
1323
+ - type: map_at_3
1324
+ value: 48.762
1325
+ - type: map_at_5
1326
+ value: 51.949999999999996
1327
+ - type: mrr_at_1
1328
+ value: 56.34178905206942
1329
+ - type: mrr_at_10
1330
+ value: 63.30843240723078
1331
+ - type: mrr_at_100
1332
+ value: 63.92076387626982
1333
+ - type: mrr_at_1000
1334
+ value: 63.9435076251571
1335
+ - type: mrr_at_20
1336
+ value: 63.64110365119446
1337
+ - type: mrr_at_3
1338
+ value: 61.526479750778805
1339
+ - type: mrr_at_5
1340
+ value: 62.38762794837559
1341
+ - type: nauc_map_at_1000_diff1
1342
+ value: 45.88957885553053
1343
+ - type: nauc_map_at_1000_max
1344
+ value: 52.59013482565773
1345
+ - type: nauc_map_at_100_diff1
1346
+ value: 45.84948517422948
1347
+ - type: nauc_map_at_100_max
1348
+ value: 52.55839985303019
1349
+ - type: nauc_map_at_10_diff1
1350
+ value: 45.763486819482196
1351
+ - type: nauc_map_at_10_max
1352
+ value: 52.09054118600712
1353
+ - type: nauc_map_at_1_diff1
1354
+ value: 55.521911317670835
1355
+ - type: nauc_map_at_1_max
1356
+ value: 34.68779817675579
1357
+ - type: nauc_map_at_20_diff1
1358
+ value: 45.757369615751884
1359
+ - type: nauc_map_at_20_max
1360
+ value: 52.44708031434436
1361
+ - type: nauc_map_at_3_diff1
1362
+ value: 47.798733616712056
1363
+ - type: nauc_map_at_3_max
1364
+ value: 46.87976781177451
1365
+ - type: nauc_map_at_5_diff1
1366
+ value: 46.215964363315884
1367
+ - type: nauc_map_at_5_max
1368
+ value: 50.5765276342371
1369
+ - type: nauc_mrr_at_1000_diff1
1370
+ value: 55.110400510640766
1371
+ - type: nauc_mrr_at_1000_max
1372
+ value: 62.66171179919574
1373
+ - type: nauc_mrr_at_100_diff1
1374
+ value: 55.10166012000449
1375
+ - type: nauc_mrr_at_100_max
1376
+ value: 62.66269343813773
1377
+ - type: nauc_mrr_at_10_diff1
1378
+ value: 55.087629594751256
1379
+ - type: nauc_mrr_at_10_max
1380
+ value: 62.69978067726044
1381
+ - type: nauc_mrr_at_1_diff1
1382
+ value: 57.446957773325956
1383
+ - type: nauc_mrr_at_1_max
1384
+ value: 63.22109004948565
1385
+ - type: nauc_mrr_at_20_diff1
1386
+ value: 55.067208283222016
1387
+ - type: nauc_mrr_at_20_max
1388
+ value: 62.66935664582939
1389
+ - type: nauc_mrr_at_3_diff1
1390
+ value: 55.18870023658262
1391
+ - type: nauc_mrr_at_3_max
1392
+ value: 62.597473549957996
1393
+ - type: nauc_mrr_at_5_diff1
1394
+ value: 54.87651100155316
1395
+ - type: nauc_mrr_at_5_max
1396
+ value: 62.72845534030979
1397
+ - type: nauc_ndcg_at_1000_diff1
1398
+ value: 47.81162759706491
1399
+ - type: nauc_ndcg_at_1000_max
1400
+ value: 56.26337910947683
1401
+ - type: nauc_ndcg_at_100_diff1
1402
+ value: 47.119077388160676
1403
+ - type: nauc_ndcg_at_100_max
1404
+ value: 55.82354642959063
1405
+ - type: nauc_ndcg_at_10_diff1
1406
+ value: 46.784535879466496
1407
+ - type: nauc_ndcg_at_10_max
1408
+ value: 54.63437116703429
1409
+ - type: nauc_ndcg_at_1_diff1
1410
+ value: 57.446957773325956
1411
+ - type: nauc_ndcg_at_1_max
1412
+ value: 63.22109004948565
1413
+ - type: nauc_ndcg_at_20_diff1
1414
+ value: 46.756211545478905
1415
+ - type: nauc_ndcg_at_20_max
1416
+ value: 55.228917899613826
1417
+ - type: nauc_ndcg_at_3_diff1
1418
+ value: 47.66168453462149
1419
+ - type: nauc_ndcg_at_3_max
1420
+ value: 54.39836405112981
1421
+ - type: nauc_ndcg_at_5_diff1
1422
+ value: 46.97491630908418
1423
+ - type: nauc_ndcg_at_5_max
1424
+ value: 53.284362953526184
1425
+ - type: nauc_precision_at_1000_diff1
1426
+ value: -14.959536048875451
1427
+ - type: nauc_precision_at_1000_max
1428
+ value: 19.740731727610537
1429
+ - type: nauc_precision_at_100_diff1
1430
+ value: -10.329364912432421
1431
+ - type: nauc_precision_at_100_max
1432
+ value: 27.80165890502952
1433
+ - type: nauc_precision_at_10_diff1
1434
+ value: 0.7865296687777561
1435
+ - type: nauc_precision_at_10_max
1436
+ value: 38.46291415400641
1437
+ - type: nauc_precision_at_1_diff1
1438
+ value: 57.446957773325956
1439
+ - type: nauc_precision_at_1_max
1440
+ value: 63.22109004948565
1441
+ - type: nauc_precision_at_20_diff1
1442
+ value: -2.2696079664009385
1443
+ - type: nauc_precision_at_20_max
1444
+ value: 35.38696590671127
1445
+ - type: nauc_precision_at_3_diff1
1446
+ value: 14.016444043719714
1447
+ - type: nauc_precision_at_3_max
1448
+ value: 46.68119169258843
1449
+ - type: nauc_precision_at_5_diff1
1450
+ value: 6.466134759646741
1451
+ - type: nauc_precision_at_5_max
1452
+ value: 43.245171983039256
1453
+ - type: nauc_recall_at_1000_diff1
1454
+ value: 10.588340380461794
1455
+ - type: nauc_recall_at_1000_max
1456
+ value: 45.913607560926515
1457
+ - type: nauc_recall_at_100_diff1
1458
+ value: 28.995302681864565
1459
+ - type: nauc_recall_at_100_max
1460
+ value: 42.67608149089844
1461
+ - type: nauc_recall_at_10_diff1
1462
+ value: 38.958724392572854
1463
+ - type: nauc_recall_at_10_max
1464
+ value: 47.455666375173315
1465
+ - type: nauc_recall_at_1_diff1
1466
+ value: 55.521911317670835
1467
+ - type: nauc_recall_at_1_max
1468
+ value: 34.68779817675579
1469
+ - type: nauc_recall_at_20_diff1
1470
+ value: 36.623788206732016
1471
+ - type: nauc_recall_at_20_max
1472
+ value: 46.654888587980174
1473
+ - type: nauc_recall_at_3_diff1
1474
+ value: 43.46749373705754
1475
+ - type: nauc_recall_at_3_max
1476
+ value: 42.55592784672105
1477
+ - type: nauc_recall_at_5_diff1
1478
+ value: 40.49018957054939
1479
+ - type: nauc_recall_at_5_max
1480
+ value: 46.86884862874594
1481
+ - type: ndcg_at_1
1482
+ value: 56.342000000000006
1483
+ - type: ndcg_at_10
1484
+ value: 60.01800000000001
1485
+ - type: ndcg_at_100
1486
+ value: 65.182
1487
+ - type: ndcg_at_1000
1488
+ value: 66.809
1489
+ - type: ndcg_at_20
1490
+ value: 61.982000000000006
1491
+ - type: ndcg_at_3
1492
+ value: 55.688
1493
+ - type: ndcg_at_5
1494
+ value: 56.607
1495
+ - type: precision_at_1
1496
+ value: 56.342000000000006
1497
+ - type: precision_at_10
1498
+ value: 14.005
1499
+ - type: precision_at_100
1500
+ value: 1.821
1501
+ - type: precision_at_1000
1502
+ value: 0.20500000000000002
1503
+ - type: precision_at_20
1504
+ value: 7.684
1505
+ - type: precision_at_3
1506
+ value: 34.089999999999996
1507
+ - type: precision_at_5
1508
+ value: 24.005000000000003
1509
+ - type: recall_at_1
1510
+ value: 35.256
1511
+ - type: recall_at_10
1512
+ value: 67.583
1513
+ - type: recall_at_100
1514
+ value: 88.74300000000001
1515
+ - type: recall_at_1000
1516
+ value: 99.163
1517
+ - type: recall_at_20
1518
+ value: 73.87
1519
+ - type: recall_at_3
1520
+ value: 53.371
1521
+ - type: recall_at_5
1522
+ value: 59.399
1523
+ license: apache-2.0
1524
+ ---
1525
+
1526
+ # [bilingual-embedding-base](https://huggingface.co/Lajavaness/bilingual-embedding-base)
1527
+
1528
+ Bilingual-embedding is the Embedding Model for bilingual language: french and english. This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of [XLM-RoBERTa](https://huggingface.co/FacebookAI/xlm-roberta-base), a pre-trained language model based on the [XLM-RoBERTa](https://huggingface.co/FacebookAI/xlm-roberta-base) architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language.
1529
+
1530
+
1531
+ ## Full Model Architecture
1532
+ ```
1533
+ SentenceTransformer(
1534
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel
1535
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
1536
+ (2): Normalize()
1537
+ )
1538
+ ```
1539
+
1540
+ ## Training and Fine-tuning process
1541
+ #### Stage 1: NLI Training
1542
+ - Dataset: [(SNLI+XNLI) for english+french]
1543
+ - Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics.
1544
+ ### Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark
1545
+ - Dataset: [STSB-fr and en]
1546
+ - Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library.
1547
+ ### Stage 4: Advanced Augmentation Fine-tuning
1548
+ - Dataset: STSB with generate [silver sample from gold sample](https://www.sbert.net/examples/training/data_augmentation/README.html)
1549
+ - Method: Employed an advanced strategy using [Augmented SBERT](https://arxiv.org/abs/2010.08240) with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy.
1550
+
1551
+
1552
+ ## Usage:
1553
+
1554
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
1555
+
1556
+ ```
1557
+ pip install -U sentence-transformers
1558
+ ```
1559
+
1560
+ Then you can use the model like this:
1561
+
1562
+ ```python
1563
+ from sentence_transformers import SentenceTransformer
1564
+
1565
+ sentences = ["Paris est une capitale de la France", "Paris is a capital of France"]
1566
+
1567
+ model = SentenceTransformer('Lajavaness/bilingual-embedding-base', trust_remote_code=True)
1568
+ print(embeddings)
1569
+
1570
+ ```
1571
+
1572
+
1573
+
1574
+
1575
+
1576
+ ## Evaluation
1577
+
1578
+ TODO
1579
+
1580
+ ## Citation
1581
+
1582
+ @article{conneau2019unsupervised,
1583
+ title={Unsupervised cross-lingual representation learning at scale},
1584
+ author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
1585
+ journal={arXiv preprint arXiv:1911.02116},
1586
+ year={2019}
1587
+ }
1588
+
1589
+ @article{reimers2019sentence,
1590
+ title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
1591
+ author={Nils Reimers, Iryna Gurevych},
1592
+ journal={https://arxiv.org/abs/1908.10084},
1593
+ year={2019}
1594
+ }
1595
+
1596
+ @article{thakur2020augmented,
1597
+ title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
1598
+ author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
1599
+ journal={arXiv e-prints},
1600
+ pages={arXiv--2010},
1601
+ year={2020}
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dangvantuan/bilingual_impl",
3
+ "architectures": [
4
+ "BilingualModel"
5
+ ],
6
+ "model_type": "xlm-roberta",
7
+ "auto_map": {
8
+ "AutoConfig":"dangvantuan/bilingual_impl--config.BilingualConfig",
9
+ "AutoModel": "dangvantuan/bilingual_impl--modeling.BilingualModel",
10
+ "AutoModelForMaskedLM": "dangvantuan/bilingual_impl--modeling.BilingualForMaskedLM",
11
+ "AutoModelForMultipleChoice": "dangvantuan/bilingual_impl--modeling.BilingualForMultipleChoice",
12
+ "AutoModelForQuestionAnswering": "dangvantuan/bilingual_impl--modeling.BilingualForQuestionAnswering",
13
+ "AutoModelForSequenceClassification": "dangvantuan/bilingual_impl--modeling.BilingualForSequenceClassification",
14
+ "AutoModelForTokenClassification": "dangvantuan/bilingual_impl--modeling.BilingualForTokenClassification"
15
+ },
16
+ "attention_probs_dropout_prob": 0.1,
17
+ "classifier_dropout": null,
18
+ "bos_token_id": 0,
19
+ "eos_token_id": 2,
20
+ "hidden_act": "gelu",
21
+ "hidden_dropout_prob": 0.1,
22
+ "hidden_size": 768,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "layer_norm_eps": 1e-05,
26
+ "max_position_embeddings": 514,
27
+ "num_attention_heads": 12,
28
+ "num_hidden_layers": 12,
29
+ "output_past": true,
30
+ "pad_token_id": 1,
31
+ "position_embedding_type": "absolute",
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.39.1",
34
+ "type_vocab_size": 1,
35
+ "use_cache": true,
36
+ "vocab_size": 250002
37
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.7.0",
4
+ "transformers": "4.38.2",
5
+ "pytorch": "2.2.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null
9
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3316824a5c622bf962a0dd32bf6194f12fdfb167aa25a9494e86fd08f4c3eb0a
3
+ size 1112197096
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1cc44ad7faaeec47241864835473fd5403f2da94673f3f764a77ebcb0a803ec
3
+ size 17083009
tokenizer_config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "mask_token": "<mask>",
49
+ "model_max_length": 512,
50
+ "pad_token": "<pad>",
51
+ "sep_token": "</s>",
52
+ "tokenizer_class": "XLMRobertaTokenizer",
53
+ "unk_token": "<unk>"
54
+ }