aspire commited on
Commit
4ec0bf2
1 Parent(s): 9388798

upload file

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. 1_Pooling/config.json +7 -0
  3. 2_Dense/config.json +6 -0
  4. 2_Dense/pytorch_model.bin +3 -0
  5. README.md +1187 -1
  6. config.json +31 -0
  7. img/matryoshka-small.gif +3 -0
  8. modules.json +20 -0
  9. pytorch_model.bin +3 -0
  10. result/acge_text_embedding_a10_bf16/AFQMC.json +20 -0
  11. result/acge_text_embedding_a10_bf16/ATEC.json +20 -0
  12. result/acge_text_embedding_a10_bf16/AmazonReviewsClassification.json +25 -0
  13. result/acge_text_embedding_a10_bf16/BQ.json +20 -0
  14. result/acge_text_embedding_a10_bf16/CLSClusteringP2P.json +10 -0
  15. result/acge_text_embedding_a10_bf16/CLSClusteringS2S.json +10 -0
  16. result/acge_text_embedding_a10_bf16/CMedQAv1.json +10 -0
  17. result/acge_text_embedding_a10_bf16/CMedQAv2.json +10 -0
  18. result/acge_text_embedding_a10_bf16/CmedqaRetrieval.json +38 -0
  19. result/acge_text_embedding_a10_bf16/Cmnli.json +49 -0
  20. result/acge_text_embedding_a10_bf16/CovidRetrieval.json +38 -0
  21. result/acge_text_embedding_a10_bf16/DuRetrieval.json +38 -0
  22. result/acge_text_embedding_a10_bf16/EcomRetrieval.json +38 -0
  23. result/acge_text_embedding_a10_bf16/IFlyTek.json +13 -0
  24. result/acge_text_embedding_a10_bf16/JDReview.json +15 -0
  25. result/acge_text_embedding_a10_bf16/LCQMC.json +20 -0
  26. result/acge_text_embedding_a10_bf16/MMarcoReranking.json +10 -0
  27. result/acge_text_embedding_a10_bf16/MMarcoRetrieval.json +38 -0
  28. result/acge_text_embedding_a10_bf16/MassiveIntentClassification.json +25 -0
  29. result/acge_text_embedding_a10_bf16/MassiveScenarioClassification.json +25 -0
  30. result/acge_text_embedding_a10_bf16/MedicalRetrieval.json +38 -0
  31. result/acge_text_embedding_a10_bf16/MultilingualSentiment.json +13 -0
  32. result/acge_text_embedding_a10_bf16/Ocnli.json +49 -0
  33. result/acge_text_embedding_a10_bf16/OnlineShopping.json +15 -0
  34. result/acge_text_embedding_a10_bf16/PAWSX.json +20 -0
  35. result/acge_text_embedding_a10_bf16/QBQTC.json +20 -0
  36. result/acge_text_embedding_a10_bf16/STS22.json +22 -0
  37. result/acge_text_embedding_a10_bf16/STSB.json +20 -0
  38. result/acge_text_embedding_a10_bf16/T2Reranking.json +10 -0
  39. result/acge_text_embedding_a10_bf16/T2Retrieval.json +38 -0
  40. result/acge_text_embedding_a10_bf16/TNews.json +13 -0
  41. result/acge_text_embedding_a10_bf16/ThuNewsClusteringP2P.json +10 -0
  42. result/acge_text_embedding_a10_bf16/ThuNewsClusteringS2S.json +10 -0
  43. result/acge_text_embedding_a10_bf16/VideoRetrieval.json +38 -0
  44. result/acge_text_embedding_a10_bf16/Waimai.json +15 -0
  45. result/acge_text_embedding_bf16/AFQMC.json +20 -0
  46. result/acge_text_embedding_bf16/ATEC.json +20 -0
  47. result/acge_text_embedding_bf16/AmazonReviewsClassification.json +25 -0
  48. result/acge_text_embedding_bf16/BQ.json +20 -0
  49. result/acge_text_embedding_bf16/CLSClusteringP2P.json +10 -0
  50. result/acge_text_embedding_bf16/CLSClusteringS2S.json +10 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ img/matryoshka-small.gif filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false
7
+ }
2_Dense/config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "in_features": 1024,
3
+ "out_features": 1792,
4
+ "bias": true,
5
+ "activation_function": "torch.nn.modules.linear.Identity"
6
+ }
2_Dense/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9d3b5ed4c0c36109bb0f29fecd7450fb603d10dd58a0ee82811d1667ec83291
3
+ size 3674687
README.md CHANGED
@@ -1,3 +1,1189 @@
1
  ---
2
- license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - mteb
5
+ - sentence-transformers
6
+ - feature-extraction
7
+ - sentence-similarity
8
+ model-index:
9
+ - name: acge_text_embedding
10
+ results:
11
+ - task:
12
+ type: STS
13
+ dataset:
14
+ type: C-MTEB/AFQMC
15
+ name: MTEB AFQMC
16
+ config: default
17
+ split: validation
18
+ revision: b44c3b011063adb25877c13823db83bb193913c4
19
+ metrics:
20
+ - type: cos_sim_pearson
21
+ value: 54.03219651150428
22
+ - type: cos_sim_spearman
23
+ value: 58.80567952355933
24
+ - type: euclidean_pearson
25
+ value: 57.47052075207808
26
+ - type: euclidean_spearman
27
+ value: 58.80429232297114
28
+ - type: manhattan_pearson
29
+ value: 57.46163912433917
30
+ - type: manhattan_spearman
31
+ value: 58.797778532121
32
+ - task:
33
+ type: STS
34
+ dataset:
35
+ type: C-MTEB/ATEC
36
+ name: MTEB ATEC
37
+ config: default
38
+ split: test
39
+ revision: 0f319b1142f28d00e055a6770f3f726ae9b7d865
40
+ metrics:
41
+ - type: cos_sim_pearson
42
+ value: 53.523171963746854
43
+ - type: cos_sim_spearman
44
+ value: 57.94610819724817
45
+ - type: euclidean_pearson
46
+ value: 61.16974418403869
47
+ - type: euclidean_spearman
48
+ value: 57.94681861980281
49
+ - type: manhattan_pearson
50
+ value: 61.167825359334515
51
+ - type: manhattan_spearman
52
+ value: 57.94540903298445
53
+ - task:
54
+ type: Classification
55
+ dataset:
56
+ type: mteb/amazon_reviews_multi
57
+ name: MTEB AmazonReviewsClassification (zh)
58
+ config: zh
59
+ split: test
60
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
61
+ metrics:
62
+ - type: accuracy
63
+ value: 48.556
64
+ - type: f1
65
+ value: 46.61852566163211
66
+ - task:
67
+ type: STS
68
+ dataset:
69
+ type: C-MTEB/BQ
70
+ name: MTEB BQ
71
+ config: default
72
+ split: test
73
+ revision: e3dda5e115e487b39ec7e618c0c6a29137052a55
74
+ metrics:
75
+ - type: cos_sim_pearson
76
+ value: 68.26963267181252
77
+ - type: cos_sim_spearman
78
+ value: 70.36696156869363
79
+ - type: euclidean_pearson
80
+ value: 69.42591718370763
81
+ - type: euclidean_spearman
82
+ value: 70.3677583116469
83
+ - type: manhattan_pearson
84
+ value: 69.40127857737215
85
+ - type: manhattan_spearman
86
+ value: 70.34572662526428
87
+ - task:
88
+ type: Clustering
89
+ dataset:
90
+ type: C-MTEB/CLSClusteringP2P
91
+ name: MTEB CLSClusteringP2P
92
+ config: default
93
+ split: test
94
+ revision: 4b6227591c6c1a73bc76b1055f3b7f3588e72476
95
+ metrics:
96
+ - type: v_measure
97
+ value: 46.54685387179774
98
+ - task:
99
+ type: Clustering
100
+ dataset:
101
+ type: C-MTEB/CLSClusteringS2S
102
+ name: MTEB CLSClusteringS2S
103
+ config: default
104
+ split: test
105
+ revision: e458b3f5414b62b7f9f83499ac1f5497ae2e869f
106
+ metrics:
107
+ - type: v_measure
108
+ value: 44.45602575811581
109
+ - task:
110
+ type: Reranking
111
+ dataset:
112
+ type: C-MTEB/CMedQAv1-reranking
113
+ name: MTEB CMedQAv1
114
+ config: default
115
+ split: test
116
+ revision: 8d7f1e942507dac42dc58017c1a001c3717da7df
117
+ metrics:
118
+ - type: map
119
+ value: 88.4576468720639
120
+ - type: mrr
121
+ value: 90.90595238095237
122
+ - task:
123
+ type: Reranking
124
+ dataset:
125
+ type: C-MTEB/CMedQAv2-reranking
126
+ name: MTEB CMedQAv2
127
+ config: default
128
+ split: test
129
+ revision: 23d186750531a14a0357ca22cd92d712fd512ea0
130
+ metrics:
131
+ - type: map
132
+ value: 88.71413673867269
133
+ - type: mrr
134
+ value: 91.19265873015873
135
+ - task:
136
+ type: Retrieval
137
+ dataset:
138
+ type: C-MTEB/CmedqaRetrieval
139
+ name: MTEB CmedqaRetrieval
140
+ config: default
141
+ split: dev
142
+ revision: cd540c506dae1cf9e9a59c3e06f42030d54e7301
143
+ metrics:
144
+ - type: map_at_1
145
+ value: 26.825
146
+ - type: map_at_10
147
+ value: 39.959
148
+ - type: map_at_100
149
+ value: 41.861
150
+ - type: map_at_1000
151
+ value: 41.963
152
+ - type: map_at_3
153
+ value: 35.357
154
+ - type: map_at_5
155
+ value: 38.001000000000005
156
+ - type: mrr_at_1
157
+ value: 40.585
158
+ - type: mrr_at_10
159
+ value: 48.802
160
+ - type: mrr_at_100
161
+ value: 49.779
162
+ - type: mrr_at_1000
163
+ value: 49.819
164
+ - type: mrr_at_3
165
+ value: 46.095000000000006
166
+ - type: mrr_at_5
167
+ value: 47.678
168
+ - type: ndcg_at_1
169
+ value: 40.585
170
+ - type: ndcg_at_10
171
+ value: 46.758
172
+ - type: ndcg_at_100
173
+ value: 53.957
174
+ - type: ndcg_at_1000
175
+ value: 55.656000000000006
176
+ - type: ndcg_at_3
177
+ value: 40.961
178
+ - type: ndcg_at_5
179
+ value: 43.564
180
+ - type: precision_at_1
181
+ value: 40.585
182
+ - type: precision_at_10
183
+ value: 10.424999999999999
184
+ - type: precision_at_100
185
+ value: 1.625
186
+ - type: precision_at_1000
187
+ value: 0.184
188
+ - type: precision_at_3
189
+ value: 23.114
190
+ - type: precision_at_5
191
+ value: 17.024
192
+ - type: recall_at_1
193
+ value: 26.825
194
+ - type: recall_at_10
195
+ value: 57.909
196
+ - type: recall_at_100
197
+ value: 87.375
198
+ - type: recall_at_1000
199
+ value: 98.695
200
+ - type: recall_at_3
201
+ value: 40.754000000000005
202
+ - type: recall_at_5
203
+ value: 48.472
204
+ - task:
205
+ type: PairClassification
206
+ dataset:
207
+ type: C-MTEB/CMNLI
208
+ name: MTEB Cmnli
209
+ config: default
210
+ split: validation
211
+ revision: 41bc36f332156f7adc9e38f53777c959b2ae9766
212
+ metrics:
213
+ - type: cos_sim_accuracy
214
+ value: 83.4155141310884
215
+ - type: cos_sim_ap
216
+ value: 90.49006000181046
217
+ - type: cos_sim_f1
218
+ value: 84.28797826579125
219
+ - type: cos_sim_precision
220
+ value: 81.69848584595128
221
+ - type: cos_sim_recall
222
+ value: 87.04699555763385
223
+ - type: dot_accuracy
224
+ value: 83.40348767288035
225
+ - type: dot_ap
226
+ value: 90.50667776818787
227
+ - type: dot_f1
228
+ value: 84.31853669417802
229
+ - type: dot_precision
230
+ value: 80.61420345489442
231
+ - type: dot_recall
232
+ value: 88.379705400982
233
+ - type: euclidean_accuracy
234
+ value: 83.43956704750451
235
+ - type: euclidean_ap
236
+ value: 90.48869698176196
237
+ - type: euclidean_f1
238
+ value: 84.32616081540203
239
+ - type: euclidean_precision
240
+ value: 81.77026136613222
241
+ - type: euclidean_recall
242
+ value: 87.04699555763385
243
+ - type: manhattan_accuracy
244
+ value: 83.55983162958509
245
+ - type: manhattan_ap
246
+ value: 90.47972486190912
247
+ - type: manhattan_f1
248
+ value: 84.42325158946412
249
+ - type: manhattan_precision
250
+ value: 82.0569410726109
251
+ - type: manhattan_recall
252
+ value: 86.93009118541033
253
+ - type: max_accuracy
254
+ value: 83.55983162958509
255
+ - type: max_ap
256
+ value: 90.50667776818787
257
+ - type: max_f1
258
+ value: 84.42325158946412
259
+ - task:
260
+ type: Retrieval
261
+ dataset:
262
+ type: C-MTEB/CovidRetrieval
263
+ name: MTEB CovidRetrieval
264
+ config: default
265
+ split: dev
266
+ revision: 1271c7809071a13532e05f25fb53511ffce77117
267
+ metrics:
268
+ - type: map_at_1
269
+ value: 67.597
270
+ - type: map_at_10
271
+ value: 76.545
272
+ - type: map_at_100
273
+ value: 76.893
274
+ - type: map_at_1000
275
+ value: 76.897
276
+ - type: map_at_3
277
+ value: 74.807
278
+ - type: map_at_5
279
+ value: 75.895
280
+ - type: mrr_at_1
281
+ value: 67.861
282
+ - type: mrr_at_10
283
+ value: 76.545
284
+ - type: mrr_at_100
285
+ value: 76.893
286
+ - type: mrr_at_1000
287
+ value: 76.897
288
+ - type: mrr_at_3
289
+ value: 74.886
290
+ - type: mrr_at_5
291
+ value: 75.934
292
+ - type: ndcg_at_1
293
+ value: 67.861
294
+ - type: ndcg_at_10
295
+ value: 80.417
296
+ - type: ndcg_at_100
297
+ value: 81.928
298
+ - type: ndcg_at_1000
299
+ value: 82.038
300
+ - type: ndcg_at_3
301
+ value: 77.025
302
+ - type: ndcg_at_5
303
+ value: 78.94099999999999
304
+ - type: precision_at_1
305
+ value: 67.861
306
+ - type: precision_at_10
307
+ value: 9.336
308
+ - type: precision_at_100
309
+ value: 1.001
310
+ - type: precision_at_1000
311
+ value: 0.101
312
+ - type: precision_at_3
313
+ value: 27.959
314
+ - type: precision_at_5
315
+ value: 17.745
316
+ - type: recall_at_1
317
+ value: 67.597
318
+ - type: recall_at_10
319
+ value: 92.308
320
+ - type: recall_at_100
321
+ value: 99.05199999999999
322
+ - type: recall_at_1000
323
+ value: 99.895
324
+ - type: recall_at_3
325
+ value: 83.325
326
+ - type: recall_at_5
327
+ value: 87.908
328
+ - task:
329
+ type: Retrieval
330
+ dataset:
331
+ type: C-MTEB/DuRetrieval
332
+ name: MTEB DuRetrieval
333
+ config: default
334
+ split: dev
335
+ revision: a1a333e290fe30b10f3f56498e3a0d911a693ced
336
+ metrics:
337
+ - type: map_at_1
338
+ value: 25.574
339
+ - type: map_at_10
340
+ value: 78.493
341
+ - type: map_at_100
342
+ value: 81.384
343
+ - type: map_at_1000
344
+ value: 81.429
345
+ - type: map_at_3
346
+ value: 54.107000000000006
347
+ - type: map_at_5
348
+ value: 68.755
349
+ - type: mrr_at_1
350
+ value: 89.2
351
+ - type: mrr_at_10
352
+ value: 92.567
353
+ - type: mrr_at_100
354
+ value: 92.642
355
+ - type: mrr_at_1000
356
+ value: 92.646
357
+ - type: mrr_at_3
358
+ value: 92.258
359
+ - type: mrr_at_5
360
+ value: 92.458
361
+ - type: ndcg_at_1
362
+ value: 89.2
363
+ - type: ndcg_at_10
364
+ value: 86.084
365
+ - type: ndcg_at_100
366
+ value: 89.053
367
+ - type: ndcg_at_1000
368
+ value: 89.484
369
+ - type: ndcg_at_3
370
+ value: 84.898
371
+ - type: ndcg_at_5
372
+ value: 84.078
373
+ - type: precision_at_1
374
+ value: 89.2
375
+ - type: precision_at_10
376
+ value: 41.345
377
+ - type: precision_at_100
378
+ value: 4.779
379
+ - type: precision_at_1000
380
+ value: 0.488
381
+ - type: precision_at_3
382
+ value: 76.167
383
+ - type: precision_at_5
384
+ value: 64.7
385
+ - type: recall_at_1
386
+ value: 25.574
387
+ - type: recall_at_10
388
+ value: 87.153
389
+ - type: recall_at_100
390
+ value: 96.829
391
+ - type: recall_at_1000
392
+ value: 99.11999999999999
393
+ - type: recall_at_3
394
+ value: 56.421
395
+ - type: recall_at_5
396
+ value: 73.7
397
+ - task:
398
+ type: Retrieval
399
+ dataset:
400
+ type: C-MTEB/EcomRetrieval
401
+ name: MTEB EcomRetrieval
402
+ config: default
403
+ split: dev
404
+ revision: 687de13dc7294d6fd9be10c6945f9e8fec8166b9
405
+ metrics:
406
+ - type: map_at_1
407
+ value: 52.0
408
+ - type: map_at_10
409
+ value: 62.553000000000004
410
+ - type: map_at_100
411
+ value: 63.048
412
+ - type: map_at_1000
413
+ value: 63.065000000000005
414
+ - type: map_at_3
415
+ value: 60.233000000000004
416
+ - type: map_at_5
417
+ value: 61.712999999999994
418
+ - type: mrr_at_1
419
+ value: 52.0
420
+ - type: mrr_at_10
421
+ value: 62.553000000000004
422
+ - type: mrr_at_100
423
+ value: 63.048
424
+ - type: mrr_at_1000
425
+ value: 63.065000000000005
426
+ - type: mrr_at_3
427
+ value: 60.233000000000004
428
+ - type: mrr_at_5
429
+ value: 61.712999999999994
430
+ - type: ndcg_at_1
431
+ value: 52.0
432
+ - type: ndcg_at_10
433
+ value: 67.51599999999999
434
+ - type: ndcg_at_100
435
+ value: 69.896
436
+ - type: ndcg_at_1000
437
+ value: 70.281
438
+ - type: ndcg_at_3
439
+ value: 62.82600000000001
440
+ - type: ndcg_at_5
441
+ value: 65.498
442
+ - type: precision_at_1
443
+ value: 52.0
444
+ - type: precision_at_10
445
+ value: 8.3
446
+ - type: precision_at_100
447
+ value: 0.941
448
+ - type: precision_at_1000
449
+ value: 0.097
450
+ - type: precision_at_3
451
+ value: 23.433
452
+ - type: precision_at_5
453
+ value: 15.36
454
+ - type: recall_at_1
455
+ value: 52.0
456
+ - type: recall_at_10
457
+ value: 83.0
458
+ - type: recall_at_100
459
+ value: 94.1
460
+ - type: recall_at_1000
461
+ value: 97.0
462
+ - type: recall_at_3
463
+ value: 70.3
464
+ - type: recall_at_5
465
+ value: 76.8
466
+ - task:
467
+ type: Classification
468
+ dataset:
469
+ type: C-MTEB/IFlyTek-classification
470
+ name: MTEB IFlyTek
471
+ config: default
472
+ split: validation
473
+ revision: 421605374b29664c5fc098418fe20ada9bd55f8a
474
+ metrics:
475
+ - type: accuracy
476
+ value: 51.76606387071951
477
+ - type: f1
478
+ value: 40.25725744367441
479
+ - task:
480
+ type: Classification
481
+ dataset:
482
+ type: C-MTEB/JDReview-classification
483
+ name: MTEB JDReview
484
+ config: default
485
+ split: test
486
+ revision: b7c64bd89eb87f8ded463478346f76731f07bf8b
487
+ metrics:
488
+ - type: accuracy
489
+ value: 86.69793621013133
490
+ - type: ap
491
+ value: 55.46718958939327
492
+ - type: f1
493
+ value: 81.48228915952436
494
+ - task:
495
+ type: STS
496
+ dataset:
497
+ type: C-MTEB/LCQMC
498
+ name: MTEB LCQMC
499
+ config: default
500
+ split: test
501
+ revision: 17f9b096f80380fce5ed12a9be8be7784b337daf
502
+ metrics:
503
+ - type: cos_sim_pearson
504
+ value: 71.13755846688528
505
+ - type: cos_sim_spearman
506
+ value: 78.17322744116031
507
+ - type: euclidean_pearson
508
+ value: 77.48740502819294
509
+ - type: euclidean_spearman
510
+ value: 78.17553979551616
511
+ - type: manhattan_pearson
512
+ value: 77.47671561749276
513
+ - type: manhattan_spearman
514
+ value: 78.16780681181362
515
+ - task:
516
+ type: Reranking
517
+ dataset:
518
+ type: C-MTEB/Mmarco-reranking
519
+ name: MTEB MMarcoReranking
520
+ config: default
521
+ split: dev
522
+ revision: 8e0c766dbe9e16e1d221116a3f36795fbade07f6
523
+ metrics:
524
+ - type: map
525
+ value: 27.054392822906316
526
+ - type: mrr
527
+ value: 29.001190476190473
528
+ - task:
529
+ type: Retrieval
530
+ dataset:
531
+ type: C-MTEB/MMarcoRetrieval
532
+ name: MTEB MMarcoRetrieval
533
+ config: default
534
+ split: dev
535
+ revision: 539bbde593d947e2a124ba72651aafc09eb33fc2
536
+ metrics:
537
+ - type: map_at_1
538
+ value: 65.62599999999999
539
+ - type: map_at_10
540
+ value: 74.749
541
+ - type: map_at_100
542
+ value: 75.091
543
+ - type: map_at_1000
544
+ value: 75.103
545
+ - type: map_at_3
546
+ value: 73.007
547
+ - type: map_at_5
548
+ value: 74.124
549
+ - type: mrr_at_1
550
+ value: 67.894
551
+ - type: mrr_at_10
552
+ value: 75.374
553
+ - type: mrr_at_100
554
+ value: 75.67399999999999
555
+ - type: mrr_at_1000
556
+ value: 75.685
557
+ - type: mrr_at_3
558
+ value: 73.868
559
+ - type: mrr_at_5
560
+ value: 74.83
561
+ - type: ndcg_at_1
562
+ value: 67.894
563
+ - type: ndcg_at_10
564
+ value: 78.414
565
+ - type: ndcg_at_100
566
+ value: 79.947
567
+ - type: ndcg_at_1000
568
+ value: 80.265
569
+ - type: ndcg_at_3
570
+ value: 75.12
571
+ - type: ndcg_at_5
572
+ value: 76.999
573
+ - type: precision_at_1
574
+ value: 67.894
575
+ - type: precision_at_10
576
+ value: 9.47
577
+ - type: precision_at_100
578
+ value: 1.023
579
+ - type: precision_at_1000
580
+ value: 0.105
581
+ - type: precision_at_3
582
+ value: 28.333000000000002
583
+ - type: precision_at_5
584
+ value: 17.989
585
+ - type: recall_at_1
586
+ value: 65.62599999999999
587
+ - type: recall_at_10
588
+ value: 89.063
589
+ - type: recall_at_100
590
+ value: 95.99499999999999
591
+ - type: recall_at_1000
592
+ value: 98.455
593
+ - type: recall_at_3
594
+ value: 80.357
595
+ - type: recall_at_5
596
+ value: 84.824
597
+ - task:
598
+ type: Classification
599
+ dataset:
600
+ type: mteb/amazon_massive_intent
601
+ name: MTEB MassiveIntentClassification (zh-CN)
602
+ config: zh-CN
603
+ split: test
604
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
605
+ metrics:
606
+ - type: accuracy
607
+ value: 75.88433086751849
608
+ - type: f1
609
+ value: 73.06801290283882
610
+ - task:
611
+ type: Classification
612
+ dataset:
613
+ type: mteb/amazon_massive_scenario
614
+ name: MTEB MassiveScenarioClassification (zh-CN)
615
+ config: zh-CN
616
+ split: test
617
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
618
+ metrics:
619
+ - type: accuracy
620
+ value: 78.44317417619366
621
+ - type: f1
622
+ value: 78.1407925250533
623
+ - task:
624
+ type: Retrieval
625
+ dataset:
626
+ type: C-MTEB/MedicalRetrieval
627
+ name: MTEB MedicalRetrieval
628
+ config: default
629
+ split: dev
630
+ revision: 2039188fb5800a9803ba5048df7b76e6fb151fc6
631
+ metrics:
632
+ - type: map_at_1
633
+ value: 54.900000000000006
634
+ - type: map_at_10
635
+ value: 61.0
636
+ - type: map_at_100
637
+ value: 61.549
638
+ - type: map_at_1000
639
+ value: 61.590999999999994
640
+ - type: map_at_3
641
+ value: 59.516999999999996
642
+ - type: map_at_5
643
+ value: 60.267
644
+ - type: mrr_at_1
645
+ value: 55.1
646
+ - type: mrr_at_10
647
+ value: 61.1
648
+ - type: mrr_at_100
649
+ value: 61.649
650
+ - type: mrr_at_1000
651
+ value: 61.690999999999995
652
+ - type: mrr_at_3
653
+ value: 59.617
654
+ - type: mrr_at_5
655
+ value: 60.367000000000004
656
+ - type: ndcg_at_1
657
+ value: 54.900000000000006
658
+ - type: ndcg_at_10
659
+ value: 64.07000000000001
660
+ - type: ndcg_at_100
661
+ value: 66.981
662
+ - type: ndcg_at_1000
663
+ value: 68.207
664
+ - type: ndcg_at_3
665
+ value: 60.955999999999996
666
+ - type: ndcg_at_5
667
+ value: 62.31100000000001
668
+ - type: precision_at_1
669
+ value: 54.900000000000006
670
+ - type: precision_at_10
671
+ value: 7.380000000000001
672
+ - type: precision_at_100
673
+ value: 0.88
674
+ - type: precision_at_1000
675
+ value: 0.098
676
+ - type: precision_at_3
677
+ value: 21.7
678
+ - type: precision_at_5
679
+ value: 13.68
680
+ - type: recall_at_1
681
+ value: 54.900000000000006
682
+ - type: recall_at_10
683
+ value: 73.8
684
+ - type: recall_at_100
685
+ value: 88.0
686
+ - type: recall_at_1000
687
+ value: 97.8
688
+ - type: recall_at_3
689
+ value: 65.10000000000001
690
+ - type: recall_at_5
691
+ value: 68.4
692
+ - task:
693
+ type: Classification
694
+ dataset:
695
+ type: C-MTEB/MultilingualSentiment-classification
696
+ name: MTEB MultilingualSentiment
697
+ config: default
698
+ split: validation
699
+ revision: 46958b007a63fdbf239b7672c25d0bea67b5ea1a
700
+ metrics:
701
+ - type: accuracy
702
+ value: 77.56333333333333
703
+ - type: f1
704
+ value: 77.53666660124703
705
+ - task:
706
+ type: PairClassification
707
+ dataset:
708
+ type: C-MTEB/OCNLI
709
+ name: MTEB Ocnli
710
+ config: default
711
+ split: validation
712
+ revision: 66e76a618a34d6d565d5538088562851e6daa7ec
713
+ metrics:
714
+ - type: cos_sim_accuracy
715
+ value: 81.10449377368705
716
+ - type: cos_sim_ap
717
+ value: 85.16141108141811
718
+ - type: cos_sim_f1
719
+ value: 82.97771455666192
720
+ - type: cos_sim_precision
721
+ value: 75.30120481927712
722
+ - type: cos_sim_recall
723
+ value: 92.39704329461456
724
+ - type: dot_accuracy
725
+ value: 81.05035192203573
726
+ - type: dot_ap
727
+ value: 85.13568069803823
728
+ - type: dot_f1
729
+ value: 83.04038004750595
730
+ - type: dot_precision
731
+ value: 75.47495682210709
732
+ - type: dot_recall
733
+ value: 92.29144667370645
734
+ - type: euclidean_accuracy
735
+ value: 81.10449377368705
736
+ - type: euclidean_ap
737
+ value: 85.16341835376645
738
+ - type: euclidean_f1
739
+ value: 82.96860133206471
740
+ - type: euclidean_precision
741
+ value: 75.4978354978355
742
+ - type: euclidean_recall
743
+ value: 92.08025343189018
744
+ - type: manhattan_accuracy
745
+ value: 81.15863562533838
746
+ - type: manhattan_ap
747
+ value: 85.13388548299352
748
+ - type: manhattan_f1
749
+ value: 82.91048348492102
750
+ - type: manhattan_precision
751
+ value: 75.83187390542906
752
+ - type: manhattan_recall
753
+ value: 91.4466737064414
754
+ - type: max_accuracy
755
+ value: 81.15863562533838
756
+ - type: max_ap
757
+ value: 85.16341835376645
758
+ - type: max_f1
759
+ value: 83.04038004750595
760
+ - task:
761
+ type: Classification
762
+ dataset:
763
+ type: C-MTEB/OnlineShopping-classification
764
+ name: MTEB OnlineShopping
765
+ config: default
766
+ split: test
767
+ revision: e610f2ebd179a8fda30ae534c3878750a96db120
768
+ metrics:
769
+ - type: accuracy
770
+ value: 93.75
771
+ - type: ap
772
+ value: 91.8757063139003
773
+ - type: f1
774
+ value: 93.73901896028437
775
+ - task:
776
+ type: STS
777
+ dataset:
778
+ type: C-MTEB/PAWSX
779
+ name: MTEB PAWSX
780
+ config: default
781
+ split: test
782
+ revision: 9c6a90e430ac22b5779fb019a23e820b11a8b5e1
783
+ metrics:
784
+ - type: cos_sim_pearson
785
+ value: 39.15831534609524
786
+ - type: cos_sim_spearman
787
+ value: 45.4969633673045
788
+ - type: euclidean_pearson
789
+ value: 44.848515043386826
790
+ - type: euclidean_spearman
791
+ value: 45.50184060659851
792
+ - type: manhattan_pearson
793
+ value: 44.855618769134786
794
+ - type: manhattan_spearman
795
+ value: 45.521349632021
796
+ - task:
797
+ type: STS
798
+ dataset:
799
+ type: C-MTEB/QBQTC
800
+ name: MTEB QBQTC
801
+ config: default
802
+ split: test
803
+ revision: 790b0510dc52b1553e8c49f3d2afb48c0e5c48b7
804
+ metrics:
805
+ - type: cos_sim_pearson
806
+ value: 34.240063381471685
807
+ - type: cos_sim_spearman
808
+ value: 37.29810568951238
809
+ - type: euclidean_pearson
810
+ value: 35.114630288288694
811
+ - type: euclidean_spearman
812
+ value: 37.29224953963422
813
+ - type: manhattan_pearson
814
+ value: 35.07429582481541
815
+ - type: manhattan_spearman
816
+ value: 37.24006222876743
817
+ - task:
818
+ type: STS
819
+ dataset:
820
+ type: mteb/sts22-crosslingual-sts
821
+ name: MTEB STS22 (zh)
822
+ config: zh
823
+ split: test
824
+ revision: eea2b4fe26a775864c896887d910b76a8098ad3f
825
+ metrics:
826
+ - type: cos_sim_pearson
827
+ value: 61.839386292911634
828
+ - type: cos_sim_spearman
829
+ value: 67.05632097771566
830
+ - type: euclidean_pearson
831
+ value: 65.72031356075829
832
+ - type: euclidean_spearman
833
+ value: 67.05823973191457
834
+ - type: manhattan_pearson
835
+ value: 65.66073527177826
836
+ - type: manhattan_spearman
837
+ value: 67.04221791481658
838
+ - task:
839
+ type: STS
840
+ dataset:
841
+ type: C-MTEB/STSB
842
+ name: MTEB STSB
843
+ config: default
844
+ split: test
845
+ revision: 0cde68302b3541bb8b3c340dc0644b0b745b3dc0
846
+ metrics:
847
+ - type: cos_sim_pearson
848
+ value: 81.56195178204662
849
+ - type: cos_sim_spearman
850
+ value: 82.73033434099031
851
+ - type: euclidean_pearson
852
+ value: 82.49605254478311
853
+ - type: euclidean_spearman
854
+ value: 82.72004995354247
855
+ - type: manhattan_pearson
856
+ value: 82.48358662476731
857
+ - type: manhattan_spearman
858
+ value: 82.70676710419983
859
+ - task:
860
+ type: Reranking
861
+ dataset:
862
+ type: C-MTEB/T2Reranking
863
+ name: MTEB T2Reranking
864
+ config: default
865
+ split: dev
866
+ revision: 76631901a18387f85eaa53e5450019b87ad58ef9
867
+ metrics:
868
+ - type: map
869
+ value: 65.9012655137193
870
+ - type: mrr
871
+ value: 75.97216177150165
872
+ - task:
873
+ type: Retrieval
874
+ dataset:
875
+ type: C-MTEB/T2Retrieval
876
+ name: MTEB T2Retrieval
877
+ config: default
878
+ split: dev
879
+ revision: 8731a845f1bf500a4f111cf1070785c793d10e64
880
+ metrics:
881
+ - type: map_at_1
882
+ value: 27.057
883
+ - type: map_at_10
884
+ value: 75.29299999999999
885
+ - type: map_at_100
886
+ value: 79.098
887
+ - type: map_at_1000
888
+ value: 79.172
889
+ - type: map_at_3
890
+ value: 53.049
891
+ - type: map_at_5
892
+ value: 65.103
893
+ - type: mrr_at_1
894
+ value: 88.822
895
+ - type: mrr_at_10
896
+ value: 91.721
897
+ - type: mrr_at_100
898
+ value: 91.814
899
+ - type: mrr_at_1000
900
+ value: 91.818
901
+ - type: mrr_at_3
902
+ value: 91.213
903
+ - type: mrr_at_5
904
+ value: 91.544
905
+ - type: ndcg_at_1
906
+ value: 88.822
907
+ - type: ndcg_at_10
908
+ value: 83.269
909
+ - type: ndcg_at_100
910
+ value: 87.259
911
+ - type: ndcg_at_1000
912
+ value: 87.938
913
+ - type: ndcg_at_3
914
+ value: 84.678
915
+ - type: ndcg_at_5
916
+ value: 83.231
917
+ - type: precision_at_1
918
+ value: 88.822
919
+ - type: precision_at_10
920
+ value: 41.297
921
+ - type: precision_at_100
922
+ value: 4.994
923
+ - type: precision_at_1000
924
+ value: 0.515
925
+ - type: precision_at_3
926
+ value: 73.933
927
+ - type: precision_at_5
928
+ value: 61.885
929
+ - type: recall_at_1
930
+ value: 27.057
931
+ - type: recall_at_10
932
+ value: 82.33200000000001
933
+ - type: recall_at_100
934
+ value: 95.065
935
+ - type: recall_at_1000
936
+ value: 98.466
937
+ - type: recall_at_3
938
+ value: 54.872
939
+ - type: recall_at_5
940
+ value: 68.814
941
+ - task:
942
+ type: Classification
943
+ dataset:
944
+ type: C-MTEB/TNews-classification
945
+ name: MTEB TNews
946
+ config: default
947
+ split: validation
948
+ revision: 317f262bf1e6126357bbe89e875451e4b0938fe4
949
+ metrics:
950
+ - type: accuracy
951
+ value: 53.690000000000005
952
+ - type: f1
953
+ value: 51.87306088948137
954
+ - task:
955
+ type: Clustering
956
+ dataset:
957
+ type: C-MTEB/ThuNewsClusteringP2P
958
+ name: MTEB ThuNewsClusteringP2P
959
+ config: default
960
+ split: test
961
+ revision: 5798586b105c0434e4f0fe5e767abe619442cf93
962
+ metrics:
963
+ - type: v_measure
964
+ value: 73.76590442198115
965
+ - task:
966
+ type: Clustering
967
+ dataset:
968
+ type: C-MTEB/ThuNewsClusteringS2S
969
+ name: MTEB ThuNewsClusteringS2S
970
+ config: default
971
+ split: test
972
+ revision: 8a8b2caeda43f39e13c4bc5bea0f8a667896e10d
973
+ metrics:
974
+ - type: v_measure
975
+ value: 68.61875345658028
976
+ - task:
977
+ type: Retrieval
978
+ dataset:
979
+ type: C-MTEB/VideoRetrieval
980
+ name: MTEB VideoRetrieval
981
+ config: default
982
+ split: dev
983
+ revision: 58c2597a5943a2ba48f4668c3b90d796283c5639
984
+ metrics:
985
+ - type: map_at_1
986
+ value: 59.4
987
+ - type: map_at_10
988
+ value: 69.19
989
+ - type: map_at_100
990
+ value: 69.711
991
+ - type: map_at_1000
992
+ value: 69.72699999999999
993
+ - type: map_at_3
994
+ value: 67.717
995
+ - type: map_at_5
996
+ value: 68.742
997
+ - type: mrr_at_1
998
+ value: 59.4
999
+ - type: mrr_at_10
1000
+ value: 69.19
1001
+ - type: mrr_at_100
1002
+ value: 69.711
1003
+ - type: mrr_at_1000
1004
+ value: 69.72699999999999
1005
+ - type: mrr_at_3
1006
+ value: 67.717
1007
+ - type: mrr_at_5
1008
+ value: 68.742
1009
+ - type: ndcg_at_1
1010
+ value: 59.4
1011
+ - type: ndcg_at_10
1012
+ value: 73.28099999999999
1013
+ - type: ndcg_at_100
1014
+ value: 75.575
1015
+ - type: ndcg_at_1000
1016
+ value: 75.971
1017
+ - type: ndcg_at_3
1018
+ value: 70.339
1019
+ - type: ndcg_at_5
1020
+ value: 72.16799999999999
1021
+ - type: precision_at_1
1022
+ value: 59.4
1023
+ - type: precision_at_10
1024
+ value: 8.58
1025
+ - type: precision_at_100
1026
+ value: 0.96
1027
+ - type: precision_at_1000
1028
+ value: 0.099
1029
+ - type: precision_at_3
1030
+ value: 25.967000000000002
1031
+ - type: precision_at_5
1032
+ value: 16.46
1033
+ - type: recall_at_1
1034
+ value: 59.4
1035
+ - type: recall_at_10
1036
+ value: 85.8
1037
+ - type: recall_at_100
1038
+ value: 96.0
1039
+ - type: recall_at_1000
1040
+ value: 99.1
1041
+ - type: recall_at_3
1042
+ value: 77.9
1043
+ - type: recall_at_5
1044
+ value: 82.3
1045
+ - task:
1046
+ type: Classification
1047
+ dataset:
1048
+ type: C-MTEB/waimai-classification
1049
+ name: MTEB Waimai
1050
+ config: default
1051
+ split: test
1052
+ revision: 339287def212450dcaa9df8c22bf93e9980c7023
1053
+ metrics:
1054
+ - type: accuracy
1055
+ value: 88.56000000000002
1056
+ - type: ap
1057
+ value: 73.62152033132061
1058
+ - type: f1
1059
+ value: 87.0916916405758
1060
  ---
1061
+ ## acge model
1062
+
1063
+ acge是一个通用的文本编码模型,是一个可变长度的向量化模型,使用了[Matryoshka Representation Learning](https://arxiv.org/abs/2205.13147),如图所示:
1064
+
1065
+ ![matryoshka-small](./img/matryoshka-small.gif)
1066
+
1067
+ 建议使用的维度为1024或者1792
1068
+
1069
+
1070
+ | Model Name | Model Size (GB) | Dimension | Sequence Length | Language | Need instruction for retrieval? |
1071
+ |:------------------:|:---------------:|:---------:|:---------------:|:--------:|:-------------------------------:|
1072
+ | acge-text-embedding | 0.65 | [1024, 1792] | 1024 | Chinese | NO |
1073
+
1074
+
1075
+ ## Metric
1076
+
1077
+ #### C-MTEB leaderboard (Chinese)
1078
+
1079
+ 测试的时候因为数据的随机性、显卡、推理的数据类型导致每次推理的结果不一致,我总共测试了4次,不同的显卡(A10 A100),不同的数据类型,测试结果放在了result文件夹中,选取了一个精度最低的测试作为最终的精度测试。
1080
+
1081
+ | Model Name | GPU | tensor-type | Model Size (GB) | Dimension | Sequence Length | Average (35) | Classification (9) | Clustering (4) | Pair Classification (2) | Reranking (4) | Retrieval (8) | STS (8) |
1082
+ |:------------------:|:---------------:|:---------:|:---------------:|:------------:|:------------------:|:--------------:|:-----------------------:|:-------------:|:-------------:|:-------:|:-------:|:-------:|
1083
+ | acge_text_embedding | NVIDIA TESLA A10 | bfloat16 | 0.65 | 1792 | 1024 | 68.91 | 72.76 | 58.22 | 87.82 | 67.67 | 72.48 | 62.24 |
1084
+ | acge_text_embedding | NVIDIA TESLA A100 | bfloat16 | 0.65 | 1792 | 1024 | 68.91 | 72.77 | 58.35 | 87.82 | 67.53 | 72.48 | 62.24 |
1085
+ | acge_text_embedding | NVIDIA TESLA A100 | float16 | 0.65 | 1792 | 1024 | 68.99 | 72.76 | 58.68 | 87.84 | 67.89 | 72.49 | 62.24 |
1086
+ | acge_text_embedding | NVIDIA TESLA A100 | float32 | 0.65 | 1792 | 1024 | 68.98 | 72.76 | 58.58 | 87.83 | 67.91 | 72.49 | 62.24 |
1087
+
1088
+ #### Reproduce our results
1089
+
1090
+ **C-MTEB:**
1091
+
1092
+ ```python
1093
+ import torch
1094
+ import argparse
1095
+ import functools
1096
+ from C_MTEB.tasks import *
1097
+ from typing import List, Dict
1098
+ from sentence_transformers import SentenceTransformer
1099
+ from mteb import MTEB, DRESModel
1100
+
1101
+
1102
+ class RetrievalModel(DRESModel):
1103
+ def __init__(self, encoder, **kwargs):
1104
+ self.encoder = encoder
1105
+
1106
+ def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:
1107
+ input_texts = ['{}'.format(q) for q in queries]
1108
+ return self._do_encode(input_texts)
1109
+
1110
+ def encode_corpus(self, corpus: List[Dict[str, str]], **kwargs) -> np.ndarray:
1111
+ input_texts = ['{} {}'.format(doc.get('title', ''), doc['text']).strip() for doc in corpus]
1112
+ input_texts = ['{}'.format(t) for t in input_texts]
1113
+ return self._do_encode(input_texts)
1114
+
1115
+ @torch.no_grad()
1116
+ def _do_encode(self, input_texts: List[str]) -> np.ndarray:
1117
+ return self.encoder.encode(
1118
+ sentences=input_texts,
1119
+ batch_size=512,
1120
+ normalize_embeddings=True,
1121
+ convert_to_numpy=True
1122
+ )
1123
+
1124
+
1125
+ def get_args():
1126
+ parser = argparse.ArgumentParser()
1127
+ parser.add_argument('--model_name_or_path', default="acge_text_embedding", type=str)
1128
+ parser.add_argument('--task_type', default=None, type=str)
1129
+ parser.add_argument('--pooling_method', default='cls', type=str)
1130
+ parser.add_argument('--output_dir', default='zh_results',
1131
+ type=str, help='output directory')
1132
+ parser.add_argument('--max_len', default=1024, type=int, help='max length')
1133
+ return parser.parse_args()
1134
+
1135
+
1136
+ if __name__ == '__main__':
1137
+ args = get_args()
1138
+ encoder = SentenceTransformer(args.model_name_or_path).half()
1139
+ encoder.encode = functools.partial(encoder.encode, normalize_embeddings=True)
1140
+ encoder.max_seq_length = int(args.max_len)
1141
+
1142
+ task_names = [t.description["name"] for t in MTEB(task_types=args.task_type,
1143
+ task_langs=['zh', 'zh-CN']).tasks]
1144
+ TASKS_WITH_PROMPTS = ["T2Retrieval", "MMarcoRetrieval", "DuRetrieval", "CovidRetrieval", "CmedqaRetrieval",
1145
+ "EcomRetrieval", "MedicalRetrieval", "VideoRetrieval"]
1146
+ for task in task_names:
1147
+ evaluation = MTEB(tasks=[task], task_langs=['zh', 'zh-CN'])
1148
+ if task in TASKS_WITH_PROMPTS:
1149
+ evaluation.run(RetrievalModel(encoder), output_folder=args.output_dir, overwrite_results=False)
1150
+ else:
1151
+ evaluation.run(encoder, output_folder=args.output_dir, overwrite_results=False)
1152
+
1153
+
1154
+ ```
1155
+
1156
+
1157
+ ## Usage
1158
+
1159
+ #### acge 中文系列模型
1160
+
1161
+ 在sentence-transformer库中的使用方法:
1162
+
1163
+ ```python
1164
+ from sentence_transformers import SentenceTransformer
1165
+
1166
+ sentences = ["数据1", "数据2"]
1167
+ model = SentenceTransformer('acge_text_embedding')
1168
+ print(model.max_seq_length)
1169
+ embeddings_1 = model.encode(sentences, normalize_embeddings=True)
1170
+ embeddings_2 = model.encode(sentences, normalize_embeddings=True)
1171
+ similarity = embeddings_1 @ embeddings_2.T
1172
+ print(similarity)
1173
+ ```
1174
+ 在sentence-transformer库中的使用方法,选取不同的维度:
1175
+
1176
+ ```python
1177
+ import torch
1178
+ from sentence_transformers import SentenceTransformer
1179
+
1180
+ sentences = ["数据1", "数据2"]
1181
+ model = SentenceTransformer('acge_text_embedding')
1182
+ embeddings = model.encode(sentences, normalize_embeddings=False)
1183
+ matryoshka_dim = 1024
1184
+ embeddings = embeddings[..., :matryoshka_dim] # Shrink the embedding dimensions
1185
+ embeddings = torch.nn.functional.normalize(embeddings, p=2, dim=1)
1186
+ print(embeddings.shape)
1187
+ # => (2, 1024)
1188
+
1189
+ ```
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 1024,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "pooler_fc_size": 768,
21
+ "pooler_num_attention_heads": 12,
22
+ "pooler_num_fc_layers": 3,
23
+ "pooler_size_per_head": 128,
24
+ "pooler_type": "first_token_transform",
25
+ "position_embedding_type": "absolute",
26
+ "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.28.0",
28
+ "type_vocab_size": 2,
29
+ "use_cache": true,
30
+ "vocab_size": 21128
31
+ }
img/matryoshka-small.gif ADDED

Git LFS Details

  • SHA256: cbd65f32e8a57b09c6b0866d95facec81cf8a01e064caeb57018e65cfc12dba8
  • Pointer size: 132 Bytes
  • Size of remote file: 3.01 MB
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Dense",
18
+ "type": "sentence_transformers.models.Dense"
19
+ }
20
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6791d0a1cee50f2d5d3f5a1092ef7e72750ad64bf48830d9deb017e8584c2941
3
+ size 652228333
result/acge_text_embedding_a10_bf16/AFQMC.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "AFQMC",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "cos_sim": {
7
+ "pearson": 0.5403313115451808,
8
+ "spearman": 0.5880630275663453
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.5746843921090328,
12
+ "spearman": 0.5880155813915751
13
+ },
14
+ "evaluation_time": 2.86,
15
+ "manhattan": {
16
+ "pearson": 0.5745948298354011,
17
+ "spearman": 0.5879717540772654
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/ATEC.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "ATEC",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.5352343534213784,
8
+ "spearman": 0.5794595170176331
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.6116662835482146,
12
+ "spearman": 0.5794590216330153
13
+ },
14
+ "evaluation_time": 15.75,
15
+ "manhattan": {
16
+ "pearson": 0.6116423136556863,
17
+ "spearman": 0.5794474364498415
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/AmazonReviewsClassification.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d",
3
+ "mteb_dataset_name": "AmazonReviewsClassification",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 16.78,
7
+ "zh": {
8
+ "accuracy": 0.48536,
9
+ "accuracy_stderr": 0.016551688735594317,
10
+ "f1": 0.4659652987641795,
11
+ "f1_stderr": 0.016438251092234995,
12
+ "main_score": 0.48536
13
+ }
14
+ },
15
+ "validation": {
16
+ "evaluation_time": 22.66,
17
+ "zh": {
18
+ "accuracy": 0.4768,
19
+ "accuracy_stderr": 0.01374394412095741,
20
+ "f1": 0.4575978790615031,
21
+ "f1_stderr": 0.011966200426813917,
22
+ "main_score": 0.4768
23
+ }
24
+ }
25
+ }
result/acge_text_embedding_a10_bf16/BQ.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "BQ",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.6827376047375847,
8
+ "spearman": 0.7037250413109453
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.6942852768280962,
12
+ "spearman": 0.7037187366396407
13
+ },
14
+ "evaluation_time": 7.01,
15
+ "manhattan": {
16
+ "pearson": 0.694038790612488,
17
+ "spearman": 0.7034866071976907
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/CLSClusteringP2P.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "CLSClusteringP2P",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 408.54,
7
+ "v_measure": 0.46838444815258395,
8
+ "v_measure_std": 0.009198056440364623
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/CLSClusteringS2S.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "CLSClusteringS2S",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 55.48,
7
+ "v_measure": 0.44259095471511933,
8
+ "v_measure_std": 0.013162258362441878
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/CMedQAv1.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "CMedQAv1",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 177.38,
7
+ "map": 0.8850276515595047,
8
+ "mrr": 0.9091761904761906
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/CMedQAv2.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "CMedQAv2",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 166.23,
7
+ "map": 0.887392598559833,
8
+ "mrr": 0.9120765873015874
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/CmedqaRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "CmedqaRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 642.4,
7
+ "map_at_1": 0.26912,
8
+ "map_at_10": 0.4002,
9
+ "map_at_100": 0.41919,
10
+ "map_at_1000": 0.42021,
11
+ "map_at_3": 0.35429,
12
+ "map_at_5": 0.38055,
13
+ "mrr_at_1": 0.40685,
14
+ "mrr_at_10": 0.48849,
15
+ "mrr_at_100": 0.4983,
16
+ "mrr_at_1000": 0.4987,
17
+ "mrr_at_3": 0.46166,
18
+ "mrr_at_5": 0.47737,
19
+ "ndcg_at_1": 0.40685,
20
+ "ndcg_at_10": 0.46798,
21
+ "ndcg_at_100": 0.53998,
22
+ "ndcg_at_1000": 0.557,
23
+ "ndcg_at_3": 0.41035,
24
+ "ndcg_at_5": 0.4361,
25
+ "precision_at_1": 0.40685,
26
+ "precision_at_10": 0.1043,
27
+ "precision_at_100": 0.01625,
28
+ "precision_at_1000": 0.00184,
29
+ "precision_at_3": 0.23139,
30
+ "precision_at_5": 0.17024,
31
+ "recall_at_1": 0.26912,
32
+ "recall_at_10": 0.5789,
33
+ "recall_at_100": 0.87374,
34
+ "recall_at_1000": 0.98721,
35
+ "recall_at_3": 0.40825,
36
+ "recall_at_5": 0.48491
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/Cmnli.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "Cmnli",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "cos_sim": {
7
+ "accuracy": 0.834155141310884,
8
+ "accuracy_threshold": 0.655905544757843,
9
+ "ap": 0.9049057754163061,
10
+ "f1": 0.8427460263780859,
11
+ "f1_threshold": 0.645372748374939,
12
+ "precision": 0.8136700043535046,
13
+ "recall": 0.8739770867430442
14
+ },
15
+ "dot": {
16
+ "accuracy": 0.8342754058929646,
17
+ "accuracy_threshold": 0.6557313203811646,
18
+ "ap": 0.9050212054081632,
19
+ "f1": 0.8431416424255985,
20
+ "f1_threshold": 0.6397272348403931,
21
+ "precision": 0.8084102124007724,
22
+ "recall": 0.8809913490764555
23
+ },
24
+ "euclidean": {
25
+ "accuracy": 0.8345159350571256,
26
+ "accuracy_threshold": 0.8281130194664001,
27
+ "ap": 0.9048986952292054,
28
+ "f1": 0.8430311231393776,
29
+ "f1_threshold": 0.8416151404380798,
30
+ "precision": 0.8142016989762579,
31
+ "recall": 0.8739770867430442
32
+ },
33
+ "evaluation_time": 6.59,
34
+ "manhattan": {
35
+ "accuracy": 0.8353577871316897,
36
+ "accuracy_threshold": 28.042278289794922,
37
+ "ap": 0.9048090090749703,
38
+ "f1": 0.8442074208555542,
39
+ "f1_threshold": 28.250762939453125,
40
+ "precision": 0.8201058201058201,
41
+ "recall": 0.8697685293429974
42
+ },
43
+ "max": {
44
+ "accuracy": 0.8353577871316897,
45
+ "ap": 0.9050212054081632,
46
+ "f1": 0.8442074208555542
47
+ }
48
+ }
49
+ }
result/acge_text_embedding_a10_bf16/CovidRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "CovidRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 675.01,
7
+ "map_at_1": 0.67597,
8
+ "map_at_10": 0.76531,
9
+ "map_at_100": 0.7688,
10
+ "map_at_1000": 0.76884,
11
+ "map_at_3": 0.74772,
12
+ "map_at_5": 0.75881,
13
+ "mrr_at_1": 0.67861,
14
+ "mrr_at_10": 0.7654,
15
+ "mrr_at_100": 0.76888,
16
+ "mrr_at_1000": 0.76893,
17
+ "mrr_at_3": 0.74886,
18
+ "mrr_at_5": 0.75929,
19
+ "ndcg_at_1": 0.67861,
20
+ "ndcg_at_10": 0.80405,
21
+ "ndcg_at_100": 0.81918,
22
+ "ndcg_at_1000": 0.82027,
23
+ "ndcg_at_3": 0.76972,
24
+ "ndcg_at_5": 0.78929,
25
+ "precision_at_1": 0.67861,
26
+ "precision_at_10": 0.09336,
27
+ "precision_at_100": 0.01001,
28
+ "precision_at_1000": 0.00101,
29
+ "precision_at_3": 0.27924,
30
+ "precision_at_5": 0.17745,
31
+ "recall_at_1": 0.67597,
32
+ "recall_at_10": 0.92308,
33
+ "recall_at_100": 0.99052,
34
+ "recall_at_1000": 0.99895,
35
+ "recall_at_3": 0.83219,
36
+ "recall_at_5": 0.87908
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/DuRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "DuRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 669.37,
7
+ "map_at_1": 0.25574,
8
+ "map_at_10": 0.7849,
9
+ "map_at_100": 0.81378,
10
+ "map_at_1000": 0.81423,
11
+ "map_at_3": 0.54115,
12
+ "map_at_5": 0.68734,
13
+ "mrr_at_1": 0.892,
14
+ "mrr_at_10": 0.92573,
15
+ "mrr_at_100": 0.92649,
16
+ "mrr_at_1000": 0.92653,
17
+ "mrr_at_3": 0.92275,
18
+ "mrr_at_5": 0.92465,
19
+ "ndcg_at_1": 0.892,
20
+ "ndcg_at_10": 0.86086,
21
+ "ndcg_at_100": 0.89052,
22
+ "ndcg_at_1000": 0.89483,
23
+ "ndcg_at_3": 0.84913,
24
+ "ndcg_at_5": 0.8406,
25
+ "precision_at_1": 0.892,
26
+ "precision_at_10": 0.4135,
27
+ "precision_at_100": 0.04779,
28
+ "precision_at_1000": 0.00488,
29
+ "precision_at_3": 0.76183,
30
+ "precision_at_5": 0.6468,
31
+ "recall_at_1": 0.25574,
32
+ "recall_at_10": 0.87154,
33
+ "recall_at_100": 0.96829,
34
+ "recall_at_1000": 0.9912,
35
+ "recall_at_3": 0.56446,
36
+ "recall_at_5": 0.73658
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/EcomRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "EcomRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 65.33,
7
+ "map_at_1": 0.521,
8
+ "map_at_10": 0.62619,
9
+ "map_at_100": 0.63114,
10
+ "map_at_1000": 0.63131,
11
+ "map_at_3": 0.60267,
12
+ "map_at_5": 0.61792,
13
+ "mrr_at_1": 0.521,
14
+ "mrr_at_10": 0.62619,
15
+ "mrr_at_100": 0.63114,
16
+ "mrr_at_1000": 0.63131,
17
+ "mrr_at_3": 0.60267,
18
+ "mrr_at_5": 0.61792,
19
+ "ndcg_at_1": 0.521,
20
+ "ndcg_at_10": 0.67566,
21
+ "ndcg_at_100": 0.69947,
22
+ "ndcg_at_1000": 0.70333,
23
+ "ndcg_at_3": 0.62826,
24
+ "ndcg_at_5": 0.6558,
25
+ "precision_at_1": 0.521,
26
+ "precision_at_10": 0.083,
27
+ "precision_at_100": 0.00941,
28
+ "precision_at_1000": 0.00097,
29
+ "precision_at_3": 0.234,
30
+ "precision_at_5": 0.1538,
31
+ "recall_at_1": 0.521,
32
+ "recall_at_10": 0.83,
33
+ "recall_at_100": 0.941,
34
+ "recall_at_1000": 0.97,
35
+ "recall_at_3": 0.702,
36
+ "recall_at_5": 0.769
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/IFlyTek.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "IFlyTek",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "accuracy": 0.5176606387071951,
7
+ "accuracy_stderr": 0.0035230544493296385,
8
+ "evaluation_time": 149.96,
9
+ "f1": 0.40270170518699155,
10
+ "f1_stderr": 0.0017349315887891484,
11
+ "main_score": 0.5176606387071951
12
+ }
13
+ }
result/acge_text_embedding_a10_bf16/JDReview.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "JDReview",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "accuracy": 0.8669793621013133,
7
+ "accuracy_stderr": 0.009170243459549659,
8
+ "ap": 0.5546718958939327,
9
+ "ap_stderr": 0.013907979413741858,
10
+ "evaluation_time": 9.64,
11
+ "f1": 0.8148228915952436,
12
+ "f1_stderr": 0.008989126268531128,
13
+ "main_score": 0.8669793621013133
14
+ }
15
+ }
result/acge_text_embedding_a10_bf16/LCQMC.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "LCQMC",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.7113799238905196,
8
+ "spearman": 0.7817304786177175
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.7748642718370318,
12
+ "spearman": 0.7817554866337617
13
+ },
14
+ "evaluation_time": 7.69,
15
+ "manhattan": {
16
+ "pearson": 0.7747501828376532,
17
+ "spearman": 0.7816650098418465
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/MMarcoReranking.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "MMarcoReranking",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 204.7,
7
+ "map": 0.2758238117043332,
8
+ "mrr": 0.27842460317460316
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/MMarcoRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "MMarcoRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 241.64,
7
+ "map_at_1": 0.65654,
8
+ "map_at_10": 0.74765,
9
+ "map_at_100": 0.75108,
10
+ "map_at_1000": 0.7512,
11
+ "map_at_3": 0.73022,
12
+ "map_at_5": 0.74134,
13
+ "mrr_at_1": 0.67923,
14
+ "mrr_at_10": 0.75395,
15
+ "mrr_at_100": 0.75694,
16
+ "mrr_at_1000": 0.75705,
17
+ "mrr_at_3": 0.73885,
18
+ "mrr_at_5": 0.74845,
19
+ "ndcg_at_1": 0.67923,
20
+ "ndcg_at_10": 0.78425,
21
+ "ndcg_at_100": 0.79966,
22
+ "ndcg_at_1000": 0.80278,
23
+ "ndcg_at_3": 0.75128,
24
+ "ndcg_at_5": 0.76999,
25
+ "precision_at_1": 0.67923,
26
+ "precision_at_10": 0.09468,
27
+ "precision_at_100": 0.01023,
28
+ "precision_at_1000": 0.00105,
29
+ "precision_at_3": 0.28329,
30
+ "precision_at_5": 0.1798,
31
+ "recall_at_1": 0.65654,
32
+ "recall_at_10": 0.89056,
33
+ "recall_at_100": 0.96037,
34
+ "recall_at_1000": 0.98455,
35
+ "recall_at_3": 0.80343,
36
+ "recall_at_5": 0.84789
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/MassiveIntentClassification.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7",
3
+ "mteb_dataset_name": "MassiveIntentClassification",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 12.49,
7
+ "zh-CN": {
8
+ "accuracy": 0.7585743106926698,
9
+ "accuracy_stderr": 0.015209391900334438,
10
+ "f1": 0.7303701614932926,
11
+ "f1_stderr": 0.012723690389832107,
12
+ "main_score": 0.7585743106926698
13
+ }
14
+ },
15
+ "validation": {
16
+ "evaluation_time": 12.27,
17
+ "zh-CN": {
18
+ "accuracy": 0.7619773733398917,
19
+ "accuracy_stderr": 0.014512576249971336,
20
+ "f1": 0.7244896179461546,
21
+ "f1_stderr": 0.011384890091815123,
22
+ "main_score": 0.7619773733398917
23
+ }
24
+ }
25
+ }
result/acge_text_embedding_a10_bf16/MassiveScenarioClassification.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634",
3
+ "mteb_dataset_name": "MassiveScenarioClassification",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 8.85,
7
+ "zh-CN": {
8
+ "accuracy": 0.7842299932750505,
9
+ "accuracy_stderr": 0.014823175563716382,
10
+ "f1": 0.7810417897185625,
11
+ "f1_stderr": 0.013286373780853102,
12
+ "main_score": 0.7842299932750505
13
+ }
14
+ },
15
+ "validation": {
16
+ "evaluation_time": 8.38,
17
+ "zh-CN": {
18
+ "accuracy": 0.7782095425479587,
19
+ "accuracy_stderr": 0.014618885958749248,
20
+ "f1": 0.7723375008626536,
21
+ "f1_stderr": 0.015591423260094068,
22
+ "main_score": 0.7782095425479587
23
+ }
24
+ }
25
+ }
result/acge_text_embedding_a10_bf16/MedicalRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "MedicalRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 236.29,
7
+ "map_at_1": 0.549,
8
+ "map_at_10": 0.61024,
9
+ "map_at_100": 0.61585,
10
+ "map_at_1000": 0.61625,
11
+ "map_at_3": 0.59533,
12
+ "map_at_5": 0.60333,
13
+ "mrr_at_1": 0.551,
14
+ "mrr_at_10": 0.61124,
15
+ "mrr_at_100": 0.61686,
16
+ "mrr_at_1000": 0.61726,
17
+ "mrr_at_3": 0.59633,
18
+ "mrr_at_5": 0.60433,
19
+ "ndcg_at_1": 0.549,
20
+ "ndcg_at_10": 0.6407,
21
+ "ndcg_at_100": 0.67041,
22
+ "ndcg_at_1000": 0.68237,
23
+ "ndcg_at_3": 0.60969,
24
+ "ndcg_at_5": 0.62411,
25
+ "precision_at_1": 0.549,
26
+ "precision_at_10": 0.0737,
27
+ "precision_at_100": 0.00882,
28
+ "precision_at_1000": 0.00098,
29
+ "precision_at_3": 0.217,
30
+ "precision_at_5": 0.1372,
31
+ "recall_at_1": 0.549,
32
+ "recall_at_10": 0.737,
33
+ "recall_at_100": 0.882,
34
+ "recall_at_1000": 0.978,
35
+ "recall_at_3": 0.651,
36
+ "recall_at_5": 0.686
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/MultilingualSentiment.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "MultilingualSentiment",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "accuracy": 0.7757666666666666,
7
+ "accuracy_stderr": 0.004494564618637846,
8
+ "evaluation_time": 19.48,
9
+ "f1": 0.7754926687781014,
10
+ "f1_stderr": 0.006051542426096796,
11
+ "main_score": 0.7757666666666666
12
+ }
13
+ }
result/acge_text_embedding_a10_bf16/Ocnli.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "Ocnli",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "cos_sim": {
7
+ "accuracy": 0.8110449377368706,
8
+ "accuracy_threshold": 0.5930440425872803,
9
+ "ap": 0.8516262845422288,
10
+ "f1": 0.8297771455666192,
11
+ "f1_threshold": 0.5647917985916138,
12
+ "precision": 0.7530120481927711,
13
+ "recall": 0.9239704329461457
14
+ },
15
+ "dot": {
16
+ "accuracy": 0.8110449377368706,
17
+ "accuracy_threshold": 0.5982565879821777,
18
+ "ap": 0.8511448381714821,
19
+ "f1": 0.8299190090519295,
20
+ "f1_threshold": 0.5676579475402832,
21
+ "precision": 0.7560763888888888,
22
+ "recall": 0.9197465681098205
23
+ },
24
+ "euclidean": {
25
+ "accuracy": 0.8105035192203573,
26
+ "accuracy_threshold": 0.8990259170532227,
27
+ "ap": 0.8516821622996872,
28
+ "f1": 0.8300094966761632,
29
+ "f1_threshold": 0.9317486882209778,
30
+ "precision": 0.7540983606557377,
31
+ "recall": 0.9229144667370645
32
+ },
33
+ "evaluation_time": 1.5,
34
+ "manhattan": {
35
+ "accuracy": 0.8115863562533838,
36
+ "accuracy_threshold": 30.2585506439209,
37
+ "ap": 0.8514921863839019,
38
+ "f1": 0.8291048348492102,
39
+ "f1_threshold": 31.238189697265625,
40
+ "precision": 0.7583187390542907,
41
+ "recall": 0.914466737064414
42
+ },
43
+ "max": {
44
+ "accuracy": 0.8115863562533838,
45
+ "ap": 0.8516821622996872,
46
+ "f1": 0.8300094966761632
47
+ }
48
+ }
49
+ }
result/acge_text_embedding_a10_bf16/OnlineShopping.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "OnlineShopping",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "accuracy": 0.9374,
7
+ "accuracy_stderr": 0.005499090833946969,
8
+ "ap": 0.918742935725318,
9
+ "ap_stderr": 0.0038520346274983235,
10
+ "evaluation_time": 13.75,
11
+ "f1": 0.937291967799952,
12
+ "f1_stderr": 0.005456903309479222,
13
+ "main_score": 0.9374
14
+ }
15
+ }
result/acge_text_embedding_a10_bf16/PAWSX.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "PAWSX",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.39153309719954255,
8
+ "spearman": 0.4549104095046596
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.4484341925744787,
12
+ "spearman": 0.4549591820681975
13
+ },
14
+ "evaluation_time": 3.2,
15
+ "manhattan": {
16
+ "pearson": 0.44849970344307794,
17
+ "spearman": 0.45511943477696504
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/QBQTC.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "QBQTC",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.34238065972733855,
8
+ "spearman": 0.3729994521083957
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.35117285683586885,
12
+ "spearman": 0.37296421136185903
13
+ },
14
+ "evaluation_time": 4.28,
15
+ "manhattan": {
16
+ "pearson": 0.35077612993620544,
17
+ "spearman": 0.3724321404636873
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/STS22.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80",
3
+ "mteb_dataset_name": "STS22",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 19.94,
7
+ "zh": {
8
+ "cos_sim": {
9
+ "pearson": 0.6181084429804533,
10
+ "spearman": 0.6706680826468155
11
+ },
12
+ "euclidean": {
13
+ "pearson": 0.6568769357174774,
14
+ "spearman": 0.6703679315323132
15
+ },
16
+ "manhattan": {
17
+ "pearson": 0.6562894558510642,
18
+ "spearman": 0.6703727613483745
19
+ }
20
+ }
21
+ }
22
+ }
result/acge_text_embedding_a10_bf16/STSB.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "STSB",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.8156386995511361,
8
+ "spearman": 0.8273686622131105
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.824948141156291,
12
+ "spearman": 0.8272976398817485
13
+ },
14
+ "evaluation_time": 1.18,
15
+ "manhattan": {
16
+ "pearson": 0.8248268721046512,
17
+ "spearman": 0.8271507072385981
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_a10_bf16/T2Reranking.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "T2Reranking",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 1202.17,
7
+ "map": 0.6585592968571603,
8
+ "mrr": 0.7596156946190799
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/T2Retrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "T2Retrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 1827.95,
7
+ "map_at_1": 0.27061,
8
+ "map_at_10": 0.75299,
9
+ "map_at_100": 0.79106,
10
+ "map_at_1000": 0.79179,
11
+ "map_at_3": 0.53062,
12
+ "map_at_5": 0.65117,
13
+ "mrr_at_1": 0.88817,
14
+ "mrr_at_10": 0.91719,
15
+ "mrr_at_100": 0.91812,
16
+ "mrr_at_1000": 0.91815,
17
+ "mrr_at_3": 0.91209,
18
+ "mrr_at_5": 0.91546,
19
+ "ndcg_at_1": 0.88817,
20
+ "ndcg_at_10": 0.83269,
21
+ "ndcg_at_100": 0.8726,
22
+ "ndcg_at_1000": 0.87942,
23
+ "ndcg_at_3": 0.84688,
24
+ "ndcg_at_5": 0.83242,
25
+ "precision_at_1": 0.88817,
26
+ "precision_at_10": 0.41293,
27
+ "precision_at_100": 0.04994,
28
+ "precision_at_1000": 0.00515,
29
+ "precision_at_3": 0.73945,
30
+ "precision_at_5": 0.6189,
31
+ "recall_at_1": 0.27061,
32
+ "recall_at_10": 0.8232,
33
+ "recall_at_100": 0.95049,
34
+ "recall_at_1000": 0.98466,
35
+ "recall_at_3": 0.54877,
36
+ "recall_at_5": 0.6883
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/TNews.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "TNews",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "accuracy": 0.53697,
7
+ "accuracy_stderr": 0.003216846281686451,
8
+ "evaluation_time": 15.68,
9
+ "f1": 0.5187777083293563,
10
+ "f1_stderr": 0.004223883600638354,
11
+ "main_score": 0.53697
12
+ }
13
+ }
result/acge_text_embedding_a10_bf16/ThuNewsClusteringP2P.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "ThuNewsClusteringP2P",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 210.99,
7
+ "v_measure": 0.7348804805577354,
8
+ "v_measure_std": 0.022460628082213604
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/ThuNewsClusteringS2S.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "ThuNewsClusteringS2S",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "evaluation_time": 55.95,
7
+ "v_measure": 0.6828918056606984,
8
+ "v_measure_std": 0.02042687239840596
9
+ }
10
+ }
result/acge_text_embedding_a10_bf16/VideoRetrieval.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "VideoRetrieval",
4
+ "mteb_version": "1.1.1",
5
+ "validation": {
6
+ "evaluation_time": 65.38,
7
+ "map_at_1": 0.593,
8
+ "map_at_10": 0.69131,
9
+ "map_at_100": 0.6965,
10
+ "map_at_1000": 0.69667,
11
+ "map_at_3": 0.67667,
12
+ "map_at_5": 0.68687,
13
+ "mrr_at_1": 0.593,
14
+ "mrr_at_10": 0.69131,
15
+ "mrr_at_100": 0.6965,
16
+ "mrr_at_1000": 0.69667,
17
+ "mrr_at_3": 0.67667,
18
+ "mrr_at_5": 0.68687,
19
+ "ndcg_at_1": 0.593,
20
+ "ndcg_at_10": 0.73236,
21
+ "ndcg_at_100": 0.75514,
22
+ "ndcg_at_1000": 0.75924,
23
+ "ndcg_at_3": 0.70302,
24
+ "ndcg_at_5": 0.72127,
25
+ "precision_at_1": 0.593,
26
+ "precision_at_10": 0.0858,
27
+ "precision_at_100": 0.00959,
28
+ "precision_at_1000": 0.00099,
29
+ "precision_at_3": 0.25967,
30
+ "precision_at_5": 0.1646,
31
+ "recall_at_1": 0.593,
32
+ "recall_at_10": 0.858,
33
+ "recall_at_100": 0.959,
34
+ "recall_at_1000": 0.991,
35
+ "recall_at_3": 0.779,
36
+ "recall_at_5": 0.823
37
+ }
38
+ }
result/acge_text_embedding_a10_bf16/Waimai.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": null,
3
+ "mteb_dataset_name": "Waimai",
4
+ "mteb_version": "1.1.1",
5
+ "test": {
6
+ "accuracy": 0.8855999999999999,
7
+ "accuracy_stderr": 0.00542586398650022,
8
+ "ap": 0.7361808496992722,
9
+ "ap_stderr": 0.011074642485384353,
10
+ "evaluation_time": 8.39,
11
+ "f1": 0.8709134854653389,
12
+ "f1_stderr": 0.0046763453419527284,
13
+ "main_score": 0.8855999999999999
14
+ }
15
+ }
result/acge_text_embedding_bf16/AFQMC.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "b44c3b011063adb25877c13823db83bb193913c4",
3
+ "mteb_dataset_name": "AFQMC",
4
+ "mteb_version": "1.1.2",
5
+ "validation": {
6
+ "cos_sim": {
7
+ "pearson": 0.5403219651150428,
8
+ "spearman": 0.5880567952355933
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.5747052075207808,
12
+ "spearman": 0.5880429232297114
13
+ },
14
+ "evaluation_time": 2.53,
15
+ "manhattan": {
16
+ "pearson": 0.5746163912433917,
17
+ "spearman": 0.58797778532121
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_bf16/ATEC.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "0f319b1142f28d00e055a6770f3f726ae9b7d865",
3
+ "mteb_dataset_name": "ATEC",
4
+ "mteb_version": "1.1.2",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.5352317196374685,
8
+ "spearman": 0.5794610819724817
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.6116974418403869,
12
+ "spearman": 0.5794681861980281
13
+ },
14
+ "evaluation_time": 11.13,
15
+ "manhattan": {
16
+ "pearson": 0.6116782535933452,
17
+ "spearman": 0.5794540903298445
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_bf16/AmazonReviewsClassification.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d",
3
+ "mteb_dataset_name": "AmazonReviewsClassification",
4
+ "mteb_version": "1.1.2",
5
+ "test": {
6
+ "evaluation_time": 49.61,
7
+ "zh": {
8
+ "accuracy": 0.48556,
9
+ "accuracy_stderr": 0.016404828557470493,
10
+ "f1": 0.4661852566163211,
11
+ "f1_stderr": 0.016210688869191376,
12
+ "main_score": 0.48556
13
+ }
14
+ },
15
+ "validation": {
16
+ "evaluation_time": 32.97,
17
+ "zh": {
18
+ "accuracy": 0.4767600000000001,
19
+ "accuracy_stderr": 0.01377383025886408,
20
+ "f1": 0.45757305049914343,
21
+ "f1_stderr": 0.011974246680296244,
22
+ "main_score": 0.4767600000000001
23
+ }
24
+ }
25
+ }
result/acge_text_embedding_bf16/BQ.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "e3dda5e115e487b39ec7e618c0c6a29137052a55",
3
+ "mteb_dataset_name": "BQ",
4
+ "mteb_version": "1.1.2",
5
+ "test": {
6
+ "cos_sim": {
7
+ "pearson": 0.6826963267181252,
8
+ "spearman": 0.7036696156869363
9
+ },
10
+ "euclidean": {
11
+ "pearson": 0.6942591718370763,
12
+ "spearman": 0.703677583116469
13
+ },
14
+ "evaluation_time": 5.46,
15
+ "manhattan": {
16
+ "pearson": 0.6940127857737215,
17
+ "spearman": 0.7034572662526427
18
+ }
19
+ }
20
+ }
result/acge_text_embedding_bf16/CLSClusteringP2P.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "4b6227591c6c1a73bc76b1055f3b7f3588e72476",
3
+ "mteb_dataset_name": "CLSClusteringP2P",
4
+ "mteb_version": "1.1.2",
5
+ "test": {
6
+ "evaluation_time": 184.69,
7
+ "v_measure": 0.46546853871797733,
8
+ "v_measure_std": 0.014569739407015586
9
+ }
10
+ }
result/acge_text_embedding_bf16/CLSClusteringS2S.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_revision": "e458b3f5414b62b7f9f83499ac1f5497ae2e869f",
3
+ "mteb_dataset_name": "CLSClusteringS2S",
4
+ "mteb_version": "1.1.2",
5
+ "test": {
6
+ "evaluation_time": 110.77,
7
+ "v_measure": 0.4445602575811581,
8
+ "v_measure_std": 0.012328026830488758
9
+ }
10
+ }