Muennighoff commited on
Commit
039f451
1 Parent(s): 62a5cc0

Add MTEB evaluation

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. beir.json → evaluation/beir/beir.json +0 -0
  2. evaluation/mteb/AmazonCounterfactualClassification.json +16 -0
  3. evaluation/mteb/AmazonPolarityClassification.json +14 -0
  4. evaluation/mteb/AmazonReviewsClassification.json +14 -0
  5. evaluation/mteb/ArguAna.json +37 -0
  6. evaluation/mteb/ArxivClusteringP2P.json +9 -0
  7. evaluation/mteb/ArxivClusteringS2S.json +9 -0
  8. evaluation/mteb/AskUbuntuDupQuestions.json +9 -0
  9. evaluation/mteb/BIOSSES.json +17 -0
  10. evaluation/mteb/Banking77Classification.json +12 -0
  11. evaluation/mteb/BiorxivClusteringP2P.json +9 -0
  12. evaluation/mteb/BiorxivClusteringS2S.json +9 -0
  13. evaluation/mteb/CQADupstackAndroidRetrieval.json +37 -0
  14. evaluation/mteb/CQADupstackEnglishRetrieval.json +37 -0
  15. evaluation/mteb/CQADupstackGamingRetrieval.json +37 -0
  16. evaluation/mteb/CQADupstackGisRetrieval.json +37 -0
  17. evaluation/mteb/CQADupstackMathematicaRetrieval.json +37 -0
  18. evaluation/mteb/CQADupstackPhysicsRetrieval.json +37 -0
  19. evaluation/mteb/CQADupstackProgrammersRetrieval.json +37 -0
  20. evaluation/mteb/CQADupstackRetrieval.json +13 -0
  21. evaluation/mteb/CQADupstackStatsRetrieval.json +37 -0
  22. evaluation/mteb/CQADupstackTexRetrieval.json +37 -0
  23. evaluation/mteb/CQADupstackUnixRetrieval.json +37 -0
  24. evaluation/mteb/CQADupstackWebmastersRetrieval.json +37 -0
  25. evaluation/mteb/CQADupstackWordpressRetrieval.json +37 -0
  26. evaluation/mteb/ClimateFEVER.json +37 -0
  27. evaluation/mteb/DBPedia.json +37 -0
  28. evaluation/mteb/EmotionClassification.json +12 -0
  29. evaluation/mteb/FEVER.json +37 -0
  30. evaluation/mteb/FiQA2018.json +37 -0
  31. evaluation/mteb/HotpotQA.json +37 -0
  32. evaluation/mteb/ImdbClassification.json +14 -0
  33. evaluation/mteb/MSMARCO.json +37 -0
  34. evaluation/mteb/MTOPDomainClassification.json +14 -0
  35. evaluation/mteb/MTOPIntentClassification.json +14 -0
  36. evaluation/mteb/MassiveIntentClassification.json +14 -0
  37. evaluation/mteb/MassiveScenarioClassification.json +14 -0
  38. evaluation/mteb/MedrxivClusteringP2P.json +9 -0
  39. evaluation/mteb/MedrxivClusteringS2S.json +9 -0
  40. evaluation/mteb/MindSmallReranking.json +9 -0
  41. evaluation/mteb/NFCorpus.json +37 -0
  42. evaluation/mteb/NQ.json +37 -0
  43. evaluation/mteb/QuoraRetrieval.json +37 -0
  44. evaluation/mteb/RedditClustering.json +9 -0
  45. evaluation/mteb/RedditClusteringP2P.json +9 -0
  46. evaluation/mteb/SCIDOCS.json +37 -0
  47. evaluation/mteb/SICK-R.json +17 -0
  48. evaluation/mteb/STS12.json +19 -0
  49. evaluation/mteb/STS13.json +19 -0
  50. evaluation/mteb/STS14.json +19 -0
beir.json → evaluation/beir/beir.json RENAMED
File without changes
evaluation/mteb/AmazonCounterfactualClassification.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "en": {
4
+ "accuracy": 0.6922388059701492,
5
+ "accuracy_stderr": 0.05577698009153047,
6
+ "ap": 0.3204724673950256,
7
+ "ap_stderr": 0.030758638728823475,
8
+ "f1": 0.6325719825770428,
9
+ "f1_stderr": 0.04382866443453507,
10
+ "main_score": 0.3204724673950256
11
+ },
12
+ "evaluation_time": 234.06
13
+ },
14
+ "dataset_version": null,
15
+ "mteb_version": "0.0.2"
16
+ }
evaluation/mteb/AmazonPolarityClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "accuracy": 0.7126109999999999,
4
+ "accuracy_stderr": 0.0371266464483395,
5
+ "ap": 0.6616336378255403,
6
+ "ap_stderr": 0.038282983973039475,
7
+ "evaluation_time": 330535.51,
8
+ "f1": 0.7089719145825304,
9
+ "f1_stderr": 0.03949505400938336,
10
+ "main_score": 0.7126109999999999
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/AmazonReviewsClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "en": {
4
+ "accuracy": 0.39191999999999994,
5
+ "accuracy_stderr": 0.023273538622220733,
6
+ "f1": 0.38580766731113825,
7
+ "f1_stderr": 0.018793905233795604,
8
+ "main_score": 0.39191999999999994
9
+ },
10
+ "evaluation_time": 2060.82
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/ArguAna.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 477.21,
6
+ "map_at_1": 0.27312,
7
+ "map_at_10": 0.4262,
8
+ "map_at_100": 0.43707,
9
+ "map_at_1000": 0.43715,
10
+ "map_at_3": 0.37624,
11
+ "map_at_5": 0.40498,
12
+ "mrr_at_1": 0.27667,
13
+ "mrr_at_10": 0.42737,
14
+ "mrr_at_100": 0.43823,
15
+ "mrr_at_1000": 0.43831,
16
+ "mrr_at_3": 0.37743,
17
+ "mrr_at_5": 0.40616,
18
+ "ndcg_at_1": 0.27312,
19
+ "ndcg_at_10": 0.51375,
20
+ "ndcg_at_100": 0.55778,
21
+ "ndcg_at_1000": 0.55966,
22
+ "ndcg_at_3": 0.41087,
23
+ "ndcg_at_5": 0.46269,
24
+ "precision_at_1": 0.27312,
25
+ "precision_at_10": 0.07945,
26
+ "precision_at_100": 0.00982,
27
+ "precision_at_1000": 0.001,
28
+ "precision_at_3": 0.17046,
29
+ "precision_at_5": 0.12745,
30
+ "recall_at_1": 0.27312,
31
+ "recall_at_10": 0.79445,
32
+ "recall_at_100": 0.98151,
33
+ "recall_at_1000": 0.99573,
34
+ "recall_at_3": 0.51138,
35
+ "recall_at_5": 0.63727
36
+ }
37
+ }
evaluation/mteb/ArxivClusteringP2P.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 122260.97,
4
+ "v_measure": 0.4559037428592033,
5
+ "v_measure_std": 0.13565379029776853
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/ArxivClusteringS2S.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 15111.26,
4
+ "v_measure": 0.3886371701986363,
5
+ "v_measure_std": 0.13693041258907623
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/AskUbuntuDupQuestions.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 129.15,
4
+ "map": 0.6162556869142777,
5
+ "mrr": 0.7583256386580486
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/BIOSSES.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "cos_sim": {
4
+ "pearson": 0.8996074355094802,
5
+ "spearman": 0.862501580394454
6
+ },
7
+ "euclidean": {
8
+ "pearson": 0.8218427440380462,
9
+ "spearman": 0.8014760935017947
10
+ },
11
+ "evaluation_time": 13.54,
12
+ "manhattan": {
13
+ "pearson": 0.8224621578156392,
14
+ "spearman": 0.8000363016590163
15
+ }
16
+ }
17
+ }
evaluation/mteb/Banking77Classification.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "accuracy": 0.8449350649350649,
4
+ "accuracy_stderr": 0.005092747481478234,
5
+ "evaluation_time": 568.26,
6
+ "f1": 0.8442493432337361,
7
+ "f1_stderr": 0.005291617674599615,
8
+ "main_score": 0.8449350649350649
9
+ },
10
+ "dataset_version": null,
11
+ "mteb_version": "0.0.2"
12
+ }
evaluation/mteb/BiorxivClusteringP2P.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 14349.06,
4
+ "v_measure": 0.3655145972298938,
5
+ "v_measure_std": 0.01180536429604858
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/BiorxivClusteringS2S.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 1826.36,
4
+ "v_measure": 0.3369901851846774,
5
+ "v_measure_std": 0.011822688018810827
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/CQADupstackAndroidRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 2419.91,
6
+ "map_at_1": 0.30499,
7
+ "map_at_10": 0.41208,
8
+ "map_at_100": 0.42638,
9
+ "map_at_1000": 0.42754,
10
+ "map_at_3": 0.37506,
11
+ "map_at_5": 0.39422,
12
+ "mrr_at_1": 0.37339,
13
+ "mrr_at_10": 0.47051,
14
+ "mrr_at_100": 0.47745,
15
+ "mrr_at_1000": 0.47786,
16
+ "mrr_at_3": 0.44087,
17
+ "mrr_at_5": 0.45711,
18
+ "ndcg_at_1": 0.37339,
19
+ "ndcg_at_10": 0.47666,
20
+ "ndcg_at_100": 0.52994,
21
+ "ndcg_at_1000": 0.54929,
22
+ "ndcg_at_3": 0.41982,
23
+ "ndcg_at_5": 0.4442,
24
+ "precision_at_1": 0.37339,
25
+ "precision_at_10": 0.09127,
26
+ "precision_at_100": 0.01475,
27
+ "precision_at_1000": 0.00194,
28
+ "precision_at_3": 0.20076,
29
+ "precision_at_5": 0.14449,
30
+ "recall_at_1": 0.30499,
31
+ "recall_at_10": 0.60328,
32
+ "recall_at_100": 0.82579,
33
+ "recall_at_1000": 0.95074,
34
+ "recall_at_3": 0.4417,
35
+ "recall_at_5": 0.5094
36
+ }
37
+ }
evaluation/mteb/CQADupstackEnglishRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 3724.38,
6
+ "map_at_1": 0.30613,
7
+ "map_at_10": 0.40781,
8
+ "map_at_100": 0.42018,
9
+ "map_at_1000": 0.42133,
10
+ "map_at_3": 0.37816,
11
+ "map_at_5": 0.39389,
12
+ "mrr_at_1": 0.38408,
13
+ "mrr_at_10": 0.46631,
14
+ "mrr_at_100": 0.47332,
15
+ "mrr_at_1000": 0.47368,
16
+ "mrr_at_3": 0.44384,
17
+ "mrr_at_5": 0.45661,
18
+ "ndcg_at_1": 0.38408,
19
+ "ndcg_at_10": 0.4638,
20
+ "ndcg_at_100": 0.5081,
21
+ "ndcg_at_1000": 0.52663,
22
+ "ndcg_at_3": 0.4218,
23
+ "ndcg_at_5": 0.43974,
24
+ "precision_at_1": 0.38408,
25
+ "precision_at_10": 0.08656,
26
+ "precision_at_100": 0.01386,
27
+ "precision_at_1000": 0.00184,
28
+ "precision_at_3": 0.20276,
29
+ "precision_at_5": 0.14242,
30
+ "recall_at_1": 0.30613,
31
+ "recall_at_10": 0.5644,
32
+ "recall_at_100": 0.75044,
33
+ "recall_at_1000": 0.86426,
34
+ "recall_at_3": 0.43766,
35
+ "recall_at_5": 0.48998
36
+ }
37
+ }
evaluation/mteb/CQADupstackGamingRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 3971.77,
6
+ "map_at_1": 0.37371,
7
+ "map_at_10": 0.49718,
8
+ "map_at_100": 0.50737,
9
+ "map_at_1000": 0.5079,
10
+ "map_at_3": 0.46231,
11
+ "map_at_5": 0.48329,
12
+ "mrr_at_1": 0.42884,
13
+ "mrr_at_10": 0.53176,
14
+ "mrr_at_100": 0.53817,
15
+ "mrr_at_1000": 0.53845,
16
+ "mrr_at_3": 0.50199,
17
+ "mrr_at_5": 0.5213,
18
+ "ndcg_at_1": 0.42884,
19
+ "ndcg_at_10": 0.55826,
20
+ "ndcg_at_100": 0.5993,
21
+ "ndcg_at_1000": 0.61013,
22
+ "ndcg_at_3": 0.49764,
23
+ "ndcg_at_5": 0.53026,
24
+ "precision_at_1": 0.42884,
25
+ "precision_at_10": 0.09047,
26
+ "precision_at_100": 0.01212,
27
+ "precision_at_1000": 0.00135,
28
+ "precision_at_3": 0.22132,
29
+ "precision_at_5": 0.15524,
30
+ "recall_at_1": 0.37371,
31
+ "recall_at_10": 0.70482,
32
+ "recall_at_100": 0.88425,
33
+ "recall_at_1000": 0.96034,
34
+ "recall_at_3": 0.5443,
35
+ "recall_at_5": 0.62328
36
+ }
37
+ }
evaluation/mteb/CQADupstackGisRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 5450.64,
6
+ "map_at_1": 0.22876,
7
+ "map_at_10": 0.31715,
8
+ "map_at_100": 0.32847,
9
+ "map_at_1000": 0.32922,
10
+ "map_at_3": 0.2905,
11
+ "map_at_5": 0.30396,
12
+ "mrr_at_1": 0.2452,
13
+ "mrr_at_10": 0.33497,
14
+ "mrr_at_100": 0.34455,
15
+ "mrr_at_1000": 0.3451,
16
+ "mrr_at_3": 0.30791,
17
+ "mrr_at_5": 0.32175,
18
+ "ndcg_at_1": 0.2452,
19
+ "ndcg_at_10": 0.3695,
20
+ "ndcg_at_100": 0.42238,
21
+ "ndcg_at_1000": 0.44148,
22
+ "ndcg_at_3": 0.31435,
23
+ "ndcg_at_5": 0.33839,
24
+ "precision_at_1": 0.2452,
25
+ "precision_at_10": 0.05932,
26
+ "precision_at_100": 0.00901,
27
+ "precision_at_1000": 0.0011,
28
+ "precision_at_3": 0.13446,
29
+ "precision_at_5": 0.09469,
30
+ "recall_at_1": 0.22876,
31
+ "recall_at_10": 0.5138,
32
+ "recall_at_100": 0.75311,
33
+ "recall_at_1000": 0.89718,
34
+ "recall_at_3": 0.3626,
35
+ "recall_at_5": 0.42249
36
+ }
37
+ }
evaluation/mteb/CQADupstackMathematicaRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 2931.62,
6
+ "map_at_1": 0.14984,
7
+ "map_at_10": 0.23457,
8
+ "map_at_100": 0.24723,
9
+ "map_at_1000": 0.24846,
10
+ "map_at_3": 0.20873,
11
+ "map_at_5": 0.22357,
12
+ "mrr_at_1": 0.18159,
13
+ "mrr_at_10": 0.27431,
14
+ "mrr_at_100": 0.28449,
15
+ "mrr_at_1000": 0.2852,
16
+ "mrr_at_3": 0.24979,
17
+ "mrr_at_5": 0.26447,
18
+ "ndcg_at_1": 0.18159,
19
+ "ndcg_at_10": 0.28628,
20
+ "ndcg_at_100": 0.34741,
21
+ "ndcg_at_1000": 0.37516,
22
+ "ndcg_at_3": 0.23902,
23
+ "ndcg_at_5": 0.26294,
24
+ "precision_at_1": 0.18159,
25
+ "precision_at_10": 0.05485,
26
+ "precision_at_100": 0.00985,
27
+ "precision_at_1000": 0.00136,
28
+ "precision_at_3": 0.11774,
29
+ "precision_at_5": 0.08731,
30
+ "recall_at_1": 0.14984,
31
+ "recall_at_10": 0.40198,
32
+ "recall_at_100": 0.67115,
33
+ "recall_at_1000": 0.86497,
34
+ "recall_at_3": 0.27639,
35
+ "recall_at_5": 0.33595
36
+ }
37
+ }
evaluation/mteb/CQADupstackPhysicsRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 5338.65,
6
+ "map_at_1": 0.29067,
7
+ "map_at_10": 0.39457,
8
+ "map_at_100": 0.4083,
9
+ "map_at_1000": 0.4094,
10
+ "map_at_3": 0.35995,
11
+ "map_at_5": 0.38159,
12
+ "mrr_at_1": 0.34937,
13
+ "mrr_at_10": 0.44755,
14
+ "mrr_at_100": 0.45549,
15
+ "mrr_at_1000": 0.45589,
16
+ "mrr_at_3": 0.41947,
17
+ "mrr_at_5": 0.43733,
18
+ "ndcg_at_1": 0.34937,
19
+ "ndcg_at_10": 0.45573,
20
+ "ndcg_at_100": 0.51267,
21
+ "ndcg_at_1000": 0.53184,
22
+ "ndcg_at_3": 0.39962,
23
+ "ndcg_at_5": 0.4302,
24
+ "precision_at_1": 0.34937,
25
+ "precision_at_10": 0.08296,
26
+ "precision_at_100": 0.0132,
27
+ "precision_at_1000": 0.00167,
28
+ "precision_at_3": 0.188,
29
+ "precision_at_5": 0.13763,
30
+ "recall_at_1": 0.29067,
31
+ "recall_at_10": 0.58298,
32
+ "recall_at_100": 0.82251,
33
+ "recall_at_1000": 0.94476,
34
+ "recall_at_3": 0.42984,
35
+ "recall_at_5": 0.50658
36
+ }
37
+ }
evaluation/mteb/CQADupstackProgrammersRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 4979.86,
6
+ "map_at_1": 0.25986,
7
+ "map_at_10": 0.35746,
8
+ "map_at_100": 0.37067,
9
+ "map_at_1000": 0.37191,
10
+ "map_at_3": 0.32599,
11
+ "map_at_5": 0.34239,
12
+ "mrr_at_1": 0.31735,
13
+ "mrr_at_10": 0.40515,
14
+ "mrr_at_100": 0.41459,
15
+ "mrr_at_1000": 0.41516,
16
+ "mrr_at_3": 0.37938,
17
+ "mrr_at_5": 0.3925,
18
+ "ndcg_at_1": 0.31735,
19
+ "ndcg_at_10": 0.41484,
20
+ "ndcg_at_100": 0.47047,
21
+ "ndcg_at_1000": 0.49427,
22
+ "ndcg_at_3": 0.36255,
23
+ "ndcg_at_5": 0.38375,
24
+ "precision_at_1": 0.31735,
25
+ "precision_at_10": 0.0766,
26
+ "precision_at_100": 0.01234,
27
+ "precision_at_1000": 0.0016,
28
+ "precision_at_3": 0.17428,
29
+ "precision_at_5": 0.12329,
30
+ "recall_at_1": 0.25986,
31
+ "recall_at_10": 0.53761,
32
+ "recall_at_100": 0.77149,
33
+ "recall_at_1000": 0.93342,
34
+ "recall_at_3": 0.39068,
35
+ "recall_at_5": 0.44693
36
+ }
37
+ }
evaluation/mteb/CQADupstackRetrieval.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 7822.83,
6
+ "ndcg_at_1": 0.2821458333333333,
7
+ "ndcg_at_3": 0.3325866666666667,
8
+ "ndcg_at_5": 0.35453333333333326,
9
+ "ndcg_at_10": 0.381195,
10
+ "ndcg_at_100": 0.43441,
11
+ "ndcg_at_1000": 0.45826083333333334
12
+ }
13
+ }
evaluation/mteb/CQADupstackStatsRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 6837.54,
6
+ "map_at_1": 0.22082,
7
+ "map_at_10": 0.29216,
8
+ "map_at_100": 0.30163,
9
+ "map_at_1000": 0.30269,
10
+ "map_at_3": 0.26942,
11
+ "map_at_5": 0.28236,
12
+ "mrr_at_1": 0.24847,
13
+ "mrr_at_10": 0.31919,
14
+ "mrr_at_100": 0.32817,
15
+ "mrr_at_1000": 0.32897,
16
+ "mrr_at_3": 0.29831,
17
+ "mrr_at_5": 0.3102,
18
+ "ndcg_at_1": 0.24847,
19
+ "ndcg_at_10": 0.334,
20
+ "ndcg_at_100": 0.38354,
21
+ "ndcg_at_1000": 0.41045,
22
+ "ndcg_at_3": 0.29236,
23
+ "ndcg_at_5": 0.31258,
24
+ "precision_at_1": 0.24847,
25
+ "precision_at_10": 0.05353,
26
+ "precision_at_100": 0.00853,
27
+ "precision_at_1000": 0.00116,
28
+ "precision_at_3": 0.12679,
29
+ "precision_at_5": 0.08988,
30
+ "recall_at_1": 0.22082,
31
+ "recall_at_10": 0.43505,
32
+ "recall_at_100": 0.66454,
33
+ "recall_at_1000": 0.86378,
34
+ "recall_at_3": 0.32163,
35
+ "recall_at_5": 0.3706
36
+ }
37
+ }
evaluation/mteb/CQADupstackTexRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 11622.05,
6
+ "map_at_1": 0.1554,
7
+ "map_at_10": 0.22362,
8
+ "map_at_100": 0.23435,
9
+ "map_at_1000": 0.23564,
10
+ "map_at_3": 0.20143,
11
+ "map_at_5": 0.21324,
12
+ "mrr_at_1": 0.18892,
13
+ "mrr_at_10": 0.25943,
14
+ "mrr_at_100": 0.26883,
15
+ "mrr_at_1000": 0.26969,
16
+ "mrr_at_3": 0.23727,
17
+ "mrr_at_5": 0.24923,
18
+ "ndcg_at_1": 0.18892,
19
+ "ndcg_at_10": 0.26811,
20
+ "ndcg_at_100": 0.32066,
21
+ "ndcg_at_1000": 0.35166,
22
+ "ndcg_at_3": 0.22706,
23
+ "ndcg_at_5": 0.24508,
24
+ "precision_at_1": 0.18892,
25
+ "precision_at_10": 0.04942,
26
+ "precision_at_100": 0.00878,
27
+ "precision_at_1000": 0.00131,
28
+ "precision_at_3": 0.10748,
29
+ "precision_at_5": 0.07784,
30
+ "recall_at_1": 0.1554,
31
+ "recall_at_10": 0.36743,
32
+ "recall_at_100": 0.60525,
33
+ "recall_at_1000": 0.82576,
34
+ "recall_at_3": 0.25252,
35
+ "recall_at_5": 0.29872
36
+ }
37
+ }
evaluation/mteb/CQADupstackUnixRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 7165.48,
6
+ "map_at_1": 0.24453,
7
+ "map_at_10": 0.33363,
8
+ "map_at_100": 0.34579,
9
+ "map_at_1000": 0.34686,
10
+ "map_at_3": 0.30583,
11
+ "map_at_5": 0.32118,
12
+ "mrr_at_1": 0.28918,
13
+ "mrr_at_10": 0.37675,
14
+ "mrr_at_100": 0.38567,
15
+ "mrr_at_1000": 0.38632,
16
+ "mrr_at_3": 0.35261,
17
+ "mrr_at_5": 0.36576,
18
+ "ndcg_at_1": 0.28918,
19
+ "ndcg_at_10": 0.38736,
20
+ "ndcg_at_100": 0.44261,
21
+ "ndcg_at_1000": 0.4672,
22
+ "ndcg_at_3": 0.3381,
23
+ "ndcg_at_5": 0.36009,
24
+ "precision_at_1": 0.28918,
25
+ "precision_at_10": 0.06586,
26
+ "precision_at_100": 0.01047,
27
+ "precision_at_1000": 0.00137,
28
+ "precision_at_3": 0.15361,
29
+ "precision_at_5": 0.10858,
30
+ "recall_at_1": 0.24453,
31
+ "recall_at_10": 0.50886,
32
+ "recall_at_100": 0.7503,
33
+ "recall_at_1000": 0.92123,
34
+ "recall_at_3": 0.37138,
35
+ "recall_at_5": 0.42865
36
+ }
37
+ }
evaluation/mteb/CQADupstackWebmastersRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 2264.66,
6
+ "map_at_1": 0.2457,
7
+ "map_at_10": 0.33672,
8
+ "map_at_100": 0.35244,
9
+ "map_at_1000": 0.35467,
10
+ "map_at_3": 0.30712,
11
+ "map_at_5": 0.32383,
12
+ "mrr_at_1": 0.29644,
13
+ "mrr_at_10": 0.38344,
14
+ "mrr_at_100": 0.39219,
15
+ "mrr_at_1000": 0.39282,
16
+ "mrr_at_3": 0.35771,
17
+ "mrr_at_5": 0.37273,
18
+ "ndcg_at_1": 0.29644,
19
+ "ndcg_at_10": 0.39567,
20
+ "ndcg_at_100": 0.45097,
21
+ "ndcg_at_1000": 0.47923,
22
+ "ndcg_at_3": 0.34768,
23
+ "ndcg_at_5": 0.37122,
24
+ "precision_at_1": 0.29644,
25
+ "precision_at_10": 0.07589,
26
+ "precision_at_100": 0.01478,
27
+ "precision_at_1000": 0.00235,
28
+ "precision_at_3": 0.16337,
29
+ "precision_at_5": 0.12055,
30
+ "recall_at_1": 0.2457,
31
+ "recall_at_10": 0.51009,
32
+ "recall_at_100": 0.75423,
33
+ "recall_at_1000": 0.93671,
34
+ "recall_at_3": 0.36926,
35
+ "recall_at_5": 0.43245
36
+ }
37
+ }
evaluation/mteb/CQADupstackWordpressRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 7608.65,
6
+ "map_at_1": 0.21356,
7
+ "map_at_10": 0.27904,
8
+ "map_at_100": 0.28938,
9
+ "map_at_1000": 0.29036,
10
+ "map_at_3": 0.25726,
11
+ "map_at_5": 0.26935,
12
+ "mrr_at_1": 0.22551,
13
+ "mrr_at_10": 0.29259,
14
+ "mrr_at_100": 0.30272,
15
+ "mrr_at_1000": 0.30348,
16
+ "mrr_at_3": 0.27295,
17
+ "mrr_at_5": 0.28358,
18
+ "ndcg_at_1": 0.22551,
19
+ "ndcg_at_10": 0.31817,
20
+ "ndcg_at_100": 0.37164,
21
+ "ndcg_at_1000": 0.3982,
22
+ "ndcg_at_3": 0.27596,
23
+ "ndcg_at_5": 0.29568,
24
+ "precision_at_1": 0.22551,
25
+ "precision_at_10": 0.04917,
26
+ "precision_at_100": 0.00828,
27
+ "precision_at_1000": 0.00114,
28
+ "precision_at_3": 0.11583,
29
+ "precision_at_5": 0.08133,
30
+ "recall_at_1": 0.21356,
31
+ "recall_at_10": 0.42489,
32
+ "recall_at_100": 0.67128,
33
+ "recall_at_1000": 0.87441,
34
+ "recall_at_3": 0.31165,
35
+ "recall_at_5": 0.35853
36
+ }
37
+ }
evaluation/mteb/ClimateFEVER.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 64667.21,
6
+ "map_at_1": 0.12306,
7
+ "map_at_10": 0.21523,
8
+ "map_at_100": 0.23358,
9
+ "map_at_1000": 0.23541,
10
+ "map_at_3": 0.17809,
11
+ "map_at_5": 0.19631,
12
+ "mrr_at_1": 0.27948,
13
+ "mrr_at_10": 0.40355,
14
+ "mrr_at_100": 0.41166,
15
+ "mrr_at_1000": 0.41203,
16
+ "mrr_at_3": 0.36819,
17
+ "mrr_at_5": 0.38959,
18
+ "ndcg_at_1": 0.27948,
19
+ "ndcg_at_10": 0.30462,
20
+ "ndcg_at_100": 0.37473,
21
+ "ndcg_at_1000": 0.40718,
22
+ "ndcg_at_3": 0.24646,
23
+ "ndcg_at_5": 0.26642,
24
+ "precision_at_1": 0.27948,
25
+ "precision_at_10": 0.09648,
26
+ "precision_at_100": 0.01724,
27
+ "precision_at_1000": 0.00232,
28
+ "precision_at_3": 0.1848,
29
+ "precision_at_5": 0.14293,
30
+ "recall_at_1": 0.12306,
31
+ "recall_at_10": 0.37181,
32
+ "recall_at_100": 0.61148,
33
+ "recall_at_1000": 0.79401,
34
+ "recall_at_3": 0.22883,
35
+ "recall_at_5": 0.2859
36
+ }
37
+ }
evaluation/mteb/DBPedia.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 43004.49,
6
+ "map_at_1": 0.09357,
7
+ "map_at_10": 0.18849,
8
+ "map_at_100": 0.25369,
9
+ "map_at_1000": 0.2695,
10
+ "map_at_3": 0.13625,
11
+ "map_at_5": 0.15957,
12
+ "mrr_at_1": 0.6775,
13
+ "mrr_at_10": 0.74734,
14
+ "mrr_at_100": 0.751,
15
+ "mrr_at_1000": 0.75109,
16
+ "mrr_at_3": 0.73542,
17
+ "mrr_at_5": 0.74167,
18
+ "ndcg_at_1": 0.55375,
19
+ "ndcg_at_10": 0.39874,
20
+ "ndcg_at_100": 0.43098,
21
+ "ndcg_at_1000": 0.50692,
22
+ "ndcg_at_3": 0.44856,
23
+ "ndcg_at_5": 0.42139,
24
+ "precision_at_1": 0.6775,
25
+ "precision_at_10": 0.311,
26
+ "precision_at_100": 0.09303,
27
+ "precision_at_1000": 0.02006,
28
+ "precision_at_3": 0.4825,
29
+ "precision_at_5": 0.4095,
30
+ "recall_at_1": 0.09357,
31
+ "recall_at_10": 0.23832,
32
+ "recall_at_100": 0.47906,
33
+ "recall_at_1000": 0.71309,
34
+ "recall_at_3": 0.14512,
35
+ "recall_at_5": 0.183
36
+ }
37
+ }
evaluation/mteb/EmotionClassification.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "accuracy": 0.49655000000000005,
4
+ "accuracy_stderr": 0.023384236143179877,
5
+ "evaluation_time": 408.2,
6
+ "f1": 0.45519761909389506,
7
+ "f1_stderr": 0.017373883846959346,
8
+ "main_score": 0.49655000000000005
9
+ },
10
+ "dataset_version": null,
11
+ "mteb_version": "0.0.2"
12
+ }
evaluation/mteb/FEVER.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 65632.31,
6
+ "map_at_1": 0.6274,
7
+ "map_at_10": 0.7307,
8
+ "map_at_100": 0.73398,
9
+ "map_at_1000": 0.7341,
10
+ "map_at_3": 0.71338,
11
+ "map_at_5": 0.72423,
12
+ "mrr_at_1": 0.67777,
13
+ "mrr_at_10": 0.77873,
14
+ "mrr_at_100": 0.78091,
15
+ "mrr_at_1000": 0.78094,
16
+ "mrr_at_3": 0.76375,
17
+ "mrr_at_5": 0.77316,
18
+ "ndcg_at_1": 0.67777,
19
+ "ndcg_at_10": 0.7824,
20
+ "ndcg_at_100": 0.79557,
21
+ "ndcg_at_1000": 0.79814,
22
+ "ndcg_at_3": 0.75125,
23
+ "ndcg_at_5": 0.76834,
24
+ "precision_at_1": 0.67777,
25
+ "precision_at_10": 0.09832,
26
+ "precision_at_100": 0.01061,
27
+ "precision_at_1000": 0.0011,
28
+ "precision_at_3": 0.29433,
29
+ "precision_at_5": 0.18665,
30
+ "recall_at_1": 0.6274,
31
+ "recall_at_10": 0.89505,
32
+ "recall_at_100": 0.95102,
33
+ "recall_at_1000": 0.96825,
34
+ "recall_at_3": 0.81028,
35
+ "recall_at_5": 0.85281
36
+ }
37
+ }
evaluation/mteb/FiQA2018.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 951.67,
6
+ "map_at_1": 0.18467,
7
+ "map_at_10": 0.30021,
8
+ "map_at_100": 0.31739,
9
+ "map_at_1000": 0.31934,
10
+ "map_at_3": 0.26003,
11
+ "map_at_5": 0.28338,
12
+ "mrr_at_1": 0.3534,
13
+ "mrr_at_10": 0.44109,
14
+ "mrr_at_100": 0.44993,
15
+ "mrr_at_1000": 0.45042,
16
+ "mrr_at_3": 0.41667,
17
+ "mrr_at_5": 0.4314,
18
+ "ndcg_at_1": 0.3534,
19
+ "ndcg_at_10": 0.37202,
20
+ "ndcg_at_100": 0.43853,
21
+ "ndcg_at_1000": 0.47235,
22
+ "ndcg_at_3": 0.335,
23
+ "ndcg_at_5": 0.34985,
24
+ "precision_at_1": 0.3534,
25
+ "precision_at_10": 0.10247,
26
+ "precision_at_100": 0.01715,
27
+ "precision_at_1000": 0.00232,
28
+ "precision_at_3": 0.22222,
29
+ "precision_at_5": 0.16574,
30
+ "recall_at_1": 0.18467,
31
+ "recall_at_10": 0.44081,
32
+ "recall_at_100": 0.68722,
33
+ "recall_at_1000": 0.89087,
34
+ "recall_at_3": 0.30567,
35
+ "recall_at_5": 0.36982
36
+ }
37
+ }
evaluation/mteb/HotpotQA.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 46237.87,
6
+ "map_at_1": 0.35726,
7
+ "map_at_10": 0.50207,
8
+ "map_at_100": 0.51055,
9
+ "map_at_1000": 0.51128,
10
+ "map_at_3": 0.47576,
11
+ "map_at_5": 0.49172,
12
+ "mrr_at_1": 0.71452,
13
+ "mrr_at_10": 0.77419,
14
+ "mrr_at_100": 0.77711,
15
+ "mrr_at_1000": 0.77723,
16
+ "mrr_at_3": 0.76394,
17
+ "mrr_at_5": 0.77001,
18
+ "ndcg_at_1": 0.71452,
19
+ "ndcg_at_10": 0.59261,
20
+ "ndcg_at_100": 0.62424,
21
+ "ndcg_at_1000": 0.63951,
22
+ "ndcg_at_3": 0.55327,
23
+ "ndcg_at_5": 0.57417,
24
+ "precision_at_1": 0.71452,
25
+ "precision_at_10": 0.12061,
26
+ "precision_at_100": 0.01455,
27
+ "precision_at_1000": 0.00166,
28
+ "precision_at_3": 0.3436,
29
+ "precision_at_5": 0.22266,
30
+ "recall_at_1": 0.35726,
31
+ "recall_at_10": 0.60304,
32
+ "recall_at_100": 0.72755,
33
+ "recall_at_1000": 0.82978,
34
+ "recall_at_3": 0.5154,
35
+ "recall_at_5": 0.55665
36
+ }
37
+ }
evaluation/mteb/ImdbClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "accuracy": 0.666376,
4
+ "accuracy_stderr": 0.03435007633179292,
5
+ "ap": 0.6148938261286748,
6
+ "ap_stderr": 0.028266712412564548,
7
+ "evaluation_time": 38609.7,
8
+ "f1": 0.6635089269264965,
9
+ "f1_stderr": 0.03526614235815782,
10
+ "main_score": 0.666376
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/MSMARCO.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "validation": {
5
+ "evaluation_time": 82887.84,
6
+ "map_at_1": 0.20842,
7
+ "map_at_10": 0.32992,
8
+ "map_at_100": 0.34236,
9
+ "map_at_1000": 0.34286,
10
+ "map_at_3": 0.29049,
11
+ "map_at_5": 0.31392,
12
+ "mrr_at_1": 0.21375,
13
+ "mrr_at_10": 0.33581,
14
+ "mrr_at_100": 0.3476,
15
+ "mrr_at_1000": 0.34803,
16
+ "mrr_at_3": 0.29704,
17
+ "mrr_at_5": 0.32015,
18
+ "ndcg_at_1": 0.21375,
19
+ "ndcg_at_10": 0.39905,
20
+ "ndcg_at_100": 0.45843,
21
+ "ndcg_at_1000": 0.47084,
22
+ "ndcg_at_3": 0.31919,
23
+ "ndcg_at_5": 0.36107,
24
+ "precision_at_1": 0.21375,
25
+ "precision_at_10": 0.06393,
26
+ "precision_at_100": 0.00935,
27
+ "precision_at_1000": 0.00104,
28
+ "precision_at_3": 0.13663,
29
+ "precision_at_5": 0.10324,
30
+ "recall_at_1": 0.20842,
31
+ "recall_at_10": 0.6117,
32
+ "recall_at_100": 0.88518,
33
+ "recall_at_1000": 0.97993,
34
+ "recall_at_3": 0.39571,
35
+ "recall_at_5": 0.49654
36
+ }
37
+ }
evaluation/mteb/MTOPDomainClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "en": {
4
+ "accuracy": 0.9346557227542178,
5
+ "accuracy_stderr": 0.0072510589372134955,
6
+ "f1": 0.9287345917772146,
7
+ "f1_stderr": 0.0082627627462556,
8
+ "main_score": 0.9346557227542178
9
+ },
10
+ "evaluation_time": 475.45
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/MTOPIntentClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "en": {
4
+ "accuracy": 0.7242134062927497,
5
+ "accuracy_stderr": 0.012571448106282981,
6
+ "f1": 0.5503624810959269,
7
+ "f1_stderr": 0.011737816802681789,
8
+ "main_score": 0.7242134062927497
9
+ },
10
+ "evaluation_time": 612.9
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/MassiveIntentClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "en": {
4
+ "accuracy": 0.703866845998655,
5
+ "accuracy_stderr": 0.00456616151893328,
6
+ "f1": 0.6896745198729209,
7
+ "f1_stderr": 0.006294250065644286,
8
+ "main_score": 0.703866845998655
9
+ },
10
+ "evaluation_time": 387.35
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/MassiveScenarioClassification.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "en": {
4
+ "accuracy": 0.7627774041694687,
5
+ "accuracy_stderr": 0.014020552186569309,
6
+ "f1": 0.7672936190462792,
7
+ "f1_stderr": 0.01203156406861017,
8
+ "main_score": 0.7627774041694687
9
+ },
10
+ "evaluation_time": 333.83
11
+ },
12
+ "dataset_version": null,
13
+ "mteb_version": "0.0.2"
14
+ }
evaluation/mteb/MedrxivClusteringP2P.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 7211.78,
4
+ "v_measure": 0.3151174592577334,
5
+ "v_measure_std": 0.012772027882021399
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/MedrxivClusteringS2S.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 1053.59,
4
+ "v_measure": 0.28764235987575365,
5
+ "v_measure_std": 0.011424256185368544
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/MindSmallReranking.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 72941.42,
6
+ "map": 0.32293531363866007,
7
+ "mrr": 0.33536774455851687
8
+ }
9
+ }
evaluation/mteb/NFCorpus.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 166.79,
6
+ "map_at_1": 0.05702,
7
+ "map_at_10": 0.13642,
8
+ "map_at_100": 0.17503,
9
+ "map_at_1000": 0.19126,
10
+ "map_at_3": 0.09748,
11
+ "map_at_5": 0.11642,
12
+ "mrr_at_1": 0.4582,
13
+ "mrr_at_10": 0.54821,
14
+ "mrr_at_100": 0.55422,
15
+ "mrr_at_1000": 0.55453,
16
+ "mrr_at_3": 0.52374,
17
+ "mrr_at_5": 0.53937,
18
+ "ndcg_at_1": 0.44272,
19
+ "ndcg_at_10": 0.36213,
20
+ "ndcg_at_100": 0.33829,
21
+ "ndcg_at_1000": 0.42557,
22
+ "ndcg_at_3": 0.40814,
23
+ "ndcg_at_5": 0.39562,
24
+ "precision_at_1": 0.45511,
25
+ "precision_at_10": 0.27214,
26
+ "precision_at_100": 0.08941,
27
+ "precision_at_1000": 0.02187,
28
+ "precision_at_3": 0.37874,
29
+ "precision_at_5": 0.34489,
30
+ "recall_at_1": 0.05702,
31
+ "recall_at_10": 0.17638,
32
+ "recall_at_100": 0.34419,
33
+ "recall_at_1000": 0.6641,
34
+ "recall_at_3": 0.10914,
35
+ "recall_at_5": 0.14032
36
+ }
37
+ }
evaluation/mteb/NQ.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 31038.1,
6
+ "map_at_1": 0.30567,
7
+ "map_at_10": 0.4501,
8
+ "map_at_100": 0.46091,
9
+ "map_at_1000": 0.46126,
10
+ "map_at_3": 0.40897,
11
+ "map_at_5": 0.43301,
12
+ "mrr_at_1": 0.3456,
13
+ "mrr_at_10": 0.47725,
14
+ "mrr_at_100": 0.48548,
15
+ "mrr_at_1000": 0.48572,
16
+ "mrr_at_3": 0.44361,
17
+ "mrr_at_5": 0.46351,
18
+ "ndcg_at_1": 0.34531,
19
+ "ndcg_at_10": 0.5241,
20
+ "ndcg_at_100": 0.56999,
21
+ "ndcg_at_1000": 0.57831,
22
+ "ndcg_at_3": 0.44734,
23
+ "ndcg_at_5": 0.48701,
24
+ "precision_at_1": 0.34531,
25
+ "precision_at_10": 0.08612,
26
+ "precision_at_100": 0.01118,
27
+ "precision_at_1000": 0.0012,
28
+ "precision_at_3": 0.20307,
29
+ "precision_at_5": 0.14519,
30
+ "recall_at_1": 0.30567,
31
+ "recall_at_10": 0.72238,
32
+ "recall_at_100": 0.92154,
33
+ "recall_at_1000": 0.98375,
34
+ "recall_at_3": 0.52438,
35
+ "recall_at_5": 0.61517
36
+ }
37
+ }
evaluation/mteb/QuoraRetrieval.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 1922.86,
6
+ "map_at_1": 0.6598,
7
+ "map_at_10": 0.80056,
8
+ "map_at_100": 0.80763,
9
+ "map_at_1000": 0.80786,
10
+ "map_at_3": 0.76848,
11
+ "map_at_5": 0.78854,
12
+ "mrr_at_1": 0.7586,
13
+ "mrr_at_10": 0.83397,
14
+ "mrr_at_100": 0.83555,
15
+ "mrr_at_1000": 0.83557,
16
+ "mrr_at_3": 0.82033,
17
+ "mrr_at_5": 0.8297,
18
+ "ndcg_at_1": 0.7588,
19
+ "ndcg_at_10": 0.84581,
20
+ "ndcg_at_100": 0.86151,
21
+ "ndcg_at_1000": 0.86315,
22
+ "ndcg_at_3": 0.80902,
23
+ "ndcg_at_5": 0.82953,
24
+ "precision_at_1": 0.7588,
25
+ "precision_at_10": 0.12986,
26
+ "precision_at_100": 0.01511,
27
+ "precision_at_1000": 0.00156,
28
+ "precision_at_3": 0.35383,
29
+ "precision_at_5": 0.23556,
30
+ "recall_at_1": 0.6598,
31
+ "recall_at_10": 0.93716,
32
+ "recall_at_100": 0.99218,
33
+ "recall_at_1000": 0.9997,
34
+ "recall_at_3": 0.83551,
35
+ "recall_at_5": 0.88998
36
+ }
37
+ }
evaluation/mteb/RedditClustering.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "evaluation_time": 14260.04,
4
+ "v_measure": 0.4045148482612238,
5
+ "v_measure_std": 0.10798668357466681
6
+ },
7
+ "dataset_version": null,
8
+ "mteb_version": "0.0.2"
9
+ }
evaluation/mteb/RedditClusteringP2P.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 57925.8,
6
+ "v_measure": 0.5574949067303913,
7
+ "v_measure_std": 0.12781562321045892
8
+ }
9
+ }
evaluation/mteb/SCIDOCS.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_version": null,
3
+ "mteb_version": "0.0.2",
4
+ "test": {
5
+ "evaluation_time": 626.47,
6
+ "map_at_1": 0.04903,
7
+ "map_at_10": 0.11926,
8
+ "map_at_100": 0.13917,
9
+ "map_at_1000": 0.14215,
10
+ "map_at_3": 0.088,
11
+ "map_at_5": 0.10361,
12
+ "mrr_at_1": 0.241,
13
+ "mrr_at_10": 0.34482,
14
+ "mrr_at_100": 0.35566,
15
+ "mrr_at_1000": 0.35619,
16
+ "mrr_at_3": 0.31433,
17
+ "mrr_at_5": 0.33243,
18
+ "ndcg_at_1": 0.241,
19
+ "ndcg_at_10": 0.19873,
20
+ "ndcg_at_100": 0.27606,
21
+ "ndcg_at_1000": 0.32811,
22
+ "ndcg_at_3": 0.19498,
23
+ "ndcg_at_5": 0.16813,
24
+ "precision_at_1": 0.241,
25
+ "precision_at_10": 0.1008,
26
+ "precision_at_100": 0.02122,
27
+ "precision_at_1000": 0.00337,
28
+ "precision_at_3": 0.182,
29
+ "precision_at_5": 0.1462,
30
+ "recall_at_1": 0.04903,
31
+ "recall_at_10": 0.20438,
32
+ "recall_at_100": 0.43043,
33
+ "recall_at_1000": 0.6841,
34
+ "recall_at_3": 0.11068,
35
+ "recall_at_5": 0.14818
36
+ }
37
+ }
evaluation/mteb/SICK-R.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "cos_sim": {
4
+ "pearson": 0.7858086597995997,
5
+ "spearman": 0.6963214182814992
6
+ },
7
+ "euclidean": {
8
+ "pearson": 0.7276175489042691,
9
+ "spearman": 0.6784965161872971
10
+ },
11
+ "evaluation_time": 199.7,
12
+ "manhattan": {
13
+ "pearson": 0.7273812689782593,
14
+ "spearman": 0.6783610439531278
15
+ }
16
+ }
17
+ }
evaluation/mteb/STS12.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "cos_sim": {
4
+ "pearson": 0.7513970861325007,
5
+ "spearman": 0.675020551515597
6
+ },
7
+ "euclidean": {
8
+ "pearson": 0.6633415412418275,
9
+ "spearman": 0.6682145056673268
10
+ },
11
+ "evaluation_time": 100.4,
12
+ "manhattan": {
13
+ "pearson": 0.6655489484006415,
14
+ "spearman": 0.6695147433279057
15
+ }
16
+ },
17
+ "dataset_version": null,
18
+ "mteb_version": "0.0.2"
19
+ }
evaluation/mteb/STS13.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "cos_sim": {
4
+ "pearson": 0.7885850536483447,
5
+ "spearman": 0.7916333501772059
6
+ },
7
+ "euclidean": {
8
+ "pearson": 0.7274090561408476,
9
+ "spearman": 0.7357374448302961
10
+ },
11
+ "evaluation_time": 43.53,
12
+ "manhattan": {
13
+ "pearson": 0.7292980654233225,
14
+ "spearman": 0.7372777155112589
15
+ }
16
+ },
17
+ "dataset_version": null,
18
+ "mteb_version": "0.0.2"
19
+ }
evaluation/mteb/STS14.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "test": {
3
+ "cos_sim": {
4
+ "pearson": 0.7951125593897028,
5
+ "spearman": 0.7446048326701329
6
+ },
7
+ "euclidean": {
8
+ "pearson": 0.7087726087052986,
9
+ "spearman": 0.677721470654411
10
+ },
11
+ "evaluation_time": 107.32,
12
+ "manhattan": {
13
+ "pearson": 0.7105892792135637,
14
+ "spearman": 0.6793472619779036
15
+ }
16
+ },
17
+ "dataset_version": null,
18
+ "mteb_version": "0.0.2"
19
+ }