Muennighoff
commited on
Commit
•
039f451
1
Parent(s):
62a5cc0
Add MTEB evaluation
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- beir.json → evaluation/beir/beir.json +0 -0
- evaluation/mteb/AmazonCounterfactualClassification.json +16 -0
- evaluation/mteb/AmazonPolarityClassification.json +14 -0
- evaluation/mteb/AmazonReviewsClassification.json +14 -0
- evaluation/mteb/ArguAna.json +37 -0
- evaluation/mteb/ArxivClusteringP2P.json +9 -0
- evaluation/mteb/ArxivClusteringS2S.json +9 -0
- evaluation/mteb/AskUbuntuDupQuestions.json +9 -0
- evaluation/mteb/BIOSSES.json +17 -0
- evaluation/mteb/Banking77Classification.json +12 -0
- evaluation/mteb/BiorxivClusteringP2P.json +9 -0
- evaluation/mteb/BiorxivClusteringS2S.json +9 -0
- evaluation/mteb/CQADupstackAndroidRetrieval.json +37 -0
- evaluation/mteb/CQADupstackEnglishRetrieval.json +37 -0
- evaluation/mteb/CQADupstackGamingRetrieval.json +37 -0
- evaluation/mteb/CQADupstackGisRetrieval.json +37 -0
- evaluation/mteb/CQADupstackMathematicaRetrieval.json +37 -0
- evaluation/mteb/CQADupstackPhysicsRetrieval.json +37 -0
- evaluation/mteb/CQADupstackProgrammersRetrieval.json +37 -0
- evaluation/mteb/CQADupstackRetrieval.json +13 -0
- evaluation/mteb/CQADupstackStatsRetrieval.json +37 -0
- evaluation/mteb/CQADupstackTexRetrieval.json +37 -0
- evaluation/mteb/CQADupstackUnixRetrieval.json +37 -0
- evaluation/mteb/CQADupstackWebmastersRetrieval.json +37 -0
- evaluation/mteb/CQADupstackWordpressRetrieval.json +37 -0
- evaluation/mteb/ClimateFEVER.json +37 -0
- evaluation/mteb/DBPedia.json +37 -0
- evaluation/mteb/EmotionClassification.json +12 -0
- evaluation/mteb/FEVER.json +37 -0
- evaluation/mteb/FiQA2018.json +37 -0
- evaluation/mteb/HotpotQA.json +37 -0
- evaluation/mteb/ImdbClassification.json +14 -0
- evaluation/mteb/MSMARCO.json +37 -0
- evaluation/mteb/MTOPDomainClassification.json +14 -0
- evaluation/mteb/MTOPIntentClassification.json +14 -0
- evaluation/mteb/MassiveIntentClassification.json +14 -0
- evaluation/mteb/MassiveScenarioClassification.json +14 -0
- evaluation/mteb/MedrxivClusteringP2P.json +9 -0
- evaluation/mteb/MedrxivClusteringS2S.json +9 -0
- evaluation/mteb/MindSmallReranking.json +9 -0
- evaluation/mteb/NFCorpus.json +37 -0
- evaluation/mteb/NQ.json +37 -0
- evaluation/mteb/QuoraRetrieval.json +37 -0
- evaluation/mteb/RedditClustering.json +9 -0
- evaluation/mteb/RedditClusteringP2P.json +9 -0
- evaluation/mteb/SCIDOCS.json +37 -0
- evaluation/mteb/SICK-R.json +17 -0
- evaluation/mteb/STS12.json +19 -0
- evaluation/mteb/STS13.json +19 -0
- evaluation/mteb/STS14.json +19 -0
beir.json → evaluation/beir/beir.json
RENAMED
File without changes
|
evaluation/mteb/AmazonCounterfactualClassification.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"en": {
|
4 |
+
"accuracy": 0.6922388059701492,
|
5 |
+
"accuracy_stderr": 0.05577698009153047,
|
6 |
+
"ap": 0.3204724673950256,
|
7 |
+
"ap_stderr": 0.030758638728823475,
|
8 |
+
"f1": 0.6325719825770428,
|
9 |
+
"f1_stderr": 0.04382866443453507,
|
10 |
+
"main_score": 0.3204724673950256
|
11 |
+
},
|
12 |
+
"evaluation_time": 234.06
|
13 |
+
},
|
14 |
+
"dataset_version": null,
|
15 |
+
"mteb_version": "0.0.2"
|
16 |
+
}
|
evaluation/mteb/AmazonPolarityClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"accuracy": 0.7126109999999999,
|
4 |
+
"accuracy_stderr": 0.0371266464483395,
|
5 |
+
"ap": 0.6616336378255403,
|
6 |
+
"ap_stderr": 0.038282983973039475,
|
7 |
+
"evaluation_time": 330535.51,
|
8 |
+
"f1": 0.7089719145825304,
|
9 |
+
"f1_stderr": 0.03949505400938336,
|
10 |
+
"main_score": 0.7126109999999999
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/AmazonReviewsClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"en": {
|
4 |
+
"accuracy": 0.39191999999999994,
|
5 |
+
"accuracy_stderr": 0.023273538622220733,
|
6 |
+
"f1": 0.38580766731113825,
|
7 |
+
"f1_stderr": 0.018793905233795604,
|
8 |
+
"main_score": 0.39191999999999994
|
9 |
+
},
|
10 |
+
"evaluation_time": 2060.82
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/ArguAna.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 477.21,
|
6 |
+
"map_at_1": 0.27312,
|
7 |
+
"map_at_10": 0.4262,
|
8 |
+
"map_at_100": 0.43707,
|
9 |
+
"map_at_1000": 0.43715,
|
10 |
+
"map_at_3": 0.37624,
|
11 |
+
"map_at_5": 0.40498,
|
12 |
+
"mrr_at_1": 0.27667,
|
13 |
+
"mrr_at_10": 0.42737,
|
14 |
+
"mrr_at_100": 0.43823,
|
15 |
+
"mrr_at_1000": 0.43831,
|
16 |
+
"mrr_at_3": 0.37743,
|
17 |
+
"mrr_at_5": 0.40616,
|
18 |
+
"ndcg_at_1": 0.27312,
|
19 |
+
"ndcg_at_10": 0.51375,
|
20 |
+
"ndcg_at_100": 0.55778,
|
21 |
+
"ndcg_at_1000": 0.55966,
|
22 |
+
"ndcg_at_3": 0.41087,
|
23 |
+
"ndcg_at_5": 0.46269,
|
24 |
+
"precision_at_1": 0.27312,
|
25 |
+
"precision_at_10": 0.07945,
|
26 |
+
"precision_at_100": 0.00982,
|
27 |
+
"precision_at_1000": 0.001,
|
28 |
+
"precision_at_3": 0.17046,
|
29 |
+
"precision_at_5": 0.12745,
|
30 |
+
"recall_at_1": 0.27312,
|
31 |
+
"recall_at_10": 0.79445,
|
32 |
+
"recall_at_100": 0.98151,
|
33 |
+
"recall_at_1000": 0.99573,
|
34 |
+
"recall_at_3": 0.51138,
|
35 |
+
"recall_at_5": 0.63727
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/ArxivClusteringP2P.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 122260.97,
|
4 |
+
"v_measure": 0.4559037428592033,
|
5 |
+
"v_measure_std": 0.13565379029776853
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/ArxivClusteringS2S.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 15111.26,
|
4 |
+
"v_measure": 0.3886371701986363,
|
5 |
+
"v_measure_std": 0.13693041258907623
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/AskUbuntuDupQuestions.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 129.15,
|
4 |
+
"map": 0.6162556869142777,
|
5 |
+
"mrr": 0.7583256386580486
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/BIOSSES.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"cos_sim": {
|
4 |
+
"pearson": 0.8996074355094802,
|
5 |
+
"spearman": 0.862501580394454
|
6 |
+
},
|
7 |
+
"euclidean": {
|
8 |
+
"pearson": 0.8218427440380462,
|
9 |
+
"spearman": 0.8014760935017947
|
10 |
+
},
|
11 |
+
"evaluation_time": 13.54,
|
12 |
+
"manhattan": {
|
13 |
+
"pearson": 0.8224621578156392,
|
14 |
+
"spearman": 0.8000363016590163
|
15 |
+
}
|
16 |
+
}
|
17 |
+
}
|
evaluation/mteb/Banking77Classification.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"accuracy": 0.8449350649350649,
|
4 |
+
"accuracy_stderr": 0.005092747481478234,
|
5 |
+
"evaluation_time": 568.26,
|
6 |
+
"f1": 0.8442493432337361,
|
7 |
+
"f1_stderr": 0.005291617674599615,
|
8 |
+
"main_score": 0.8449350649350649
|
9 |
+
},
|
10 |
+
"dataset_version": null,
|
11 |
+
"mteb_version": "0.0.2"
|
12 |
+
}
|
evaluation/mteb/BiorxivClusteringP2P.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 14349.06,
|
4 |
+
"v_measure": 0.3655145972298938,
|
5 |
+
"v_measure_std": 0.01180536429604858
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/BiorxivClusteringS2S.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 1826.36,
|
4 |
+
"v_measure": 0.3369901851846774,
|
5 |
+
"v_measure_std": 0.011822688018810827
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/CQADupstackAndroidRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 2419.91,
|
6 |
+
"map_at_1": 0.30499,
|
7 |
+
"map_at_10": 0.41208,
|
8 |
+
"map_at_100": 0.42638,
|
9 |
+
"map_at_1000": 0.42754,
|
10 |
+
"map_at_3": 0.37506,
|
11 |
+
"map_at_5": 0.39422,
|
12 |
+
"mrr_at_1": 0.37339,
|
13 |
+
"mrr_at_10": 0.47051,
|
14 |
+
"mrr_at_100": 0.47745,
|
15 |
+
"mrr_at_1000": 0.47786,
|
16 |
+
"mrr_at_3": 0.44087,
|
17 |
+
"mrr_at_5": 0.45711,
|
18 |
+
"ndcg_at_1": 0.37339,
|
19 |
+
"ndcg_at_10": 0.47666,
|
20 |
+
"ndcg_at_100": 0.52994,
|
21 |
+
"ndcg_at_1000": 0.54929,
|
22 |
+
"ndcg_at_3": 0.41982,
|
23 |
+
"ndcg_at_5": 0.4442,
|
24 |
+
"precision_at_1": 0.37339,
|
25 |
+
"precision_at_10": 0.09127,
|
26 |
+
"precision_at_100": 0.01475,
|
27 |
+
"precision_at_1000": 0.00194,
|
28 |
+
"precision_at_3": 0.20076,
|
29 |
+
"precision_at_5": 0.14449,
|
30 |
+
"recall_at_1": 0.30499,
|
31 |
+
"recall_at_10": 0.60328,
|
32 |
+
"recall_at_100": 0.82579,
|
33 |
+
"recall_at_1000": 0.95074,
|
34 |
+
"recall_at_3": 0.4417,
|
35 |
+
"recall_at_5": 0.5094
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackEnglishRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 3724.38,
|
6 |
+
"map_at_1": 0.30613,
|
7 |
+
"map_at_10": 0.40781,
|
8 |
+
"map_at_100": 0.42018,
|
9 |
+
"map_at_1000": 0.42133,
|
10 |
+
"map_at_3": 0.37816,
|
11 |
+
"map_at_5": 0.39389,
|
12 |
+
"mrr_at_1": 0.38408,
|
13 |
+
"mrr_at_10": 0.46631,
|
14 |
+
"mrr_at_100": 0.47332,
|
15 |
+
"mrr_at_1000": 0.47368,
|
16 |
+
"mrr_at_3": 0.44384,
|
17 |
+
"mrr_at_5": 0.45661,
|
18 |
+
"ndcg_at_1": 0.38408,
|
19 |
+
"ndcg_at_10": 0.4638,
|
20 |
+
"ndcg_at_100": 0.5081,
|
21 |
+
"ndcg_at_1000": 0.52663,
|
22 |
+
"ndcg_at_3": 0.4218,
|
23 |
+
"ndcg_at_5": 0.43974,
|
24 |
+
"precision_at_1": 0.38408,
|
25 |
+
"precision_at_10": 0.08656,
|
26 |
+
"precision_at_100": 0.01386,
|
27 |
+
"precision_at_1000": 0.00184,
|
28 |
+
"precision_at_3": 0.20276,
|
29 |
+
"precision_at_5": 0.14242,
|
30 |
+
"recall_at_1": 0.30613,
|
31 |
+
"recall_at_10": 0.5644,
|
32 |
+
"recall_at_100": 0.75044,
|
33 |
+
"recall_at_1000": 0.86426,
|
34 |
+
"recall_at_3": 0.43766,
|
35 |
+
"recall_at_5": 0.48998
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackGamingRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 3971.77,
|
6 |
+
"map_at_1": 0.37371,
|
7 |
+
"map_at_10": 0.49718,
|
8 |
+
"map_at_100": 0.50737,
|
9 |
+
"map_at_1000": 0.5079,
|
10 |
+
"map_at_3": 0.46231,
|
11 |
+
"map_at_5": 0.48329,
|
12 |
+
"mrr_at_1": 0.42884,
|
13 |
+
"mrr_at_10": 0.53176,
|
14 |
+
"mrr_at_100": 0.53817,
|
15 |
+
"mrr_at_1000": 0.53845,
|
16 |
+
"mrr_at_3": 0.50199,
|
17 |
+
"mrr_at_5": 0.5213,
|
18 |
+
"ndcg_at_1": 0.42884,
|
19 |
+
"ndcg_at_10": 0.55826,
|
20 |
+
"ndcg_at_100": 0.5993,
|
21 |
+
"ndcg_at_1000": 0.61013,
|
22 |
+
"ndcg_at_3": 0.49764,
|
23 |
+
"ndcg_at_5": 0.53026,
|
24 |
+
"precision_at_1": 0.42884,
|
25 |
+
"precision_at_10": 0.09047,
|
26 |
+
"precision_at_100": 0.01212,
|
27 |
+
"precision_at_1000": 0.00135,
|
28 |
+
"precision_at_3": 0.22132,
|
29 |
+
"precision_at_5": 0.15524,
|
30 |
+
"recall_at_1": 0.37371,
|
31 |
+
"recall_at_10": 0.70482,
|
32 |
+
"recall_at_100": 0.88425,
|
33 |
+
"recall_at_1000": 0.96034,
|
34 |
+
"recall_at_3": 0.5443,
|
35 |
+
"recall_at_5": 0.62328
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackGisRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 5450.64,
|
6 |
+
"map_at_1": 0.22876,
|
7 |
+
"map_at_10": 0.31715,
|
8 |
+
"map_at_100": 0.32847,
|
9 |
+
"map_at_1000": 0.32922,
|
10 |
+
"map_at_3": 0.2905,
|
11 |
+
"map_at_5": 0.30396,
|
12 |
+
"mrr_at_1": 0.2452,
|
13 |
+
"mrr_at_10": 0.33497,
|
14 |
+
"mrr_at_100": 0.34455,
|
15 |
+
"mrr_at_1000": 0.3451,
|
16 |
+
"mrr_at_3": 0.30791,
|
17 |
+
"mrr_at_5": 0.32175,
|
18 |
+
"ndcg_at_1": 0.2452,
|
19 |
+
"ndcg_at_10": 0.3695,
|
20 |
+
"ndcg_at_100": 0.42238,
|
21 |
+
"ndcg_at_1000": 0.44148,
|
22 |
+
"ndcg_at_3": 0.31435,
|
23 |
+
"ndcg_at_5": 0.33839,
|
24 |
+
"precision_at_1": 0.2452,
|
25 |
+
"precision_at_10": 0.05932,
|
26 |
+
"precision_at_100": 0.00901,
|
27 |
+
"precision_at_1000": 0.0011,
|
28 |
+
"precision_at_3": 0.13446,
|
29 |
+
"precision_at_5": 0.09469,
|
30 |
+
"recall_at_1": 0.22876,
|
31 |
+
"recall_at_10": 0.5138,
|
32 |
+
"recall_at_100": 0.75311,
|
33 |
+
"recall_at_1000": 0.89718,
|
34 |
+
"recall_at_3": 0.3626,
|
35 |
+
"recall_at_5": 0.42249
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackMathematicaRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 2931.62,
|
6 |
+
"map_at_1": 0.14984,
|
7 |
+
"map_at_10": 0.23457,
|
8 |
+
"map_at_100": 0.24723,
|
9 |
+
"map_at_1000": 0.24846,
|
10 |
+
"map_at_3": 0.20873,
|
11 |
+
"map_at_5": 0.22357,
|
12 |
+
"mrr_at_1": 0.18159,
|
13 |
+
"mrr_at_10": 0.27431,
|
14 |
+
"mrr_at_100": 0.28449,
|
15 |
+
"mrr_at_1000": 0.2852,
|
16 |
+
"mrr_at_3": 0.24979,
|
17 |
+
"mrr_at_5": 0.26447,
|
18 |
+
"ndcg_at_1": 0.18159,
|
19 |
+
"ndcg_at_10": 0.28628,
|
20 |
+
"ndcg_at_100": 0.34741,
|
21 |
+
"ndcg_at_1000": 0.37516,
|
22 |
+
"ndcg_at_3": 0.23902,
|
23 |
+
"ndcg_at_5": 0.26294,
|
24 |
+
"precision_at_1": 0.18159,
|
25 |
+
"precision_at_10": 0.05485,
|
26 |
+
"precision_at_100": 0.00985,
|
27 |
+
"precision_at_1000": 0.00136,
|
28 |
+
"precision_at_3": 0.11774,
|
29 |
+
"precision_at_5": 0.08731,
|
30 |
+
"recall_at_1": 0.14984,
|
31 |
+
"recall_at_10": 0.40198,
|
32 |
+
"recall_at_100": 0.67115,
|
33 |
+
"recall_at_1000": 0.86497,
|
34 |
+
"recall_at_3": 0.27639,
|
35 |
+
"recall_at_5": 0.33595
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackPhysicsRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 5338.65,
|
6 |
+
"map_at_1": 0.29067,
|
7 |
+
"map_at_10": 0.39457,
|
8 |
+
"map_at_100": 0.4083,
|
9 |
+
"map_at_1000": 0.4094,
|
10 |
+
"map_at_3": 0.35995,
|
11 |
+
"map_at_5": 0.38159,
|
12 |
+
"mrr_at_1": 0.34937,
|
13 |
+
"mrr_at_10": 0.44755,
|
14 |
+
"mrr_at_100": 0.45549,
|
15 |
+
"mrr_at_1000": 0.45589,
|
16 |
+
"mrr_at_3": 0.41947,
|
17 |
+
"mrr_at_5": 0.43733,
|
18 |
+
"ndcg_at_1": 0.34937,
|
19 |
+
"ndcg_at_10": 0.45573,
|
20 |
+
"ndcg_at_100": 0.51267,
|
21 |
+
"ndcg_at_1000": 0.53184,
|
22 |
+
"ndcg_at_3": 0.39962,
|
23 |
+
"ndcg_at_5": 0.4302,
|
24 |
+
"precision_at_1": 0.34937,
|
25 |
+
"precision_at_10": 0.08296,
|
26 |
+
"precision_at_100": 0.0132,
|
27 |
+
"precision_at_1000": 0.00167,
|
28 |
+
"precision_at_3": 0.188,
|
29 |
+
"precision_at_5": 0.13763,
|
30 |
+
"recall_at_1": 0.29067,
|
31 |
+
"recall_at_10": 0.58298,
|
32 |
+
"recall_at_100": 0.82251,
|
33 |
+
"recall_at_1000": 0.94476,
|
34 |
+
"recall_at_3": 0.42984,
|
35 |
+
"recall_at_5": 0.50658
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackProgrammersRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 4979.86,
|
6 |
+
"map_at_1": 0.25986,
|
7 |
+
"map_at_10": 0.35746,
|
8 |
+
"map_at_100": 0.37067,
|
9 |
+
"map_at_1000": 0.37191,
|
10 |
+
"map_at_3": 0.32599,
|
11 |
+
"map_at_5": 0.34239,
|
12 |
+
"mrr_at_1": 0.31735,
|
13 |
+
"mrr_at_10": 0.40515,
|
14 |
+
"mrr_at_100": 0.41459,
|
15 |
+
"mrr_at_1000": 0.41516,
|
16 |
+
"mrr_at_3": 0.37938,
|
17 |
+
"mrr_at_5": 0.3925,
|
18 |
+
"ndcg_at_1": 0.31735,
|
19 |
+
"ndcg_at_10": 0.41484,
|
20 |
+
"ndcg_at_100": 0.47047,
|
21 |
+
"ndcg_at_1000": 0.49427,
|
22 |
+
"ndcg_at_3": 0.36255,
|
23 |
+
"ndcg_at_5": 0.38375,
|
24 |
+
"precision_at_1": 0.31735,
|
25 |
+
"precision_at_10": 0.0766,
|
26 |
+
"precision_at_100": 0.01234,
|
27 |
+
"precision_at_1000": 0.0016,
|
28 |
+
"precision_at_3": 0.17428,
|
29 |
+
"precision_at_5": 0.12329,
|
30 |
+
"recall_at_1": 0.25986,
|
31 |
+
"recall_at_10": 0.53761,
|
32 |
+
"recall_at_100": 0.77149,
|
33 |
+
"recall_at_1000": 0.93342,
|
34 |
+
"recall_at_3": 0.39068,
|
35 |
+
"recall_at_5": 0.44693
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackRetrieval.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 7822.83,
|
6 |
+
"ndcg_at_1": 0.2821458333333333,
|
7 |
+
"ndcg_at_3": 0.3325866666666667,
|
8 |
+
"ndcg_at_5": 0.35453333333333326,
|
9 |
+
"ndcg_at_10": 0.381195,
|
10 |
+
"ndcg_at_100": 0.43441,
|
11 |
+
"ndcg_at_1000": 0.45826083333333334
|
12 |
+
}
|
13 |
+
}
|
evaluation/mteb/CQADupstackStatsRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 6837.54,
|
6 |
+
"map_at_1": 0.22082,
|
7 |
+
"map_at_10": 0.29216,
|
8 |
+
"map_at_100": 0.30163,
|
9 |
+
"map_at_1000": 0.30269,
|
10 |
+
"map_at_3": 0.26942,
|
11 |
+
"map_at_5": 0.28236,
|
12 |
+
"mrr_at_1": 0.24847,
|
13 |
+
"mrr_at_10": 0.31919,
|
14 |
+
"mrr_at_100": 0.32817,
|
15 |
+
"mrr_at_1000": 0.32897,
|
16 |
+
"mrr_at_3": 0.29831,
|
17 |
+
"mrr_at_5": 0.3102,
|
18 |
+
"ndcg_at_1": 0.24847,
|
19 |
+
"ndcg_at_10": 0.334,
|
20 |
+
"ndcg_at_100": 0.38354,
|
21 |
+
"ndcg_at_1000": 0.41045,
|
22 |
+
"ndcg_at_3": 0.29236,
|
23 |
+
"ndcg_at_5": 0.31258,
|
24 |
+
"precision_at_1": 0.24847,
|
25 |
+
"precision_at_10": 0.05353,
|
26 |
+
"precision_at_100": 0.00853,
|
27 |
+
"precision_at_1000": 0.00116,
|
28 |
+
"precision_at_3": 0.12679,
|
29 |
+
"precision_at_5": 0.08988,
|
30 |
+
"recall_at_1": 0.22082,
|
31 |
+
"recall_at_10": 0.43505,
|
32 |
+
"recall_at_100": 0.66454,
|
33 |
+
"recall_at_1000": 0.86378,
|
34 |
+
"recall_at_3": 0.32163,
|
35 |
+
"recall_at_5": 0.3706
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackTexRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 11622.05,
|
6 |
+
"map_at_1": 0.1554,
|
7 |
+
"map_at_10": 0.22362,
|
8 |
+
"map_at_100": 0.23435,
|
9 |
+
"map_at_1000": 0.23564,
|
10 |
+
"map_at_3": 0.20143,
|
11 |
+
"map_at_5": 0.21324,
|
12 |
+
"mrr_at_1": 0.18892,
|
13 |
+
"mrr_at_10": 0.25943,
|
14 |
+
"mrr_at_100": 0.26883,
|
15 |
+
"mrr_at_1000": 0.26969,
|
16 |
+
"mrr_at_3": 0.23727,
|
17 |
+
"mrr_at_5": 0.24923,
|
18 |
+
"ndcg_at_1": 0.18892,
|
19 |
+
"ndcg_at_10": 0.26811,
|
20 |
+
"ndcg_at_100": 0.32066,
|
21 |
+
"ndcg_at_1000": 0.35166,
|
22 |
+
"ndcg_at_3": 0.22706,
|
23 |
+
"ndcg_at_5": 0.24508,
|
24 |
+
"precision_at_1": 0.18892,
|
25 |
+
"precision_at_10": 0.04942,
|
26 |
+
"precision_at_100": 0.00878,
|
27 |
+
"precision_at_1000": 0.00131,
|
28 |
+
"precision_at_3": 0.10748,
|
29 |
+
"precision_at_5": 0.07784,
|
30 |
+
"recall_at_1": 0.1554,
|
31 |
+
"recall_at_10": 0.36743,
|
32 |
+
"recall_at_100": 0.60525,
|
33 |
+
"recall_at_1000": 0.82576,
|
34 |
+
"recall_at_3": 0.25252,
|
35 |
+
"recall_at_5": 0.29872
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackUnixRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 7165.48,
|
6 |
+
"map_at_1": 0.24453,
|
7 |
+
"map_at_10": 0.33363,
|
8 |
+
"map_at_100": 0.34579,
|
9 |
+
"map_at_1000": 0.34686,
|
10 |
+
"map_at_3": 0.30583,
|
11 |
+
"map_at_5": 0.32118,
|
12 |
+
"mrr_at_1": 0.28918,
|
13 |
+
"mrr_at_10": 0.37675,
|
14 |
+
"mrr_at_100": 0.38567,
|
15 |
+
"mrr_at_1000": 0.38632,
|
16 |
+
"mrr_at_3": 0.35261,
|
17 |
+
"mrr_at_5": 0.36576,
|
18 |
+
"ndcg_at_1": 0.28918,
|
19 |
+
"ndcg_at_10": 0.38736,
|
20 |
+
"ndcg_at_100": 0.44261,
|
21 |
+
"ndcg_at_1000": 0.4672,
|
22 |
+
"ndcg_at_3": 0.3381,
|
23 |
+
"ndcg_at_5": 0.36009,
|
24 |
+
"precision_at_1": 0.28918,
|
25 |
+
"precision_at_10": 0.06586,
|
26 |
+
"precision_at_100": 0.01047,
|
27 |
+
"precision_at_1000": 0.00137,
|
28 |
+
"precision_at_3": 0.15361,
|
29 |
+
"precision_at_5": 0.10858,
|
30 |
+
"recall_at_1": 0.24453,
|
31 |
+
"recall_at_10": 0.50886,
|
32 |
+
"recall_at_100": 0.7503,
|
33 |
+
"recall_at_1000": 0.92123,
|
34 |
+
"recall_at_3": 0.37138,
|
35 |
+
"recall_at_5": 0.42865
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackWebmastersRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 2264.66,
|
6 |
+
"map_at_1": 0.2457,
|
7 |
+
"map_at_10": 0.33672,
|
8 |
+
"map_at_100": 0.35244,
|
9 |
+
"map_at_1000": 0.35467,
|
10 |
+
"map_at_3": 0.30712,
|
11 |
+
"map_at_5": 0.32383,
|
12 |
+
"mrr_at_1": 0.29644,
|
13 |
+
"mrr_at_10": 0.38344,
|
14 |
+
"mrr_at_100": 0.39219,
|
15 |
+
"mrr_at_1000": 0.39282,
|
16 |
+
"mrr_at_3": 0.35771,
|
17 |
+
"mrr_at_5": 0.37273,
|
18 |
+
"ndcg_at_1": 0.29644,
|
19 |
+
"ndcg_at_10": 0.39567,
|
20 |
+
"ndcg_at_100": 0.45097,
|
21 |
+
"ndcg_at_1000": 0.47923,
|
22 |
+
"ndcg_at_3": 0.34768,
|
23 |
+
"ndcg_at_5": 0.37122,
|
24 |
+
"precision_at_1": 0.29644,
|
25 |
+
"precision_at_10": 0.07589,
|
26 |
+
"precision_at_100": 0.01478,
|
27 |
+
"precision_at_1000": 0.00235,
|
28 |
+
"precision_at_3": 0.16337,
|
29 |
+
"precision_at_5": 0.12055,
|
30 |
+
"recall_at_1": 0.2457,
|
31 |
+
"recall_at_10": 0.51009,
|
32 |
+
"recall_at_100": 0.75423,
|
33 |
+
"recall_at_1000": 0.93671,
|
34 |
+
"recall_at_3": 0.36926,
|
35 |
+
"recall_at_5": 0.43245
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/CQADupstackWordpressRetrieval.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 7608.65,
|
6 |
+
"map_at_1": 0.21356,
|
7 |
+
"map_at_10": 0.27904,
|
8 |
+
"map_at_100": 0.28938,
|
9 |
+
"map_at_1000": 0.29036,
|
10 |
+
"map_at_3": 0.25726,
|
11 |
+
"map_at_5": 0.26935,
|
12 |
+
"mrr_at_1": 0.22551,
|
13 |
+
"mrr_at_10": 0.29259,
|
14 |
+
"mrr_at_100": 0.30272,
|
15 |
+
"mrr_at_1000": 0.30348,
|
16 |
+
"mrr_at_3": 0.27295,
|
17 |
+
"mrr_at_5": 0.28358,
|
18 |
+
"ndcg_at_1": 0.22551,
|
19 |
+
"ndcg_at_10": 0.31817,
|
20 |
+
"ndcg_at_100": 0.37164,
|
21 |
+
"ndcg_at_1000": 0.3982,
|
22 |
+
"ndcg_at_3": 0.27596,
|
23 |
+
"ndcg_at_5": 0.29568,
|
24 |
+
"precision_at_1": 0.22551,
|
25 |
+
"precision_at_10": 0.04917,
|
26 |
+
"precision_at_100": 0.00828,
|
27 |
+
"precision_at_1000": 0.00114,
|
28 |
+
"precision_at_3": 0.11583,
|
29 |
+
"precision_at_5": 0.08133,
|
30 |
+
"recall_at_1": 0.21356,
|
31 |
+
"recall_at_10": 0.42489,
|
32 |
+
"recall_at_100": 0.67128,
|
33 |
+
"recall_at_1000": 0.87441,
|
34 |
+
"recall_at_3": 0.31165,
|
35 |
+
"recall_at_5": 0.35853
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/ClimateFEVER.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 64667.21,
|
6 |
+
"map_at_1": 0.12306,
|
7 |
+
"map_at_10": 0.21523,
|
8 |
+
"map_at_100": 0.23358,
|
9 |
+
"map_at_1000": 0.23541,
|
10 |
+
"map_at_3": 0.17809,
|
11 |
+
"map_at_5": 0.19631,
|
12 |
+
"mrr_at_1": 0.27948,
|
13 |
+
"mrr_at_10": 0.40355,
|
14 |
+
"mrr_at_100": 0.41166,
|
15 |
+
"mrr_at_1000": 0.41203,
|
16 |
+
"mrr_at_3": 0.36819,
|
17 |
+
"mrr_at_5": 0.38959,
|
18 |
+
"ndcg_at_1": 0.27948,
|
19 |
+
"ndcg_at_10": 0.30462,
|
20 |
+
"ndcg_at_100": 0.37473,
|
21 |
+
"ndcg_at_1000": 0.40718,
|
22 |
+
"ndcg_at_3": 0.24646,
|
23 |
+
"ndcg_at_5": 0.26642,
|
24 |
+
"precision_at_1": 0.27948,
|
25 |
+
"precision_at_10": 0.09648,
|
26 |
+
"precision_at_100": 0.01724,
|
27 |
+
"precision_at_1000": 0.00232,
|
28 |
+
"precision_at_3": 0.1848,
|
29 |
+
"precision_at_5": 0.14293,
|
30 |
+
"recall_at_1": 0.12306,
|
31 |
+
"recall_at_10": 0.37181,
|
32 |
+
"recall_at_100": 0.61148,
|
33 |
+
"recall_at_1000": 0.79401,
|
34 |
+
"recall_at_3": 0.22883,
|
35 |
+
"recall_at_5": 0.2859
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/DBPedia.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 43004.49,
|
6 |
+
"map_at_1": 0.09357,
|
7 |
+
"map_at_10": 0.18849,
|
8 |
+
"map_at_100": 0.25369,
|
9 |
+
"map_at_1000": 0.2695,
|
10 |
+
"map_at_3": 0.13625,
|
11 |
+
"map_at_5": 0.15957,
|
12 |
+
"mrr_at_1": 0.6775,
|
13 |
+
"mrr_at_10": 0.74734,
|
14 |
+
"mrr_at_100": 0.751,
|
15 |
+
"mrr_at_1000": 0.75109,
|
16 |
+
"mrr_at_3": 0.73542,
|
17 |
+
"mrr_at_5": 0.74167,
|
18 |
+
"ndcg_at_1": 0.55375,
|
19 |
+
"ndcg_at_10": 0.39874,
|
20 |
+
"ndcg_at_100": 0.43098,
|
21 |
+
"ndcg_at_1000": 0.50692,
|
22 |
+
"ndcg_at_3": 0.44856,
|
23 |
+
"ndcg_at_5": 0.42139,
|
24 |
+
"precision_at_1": 0.6775,
|
25 |
+
"precision_at_10": 0.311,
|
26 |
+
"precision_at_100": 0.09303,
|
27 |
+
"precision_at_1000": 0.02006,
|
28 |
+
"precision_at_3": 0.4825,
|
29 |
+
"precision_at_5": 0.4095,
|
30 |
+
"recall_at_1": 0.09357,
|
31 |
+
"recall_at_10": 0.23832,
|
32 |
+
"recall_at_100": 0.47906,
|
33 |
+
"recall_at_1000": 0.71309,
|
34 |
+
"recall_at_3": 0.14512,
|
35 |
+
"recall_at_5": 0.183
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/EmotionClassification.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"accuracy": 0.49655000000000005,
|
4 |
+
"accuracy_stderr": 0.023384236143179877,
|
5 |
+
"evaluation_time": 408.2,
|
6 |
+
"f1": 0.45519761909389506,
|
7 |
+
"f1_stderr": 0.017373883846959346,
|
8 |
+
"main_score": 0.49655000000000005
|
9 |
+
},
|
10 |
+
"dataset_version": null,
|
11 |
+
"mteb_version": "0.0.2"
|
12 |
+
}
|
evaluation/mteb/FEVER.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 65632.31,
|
6 |
+
"map_at_1": 0.6274,
|
7 |
+
"map_at_10": 0.7307,
|
8 |
+
"map_at_100": 0.73398,
|
9 |
+
"map_at_1000": 0.7341,
|
10 |
+
"map_at_3": 0.71338,
|
11 |
+
"map_at_5": 0.72423,
|
12 |
+
"mrr_at_1": 0.67777,
|
13 |
+
"mrr_at_10": 0.77873,
|
14 |
+
"mrr_at_100": 0.78091,
|
15 |
+
"mrr_at_1000": 0.78094,
|
16 |
+
"mrr_at_3": 0.76375,
|
17 |
+
"mrr_at_5": 0.77316,
|
18 |
+
"ndcg_at_1": 0.67777,
|
19 |
+
"ndcg_at_10": 0.7824,
|
20 |
+
"ndcg_at_100": 0.79557,
|
21 |
+
"ndcg_at_1000": 0.79814,
|
22 |
+
"ndcg_at_3": 0.75125,
|
23 |
+
"ndcg_at_5": 0.76834,
|
24 |
+
"precision_at_1": 0.67777,
|
25 |
+
"precision_at_10": 0.09832,
|
26 |
+
"precision_at_100": 0.01061,
|
27 |
+
"precision_at_1000": 0.0011,
|
28 |
+
"precision_at_3": 0.29433,
|
29 |
+
"precision_at_5": 0.18665,
|
30 |
+
"recall_at_1": 0.6274,
|
31 |
+
"recall_at_10": 0.89505,
|
32 |
+
"recall_at_100": 0.95102,
|
33 |
+
"recall_at_1000": 0.96825,
|
34 |
+
"recall_at_3": 0.81028,
|
35 |
+
"recall_at_5": 0.85281
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/FiQA2018.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 951.67,
|
6 |
+
"map_at_1": 0.18467,
|
7 |
+
"map_at_10": 0.30021,
|
8 |
+
"map_at_100": 0.31739,
|
9 |
+
"map_at_1000": 0.31934,
|
10 |
+
"map_at_3": 0.26003,
|
11 |
+
"map_at_5": 0.28338,
|
12 |
+
"mrr_at_1": 0.3534,
|
13 |
+
"mrr_at_10": 0.44109,
|
14 |
+
"mrr_at_100": 0.44993,
|
15 |
+
"mrr_at_1000": 0.45042,
|
16 |
+
"mrr_at_3": 0.41667,
|
17 |
+
"mrr_at_5": 0.4314,
|
18 |
+
"ndcg_at_1": 0.3534,
|
19 |
+
"ndcg_at_10": 0.37202,
|
20 |
+
"ndcg_at_100": 0.43853,
|
21 |
+
"ndcg_at_1000": 0.47235,
|
22 |
+
"ndcg_at_3": 0.335,
|
23 |
+
"ndcg_at_5": 0.34985,
|
24 |
+
"precision_at_1": 0.3534,
|
25 |
+
"precision_at_10": 0.10247,
|
26 |
+
"precision_at_100": 0.01715,
|
27 |
+
"precision_at_1000": 0.00232,
|
28 |
+
"precision_at_3": 0.22222,
|
29 |
+
"precision_at_5": 0.16574,
|
30 |
+
"recall_at_1": 0.18467,
|
31 |
+
"recall_at_10": 0.44081,
|
32 |
+
"recall_at_100": 0.68722,
|
33 |
+
"recall_at_1000": 0.89087,
|
34 |
+
"recall_at_3": 0.30567,
|
35 |
+
"recall_at_5": 0.36982
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/HotpotQA.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"test": {
|
5 |
+
"evaluation_time": 46237.87,
|
6 |
+
"map_at_1": 0.35726,
|
7 |
+
"map_at_10": 0.50207,
|
8 |
+
"map_at_100": 0.51055,
|
9 |
+
"map_at_1000": 0.51128,
|
10 |
+
"map_at_3": 0.47576,
|
11 |
+
"map_at_5": 0.49172,
|
12 |
+
"mrr_at_1": 0.71452,
|
13 |
+
"mrr_at_10": 0.77419,
|
14 |
+
"mrr_at_100": 0.77711,
|
15 |
+
"mrr_at_1000": 0.77723,
|
16 |
+
"mrr_at_3": 0.76394,
|
17 |
+
"mrr_at_5": 0.77001,
|
18 |
+
"ndcg_at_1": 0.71452,
|
19 |
+
"ndcg_at_10": 0.59261,
|
20 |
+
"ndcg_at_100": 0.62424,
|
21 |
+
"ndcg_at_1000": 0.63951,
|
22 |
+
"ndcg_at_3": 0.55327,
|
23 |
+
"ndcg_at_5": 0.57417,
|
24 |
+
"precision_at_1": 0.71452,
|
25 |
+
"precision_at_10": 0.12061,
|
26 |
+
"precision_at_100": 0.01455,
|
27 |
+
"precision_at_1000": 0.00166,
|
28 |
+
"precision_at_3": 0.3436,
|
29 |
+
"precision_at_5": 0.22266,
|
30 |
+
"recall_at_1": 0.35726,
|
31 |
+
"recall_at_10": 0.60304,
|
32 |
+
"recall_at_100": 0.72755,
|
33 |
+
"recall_at_1000": 0.82978,
|
34 |
+
"recall_at_3": 0.5154,
|
35 |
+
"recall_at_5": 0.55665
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/ImdbClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"accuracy": 0.666376,
|
4 |
+
"accuracy_stderr": 0.03435007633179292,
|
5 |
+
"ap": 0.6148938261286748,
|
6 |
+
"ap_stderr": 0.028266712412564548,
|
7 |
+
"evaluation_time": 38609.7,
|
8 |
+
"f1": 0.6635089269264965,
|
9 |
+
"f1_stderr": 0.03526614235815782,
|
10 |
+
"main_score": 0.666376
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/MSMARCO.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_version": null,
|
3 |
+
"mteb_version": "0.0.2",
|
4 |
+
"validation": {
|
5 |
+
"evaluation_time": 82887.84,
|
6 |
+
"map_at_1": 0.20842,
|
7 |
+
"map_at_10": 0.32992,
|
8 |
+
"map_at_100": 0.34236,
|
9 |
+
"map_at_1000": 0.34286,
|
10 |
+
"map_at_3": 0.29049,
|
11 |
+
"map_at_5": 0.31392,
|
12 |
+
"mrr_at_1": 0.21375,
|
13 |
+
"mrr_at_10": 0.33581,
|
14 |
+
"mrr_at_100": 0.3476,
|
15 |
+
"mrr_at_1000": 0.34803,
|
16 |
+
"mrr_at_3": 0.29704,
|
17 |
+
"mrr_at_5": 0.32015,
|
18 |
+
"ndcg_at_1": 0.21375,
|
19 |
+
"ndcg_at_10": 0.39905,
|
20 |
+
"ndcg_at_100": 0.45843,
|
21 |
+
"ndcg_at_1000": 0.47084,
|
22 |
+
"ndcg_at_3": 0.31919,
|
23 |
+
"ndcg_at_5": 0.36107,
|
24 |
+
"precision_at_1": 0.21375,
|
25 |
+
"precision_at_10": 0.06393,
|
26 |
+
"precision_at_100": 0.00935,
|
27 |
+
"precision_at_1000": 0.00104,
|
28 |
+
"precision_at_3": 0.13663,
|
29 |
+
"precision_at_5": 0.10324,
|
30 |
+
"recall_at_1": 0.20842,
|
31 |
+
"recall_at_10": 0.6117,
|
32 |
+
"recall_at_100": 0.88518,
|
33 |
+
"recall_at_1000": 0.97993,
|
34 |
+
"recall_at_3": 0.39571,
|
35 |
+
"recall_at_5": 0.49654
|
36 |
+
}
|
37 |
+
}
|
evaluation/mteb/MTOPDomainClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"en": {
|
4 |
+
"accuracy": 0.9346557227542178,
|
5 |
+
"accuracy_stderr": 0.0072510589372134955,
|
6 |
+
"f1": 0.9287345917772146,
|
7 |
+
"f1_stderr": 0.0082627627462556,
|
8 |
+
"main_score": 0.9346557227542178
|
9 |
+
},
|
10 |
+
"evaluation_time": 475.45
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/MTOPIntentClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"en": {
|
4 |
+
"accuracy": 0.7242134062927497,
|
5 |
+
"accuracy_stderr": 0.012571448106282981,
|
6 |
+
"f1": 0.5503624810959269,
|
7 |
+
"f1_stderr": 0.011737816802681789,
|
8 |
+
"main_score": 0.7242134062927497
|
9 |
+
},
|
10 |
+
"evaluation_time": 612.9
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/MassiveIntentClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"en": {
|
4 |
+
"accuracy": 0.703866845998655,
|
5 |
+
"accuracy_stderr": 0.00456616151893328,
|
6 |
+
"f1": 0.6896745198729209,
|
7 |
+
"f1_stderr": 0.006294250065644286,
|
8 |
+
"main_score": 0.703866845998655
|
9 |
+
},
|
10 |
+
"evaluation_time": 387.35
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/MassiveScenarioClassification.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"en": {
|
4 |
+
"accuracy": 0.7627774041694687,
|
5 |
+
"accuracy_stderr": 0.014020552186569309,
|
6 |
+
"f1": 0.7672936190462792,
|
7 |
+
"f1_stderr": 0.01203156406861017,
|
8 |
+
"main_score": 0.7627774041694687
|
9 |
+
},
|
10 |
+
"evaluation_time": 333.83
|
11 |
+
},
|
12 |
+
"dataset_version": null,
|
13 |
+
"mteb_version": "0.0.2"
|
14 |
+
}
|
evaluation/mteb/MedrxivClusteringP2P.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 7211.78,
|
4 |
+
"v_measure": 0.3151174592577334,
|
5 |
+
"v_measure_std": 0.012772027882021399
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/MedrxivClusteringS2S.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"test": {
|
3 |
+
"evaluation_time": 1053.59,
|
4 |
+
"v_measure": 0.28764235987575365,
|
5 |
+
"v_measure_std": 0.011424256185368544
|
6 |
+
},
|
7 |
+
"dataset_version": null,
|
8 |
+
"mteb_version": "0.0.2"
|
9 |
+
}
|
evaluation/mteb/MindSmallReranking.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|