diff --git a/eval/beir.json b/evaluation/beir/beir.json similarity index 100% rename from eval/beir.json rename to evaluation/beir/beir.json diff --git a/evaluation/mteb/AmazonCounterfactualClassification.json b/evaluation/mteb/AmazonCounterfactualClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..f40eadeeaa69de5f06b0b69df97bb6648c7d3440 --- /dev/null +++ b/evaluation/mteb/AmazonCounterfactualClassification.json @@ -0,0 +1,82 @@ +{ + "test": { + "de": { + "accuracy": 0.5688436830835117, + "accuracy_stderr": 0.029093897773991517, + "ap": 0.7267279104379771, + "ap_stderr": 0.010217580709985108, + "f1": 0.5444984024378641, + "f1_stderr": 0.02389292222743725, + "main_score": 0.7267279104379771 + }, + "en": { + "accuracy": 0.6123880597014926, + "accuracy_stderr": 0.031379155023926214, + "ap": 0.25854431650388643, + "ap_stderr": 0.016981404964613887, + "f1": 0.557518627628186, + "f1_stderr": 0.024487163893960967, + "main_score": 0.25854431650388643 + }, + "en-ext": { + "accuracy": 0.5827586206896551, + "accuracy_stderr": 0.048062843086995724, + "ap": 0.14067357642500386, + "ap_stderr": 0.013693676529546861, + "f1": 0.4817231851869133, + "f1_stderr": 0.030208933707168584, + "main_score": 0.14067357642500386 + }, + "evaluation_time": 52.9, + "ja": { + "accuracy": 0.5464668094218414, + "accuracy_stderr": 0.04850756984311426, + "ap": 0.11776694555054965, + "ap_stderr": 0.006424556406403228, + "f1": 0.44526622834078766, + "f1_stderr": 0.029061809459351582, + "main_score": 0.11776694555054965 + } + }, + "validation": { + "de": { + "accuracy": 0.5918454935622317, + "accuracy_stderr": 0.024350314081552426, + "ap": 0.7372017677238578, + "ap_stderr": 0.010683816932118138, + "f1": 0.5642882005866563, + "f1_stderr": 0.02007457192846663, + "main_score": 0.7372017677238578 + }, + "en": { + "accuracy": 0.6113432835820894, + "accuracy_stderr": 0.038501677223608914, + "ap": 0.2227104848944061, + "ap_stderr": 0.01397442904182502, + "f1": 0.5393378654542855, + "f1_stderr": 0.027447873327993155, + "main_score": 0.2227104848944061 + }, + "en-ext": { + "accuracy": 0.5848348348348348, + "accuracy_stderr": 0.043238290090035825, + "ap": 0.13520924912279636, + "ap_stderr": 0.011096096922529254, + "f1": 0.4794399892152111, + "f1_stderr": 0.02584441348504658, + "main_score": 0.13520924912279636 + }, + "evaluation_time": 43.78, + "ja": { + "accuracy": 0.5523605150214592, + "accuracy_stderr": 0.04902106769175921, + "ap": 0.11348392156508444, + "ap_stderr": 0.012428212574320868, + "f1": 0.4441089123442944, + "f1_stderr": 0.03136061225772234, + "main_score": 0.11348392156508444 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/AmazonPolarityClassification.json b/evaluation/mteb/AmazonPolarityClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..76d187ded5686e327f07d4dcc8239a075279a60c --- /dev/null +++ b/evaluation/mteb/AmazonPolarityClassification.json @@ -0,0 +1,14 @@ +{ + "test": { + "accuracy": 0.65401225, + "accuracy_stderr": 0.03587243183229288, + "ap": 0.6022809958678552, + "ap_stderr": 0.028477219496347024, + "evaluation_time": 12198.12, + "f1": 0.650251824898292, + "f1_stderr": 0.037356425365799015, + "main_score": 0.65401225 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/AmazonReviewsClassification.json b/evaluation/mteb/AmazonReviewsClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..c7821f56718345759cb2e53d9a6da544930bc43f --- /dev/null +++ b/evaluation/mteb/AmazonReviewsClassification.json @@ -0,0 +1,94 @@ +{ + "test": { + "de": { + "accuracy": 0.2479, + "accuracy_stderr": 0.01405709785126361, + "f1": 0.245833598854121, + "f1_stderr": 0.015461604594068135, + "main_score": 0.2479 + }, + "en": { + "accuracy": 0.31165999999999994, + "accuracy_stderr": 0.018214291092436177, + "f1": 0.30908870050167436, + "f1_stderr": 0.01830210891332752, + "main_score": 0.31165999999999994 + }, + "es": { + "accuracy": 0.26643999999999995, + "accuracy_stderr": 0.011960869533608332, + "f1": 0.2639012792213563, + "f1_stderr": 0.013800540988456476, + "main_score": 0.26643999999999995 + }, + "evaluation_time": 735.82, + "fr": { + "accuracy": 0.26386000000000004, + "accuracy_stderr": 0.017606828220892035, + "f1": 0.2627686779145487, + "f1_stderr": 0.01750120871837706, + "main_score": 0.26386000000000004 + }, + "ja": { + "accuracy": 0.22078000000000003, + "accuracy_stderr": 0.011609117106825998, + "f1": 0.21797960290226842, + "f1_stderr": 0.011959444775888735, + "main_score": 0.22078000000000003 + }, + "zh": { + "accuracy": 0.24274, + "accuracy_stderr": 0.007931733732293337, + "f1": 0.23887054434822627, + "f1_stderr": 0.008252486720387382, + "main_score": 0.24274 + } + }, + "validation": { + "de": { + "accuracy": 0.25168, + "accuracy_stderr": 0.015328457195686715, + "f1": 0.2502088133771333, + "f1_stderr": 0.01650287553492419, + "main_score": 0.25168 + }, + "en": { + "accuracy": 0.32438, + "accuracy_stderr": 0.022893920590410018, + "f1": 0.3212565428623893, + "f1_stderr": 0.021985479345985583, + "main_score": 0.32438 + }, + "es": { + "accuracy": 0.26704, + "accuracy_stderr": 0.012879068289282413, + "f1": 0.2641578465769073, + "f1_stderr": 0.01402375418117518, + "main_score": 0.26704 + }, + "evaluation_time": 740.71, + "fr": { + "accuracy": 0.26348, + "accuracy_stderr": 0.017907584985139675, + "f1": 0.26220101963133713, + "f1_stderr": 0.01807845581838548, + "main_score": 0.26348 + }, + "ja": { + "accuracy": 0.21562, + "accuracy_stderr": 0.011403666077187636, + "f1": 0.21286585397745714, + "f1_stderr": 0.011890703738952032, + "main_score": 0.21562 + }, + "zh": { + "accuracy": 0.23962, + "accuracy_stderr": 0.012780125195004927, + "f1": 0.23540552974934634, + "f1_stderr": 0.012407243106519977, + "main_score": 0.23962 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/ArguAna.json b/evaluation/mteb/ArguAna.json new file mode 100644 index 0000000000000000000000000000000000000000..19e566c968cf6e9cc2210ac441bb827cd048ba07 --- /dev/null +++ b/evaluation/mteb/ArguAna.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 76.89, + "map_at_1": 0.22404, + "map_at_10": 0.36845, + "map_at_100": 0.37945, + "map_at_1000": 0.37966, + "map_at_3": 0.3178, + "map_at_5": 0.34608, + "mrr_at_1": 0.22902, + "mrr_at_10": 0.37034, + "mrr_at_100": 0.38134, + "mrr_at_1000": 0.38155, + "mrr_at_3": 0.31935, + "mrr_at_5": 0.34812, + "ndcg_at_1": 0.22404, + "ndcg_at_10": 0.45425, + "ndcg_at_100": 0.50354, + "ndcg_at_1000": 0.50874, + "ndcg_at_3": 0.3497, + "ndcg_at_5": 0.40081, + "precision_at_1": 0.22404, + "precision_at_10": 0.07304, + "precision_at_100": 0.00951, + "precision_at_1000": 0.00099, + "precision_at_3": 0.14746, + "precision_at_5": 0.11337, + "recall_at_1": 0.22404, + "recall_at_10": 0.73044, + "recall_at_100": 0.95092, + "recall_at_1000": 0.99075, + "recall_at_3": 0.44239, + "recall_at_5": 0.56686 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArxivClusteringP2P.json b/evaluation/mteb/ArxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..1b5ffb7537ccfdaa9eeb42ca80481b1a0be43a2a --- /dev/null +++ b/evaluation/mteb/ArxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 4524.41, + "v_measure": 0.3970858340673288, + "v_measure_std": 0.13892512061332984 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/ArxivClusteringS2S.json b/evaluation/mteb/ArxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..1849df48a3b32324cd3761421e30267a075288d7 --- /dev/null +++ b/evaluation/mteb/ArxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 570.69, + "v_measure": 0.2824284771372105, + "v_measure_std": 0.14515433452032064 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/AskUbuntuDupQuestions.json b/evaluation/mteb/AskUbuntuDupQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..ee4081d75a3a3e9e22ec55fc165a6a460cf09220 --- /dev/null +++ b/evaluation/mteb/AskUbuntuDupQuestions.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 7.06, + "map": 0.5583700395192394, + "mrr": 0.7038913072154069 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/BIOSSES.json b/evaluation/mteb/BIOSSES.json new file mode 100644 index 0000000000000000000000000000000000000000..6d5d456f1dddf7a96476e56da0adc701d15004ed --- /dev/null +++ b/evaluation/mteb/BIOSSES.json @@ -0,0 +1,19 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.7925366801756224, + "spearman": 0.7520954502580506 + }, + "euclidean": { + "pearson": 0.7879900722991617, + "spearman": 0.7779996549607588 + }, + "evaluation_time": 2.65, + "manhattan": { + "pearson": 0.7818408109480399, + "spearman": 0.7685958262303105 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/Banking77Classification.json b/evaluation/mteb/Banking77Classification.json new file mode 100644 index 0000000000000000000000000000000000000000..a022497f9197ebba663d2efe0b6bf8d639694101 --- /dev/null +++ b/evaluation/mteb/Banking77Classification.json @@ -0,0 +1,12 @@ +{ + "test": { + "accuracy": 0.7770454545454545, + "accuracy_stderr": 0.007521053263962387, + "evaluation_time": 42.98, + "f1": 0.7769290001138031, + "f1_stderr": 0.007473720123531678, + "main_score": 0.7770454545454545 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/BiorxivClusteringP2P.json b/evaluation/mteb/BiorxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..be27e1bb6f23058c965758898682024073015f37 --- /dev/null +++ b/evaluation/mteb/BiorxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 553.9, + "v_measure": 0.33632603955439844, + "v_measure_std": 0.010408302575675535 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/BiorxivClusteringS2S.json b/evaluation/mteb/BiorxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..f758730f3f6372bfc3aaa444096f6d56ae53470c --- /dev/null +++ b/evaluation/mteb/BiorxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 70.35, + "v_measure": 0.27038042665369927, + "v_measure_std": 0.007785436192603769 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackAndroidRetrieval.json b/evaluation/mteb/CQADupstackAndroidRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..b9eee058c4409d2845fa1f25cb9a20742b5895cf --- /dev/null +++ b/evaluation/mteb/CQADupstackAndroidRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 64.44, + "map_at_1": 0.22139, + "map_at_10": 0.28839, + "map_at_100": 0.30023, + "map_at_1000": 0.30153, + "map_at_3": 0.26521, + "map_at_5": 0.27775, + "mrr_at_1": 0.26466, + "mrr_at_10": 0.33495, + "mrr_at_100": 0.34417, + "mrr_at_1000": 0.34485, + "mrr_at_3": 0.31402, + "mrr_at_5": 0.32496, + "ndcg_at_1": 0.26466, + "ndcg_at_10": 0.33372, + "ndcg_at_100": 0.387, + "ndcg_at_1000": 0.41696, + "ndcg_at_3": 0.29443, + "ndcg_at_5": 0.31121, + "precision_at_1": 0.26466, + "precision_at_10": 0.06037, + "precision_at_100": 0.01067, + "precision_at_1000": 0.00162, + "precision_at_3": 0.13782, + "precision_at_5": 0.09757, + "recall_at_1": 0.22139, + "recall_at_10": 0.4239, + "recall_at_100": 0.65427, + "recall_at_1000": 0.86049, + "recall_at_3": 0.31127, + "recall_at_5": 0.35718 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackEnglishRetrieval.json b/evaluation/mteb/CQADupstackEnglishRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..e4f91b15ca12c4831ec5eb104051b9d27dcc4b09 --- /dev/null +++ b/evaluation/mteb/CQADupstackEnglishRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 95.23, + "map_at_1": 0.20652, + "map_at_10": 0.27558, + "map_at_100": 0.28473, + "map_at_1000": 0.28577, + "map_at_3": 0.25402, + "map_at_5": 0.2668, + "mrr_at_1": 0.25223, + "mrr_at_10": 0.31966, + "mrr_at_100": 0.32664, + "mrr_at_1000": 0.32724, + "mrr_at_3": 0.30074, + "mrr_at_5": 0.31249, + "ndcg_at_1": 0.25223, + "ndcg_at_10": 0.31694, + "ndcg_at_100": 0.35662, + "ndcg_at_1000": 0.38092, + "ndcg_at_3": 0.28294, + "ndcg_at_5": 0.30049, + "precision_at_1": 0.25223, + "precision_at_10": 0.05777, + "precision_at_100": 0.00973, + "precision_at_1000": 0.0014, + "precision_at_3": 0.13397, + "precision_at_5": 0.09605, + "recall_at_1": 0.20652, + "recall_at_10": 0.39368, + "recall_at_100": 0.56485, + "recall_at_1000": 0.73292, + "recall_at_3": 0.2983, + "recall_at_5": 0.3443 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackGamingRetrieval.json b/evaluation/mteb/CQADupstackGamingRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..7b8465cb5356369f0f8899fecf9dfce64e8a76f3 --- /dev/null +++ b/evaluation/mteb/CQADupstackGamingRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 104.86, + "map_at_1": 0.2518, + "map_at_10": 0.34579, + "map_at_100": 0.3559, + "map_at_1000": 0.3568, + "map_at_3": 0.31736, + "map_at_5": 0.33479, + "mrr_at_1": 0.29467, + "mrr_at_10": 0.37967, + "mrr_at_100": 0.388, + "mrr_at_1000": 0.38858, + "mrr_at_3": 0.35465, + "mrr_at_5": 0.37057, + "ndcg_at_1": 0.29467, + "ndcg_at_10": 0.39796, + "ndcg_at_100": 0.44531, + "ndcg_at_1000": 0.46666, + "ndcg_at_3": 0.34676, + "ndcg_at_5": 0.37468, + "precision_at_1": 0.29467, + "precision_at_10": 0.06602, + "precision_at_100": 0.0099, + "precision_at_1000": 0.00124, + "precision_at_3": 0.15569, + "precision_at_5": 0.11172, + "recall_at_1": 0.2518, + "recall_at_10": 0.52269, + "recall_at_100": 0.73574, + "recall_at_1000": 0.89141, + "recall_at_3": 0.38522, + "recall_at_5": 0.45323 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackGisRetrieval.json b/evaluation/mteb/CQADupstackGisRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..895fc7f951f172a3c19189cce2305f3e1eed00e2 --- /dev/null +++ b/evaluation/mteb/CQADupstackGisRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 141.33, + "map_at_1": 0.16303, + "map_at_10": 0.21629, + "map_at_100": 0.22388, + "map_at_1000": 0.22489, + "map_at_3": 0.19608, + "map_at_5": 0.20774, + "mrr_at_1": 0.1774, + "mrr_at_10": 0.23214, + "mrr_at_100": 0.2397, + "mrr_at_1000": 0.24054, + "mrr_at_3": 0.21243, + "mrr_at_5": 0.22322, + "ndcg_at_1": 0.1774, + "ndcg_at_10": 0.25113, + "ndcg_at_100": 0.29288, + "ndcg_at_1000": 0.32204, + "ndcg_at_3": 0.21111, + "ndcg_at_5": 0.23062, + "precision_at_1": 0.1774, + "precision_at_10": 0.03955, + "precision_at_100": 0.00644, + "precision_at_1000": 0.00093, + "precision_at_3": 0.08851, + "precision_at_5": 0.06418, + "recall_at_1": 0.16303, + "recall_at_10": 0.34487, + "recall_at_100": 0.54414, + "recall_at_1000": 0.77158, + "recall_at_3": 0.23733, + "recall_at_5": 0.28381 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackMathematicaRetrieval.json b/evaluation/mteb/CQADupstackMathematicaRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..aa5fc220a338b6aa9432a5d84a430f955e71159b --- /dev/null +++ b/evaluation/mteb/CQADupstackMathematicaRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 78.71, + "map_at_1": 0.10133, + "map_at_10": 0.15666, + "map_at_100": 0.16592, + "map_at_1000": 0.16734, + "map_at_3": 0.13625, + "map_at_5": 0.14721, + "mrr_at_1": 0.12562, + "mrr_at_10": 0.18487, + "mrr_at_100": 0.19391, + "mrr_at_1000": 0.19487, + "mrr_at_3": 0.16418, + "mrr_at_5": 0.176, + "ndcg_at_1": 0.12562, + "ndcg_at_10": 0.1943, + "ndcg_at_100": 0.24546, + "ndcg_at_1000": 0.28193, + "ndcg_at_3": 0.1551, + "ndcg_at_5": 0.17322, + "precision_at_1": 0.12562, + "precision_at_10": 0.03794, + "precision_at_100": 0.0074, + "precision_at_1000": 0.00122, + "precision_at_3": 0.07546, + "precision_at_5": 0.05721, + "recall_at_1": 0.10133, + "recall_at_10": 0.28262, + "recall_at_100": 0.51743, + "recall_at_1000": 0.78075, + "recall_at_3": 0.17634, + "recall_at_5": 0.22129 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackPhysicsRetrieval.json b/evaluation/mteb/CQADupstackPhysicsRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..ad7e6766a73c6acfe06a4408fd2cc2a7d9a64ffc --- /dev/null +++ b/evaluation/mteb/CQADupstackPhysicsRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 136.6, + "map_at_1": 0.19992, + "map_at_10": 0.27347, + "map_at_100": 0.28582, + "map_at_1000": 0.28716, + "map_at_3": 0.24907, + "map_at_5": 0.261, + "mrr_at_1": 0.23773, + "mrr_at_10": 0.31647, + "mrr_at_100": 0.32639, + "mrr_at_1000": 0.32706, + "mrr_at_3": 0.29195, + "mrr_at_5": 0.30484, + "ndcg_at_1": 0.23773, + "ndcg_at_10": 0.32322, + "ndcg_at_100": 0.37996, + "ndcg_at_1000": 0.40819, + "ndcg_at_3": 0.27876, + "ndcg_at_5": 0.29664, + "precision_at_1": 0.23773, + "precision_at_10": 0.05977, + "precision_at_100": 0.01055, + "precision_at_1000": 0.0015, + "precision_at_3": 0.13122, + "precision_at_5": 0.09451, + "recall_at_1": 0.19992, + "recall_at_10": 0.43106, + "recall_at_100": 0.67264, + "recall_at_1000": 0.86386, + "recall_at_3": 0.30392, + "recall_at_5": 0.34911 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackProgrammersRetrieval.json b/evaluation/mteb/CQADupstackProgrammersRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..44aa04647c74e841072d3b0a147c02754fc696e6 --- /dev/null +++ b/evaluation/mteb/CQADupstackProgrammersRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 130.5, + "map_at_1": 0.17896, + "map_at_10": 0.24644, + "map_at_100": 0.2579, + "map_at_1000": 0.25914, + "map_at_3": 0.22694, + "map_at_5": 0.2369, + "mrr_at_1": 0.21347, + "mrr_at_10": 0.28594, + "mrr_at_100": 0.29544, + "mrr_at_1000": 0.29621, + "mrr_at_3": 0.26807, + "mrr_at_5": 0.27669, + "ndcg_at_1": 0.21347, + "ndcg_at_10": 0.28833, + "ndcg_at_100": 0.34272, + "ndcg_at_1000": 0.37355, + "ndcg_at_3": 0.25373, + "ndcg_at_5": 0.26756, + "precision_at_1": 0.21347, + "precision_at_10": 0.05217, + "precision_at_100": 0.00954, + "precision_at_1000": 0.00139, + "precision_at_3": 0.11948, + "precision_at_5": 0.08425, + "recall_at_1": 0.17896, + "recall_at_10": 0.37291, + "recall_at_100": 0.61138, + "recall_at_1000": 0.83212, + "recall_at_3": 0.27706, + "recall_at_5": 0.31234 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackRetrieval.json b/evaluation/mteb/CQADupstackRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..e7605bf4d5e3e4ea8262d953cd48fcf80956db74 --- /dev/null +++ b/evaluation/mteb/CQADupstackRetrieval.json @@ -0,0 +1 @@ +{"dataset_version": null, "mteb_version": "0.0.2", "test": {"evaluation_time": 62.83, "map_at_1": 0.17195166666666667, "map_at_10": 0.23329083333333334, "map_at_100": 0.2430308333333333, "map_at_1000": 0.24422416666666666, "map_at_3": 0.21327416666666665, "map_at_5": 0.22419999999999998, "mrr_at_1": 0.19999916666666667, "mrr_at_10": 0.26390166666666665, "mrr_at_100": 0.27231, "mrr_at_1000": 0.27308333333333334, "mrr_at_3": 0.244675, "mrr_at_5": 0.25541083333333336, "ndcg_at_1": 0.19999916666666667, "ndcg_at_10": 0.27248666666666665, "ndcg_at_100": 0.3200258333333334, "ndcg_at_1000": 0.34946499999999997, "ndcg_at_3": 0.2358566666666667, "ndcg_at_5": 0.2526341666666666, "precision_at_1": 0.19999916666666667, "precision_at_10": 0.04772166666666666, "precision_at_100": 0.00847, "precision_at_1000": 0.0012741666666666667, "precision_at_3": 0.10756166666666668, "precision_at_5": 0.07725416666666667, "recall_at_1": 0.17195166666666667, "recall_at_10": 0.35990833333333344, "recall_at_100": 0.57468, "recall_at_1000": 0.7882366666666667, "recall_at_3": 0.25898499999999997, "recall_at_5": 0.30084333333333335}} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackStatsRetrieval.json b/evaluation/mteb/CQADupstackStatsRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..b9f799ff77f1129dfc3754598a1e5e335d1830df --- /dev/null +++ b/evaluation/mteb/CQADupstackStatsRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 177.3, + "map_at_1": 0.16779, + "map_at_10": 0.21557, + "map_at_100": 0.22338, + "map_at_1000": 0.22421, + "map_at_3": 0.19939, + "map_at_5": 0.20903, + "mrr_at_1": 0.18405, + "mrr_at_10": 0.23435, + "mrr_at_100": 0.24179, + "mrr_at_1000": 0.2425, + "mrr_at_3": 0.21907, + "mrr_at_5": 0.22781, + "ndcg_at_1": 0.18405, + "ndcg_at_10": 0.24515, + "ndcg_at_100": 0.28721, + "ndcg_at_1000": 0.3126, + "ndcg_at_3": 0.21508, + "ndcg_at_5": 0.2301, + "precision_at_1": 0.18405, + "precision_at_10": 0.03834, + "precision_at_100": 0.00641, + "precision_at_1000": 0.00093, + "precision_at_3": 0.09151, + "precision_at_5": 0.06503, + "recall_at_1": 0.16779, + "recall_at_10": 0.3173, + "recall_at_100": 0.51673, + "recall_at_1000": 0.71176, + "recall_at_3": 0.23518, + "recall_at_5": 0.27231 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackTexRetrieval.json b/evaluation/mteb/CQADupstackTexRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..34542d0dd584c5dde43c0495c2a069e4a49541c9 --- /dev/null +++ b/evaluation/mteb/CQADupstackTexRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 300.62, + "map_at_1": 0.09279, + "map_at_10": 0.13822, + "map_at_100": 0.14533, + "map_at_1000": 0.1465, + "map_at_3": 0.12396, + "map_at_5": 0.13214, + "mrr_at_1": 0.11149, + "mrr_at_10": 0.16139, + "mrr_at_100": 0.16872, + "mrr_at_1000": 0.16964, + "mrr_at_3": 0.14613, + "mrr_at_5": 0.15486, + "ndcg_at_1": 0.11149, + "ndcg_at_10": 0.1682, + "ndcg_at_100": 0.2073, + "ndcg_at_1000": 0.23894, + "ndcg_at_3": 0.1411, + "ndcg_at_5": 0.15404, + "precision_at_1": 0.11149, + "precision_at_10": 0.03063, + "precision_at_100": 0.00587, + "precision_at_1000": 0.001, + "precision_at_3": 0.06699, + "precision_at_5": 0.04928, + "recall_at_1": 0.09279, + "recall_at_10": 0.23745, + "recall_at_100": 0.41873, + "recall_at_1000": 0.64982, + "recall_at_3": 0.16152, + "recall_at_5": 0.19409 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackUnixRetrieval.json b/evaluation/mteb/CQADupstackUnixRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..4d03b244a3286f08ec27023a7055e92cc22aaa92 --- /dev/null +++ b/evaluation/mteb/CQADupstackUnixRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 185.1, + "map_at_1": 0.1636, + "map_at_10": 0.21927, + "map_at_100": 0.22889, + "map_at_1000": 0.22994, + "map_at_3": 0.20433, + "map_at_5": 0.21337, + "mrr_at_1": 0.1875, + "mrr_at_10": 0.24859, + "mrr_at_100": 0.25747, + "mrr_at_1000": 0.25829, + "mrr_at_3": 0.23383, + "mrr_at_5": 0.24297, + "ndcg_at_1": 0.1875, + "ndcg_at_10": 0.25372, + "ndcg_at_100": 0.30343, + "ndcg_at_1000": 0.33286, + "ndcg_at_3": 0.22627, + "ndcg_at_5": 0.2404, + "precision_at_1": 0.1875, + "precision_at_10": 0.04142, + "precision_at_100": 0.00738, + "precision_at_1000": 0.00111, + "precision_at_3": 0.10261, + "precision_at_5": 0.07164, + "recall_at_1": 0.1636, + "recall_at_10": 0.32949, + "recall_at_100": 0.55552, + "recall_at_1000": 0.77099, + "recall_at_3": 0.25538, + "recall_at_5": 0.29008 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackWebmastersRetrieval.json b/evaluation/mteb/CQADupstackWebmastersRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..f91a0ae469c5e0f98d89430a69f3483734ea563d --- /dev/null +++ b/evaluation/mteb/CQADupstackWebmastersRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 62.83, + "map_at_1": 0.1739, + "map_at_10": 0.23058, + "map_at_100": 0.24445, + "map_at_1000": 0.24638, + "map_at_3": 0.21037, + "map_at_5": 0.21966, + "mrr_at_1": 0.1996, + "mrr_at_10": 0.26301, + "mrr_at_100": 0.27297, + "mrr_at_1000": 0.27375, + "mrr_at_3": 0.24341, + "mrr_at_5": 0.25339, + "ndcg_at_1": 0.1996, + "ndcg_at_10": 0.27249, + "ndcg_at_100": 0.32997, + "ndcg_at_1000": 0.36359, + "ndcg_at_3": 0.23519, + "ndcg_at_5": 0.24915, + "precision_at_1": 0.1996, + "precision_at_10": 0.05356, + "precision_at_100": 0.01198, + "precision_at_1000": 0.00204, + "precision_at_3": 0.10738, + "precision_at_5": 0.07905, + "recall_at_1": 0.1739, + "recall_at_10": 0.35255, + "recall_at_100": 0.61351, + "recall_at_1000": 0.84395, + "recall_at_3": 0.25194, + "recall_at_5": 0.28546 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackWordpressRetrieval.json b/evaluation/mteb/CQADupstackWordpressRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..7dc3064eb1664a31b756f5bce957611a22c18504 --- /dev/null +++ b/evaluation/mteb/CQADupstackWordpressRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 194.26, + "map_at_1": 0.14239, + "map_at_10": 0.19323, + "map_at_100": 0.19994, + "map_at_1000": 0.20103, + "map_at_3": 0.17631, + "map_at_5": 0.18401, + "mrr_at_1": 0.15157, + "mrr_at_10": 0.20578, + "mrr_at_100": 0.21252, + "mrr_at_1000": 0.21347, + "mrr_at_3": 0.18762, + "mrr_at_5": 0.19713, + "ndcg_at_1": 0.15157, + "ndcg_at_10": 0.22468, + "ndcg_at_100": 0.26245, + "ndcg_at_1000": 0.29534, + "ndcg_at_3": 0.18981, + "ndcg_at_5": 0.2035, + "precision_at_1": 0.15157, + "precision_at_10": 0.03512, + "precision_at_100": 0.00577, + "precision_at_1000": 0.00091, + "precision_at_3": 0.0801, + "precision_at_5": 0.05656, + "recall_at_1": 0.14239, + "recall_at_10": 0.31038, + "recall_at_100": 0.49122, + "recall_at_1000": 0.74919, + "recall_at_3": 0.21436, + "recall_at_5": 0.24692 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ClimateFEVER.json b/evaluation/mteb/ClimateFEVER.json new file mode 100644 index 0000000000000000000000000000000000000000..4ea137ee0d276e578e1a8c40b90f8c4824b6288f --- /dev/null +++ b/evaluation/mteb/ClimateFEVER.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3210.96, + "map_at_1": 0.08828, + "map_at_10": 0.14982, + "map_at_100": 0.16495, + "map_at_1000": 0.16658, + "map_at_3": 0.12366, + "map_at_5": 0.13655, + "mrr_at_1": 0.19088, + "mrr_at_10": 0.2929, + "mrr_at_100": 0.30291, + "mrr_at_1000": 0.30342, + "mrr_at_3": 0.25907, + "mrr_at_5": 0.27841, + "ndcg_at_1": 0.19088, + "ndcg_at_10": 0.21858, + "ndcg_at_100": 0.28324, + "ndcg_at_1000": 0.31561, + "ndcg_at_3": 0.17175, + "ndcg_at_5": 0.18869, + "precision_at_1": 0.19088, + "precision_at_10": 0.06919, + "precision_at_100": 0.01376, + "precision_at_1000": 0.00197, + "precision_at_3": 0.12704, + "precision_at_5": 0.09993, + "recall_at_1": 0.08828, + "recall_at_10": 0.27381, + "recall_at_100": 0.5, + "recall_at_1000": 0.68355, + "recall_at_3": 0.16118, + "recall_at_5": 0.20587 + } +} \ No newline at end of file diff --git a/evaluation/mteb/DBPedia.json b/evaluation/mteb/DBPedia.json new file mode 100644 index 0000000000000000000000000000000000000000..a5b11a612a9eb3a201d36d0796e4acc3a7b1f051 --- /dev/null +++ b/evaluation/mteb/DBPedia.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2312.27, + "map_at_1": 0.05586, + "map_at_10": 0.1004, + "map_at_100": 0.1255, + "map_at_1000": 0.13124, + "map_at_3": 0.0775, + "map_at_5": 0.08836, + "mrr_at_1": 0.4225, + "mrr_at_10": 0.51206, + "mrr_at_100": 0.51818, + "mrr_at_1000": 0.51855, + "mrr_at_3": 0.48875, + "mrr_at_5": 0.50488, + "ndcg_at_1": 0.3225, + "ndcg_at_10": 0.22718, + "ndcg_at_100": 0.24359, + "ndcg_at_1000": 0.29232, + "ndcg_at_3": 0.25974, + "ndcg_at_5": 0.24292, + "precision_at_1": 0.4225, + "precision_at_10": 0.1775, + "precision_at_100": 0.05032, + "precision_at_1000": 0.01117, + "precision_at_3": 0.28833, + "precision_at_5": 0.2425, + "recall_at_1": 0.05586, + "recall_at_10": 0.1416, + "recall_at_100": 0.28051, + "recall_at_1000": 0.45157, + "recall_at_3": 0.08758, + "recall_at_5": 0.10976 + } +} \ No newline at end of file diff --git a/evaluation/mteb/EmotionClassification.json b/evaluation/mteb/EmotionClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..0d2eb0a8d679b96c039711749559104ef0ce5c16 --- /dev/null +++ b/evaluation/mteb/EmotionClassification.json @@ -0,0 +1,20 @@ +{ + "test": { + "accuracy": 0.39075000000000004, + "accuracy_stderr": 0.02011000994529839, + "evaluation_time": 21.65, + "f1": 0.3501420354708222, + "f1_stderr": 0.015595291440010818, + "main_score": 0.39075000000000004 + }, + "validation": { + "accuracy": 0.40695, + "accuracy_stderr": 0.03307298746711582, + "evaluation_time": 24.95, + "f1": 0.3668185447111504, + "f1_stderr": 0.02395586698305724, + "main_score": 0.40695 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/FEVER.json b/evaluation/mteb/FEVER.json new file mode 100644 index 0000000000000000000000000000000000000000..76a9b3f6c418eb925807862cee6113f9e330bd4d --- /dev/null +++ b/evaluation/mteb/FEVER.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 4114.33, + "map_at_1": 0.4352, + "map_at_10": 0.54368, + "map_at_100": 0.54918, + "map_at_1000": 0.54942, + "map_at_3": 0.51712, + "map_at_5": 0.53336, + "mrr_at_1": 0.46955, + "mrr_at_10": 0.58219, + "mrr_at_100": 0.58735, + "mrr_at_1000": 0.58753, + "mrr_at_3": 0.55518, + "mrr_at_5": 0.57191, + "ndcg_at_1": 0.46955, + "ndcg_at_10": 0.6045, + "ndcg_at_100": 0.63047, + "ndcg_at_1000": 0.63713, + "ndcg_at_3": 0.55233, + "ndcg_at_5": 0.58072, + "precision_at_1": 0.46955, + "precision_at_10": 0.08267, + "precision_at_100": 0.00962, + "precision_at_1000": 0.00103, + "precision_at_3": 0.22327, + "precision_at_5": 0.14941, + "recall_at_1": 0.4352, + "recall_at_10": 0.75632, + "recall_at_100": 0.87416, + "recall_at_1000": 0.92557, + "recall_at_3": 0.61597, + "recall_at_5": 0.68518 + } +} \ No newline at end of file diff --git a/evaluation/mteb/FiQA2018.json b/evaluation/mteb/FiQA2018.json new file mode 100644 index 0000000000000000000000000000000000000000..04f966a0dd3f2d5adff5acc5691ae1377bf16d14 --- /dev/null +++ b/evaluation/mteb/FiQA2018.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 127.8, + "map_at_1": 0.09549, + "map_at_10": 0.15762, + "map_at_100": 0.17142, + "map_at_1000": 0.17329, + "map_at_3": 0.13575, + "map_at_5": 0.14754, + "mrr_at_1": 0.19753, + "mrr_at_10": 0.26568, + "mrr_at_100": 0.27606, + "mrr_at_1000": 0.2768, + "mrr_at_3": 0.24203, + "mrr_at_5": 0.25669, + "ndcg_at_1": 0.19753, + "ndcg_at_10": 0.21118, + "ndcg_at_100": 0.27308, + "ndcg_at_1000": 0.31304, + "ndcg_at_3": 0.18319, + "ndcg_at_5": 0.19414, + "precision_at_1": 0.19753, + "precision_at_10": 0.0608, + "precision_at_100": 0.01204, + "precision_at_1000": 0.00192, + "precision_at_3": 0.12191, + "precision_at_5": 0.09383, + "recall_at_1": 0.09549, + "recall_at_10": 0.26131, + "recall_at_100": 0.50545, + "recall_at_1000": 0.74968, + "recall_at_3": 0.16951, + "recall_at_5": 0.2095 + } +} \ No newline at end of file diff --git a/evaluation/mteb/HotpotQA.json b/evaluation/mteb/HotpotQA.json new file mode 100644 index 0000000000000000000000000000000000000000..643e7a8362e4508feb5c8b39b3c9d0b16052bb69 --- /dev/null +++ b/evaluation/mteb/HotpotQA.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3659.36, + "map_at_1": 0.25544, + "map_at_10": 0.3262, + "map_at_100": 0.33275, + "map_at_1000": 0.33344, + "map_at_3": 0.30851, + "map_at_5": 0.31869, + "mrr_at_1": 0.51087, + "mrr_at_10": 0.57704, + "mrr_at_100": 0.58175, + "mrr_at_1000": 0.58207, + "mrr_at_3": 0.56106, + "mrr_at_5": 0.57074, + "ndcg_at_1": 0.51087, + "ndcg_at_10": 0.40876, + "ndcg_at_100": 0.43762, + "ndcg_at_1000": 0.45423, + "ndcg_at_3": 0.3765, + "ndcg_at_5": 0.39305, + "precision_at_1": 0.51087, + "precision_at_10": 0.08304, + "precision_at_100": 0.01059, + "precision_at_1000": 0.00128, + "precision_at_3": 0.22876, + "precision_at_5": 0.15033, + "recall_at_1": 0.25544, + "recall_at_10": 0.41519, + "recall_at_100": 0.52957, + "recall_at_1000": 0.64132, + "recall_at_3": 0.34315, + "recall_at_5": 0.37583 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ImdbClassification.json b/evaluation/mteb/ImdbClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..56aa316256645083468127ea77ac9393b1617c4e --- /dev/null +++ b/evaluation/mteb/ImdbClassification.json @@ -0,0 +1,14 @@ +{ + "test": { + "accuracy": 0.586696, + "accuracy_stderr": 0.04366294612139682, + "ap": 0.553644880984279, + "ap_stderr": 0.02927763434288163, + "evaluation_time": 1493.91, + "f1": 0.5807942097405652, + "f1_stderr": 0.046729535979483976, + "main_score": 0.586696 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MSMARCO.json b/evaluation/mteb/MSMARCO.json new file mode 100644 index 0000000000000000000000000000000000000000..d7ed999e1bb472cd5d32cca0a8d3e18cd73c32f8 --- /dev/null +++ b/evaluation/mteb/MSMARCO.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "validation": { + "evaluation_time": 6441.95, + "map_at_1": 0.14442, + "map_at_10": 0.22932, + "map_at_100": 0.24132, + "map_at_1000": 0.24213, + "map_at_3": 0.20002, + "map_at_5": 0.21636, + "mrr_at_1": 0.14842, + "mrr_at_10": 0.23416, + "mrr_at_100": 0.24594, + "mrr_at_1000": 0.24669, + "mrr_at_3": 0.20494, + "mrr_at_5": 0.2214, + "ndcg_at_1": 0.14842, + "ndcg_at_10": 0.27975, + "ndcg_at_100": 0.34143, + "ndcg_at_1000": 0.3637, + "ndcg_at_3": 0.21944, + "ndcg_at_5": 0.24881, + "precision_at_1": 0.14842, + "precision_at_10": 0.04537, + "precision_at_100": 0.00767, + "precision_at_1000": 0.00096, + "precision_at_3": 0.09322, + "precision_at_5": 0.07074, + "recall_at_1": 0.14442, + "recall_at_10": 0.43557, + "recall_at_100": 0.72904, + "recall_at_1000": 0.90407, + "recall_at_3": 0.27088, + "recall_at_5": 0.34144 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MTOPDomainClassification.json b/evaluation/mteb/MTOPDomainClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..298e4844b294b4686748a54872046f8fecf40634 --- /dev/null +++ b/evaluation/mteb/MTOPDomainClassification.json @@ -0,0 +1,94 @@ +{ + "test": { + "de": { + "accuracy": 0.6273034657650043, + "accuracy_stderr": 0.026750019843962445, + "f1": 0.6078623915840713, + "f1_stderr": 0.024545709453124315, + "main_score": 0.6273034657650043 + }, + "en": { + "accuracy": 0.8695622435020519, + "accuracy_stderr": 0.005091511188494251, + "f1": 0.8658363130708494, + "f1_stderr": 0.0051871184408422396, + "main_score": 0.8695622435020519 + }, + "es": { + "accuracy": 0.6754503002001334, + "accuracy_stderr": 0.018521804912221235, + "f1": 0.6534879794116112, + "f1_stderr": 0.017925402376902783, + "main_score": 0.6754503002001334 + }, + "evaluation_time": 184.8, + "fr": { + "accuracy": 0.653523332289383, + "accuracy_stderr": 0.021385772427901146, + "f1": 0.6299400188244665, + "f1_stderr": 0.019299664587033034, + "main_score": 0.653523332289383 + }, + "hi": { + "accuracy": 0.45371100752958055, + "accuracy_stderr": 0.00950289876773436, + "f1": 0.4426285860740745, + "f1_stderr": 0.008239449950704894, + "main_score": 0.45371100752958055 + }, + "th": { + "accuracy": 0.5527667269439421, + "accuracy_stderr": 0.026744140679562094, + "f1": 0.5328388179869588, + "f1_stderr": 0.024911722678940297, + "main_score": 0.5527667269439421 + } + }, + "validation": { + "de": { + "accuracy": 0.623030303030303, + "accuracy_stderr": 0.018007006691627983, + "f1": 0.6090030451296102, + "f1_stderr": 0.016252335030927607, + "main_score": 0.623030303030303 + }, + "en": { + "accuracy": 0.8647874720357942, + "accuracy_stderr": 0.008134813907732269, + "f1": 0.8643221005490525, + "f1_stderr": 0.007924300687945415, + "main_score": 0.8647874720357942 + }, + "es": { + "accuracy": 0.6768172888015718, + "accuracy_stderr": 0.017930410630693398, + "f1": 0.6614808491907962, + "f1_stderr": 0.01724199330369825, + "main_score": 0.6768172888015718 + }, + "evaluation_time": 129.45, + "fr": { + "accuracy": 0.6497780596068484, + "accuracy_stderr": 0.025761089702497523, + "f1": 0.6384542819967916, + "f1_stderr": 0.02439328627444013, + "main_score": 0.6497780596068484 + }, + "hi": { + "accuracy": 0.44483101391650104, + "accuracy_stderr": 0.010657528607643997, + "f1": 0.441310151844022, + "f1_stderr": 0.007308843847135203, + "main_score": 0.44483101391650104 + }, + "th": { + "accuracy": 0.5390783961699582, + "accuracy_stderr": 0.021304608019150205, + "f1": 0.5230342245719538, + "f1_stderr": 0.0192316272968326, + "main_score": 0.5390783961699582 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MTOPIntentClassification.json b/evaluation/mteb/MTOPIntentClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..b0a36628c68e9e8642bcfee7a165dc969343b871 --- /dev/null +++ b/evaluation/mteb/MTOPIntentClassification.json @@ -0,0 +1,94 @@ +{ + "test": { + "de": { + "accuracy": 0.4956043956043956, + "accuracy_stderr": 0.013804089435250997, + "f1": 0.32863336734985976, + "f1_stderr": 0.010106922158537998, + "main_score": 0.4956043956043956 + }, + "en": { + "accuracy": 0.6225262197902417, + "accuracy_stderr": 0.011612339032627523, + "f1": 0.43440840371488526, + "f1_stderr": 0.007521212698716522, + "main_score": 0.6225262197902417 + }, + "es": { + "accuracy": 0.4993995997331555, + "accuracy_stderr": 0.017333218216550737, + "f1": 0.34726671876888127, + "f1_stderr": 0.009821507962106622, + "main_score": 0.4993995997331555 + }, + "evaluation_time": 336.12, + "fr": { + "accuracy": 0.46329470717193855, + "accuracy_stderr": 0.021175134055927164, + "f1": 0.3232527361598279, + "f1_stderr": 0.015670594458593846, + "main_score": 0.46329470717193855 + }, + "hi": { + "accuracy": 0.3220867694514163, + "accuracy_stderr": 0.007943432090835067, + "f1": 0.21321851228151392, + "f1_stderr": 0.006969308099265944, + "main_score": 0.3220867694514163 + }, + "th": { + "accuracy": 0.43627486437613017, + "accuracy_stderr": 0.013057059666123479, + "f1": 0.27048729223475076, + "f1_stderr": 0.007192319024223997, + "main_score": 0.43627486437613017 + } + }, + "validation": { + "de": { + "accuracy": 0.4886501377410468, + "accuracy_stderr": 0.015115810673577453, + "f1": 0.29305729988528817, + "f1_stderr": 0.013274418090447234, + "main_score": 0.4886501377410468 + }, + "en": { + "accuracy": 0.6184787472035794, + "accuracy_stderr": 0.01880584337530569, + "f1": 0.43633936644556937, + "f1_stderr": 0.012308022004906816, + "main_score": 0.6184787472035794 + }, + "es": { + "accuracy": 0.4988212180746562, + "accuracy_stderr": 0.018919199418584817, + "f1": 0.31473396095509043, + "f1_stderr": 0.012676416625486455, + "main_score": 0.4988212180746562 + }, + "evaluation_time": 282.16, + "fr": { + "accuracy": 0.46119213696892836, + "accuracy_stderr": 0.021845344865718933, + "f1": 0.28044706940385955, + "f1_stderr": 0.010116190733840541, + "main_score": 0.46119213696892836 + }, + "hi": { + "accuracy": 0.3071570576540755, + "accuracy_stderr": 0.008319669784761797, + "f1": 0.19812640558669523, + "f1_stderr": 0.0072837780604048784, + "main_score": 0.3071570576540755 + }, + "th": { + "accuracy": 0.42160383004189106, + "accuracy_stderr": 0.010170908236196702, + "f1": 0.26845354880486205, + "f1_stderr": 0.010016659553097792, + "main_score": 0.42160383004189106 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MassiveIntentClassification.json b/evaluation/mteb/MassiveIntentClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..f641400b3e38feaf79e5bd3c2f198526f45993ae --- /dev/null +++ b/evaluation/mteb/MassiveIntentClassification.json @@ -0,0 +1,724 @@ +{ + "test": { + "af": { + "accuracy": 0.4054808338937458, + "accuracy_stderr": 0.014447259403752678, + "f1": 0.39490307545239717, + "f1_stderr": 0.009212717959217511, + "main_score": 0.4054808338937458 + }, + "am": { + "accuracy": 0.2418291862811029, + "accuracy_stderr": 0.012904896103629838, + "f1": 0.23437620034727474, + "f1_stderr": 0.009361920647479921, + "main_score": 0.2418291862811029 + }, + "ar": { + "accuracy": 0.30134498991257563, + "accuracy_stderr": 0.014810775844007782, + "f1": 0.28787175191531283, + "f1_stderr": 0.012174439394246464, + "main_score": 0.30134498991257563 + }, + "az": { + "accuracy": 0.35884330867518494, + "accuracy_stderr": 0.012717802447884442, + "f1": 0.36264500398782124, + "f1_stderr": 0.010633859333577812, + "main_score": 0.35884330867518494 + }, + "bn": { + "accuracy": 0.2917283120376597, + "accuracy_stderr": 0.011891186572931844, + "f1": 0.278101616531901, + "f1_stderr": 0.010627299989024891, + "main_score": 0.2917283120376597 + }, + "cy": { + "accuracy": 0.41788836583725625, + "accuracy_stderr": 0.015348362273085753, + "f1": 0.39714131810548015, + "f1_stderr": 0.009372174520376655, + "main_score": 0.41788836583725625 + }, + "da": { + "accuracy": 0.44176193678547404, + "accuracy_stderr": 0.00924625540944034, + "f1": 0.4219249982655229, + "f1_stderr": 0.008618307806061426, + "main_score": 0.44176193678547404 + }, + "de": { + "accuracy": 0.4207464694014795, + "accuracy_stderr": 0.006478416557311502, + "f1": 0.39441882591831623, + "f1_stderr": 0.008645545638557534, + "main_score": 0.4207464694014795 + }, + "el": { + "accuracy": 0.362542030934768, + "accuracy_stderr": 0.012041979440806665, + "f1": 0.3446592715936761, + "f1_stderr": 0.009835042341951889, + "main_score": 0.362542030934768 + }, + "en": { + "accuracy": 0.6140887693342301, + "accuracy_stderr": 0.015504655249298095, + "f1": 0.5979854802683996, + "f1_stderr": 0.01208669884455989, + "main_score": 0.6140887693342301 + }, + "es": { + "accuracy": 0.42679892400806996, + "accuracy_stderr": 0.015347073096769526, + "f1": 0.4204801248338172, + "f1_stderr": 0.010741334091620194, + "main_score": 0.42679892400806996 + }, + "evaluation_time": 2374.32, + "fa": { + "accuracy": 0.3559179556153329, + "accuracy_stderr": 0.009888477522743777, + "f1": 0.34045862930486165, + "f1_stderr": 0.0084840956345157, + "main_score": 0.3559179556153329 + }, + "fi": { + "accuracy": 0.40036987222595827, + "accuracy_stderr": 0.013803399246107682, + "f1": 0.3811770343936278, + "f1_stderr": 0.01028587503250668, + "main_score": 0.40036987222595827 + }, + "fr": { + "accuracy": 0.4343981170141224, + "accuracy_stderr": 0.012246472905114743, + "f1": 0.42708438898786494, + "f1_stderr": 0.009512925256512221, + "main_score": 0.4343981170141224 + }, + "he": { + "accuracy": 0.3159381304640215, + "accuracy_stderr": 0.014973549858983267, + "f1": 0.2998550522450782, + "f1_stderr": 0.011727646762679908, + "main_score": 0.3159381304640215 + }, + "hi": { + "accuracy": 0.27044384667114996, + "accuracy_stderr": 0.007563774725762617, + "f1": 0.27313059184832666, + "f1_stderr": 0.008056028697421164, + "main_score": 0.27044384667114996 + }, + "hu": { + "accuracy": 0.38453261600538, + "accuracy_stderr": 0.01598445677659242, + "f1": 0.37309189326110437, + "f1_stderr": 0.010195896901809987, + "main_score": 0.38453261600538 + }, + "hy": { + "accuracy": 0.2797915265635508, + "accuracy_stderr": 0.015095564553015866, + "f1": 0.27430939684346445, + "f1_stderr": 0.011012889120934774, + "main_score": 0.2797915265635508 + }, + "id": { + "accuracy": 0.4397108271687963, + "accuracy_stderr": 0.017542356737953325, + "f1": 0.43405857056887615, + "f1_stderr": 0.010528016012937845, + "main_score": 0.4397108271687963 + }, + "is": { + "accuracy": 0.40302622730329524, + "accuracy_stderr": 0.012582306948623233, + "f1": 0.39108052180520747, + "f1_stderr": 0.009992598454055583, + "main_score": 0.40302622730329524 + }, + "it": { + "accuracy": 0.45474108944182917, + "accuracy_stderr": 0.01340459754818702, + "f1": 0.4585950328241134, + "f1_stderr": 0.010236110188058374, + "main_score": 0.45474108944182917 + }, + "ja": { + "accuracy": 0.4560860793544048, + "accuracy_stderr": 0.010590674677296958, + "f1": 0.4394920708216737, + "f1_stderr": 0.010641769554373246, + "main_score": 0.4560860793544048 + }, + "jv": { + "accuracy": 0.386684599865501, + "accuracy_stderr": 0.012145936599746584, + "f1": 0.37699003401885905, + "f1_stderr": 0.00943165160039381, + "main_score": 0.386684599865501 + }, + "ka": { + "accuracy": 0.25652320107599197, + "accuracy_stderr": 0.008190163090884097, + "f1": 0.25279084273189584, + "f1_stderr": 0.00943958002987095, + "main_score": 0.25652320107599197 + }, + "km": { + "accuracy": 0.28295225285810355, + "accuracy_stderr": 0.011478062668929577, + "f1": 0.2664582563877155, + "f1_stderr": 0.007309349177116762, + "main_score": 0.28295225285810355 + }, + "kn": { + "accuracy": 0.23480161398789506, + "accuracy_stderr": 0.013124898182906054, + "f1": 0.22275241866506734, + "f1_stderr": 0.009107560102876623, + "main_score": 0.23480161398789506 + }, + "ko": { + "accuracy": 0.3655682582380632, + "accuracy_stderr": 0.0138514372378647, + "f1": 0.3600475317106361, + "f1_stderr": 0.012525556508818685, + "main_score": 0.3655682582380632 + }, + "lv": { + "accuracy": 0.4184936112979153, + "accuracy_stderr": 0.01503834658744963, + "f1": 0.4138932672359119, + "f1_stderr": 0.009112866608871466, + "main_score": 0.4184936112979153 + }, + "ml": { + "accuracy": 0.2490921318090114, + "accuracy_stderr": 0.011318911223656808, + "f1": 0.23968687483768808, + "f1_stderr": 0.009623239538185528, + "main_score": 0.2490921318090114 + }, + "mn": { + "accuracy": 0.2986213853396099, + "accuracy_stderr": 0.013066535467207235, + "f1": 0.2997715207525541, + "f1_stderr": 0.012860089685643984, + "main_score": 0.2986213853396099 + }, + "ms": { + "accuracy": 0.4242098184263618, + "accuracy_stderr": 0.013890887562095782, + "f1": 0.4150877432664628, + "f1_stderr": 0.012091881563068337, + "main_score": 0.4242098184263618 + }, + "my": { + "accuracy": 0.25131136516476127, + "accuracy_stderr": 0.0075325439284173994, + "f1": 0.23938932214086775, + "f1_stderr": 0.006306918181473474, + "main_score": 0.25131136516476127 + }, + "nb": { + "accuracy": 0.3981506388702084, + "accuracy_stderr": 0.013968199431054802, + "f1": 0.3880958658779166, + "f1_stderr": 0.011576475125850125, + "main_score": 0.3981506388702084 + }, + "nl": { + "accuracy": 0.4362138533960995, + "accuracy_stderr": 0.01079982721922183, + "f1": 0.4201386842914633, + "f1_stderr": 0.009992268819898372, + "main_score": 0.4362138533960995 + }, + "pl": { + "accuracy": 0.4219569603227976, + "accuracy_stderr": 0.01868909945354249, + "f1": 0.4000556559825827, + "f1_stderr": 0.012543479799886282, + "main_score": 0.4219569603227976 + }, + "pt": { + "accuracy": 0.4520847343644923, + "accuracy_stderr": 0.01543976511380644, + "f1": 0.44241150050290506, + "f1_stderr": 0.008326917982409131, + "main_score": 0.4520847343644923 + }, + "ro": { + "accuracy": 0.4180901143241426, + "accuracy_stderr": 0.016105657510711916, + "f1": 0.40474074848670083, + "f1_stderr": 0.015837216995188204, + "main_score": 0.4180901143241426 + }, + "ru": { + "accuracy": 0.3596839273705447, + "accuracy_stderr": 0.019516291427541597, + "f1": 0.35095456843621, + "f1_stderr": 0.012440228318941022, + "main_score": 0.3596839273705447 + }, + "sl": { + "accuracy": 0.40605245460659045, + "accuracy_stderr": 0.01602119555635458, + "f1": 0.39302383051500134, + "f1_stderr": 0.0108929281580567, + "main_score": 0.40605245460659045 + }, + "sq": { + "accuracy": 0.42757229320780094, + "accuracy_stderr": 0.013061126335718017, + "f1": 0.41537639314973884, + "f1_stderr": 0.011214750874227073, + "main_score": 0.42757229320780094 + }, + "sv": { + "accuracy": 0.42347007397444514, + "accuracy_stderr": 0.01443766953082292, + "f1": 0.41043660179486263, + "f1_stderr": 0.009119701786380115, + "main_score": 0.42347007397444514 + }, + "sw": { + "accuracy": 0.4112306657700067, + "accuracy_stderr": 0.016030226034380948, + "f1": 0.39712940473289027, + "f1_stderr": 0.01079211644663987, + "main_score": 0.4112306657700067 + }, + "ta": { + "accuracy": 0.24603227975790182, + "accuracy_stderr": 0.007348449965253495, + "f1": 0.23969236788828607, + "f1_stderr": 0.00842584449511441, + "main_score": 0.24603227975790182 + }, + "te": { + "accuracy": 0.2503698722259583, + "accuracy_stderr": 0.010974450116174157, + "f1": 0.2437196123281459, + "f1_stderr": 0.007768870065899431, + "main_score": 0.2503698722259583 + }, + "th": { + "accuracy": 0.35400134498991254, + "accuracy_stderr": 0.012277223814879825, + "f1": 0.35063600413688034, + "f1_stderr": 0.008031998429326455, + "main_score": 0.35400134498991254 + }, + "tl": { + "accuracy": 0.4119031607262945, + "accuracy_stderr": 0.014317194926727485, + "f1": 0.4024043230427301, + "f1_stderr": 0.009308382803276337, + "main_score": 0.4119031607262945 + }, + "tr": { + "accuracy": 0.3640551445864156, + "accuracy_stderr": 0.01158473822441319, + "f1": 0.3603844992856558, + "f1_stderr": 0.011004967374166683, + "main_score": 0.3640551445864156 + }, + "ur": { + "accuracy": 0.25934767989240076, + "accuracy_stderr": 0.011556400737346494, + "f1": 0.252074457023531, + "f1_stderr": 0.00828687176833062, + "main_score": 0.25934767989240076 + }, + "vi": { + "accuracy": 0.38799596503026224, + "accuracy_stderr": 0.012823731186170102, + "f1": 0.37160233794673125, + "f1_stderr": 0.013799233781790802, + "main_score": 0.38799596503026224 + }, + "zh-CN": { + "accuracy": 0.4624411566913248, + "accuracy_stderr": 0.01869309179104032, + "f1": 0.44367480561291905, + "f1_stderr": 0.01471127926363261, + "main_score": 0.4624411566913248 + }, + "zh-TW": { + "accuracy": 0.4230665770006724, + "accuracy_stderr": 0.015603332261143462, + "f1": 0.41964222328351397, + "f1_stderr": 0.013651788714198228, + "main_score": 0.4230665770006724 + } + }, + "validation": { + "af": { + "accuracy": 0.4180029513034924, + "accuracy_stderr": 0.010921152256864068, + "f1": 0.408564524920107, + "f1_stderr": 0.011580488915745207, + "main_score": 0.4180029513034924 + }, + "am": { + "accuracy": 0.22936546974913924, + "accuracy_stderr": 0.011915475401965652, + "f1": 0.22443454994948162, + "f1_stderr": 0.01293675359583084, + "main_score": 0.22936546974913924 + }, + "ar": { + "accuracy": 0.2941957697983276, + "accuracy_stderr": 0.013140667522280231, + "f1": 0.278025426878666, + "f1_stderr": 0.01146329357734503, + "main_score": 0.2941957697983276 + }, + "az": { + "accuracy": 0.3528283325135268, + "accuracy_stderr": 0.012346286276438762, + "f1": 0.3586288453850816, + "f1_stderr": 0.013126667211852334, + "main_score": 0.3528283325135268 + }, + "bn": { + "accuracy": 0.29242498770290204, + "accuracy_stderr": 0.012778098967926376, + "f1": 0.2743238187163509, + "f1_stderr": 0.007196357124921039, + "main_score": 0.29242498770290204 + }, + "cy": { + "accuracy": 0.4091982292179046, + "accuracy_stderr": 0.01750643809327536, + "f1": 0.3968229515847022, + "f1_stderr": 0.011600617097504318, + "main_score": 0.4091982292179046 + }, + "da": { + "accuracy": 0.4363994097393015, + "accuracy_stderr": 0.013211089954710703, + "f1": 0.41855164392134825, + "f1_stderr": 0.012090836177572879, + "main_score": 0.4363994097393015 + }, + "de": { + "accuracy": 0.4300049188391539, + "accuracy_stderr": 0.01189667797960137, + "f1": 0.40793611600487506, + "f1_stderr": 0.014239149206748571, + "main_score": 0.4300049188391539 + }, + "el": { + "accuracy": 0.3712739793408756, + "accuracy_stderr": 0.011689463686097046, + "f1": 0.3550955737747622, + "f1_stderr": 0.01024985130519696, + "main_score": 0.3712739793408756 + }, + "en": { + "accuracy": 0.6291687161829808, + "accuracy_stderr": 0.01686014188369135, + "f1": 0.6127498027362954, + "f1_stderr": 0.012061423584454146, + "main_score": 0.6291687161829808 + }, + "es": { + "accuracy": 0.43580914904082635, + "accuracy_stderr": 0.012224082295727747, + "f1": 0.42917002190317477, + "f1_stderr": 0.011405640175485206, + "main_score": 0.43580914904082635 + }, + "evaluation_time": 1881.4, + "fa": { + "accuracy": 0.3547466797835711, + "accuracy_stderr": 0.010601895712032338, + "f1": 0.33945762420706, + "f1_stderr": 0.009267694956350405, + "main_score": 0.3547466797835711 + }, + "fi": { + "accuracy": 0.40078701426463353, + "accuracy_stderr": 0.011417217998563597, + "f1": 0.3883449448052677, + "f1_stderr": 0.012122410583794865, + "main_score": 0.40078701426463353 + }, + "fr": { + "accuracy": 0.4429414658140679, + "accuracy_stderr": 0.015256729395188778, + "f1": 0.43394278610572457, + "f1_stderr": 0.014586034300029853, + "main_score": 0.4429414658140679 + }, + "he": { + "accuracy": 0.311460895228726, + "accuracy_stderr": 0.014779126801429157, + "f1": 0.2995793979884509, + "f1_stderr": 0.013615081888042758, + "main_score": 0.311460895228726 + }, + "hi": { + "accuracy": 0.25961633054599115, + "accuracy_stderr": 0.007878745912513887, + "f1": 0.25982460372695954, + "f1_stderr": 0.0071203448616418506, + "main_score": 0.25961633054599115 + }, + "hu": { + "accuracy": 0.3748155435317265, + "accuracy_stderr": 0.011499989267418064, + "f1": 0.36610577802929695, + "f1_stderr": 0.012185955975190215, + "main_score": 0.3748155435317265 + }, + "hy": { + "accuracy": 0.2815543531726513, + "accuracy_stderr": 0.013230122952822734, + "f1": 0.2770068958000932, + "f1_stderr": 0.01277048573808052, + "main_score": 0.2815543531726513 + }, + "id": { + "accuracy": 0.4424495818986719, + "accuracy_stderr": 0.011069574031270856, + "f1": 0.43510898494968553, + "f1_stderr": 0.0076463739176352115, + "main_score": 0.4424495818986719 + }, + "is": { + "accuracy": 0.40157402852926705, + "accuracy_stderr": 0.015005603921624816, + "f1": 0.3876853823428391, + "f1_stderr": 0.01201970040174986, + "main_score": 0.40157402852926705 + }, + "it": { + "accuracy": 0.456714215445155, + "accuracy_stderr": 0.015036288695625667, + "f1": 0.463502133111645, + "f1_stderr": 0.015187785561573016, + "main_score": 0.456714215445155 + }, + "ja": { + "accuracy": 0.4479094933595672, + "accuracy_stderr": 0.014653187990596124, + "f1": 0.4280973013012505, + "f1_stderr": 0.01655707191595756, + "main_score": 0.4479094933595672 + }, + "jv": { + "accuracy": 0.384505656665027, + "accuracy_stderr": 0.012104842420943921, + "f1": 0.380167978724446, + "f1_stderr": 0.008009104534033291, + "main_score": 0.384505656665027 + }, + "ka": { + "accuracy": 0.24835218888342353, + "accuracy_stderr": 0.013181112319925865, + "f1": 0.24709500138710574, + "f1_stderr": 0.01059101293402143, + "main_score": 0.24835218888342353 + }, + "km": { + "accuracy": 0.2742252828332513, + "accuracy_stderr": 0.011534127186569546, + "f1": 0.2602068523353439, + "f1_stderr": 0.012575174825235999, + "main_score": 0.2742252828332513 + }, + "kn": { + "accuracy": 0.2259222823413674, + "accuracy_stderr": 0.014722127495659583, + "f1": 0.21716530479138849, + "f1_stderr": 0.010961787165686507, + "main_score": 0.2259222823413674 + }, + "ko": { + "accuracy": 0.3673389080177078, + "accuracy_stderr": 0.013767126952905771, + "f1": 0.37221618799085243, + "f1_stderr": 0.014709606757115768, + "main_score": 0.3673389080177078 + }, + "lv": { + "accuracy": 0.4103295622233153, + "accuracy_stderr": 0.016045974090390387, + "f1": 0.40406596723582255, + "f1_stderr": 0.0069132258926177265, + "main_score": 0.4103295622233153 + }, + "ml": { + "accuracy": 0.24200688637481554, + "accuracy_stderr": 0.011347496474072452, + "f1": 0.23514331789309012, + "f1_stderr": 0.01204913680015678, + "main_score": 0.24200688637481554 + }, + "mn": { + "accuracy": 0.29070339399901624, + "accuracy_stderr": 0.01309926671195602, + "f1": 0.29527156314146025, + "f1_stderr": 0.010723196469553449, + "main_score": 0.29070339399901624 + }, + "ms": { + "accuracy": 0.42287260206591243, + "accuracy_stderr": 0.019597793502122884, + "f1": 0.41872839411817814, + "f1_stderr": 0.014555274766542817, + "main_score": 0.42287260206591243 + }, + "my": { + "accuracy": 0.24195769798327596, + "accuracy_stderr": 0.00958467079934785, + "f1": 0.23386230455157248, + "f1_stderr": 0.010002828286172527, + "main_score": 0.24195769798327596 + }, + "nb": { + "accuracy": 0.40196753566158383, + "accuracy_stderr": 0.014139930174690087, + "f1": 0.3918493283614314, + "f1_stderr": 0.013552558677777658, + "main_score": 0.40196753566158383 + }, + "nl": { + "accuracy": 0.4228726020659125, + "accuracy_stderr": 0.016822214166599285, + "f1": 0.4111008537872992, + "f1_stderr": 0.01409045918592202, + "main_score": 0.4228726020659125 + }, + "pl": { + "accuracy": 0.4145597638957206, + "accuracy_stderr": 0.020333079371071323, + "f1": 0.39761508941215074, + "f1_stderr": 0.015094892952504711, + "main_score": 0.4145597638957206 + }, + "pt": { + "accuracy": 0.45209050664043293, + "accuracy_stderr": 0.01835254418074865, + "f1": 0.4487810416996396, + "f1_stderr": 0.01196627715625292, + "main_score": 0.45209050664043293 + }, + "ro": { + "accuracy": 0.41559272011805215, + "accuracy_stderr": 0.008131571624077545, + "f1": 0.40039259678785666, + "f1_stderr": 0.00515152110757374, + "main_score": 0.41559272011805215 + }, + "ru": { + "accuracy": 0.35622233152975896, + "accuracy_stderr": 0.021799726881596905, + "f1": 0.34781156799018975, + "f1_stderr": 0.015093828682005378, + "main_score": 0.35622233152975896 + }, + "sl": { + "accuracy": 0.4014264633546484, + "accuracy_stderr": 0.012750044779015257, + "f1": 0.3891333558812916, + "f1_stderr": 0.011224536409706653, + "main_score": 0.4014264633546484 + }, + "sq": { + "accuracy": 0.43580914904082635, + "accuracy_stderr": 0.0145750441613665, + "f1": 0.4232383304393984, + "f1_stderr": 0.009598219357928728, + "main_score": 0.43580914904082635 + }, + "sv": { + "accuracy": 0.43433349729463844, + "accuracy_stderr": 0.012787089799130798, + "f1": 0.4239334259272507, + "f1_stderr": 0.010594787809411465, + "main_score": 0.43433349729463844 + }, + "sw": { + "accuracy": 0.4067879980324644, + "accuracy_stderr": 0.01684801143160469, + "f1": 0.3915460778697146, + "f1_stderr": 0.012876417288175717, + "main_score": 0.4067879980324644 + }, + "ta": { + "accuracy": 0.2374815543531726, + "accuracy_stderr": 0.007291836875634601, + "f1": 0.23306972516495983, + "f1_stderr": 0.006823162268817619, + "main_score": 0.2374815543531726 + }, + "te": { + "accuracy": 0.24535169699950812, + "accuracy_stderr": 0.010303295016847668, + "f1": 0.23951706003507978, + "f1_stderr": 0.012099393447774153, + "main_score": 0.24535169699950812 + }, + "th": { + "accuracy": 0.3489424495818987, + "accuracy_stderr": 0.011552886222641766, + "f1": 0.33677383997436106, + "f1_stderr": 0.006869691313087882, + "main_score": 0.3489424495818987 + }, + "tl": { + "accuracy": 0.4083620265617315, + "accuracy_stderr": 0.014225228406157971, + "f1": 0.3990401121375912, + "f1_stderr": 0.014254629179738524, + "main_score": 0.4083620265617315 + }, + "tr": { + "accuracy": 0.361829808165273, + "accuracy_stderr": 0.018463935842145066, + "f1": 0.3560425832290258, + "f1_stderr": 0.015144371708336025, + "main_score": 0.361829808165273 + }, + "ur": { + "accuracy": 0.25853418593212, + "accuracy_stderr": 0.011733674034744004, + "f1": 0.25059229515932524, + "f1_stderr": 0.010461858886336843, + "main_score": 0.25853418593212 + }, + "vi": { + "accuracy": 0.38180029513034924, + "accuracy_stderr": 0.014392627591048972, + "f1": 0.35555792018834453, + "f1_stderr": 0.014687028346239177, + "main_score": 0.38180029513034924 + }, + "zh-CN": { + "accuracy": 0.46173143138219375, + "accuracy_stderr": 0.01566177233818183, + "f1": 0.45269945997397354, + "f1_stderr": 0.012845282312811623, + "main_score": 0.46173143138219375 + }, + "zh-TW": { + "accuracy": 0.4192326610919824, + "accuracy_stderr": 0.01305866106572301, + "f1": 0.42394738901751217, + "f1_stderr": 0.012561702136094713, + "main_score": 0.4192326610919824 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MassiveScenarioClassification.json b/evaluation/mteb/MassiveScenarioClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..aece42b663e3851d494eb8ea2fe5a2e45f318423 --- /dev/null +++ b/evaluation/mteb/MassiveScenarioClassification.json @@ -0,0 +1,724 @@ +{ + "test": { + "af": { + "accuracy": 0.43248150638870203, + "accuracy_stderr": 0.01914237726930163, + "f1": 0.40924230769590786, + "f1_stderr": 0.017594605316604825, + "main_score": 0.43248150638870203 + }, + "am": { + "accuracy": 0.2530262273032952, + "accuracy_stderr": 0.0171025727714938, + "f1": 0.24937105830264067, + "f1_stderr": 0.014061996186754674, + "main_score": 0.2530262273032952 + }, + "ar": { + "accuracy": 0.3207128446536651, + "accuracy_stderr": 0.013430382612278666, + "f1": 0.3180245816594883, + "f1_stderr": 0.01399700671221373, + "main_score": 0.3207128446536651 + }, + "az": { + "accuracy": 0.3668123739071957, + "accuracy_stderr": 0.017293355231396697, + "f1": 0.3637219042508338, + "f1_stderr": 0.013273690138493005, + "main_score": 0.3668123739071957 + }, + "bn": { + "accuracy": 0.2956624075319435, + "accuracy_stderr": 0.01956084843583552, + "f1": 0.2838604205636276, + "f1_stderr": 0.01649525524185898, + "main_score": 0.2956624075319435 + }, + "cy": { + "accuracy": 0.421049092131809, + "accuracy_stderr": 0.01842488912674414, + "f1": 0.38926150886991295, + "f1_stderr": 0.013736606444137845, + "main_score": 0.421049092131809 + }, + "da": { + "accuracy": 0.4544384667114997, + "accuracy_stderr": 0.018828807973369888, + "f1": 0.42578252395460003, + "f1_stderr": 0.01878751493281785, + "main_score": 0.4544384667114997 + }, + "de": { + "accuracy": 0.43211163416274373, + "accuracy_stderr": 0.015219386970045799, + "f1": 0.41044658583047894, + "f1_stderr": 0.012995583154456956, + "main_score": 0.43211163416274373 + }, + "el": { + "accuracy": 0.3650302622730329, + "accuracy_stderr": 0.014415725865475081, + "f1": 0.3449785095312759, + "f1_stderr": 0.015562024871571323, + "main_score": 0.3650302622730329 + }, + "en": { + "accuracy": 0.6973772696704774, + "accuracy_stderr": 0.011262031912799892, + "f1": 0.6921759502909044, + "f1_stderr": 0.013799487672182479, + "main_score": 0.6973772696704774 + }, + "es": { + "accuracy": 0.44078681909885675, + "accuracy_stderr": 0.018595701586506804, + "f1": 0.4305914426901129, + "f1_stderr": 0.016625685820030444, + "main_score": 0.44078681909885675 + }, + "evaluation_time": 1815.94, + "fa": { + "accuracy": 0.32612642905178213, + "accuracy_stderr": 0.012812264412745333, + "f1": 0.3202463177462754, + "f1_stderr": 0.012798802292501087, + "main_score": 0.32612642905178213 + }, + "fi": { + "accuracy": 0.40356422326832553, + "accuracy_stderr": 0.014078935277749945, + "f1": 0.3813642481807678, + "f1_stderr": 0.012692369056549692, + "main_score": 0.40356422326832553 + }, + "fr": { + "accuracy": 0.4506724949562878, + "accuracy_stderr": 0.015178175408214766, + "f1": 0.4319827608343738, + "f1_stderr": 0.014020710010605711, + "main_score": 0.4506724949562878 + }, + "he": { + "accuracy": 0.3217888365837256, + "accuracy_stderr": 0.02418084644697299, + "f1": 0.29979761884698775, + "f1_stderr": 0.020294571210800923, + "main_score": 0.3217888365837256 + }, + "hi": { + "accuracy": 0.26903160726294556, + "accuracy_stderr": 0.02497685106922395, + "f1": 0.25833010434083364, + "f1_stderr": 0.021329106099270956, + "main_score": 0.26903160726294556 + }, + "hu": { + "accuracy": 0.4037995965030262, + "accuracy_stderr": 0.020340860350433913, + "f1": 0.37931343552928826, + "f1_stderr": 0.01617066924214891, + "main_score": 0.4037995965030262 + }, + "hy": { + "accuracy": 0.28375924680564896, + "accuracy_stderr": 0.02244735882940363, + "f1": 0.2696255693013172, + "f1_stderr": 0.017311893310356832, + "main_score": 0.28375924680564896 + }, + "id": { + "accuracy": 0.44361129791526566, + "accuracy_stderr": 0.022926086894172665, + "f1": 0.4354445012295126, + "f1_stderr": 0.020284381381570574, + "main_score": 0.44361129791526566 + }, + "is": { + "accuracy": 0.39290517821116344, + "accuracy_stderr": 0.02370210834656748, + "f1": 0.3726982052174147, + "f1_stderr": 0.019988763380559, + "main_score": 0.39290517821116344 + }, + "it": { + "accuracy": 0.46469401479488903, + "accuracy_stderr": 0.018658554046491128, + "f1": 0.44060986162841564, + "f1_stderr": 0.017892736302635378, + "main_score": 0.46469401479488903 + }, + "ja": { + "accuracy": 0.46257565568258235, + "accuracy_stderr": 0.018244361807715694, + "f1": 0.4562513945675882, + "f1_stderr": 0.016350188403047698, + "main_score": 0.46257565568258235 + }, + "jv": { + "accuracy": 0.41126429051782115, + "accuracy_stderr": 0.018576843941362883, + "f1": 0.3954392378396527, + "f1_stderr": 0.011255653042251292, + "main_score": 0.41126429051782115 + }, + "ka": { + "accuracy": 0.24727639542703428, + "accuracy_stderr": 0.018326690606661058, + "f1": 0.23337743140804484, + "f1_stderr": 0.011940429119171217, + "main_score": 0.24727639542703428 + }, + "km": { + "accuracy": 0.2974108944182918, + "accuracy_stderr": 0.013625492982156541, + "f1": 0.2757087619008375, + "f1_stderr": 0.01055451354659993, + "main_score": 0.2974108944182918 + }, + "kn": { + "accuracy": 0.23850033624747816, + "accuracy_stderr": 0.014455552445217143, + "f1": 0.2286733484540032, + "f1_stderr": 0.010992305614270776, + "main_score": 0.23850033624747816 + }, + "ko": { + "accuracy": 0.3656691324815064, + "accuracy_stderr": 0.020280629432627, + "f1": 0.35504081677134564, + "f1_stderr": 0.019299833634584138, + "main_score": 0.3656691324815064 + }, + "lv": { + "accuracy": 0.40928043039677203, + "accuracy_stderr": 0.017879262085215602, + "f1": 0.3910858913121125, + "f1_stderr": 0.015622608383011384, + "main_score": 0.40928043039677203 + }, + "ml": { + "accuracy": 0.25527908540685945, + "accuracy_stderr": 0.012492668582100332, + "f1": 0.25333391622280477, + "f1_stderr": 0.011431795353486644, + "main_score": 0.25527908540685945 + }, + "mn": { + "accuracy": 0.29105581708137185, + "accuracy_stderr": 0.02289852732480194, + "f1": 0.28478235012692815, + "f1_stderr": 0.0211390543174164, + "main_score": 0.29105581708137185 + }, + "ms": { + "accuracy": 0.43786146603900467, + "accuracy_stderr": 0.02361491677556193, + "f1": 0.41964014392626703, + "f1_stderr": 0.016224233488107753, + "main_score": 0.43786146603900467 + }, + "my": { + "accuracy": 0.27269670477471414, + "accuracy_stderr": 0.017084548735816784, + "f1": 0.26228386764141853, + "f1_stderr": 0.01770505820877428, + "main_score": 0.27269670477471414 + }, + "nb": { + "accuracy": 0.3901815736381977, + "accuracy_stderr": 0.02356766226099208, + "f1": 0.37641949339321856, + "f1_stderr": 0.018189340920191487, + "main_score": 0.3901815736381977 + }, + "nl": { + "accuracy": 0.4535978480161399, + "accuracy_stderr": 0.016327230257174263, + "f1": 0.426851176096831, + "f1_stderr": 0.009526114688499471, + "main_score": 0.4535978480161399 + }, + "pl": { + "accuracy": 0.41893073301950234, + "accuracy_stderr": 0.020686982211902172, + "f1": 0.4088871064261502, + "f1_stderr": 0.019791742479992352, + "main_score": 0.41893073301950234 + }, + "pt": { + "accuracy": 0.45901143241425685, + "accuracy_stderr": 0.017537457772563485, + "f1": 0.44496942353920543, + "f1_stderr": 0.016580297609253208, + "main_score": 0.45901143241425685 + }, + "ro": { + "accuracy": 0.44115669132481505, + "accuracy_stderr": 0.017138728900302158, + "f1": 0.41953945105870616, + "f1_stderr": 0.01663750637309216, + "main_score": 0.44115669132481505 + }, + "ru": { + "accuracy": 0.3276395427034297, + "accuracy_stderr": 0.01520582329589761, + "f1": 0.31436372571600935, + "f1_stderr": 0.016822070079219324, + "main_score": 0.3276395427034297 + }, + "sl": { + "accuracy": 0.40504371217215873, + "accuracy_stderr": 0.01737927871109968, + "f1": 0.39322752749628165, + "f1_stderr": 0.016021377230910933, + "main_score": 0.40504371217215873 + }, + "sq": { + "accuracy": 0.4251849361129792, + "accuracy_stderr": 0.02703155777439191, + "f1": 0.41413929711846303, + "f1_stderr": 0.02203846614787482, + "main_score": 0.4251849361129792 + }, + "sv": { + "accuracy": 0.42293207800941496, + "accuracy_stderr": 0.02634507038010069, + "f1": 0.4050409536806683, + "f1_stderr": 0.021882375504727304, + "main_score": 0.42293207800941496 + }, + "sw": { + "accuracy": 0.42999327505043705, + "accuracy_stderr": 0.015164574873190428, + "f1": 0.4104541622497327, + "f1_stderr": 0.01416304033082228, + "main_score": 0.42999327505043705 + }, + "ta": { + "accuracy": 0.2832548755884331, + "accuracy_stderr": 0.017499864243874726, + "f1": 0.2727684199556187, + "f1_stderr": 0.017340547403638454, + "main_score": 0.2832548755884331 + }, + "te": { + "accuracy": 0.26593813046402154, + "accuracy_stderr": 0.021809193915635242, + "f1": 0.25483878616197586, + "f1_stderr": 0.019449647389494947, + "main_score": 0.26593813046402154 + }, + "th": { + "accuracy": 0.36788836583725626, + "accuracy_stderr": 0.01545089176597426, + "f1": 0.34603932909177687, + "f1_stderr": 0.016869984806312827, + "main_score": 0.36788836583725626 + }, + "tl": { + "accuracy": 0.425689307330195, + "accuracy_stderr": 0.015430059348496856, + "f1": 0.40924469309079825, + "f1_stderr": 0.008776200992571783, + "main_score": 0.425689307330195 + }, + "tr": { + "accuracy": 0.37094821788836585, + "accuracy_stderr": 0.022152967877636806, + "f1": 0.3794962882285716, + "f1_stderr": 0.0210046248379818, + "main_score": 0.37094821788836585 + }, + "ur": { + "accuracy": 0.2883658372562206, + "accuracy_stderr": 0.02184377077895051, + "f1": 0.2780655865551234, + "f1_stderr": 0.021979806560091308, + "main_score": 0.2883658372562206 + }, + "vi": { + "accuracy": 0.37357094821788833, + "accuracy_stderr": 0.015912003760378605, + "f1": 0.3750791896103816, + "f1_stderr": 0.013705906683792032, + "main_score": 0.37357094821788833 + }, + "zh-CN": { + "accuracy": 0.4937794216543375, + "accuracy_stderr": 0.014582638723526031, + "f1": 0.4720421153697707, + "f1_stderr": 0.014185512249352985, + "main_score": 0.4937794216543375 + }, + "zh-TW": { + "accuracy": 0.44421654337592476, + "accuracy_stderr": 0.027457834005907886, + "f1": 0.4434741861198931, + "f1_stderr": 0.02234941824008831, + "main_score": 0.44421654337592476 + } + }, + "validation": { + "af": { + "accuracy": 0.42297097884899165, + "accuracy_stderr": 0.01949058592896654, + "f1": 0.41228412552668264, + "f1_stderr": 0.02072529581458811, + "main_score": 0.42297097884899165 + }, + "am": { + "accuracy": 0.2450565666502705, + "accuracy_stderr": 0.008479256952982387, + "f1": 0.24806662079898306, + "f1_stderr": 0.008780275014937335, + "main_score": 0.2450565666502705 + }, + "ar": { + "accuracy": 0.3055582882439744, + "accuracy_stderr": 0.017670617309841773, + "f1": 0.3069995212499811, + "f1_stderr": 0.015362354242331443, + "main_score": 0.3055582882439744 + }, + "az": { + "accuracy": 0.3621249385145106, + "accuracy_stderr": 0.012152220847254775, + "f1": 0.36357765091456506, + "f1_stderr": 0.010139140710942176, + "main_score": 0.3621249385145106 + }, + "bn": { + "accuracy": 0.2971470732907034, + "accuracy_stderr": 0.022069587561896618, + "f1": 0.2882207128496783, + "f1_stderr": 0.01683479482879926, + "main_score": 0.2971470732907034 + }, + "cy": { + "accuracy": 0.4136251844564683, + "accuracy_stderr": 0.024426828453818297, + "f1": 0.39296855913661843, + "f1_stderr": 0.020973943849242817, + "main_score": 0.4136251844564683 + }, + "da": { + "accuracy": 0.43807181505164783, + "accuracy_stderr": 0.017869947001923518, + "f1": 0.4206737421170841, + "f1_stderr": 0.016872412523955146, + "main_score": 0.43807181505164783 + }, + "de": { + "accuracy": 0.43133300541072306, + "accuracy_stderr": 0.020020883880127186, + "f1": 0.416915841608262, + "f1_stderr": 0.021672651279716317, + "main_score": 0.43133300541072306 + }, + "el": { + "accuracy": 0.3577471716674865, + "accuracy_stderr": 0.01701527354158656, + "f1": 0.34927355878305144, + "f1_stderr": 0.016442978728160182, + "main_score": 0.3577471716674865 + }, + "en": { + "accuracy": 0.7017707820954255, + "accuracy_stderr": 0.016159041704017264, + "f1": 0.6966771799036044, + "f1_stderr": 0.01608645681525308, + "main_score": 0.7017707820954255 + }, + "es": { + "accuracy": 0.43846532218396456, + "accuracy_stderr": 0.02347394558102388, + "f1": 0.4343598552554334, + "f1_stderr": 0.0181860533015881, + "main_score": 0.43846532218396456 + }, + "evaluation_time": 1332.6, + "fa": { + "accuracy": 0.33084112149532713, + "accuracy_stderr": 0.011130497227832424, + "f1": 0.33401019078365096, + "f1_stderr": 0.013062402784378667, + "main_score": 0.33084112149532713 + }, + "fi": { + "accuracy": 0.39316281357599603, + "accuracy_stderr": 0.01045771116792211, + "f1": 0.3785817393037779, + "f1_stderr": 0.013819345424261865, + "main_score": 0.39316281357599603 + }, + "fr": { + "accuracy": 0.4451549434333497, + "accuracy_stderr": 0.015595125043219043, + "f1": 0.4346460544394509, + "f1_stderr": 0.013779384653720472, + "main_score": 0.4451549434333497 + }, + "he": { + "accuracy": 0.3149532710280374, + "accuracy_stderr": 0.023701506980689756, + "f1": 0.3011898277187477, + "f1_stderr": 0.02093714853708861, + "main_score": 0.3149532710280374 + }, + "hi": { + "accuracy": 0.26099360550909984, + "accuracy_stderr": 0.020137465573030665, + "f1": 0.2551702530489754, + "f1_stderr": 0.016256501108798407, + "main_score": 0.26099360550909984 + }, + "hu": { + "accuracy": 0.3777668470241023, + "accuracy_stderr": 0.01415737274034313, + "f1": 0.3636064864884589, + "f1_stderr": 0.009160637137956562, + "main_score": 0.3777668470241023 + }, + "hy": { + "accuracy": 0.28558780127889816, + "accuracy_stderr": 0.01877837310408312, + "f1": 0.2772579956450185, + "f1_stderr": 0.015424825177141353, + "main_score": 0.28558780127889816 + }, + "id": { + "accuracy": 0.4339399901623217, + "accuracy_stderr": 0.014721716629155198, + "f1": 0.4297916006449869, + "f1_stderr": 0.01354829266304612, + "main_score": 0.4339399901623217 + }, + "is": { + "accuracy": 0.3939498278406296, + "accuracy_stderr": 0.019989438522709065, + "f1": 0.37779417546607796, + "f1_stderr": 0.015353564518631574, + "main_score": 0.3939498278406296 + }, + "it": { + "accuracy": 0.45327102803738323, + "accuracy_stderr": 0.01699848617376043, + "f1": 0.44153892466033684, + "f1_stderr": 0.01882228199455719, + "main_score": 0.45327102803738323 + }, + "ja": { + "accuracy": 0.4578455484505656, + "accuracy_stderr": 0.020281847759727148, + "f1": 0.45729859048271465, + "f1_stderr": 0.01661016283738532, + "main_score": 0.4578455484505656 + }, + "jv": { + "accuracy": 0.39758976881455976, + "accuracy_stderr": 0.01707347422003101, + "f1": 0.39358025541686337, + "f1_stderr": 0.014149347886283038, + "main_score": 0.39758976881455976 + }, + "ka": { + "accuracy": 0.2424003935071323, + "accuracy_stderr": 0.019907387368651303, + "f1": 0.23256193506176298, + "f1_stderr": 0.015099183200210618, + "main_score": 0.2424003935071323 + }, + "km": { + "accuracy": 0.2993605509099853, + "accuracy_stderr": 0.013532083217551187, + "f1": 0.2809200454959333, + "f1_stderr": 0.009393641610664295, + "main_score": 0.2993605509099853 + }, + "kn": { + "accuracy": 0.23177570093457942, + "accuracy_stderr": 0.018427996145927942, + "f1": 0.22821426633751796, + "f1_stderr": 0.017679796969382704, + "main_score": 0.23177570093457942 + }, + "ko": { + "accuracy": 0.3586817511067388, + "accuracy_stderr": 0.021898282999062484, + "f1": 0.35579471530107626, + "f1_stderr": 0.019292875338196964, + "main_score": 0.3586817511067388 + }, + "lv": { + "accuracy": 0.4004426955238564, + "accuracy_stderr": 0.014973725234459603, + "f1": 0.387872887807314, + "f1_stderr": 0.015108580814192384, + "main_score": 0.4004426955238564 + }, + "ml": { + "accuracy": 0.24756517461878996, + "accuracy_stderr": 0.012515618686269558, + "f1": 0.24899937888159857, + "f1_stderr": 0.012555787929717375, + "main_score": 0.24756517461878996 + }, + "mn": { + "accuracy": 0.2843580914904083, + "accuracy_stderr": 0.019142375289924282, + "f1": 0.2816633850287075, + "f1_stderr": 0.01723360155398601, + "main_score": 0.2843580914904083 + }, + "ms": { + "accuracy": 0.43320216428922764, + "accuracy_stderr": 0.02159818527886489, + "f1": 0.4249245664682754, + "f1_stderr": 0.014510663777917886, + "main_score": 0.43320216428922764 + }, + "my": { + "accuracy": 0.26463354648302995, + "accuracy_stderr": 0.019322963284725016, + "f1": 0.2608329884167839, + "f1_stderr": 0.02115978672739665, + "main_score": 0.26463354648302995 + }, + "nb": { + "accuracy": 0.3869650762420069, + "accuracy_stderr": 0.024525284668151657, + "f1": 0.3814300772740455, + "f1_stderr": 0.020457575337737684, + "main_score": 0.3869650762420069 + }, + "nl": { + "accuracy": 0.4398425971470733, + "accuracy_stderr": 0.01625719128512154, + "f1": 0.42260609210168926, + "f1_stderr": 0.011560492888814633, + "main_score": 0.4398425971470733 + }, + "pl": { + "accuracy": 0.40083620265617326, + "accuracy_stderr": 0.022349561833830214, + "f1": 0.3991485290358368, + "f1_stderr": 0.01999404587448531, + "main_score": 0.40083620265617326 + }, + "pt": { + "accuracy": 0.45031972454500735, + "accuracy_stderr": 0.01750477954059943, + "f1": 0.4437398935443329, + "f1_stderr": 0.018517944459345348, + "main_score": 0.45031972454500735 + }, + "ro": { + "accuracy": 0.43812100344318744, + "accuracy_stderr": 0.021415938790220652, + "f1": 0.42362645820764067, + "f1_stderr": 0.018731210126253738, + "main_score": 0.43812100344318744 + }, + "ru": { + "accuracy": 0.33271028037383177, + "accuracy_stderr": 0.022300521711360505, + "f1": 0.32468290003396405, + "f1_stderr": 0.021982683919350685, + "main_score": 0.33271028037383177 + }, + "sl": { + "accuracy": 0.3973930152484014, + "accuracy_stderr": 0.018751618725414546, + "f1": 0.392100156522409, + "f1_stderr": 0.014838827675853575, + "main_score": 0.3973930152484014 + }, + "sq": { + "accuracy": 0.4194786030496803, + "accuracy_stderr": 0.02354660171070067, + "f1": 0.4169388707660075, + "f1_stderr": 0.017217415251225807, + "main_score": 0.4194786030496803 + }, + "sv": { + "accuracy": 0.42543039842597147, + "accuracy_stderr": 0.021562756889860635, + "f1": 0.4177387745760652, + "f1_stderr": 0.019505579966050932, + "main_score": 0.42543039842597147 + }, + "sw": { + "accuracy": 0.4136251844564683, + "accuracy_stderr": 0.012985083231251377, + "f1": 0.40084113718397746, + "f1_stderr": 0.00942359858438975, + "main_score": 0.4136251844564683 + }, + "ta": { + "accuracy": 0.2627643876045253, + "accuracy_stderr": 0.014139930174690075, + "f1": 0.25844837599995923, + "f1_stderr": 0.012773160336082992, + "main_score": 0.2627643876045253 + }, + "te": { + "accuracy": 0.2584358091490408, + "accuracy_stderr": 0.016944240742103198, + "f1": 0.2514435908623849, + "f1_stderr": 0.012722578409134445, + "main_score": 0.2584358091490408 + }, + "th": { + "accuracy": 0.35833743236596166, + "accuracy_stderr": 0.013268562919427823, + "f1": 0.3419105153327198, + "f1_stderr": 0.01289016362222908, + "main_score": 0.35833743236596166 + }, + "tl": { + "accuracy": 0.4170191834727004, + "accuracy_stderr": 0.018806183012845677, + "f1": 0.409812860893571, + "f1_stderr": 0.014175082125879828, + "main_score": 0.4170191834727004 + }, + "tr": { + "accuracy": 0.3648303000491884, + "accuracy_stderr": 0.024139475649706935, + "f1": 0.3753352910025087, + "f1_stderr": 0.020068175204176857, + "main_score": 0.3648303000491884 + }, + "ur": { + "accuracy": 0.27668470241023124, + "accuracy_stderr": 0.016621382716288085, + "f1": 0.2703604403224913, + "f1_stderr": 0.018845793770781712, + "main_score": 0.27668470241023124 + }, + "vi": { + "accuracy": 0.3775700934579439, + "accuracy_stderr": 0.017434075812833565, + "f1": 0.37914946223283774, + "f1_stderr": 0.016214172379444072, + "main_score": 0.3775700934579439 + }, + "zh-CN": { + "accuracy": 0.48888342351205105, + "accuracy_stderr": 0.014925171646575468, + "f1": 0.4748460692222126, + "f1_stderr": 0.017161662351747747, + "main_score": 0.48888342351205105 + }, + "zh-TW": { + "accuracy": 0.43866207575012295, + "accuracy_stderr": 0.02551147284957334, + "f1": 0.4429342869790666, + "f1_stderr": 0.020015637537278582, + "main_score": 0.43866207575012295 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MedrxivClusteringP2P.json b/evaluation/mteb/MedrxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..9df9e0653b57846ec476fa6c69bc20a0d38b12ac --- /dev/null +++ b/evaluation/mteb/MedrxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 288.36, + "v_measure": 0.31374938993074253, + "v_measure_std": 0.011604811334004275 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MedrxivClusteringS2S.json b/evaluation/mteb/MedrxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..c2ae2b74c8510701b1dd3baa77858391986bbb4a --- /dev/null +++ b/evaluation/mteb/MedrxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 42.83, + "v_measure": 0.26871455379644094, + "v_measure_std": 0.015129306688366255 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/MindSmallReranking.json b/evaluation/mteb/MindSmallReranking.json new file mode 100644 index 0000000000000000000000000000000000000000..d7b70c382a224e29c15351a0de60fdf5eab5ece2 --- /dev/null +++ b/evaluation/mteb/MindSmallReranking.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1925.88, + "map": 0.30402396942935334, + "mrr": 0.3142600938803256 + } +} \ No newline at end of file diff --git a/evaluation/mteb/NFCorpus.json b/evaluation/mteb/NFCorpus.json new file mode 100644 index 0000000000000000000000000000000000000000..8409a273fb08fba38231d65c81ecf5f22536e96a --- /dev/null +++ b/evaluation/mteb/NFCorpus.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 69.17, + "map_at_1": 0.03774, + "map_at_10": 0.07615, + "map_at_100": 0.09574, + "map_at_1000": 0.10711, + "map_at_3": 0.05754, + "map_at_5": 0.06666, + "mrr_at_1": 0.33127, + "mrr_at_10": 0.40351, + "mrr_at_100": 0.41144, + "mrr_at_1000": 0.41202, + "mrr_at_3": 0.38029, + "mrr_at_5": 0.3919, + "ndcg_at_1": 0.31579, + "ndcg_at_10": 0.22792, + "ndcg_at_100": 0.21699, + "ndcg_at_1000": 0.30893, + "ndcg_at_3": 0.26829, + "ndcg_at_5": 0.25119, + "precision_at_1": 0.33127, + "precision_at_10": 0.16718, + "precision_at_100": 0.05709, + "precision_at_1000": 0.01836, + "precision_at_3": 0.24768, + "precision_at_5": 0.213, + "recall_at_1": 0.03774, + "recall_at_10": 0.10303, + "recall_at_100": 0.23013, + "recall_at_1000": 0.54865, + "recall_at_3": 0.06554, + "recall_at_5": 0.08087 + } +} \ No newline at end of file diff --git a/evaluation/mteb/NQ.json b/evaluation/mteb/NQ.json new file mode 100644 index 0000000000000000000000000000000000000000..f601d19511b034c1077345b3bded291e5be8d0ad --- /dev/null +++ b/evaluation/mteb/NQ.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1782.69, + "map_at_1": 0.15621, + "map_at_10": 0.24519, + "map_at_100": 0.25586, + "map_at_1000": 0.25662, + "map_at_3": 0.21619, + "map_at_5": 0.23232, + "mrr_at_1": 0.17497, + "mrr_at_10": 0.26301, + "mrr_at_100": 0.27235, + "mrr_at_1000": 0.27297, + "mrr_at_3": 0.23561, + "mrr_at_5": 0.25111, + "ndcg_at_1": 0.17497, + "ndcg_at_10": 0.29725, + "ndcg_at_100": 0.34824, + "ndcg_at_1000": 0.36907, + "ndcg_at_3": 0.23946, + "ndcg_at_5": 0.26739, + "precision_at_1": 0.17497, + "precision_at_10": 0.05217, + "precision_at_100": 0.0081, + "precision_at_1000": 0.00101, + "precision_at_3": 0.11114, + "precision_at_5": 0.08285, + "recall_at_1": 0.15621, + "recall_at_10": 0.43999, + "recall_at_100": 0.67183, + "recall_at_1000": 0.83174, + "recall_at_3": 0.2872, + "recall_at_5": 0.35154 + } +} \ No newline at end of file diff --git a/evaluation/mteb/QuoraRetrieval.json b/evaluation/mteb/QuoraRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..69ad0c81c880466d95021a540f7c0e0b07973858 --- /dev/null +++ b/evaluation/mteb/QuoraRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 296.55, + "map_at_1": 0.54717, + "map_at_10": 0.67514, + "map_at_100": 0.68484, + "map_at_1000": 0.68523, + "map_at_3": 0.64169, + "map_at_5": 0.66054, + "mrr_at_1": 0.6246, + "mrr_at_10": 0.71503, + "mrr_at_100": 0.71915, + "mrr_at_1000": 0.71923, + "mrr_at_3": 0.69468, + "mrr_at_5": 0.70677, + "ndcg_at_1": 0.6248, + "ndcg_at_10": 0.7298, + "ndcg_at_100": 0.76023, + "ndcg_at_1000": 0.76512, + "ndcg_at_3": 0.68138, + "ndcg_at_5": 0.70458, + "precision_at_1": 0.6248, + "precision_at_10": 0.11373, + "precision_at_100": 0.01437, + "precision_at_1000": 0.00154, + "precision_at_3": 0.29623, + "precision_at_5": 0.19918, + "recall_at_1": 0.54717, + "recall_at_10": 0.84745, + "recall_at_100": 0.96528, + "recall_at_1000": 0.9939, + "recall_at_3": 0.71606, + "recall_at_5": 0.77511 + } +} \ No newline at end of file diff --git a/evaluation/mteb/RedditClustering.json b/evaluation/mteb/RedditClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..5d6246e067678817d90fbb8e8fd8b4504016e8fe --- /dev/null +++ b/evaluation/mteb/RedditClustering.json @@ -0,0 +1,14 @@ +{ + "test": { + "evaluation_time": 483.06, + "v_measure": 0.4023390747226228, + "v_measure_std": 0.05592188317124693 + }, + "validation": { + "evaluation_time": 486.87, + "v_measure": 0.4023390747226228, + "v_measure_std": 0.05592188317124693 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/RedditClusteringP2P.json b/evaluation/mteb/RedditClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..f560198042112963ca383c61f8c22351764ba09e --- /dev/null +++ b/evaluation/mteb/RedditClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1471.4, + "v_measure": 0.49090518272935624, + "v_measure_std": 0.11128243444352012 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SCIDOCS.json b/evaluation/mteb/SCIDOCS.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4c6d339856d3a872881d33392cea82d63f85c0 --- /dev/null +++ b/evaluation/mteb/SCIDOCS.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 102.16, + "map_at_1": 0.03028, + "map_at_10": 0.06968, + "map_at_100": 0.082, + "map_at_1000": 0.08432, + "map_at_3": 0.05307, + "map_at_5": 0.06099, + "mrr_at_1": 0.148, + "mrr_at_10": 0.22425, + "mrr_at_100": 0.23577, + "mrr_at_1000": 0.2367, + "mrr_at_3": 0.20233, + "mrr_at_5": 0.21318, + "ndcg_at_1": 0.148, + "ndcg_at_10": 0.12206, + "ndcg_at_100": 0.17799, + "ndcg_at_1000": 0.22891, + "ndcg_at_3": 0.12128, + "ndcg_at_5": 0.10212, + "precision_at_1": 0.148, + "precision_at_10": 0.0617, + "precision_at_100": 0.01428, + "precision_at_1000": 0.00266, + "precision_at_3": 0.11333, + "precision_at_5": 0.0874, + "recall_at_1": 0.03028, + "recall_at_10": 0.12522, + "recall_at_100": 0.28975, + "recall_at_1000": 0.54038, + "recall_at_3": 0.06913, + "recall_at_5": 0.08883 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SGPT-125M-weightedmean-msmarco-specb-bitfit_results.csv b/evaluation/mteb/SGPT-125M-weightedmean-msmarco-specb-bitfit_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..5702669a3588b4ff0a3c943ae5a89613c0990bda --- /dev/null +++ b/evaluation/mteb/SGPT-125M-weightedmean-msmarco-specb-bitfit_results.csv @@ -0,0 +1,210 @@ +model,task,dataset,language,metric,value +SGPT-125M-weightedmean-msmarco-specb-bitfit,BitextMining,BUCC,,f1, +SGPT-125M-weightedmean-msmarco-specb-bitfit,BitextMining,Tatoeba,,f1, +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en,accuracy,0.6123880597014926 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,de,accuracy,0.5688436830835117 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en-ext,accuracy,0.5827586206896551 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,ja,accuracy,0.5464668094218414 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonPolarityClassification,en,accuracy,0.65401225 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,en,accuracy,0.31165999999999994 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,de,accuracy,0.2479 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,es,accuracy,0.26643999999999995 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,fr,accuracy,0.26386000000000004 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,ja,accuracy,0.22078000000000003 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,zh,accuracy,0.24274 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,Banking77Classification,en,accuracy,0.7770454545454545 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,EmotionClassification,en,accuracy,0.39075000000000004 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,ImdbClassification,en,accuracy,0.586696 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,af,accuracy,0.4054808338937458 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,am,accuracy,0.2418291862811029 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ar,accuracy,0.30134498991257563 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,az,accuracy,0.35884330867518494 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,bn,accuracy,0.2917283120376597 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,cy,accuracy,0.41788836583725625 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,da,accuracy,0.44176193678547404 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,de,accuracy,0.4207464694014795 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,el,accuracy,0.362542030934768 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,en,accuracy,0.6140887693342301 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,es,accuracy,0.42679892400806996 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fa,accuracy,0.3559179556153329 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fi,accuracy,0.40036987222595827 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fr,accuracy,0.4343981170141224 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,he,accuracy,0.3159381304640215 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hi,accuracy,0.27044384667114996 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hu,accuracy,0.38453261600538 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hy,accuracy,0.2797915265635508 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,id,accuracy,0.4397108271687963 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,is,accuracy,0.40302622730329524 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,it,accuracy,0.45474108944182917 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ja,accuracy,0.4560860793544048 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,jv,accuracy,0.386684599865501 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ka,accuracy,0.25652320107599197 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,km,accuracy,0.28295225285810355 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,kn,accuracy,0.23480161398789506 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ko,accuracy,0.3655682582380632 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,lv,accuracy,0.4184936112979153 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ml,accuracy,0.2490921318090114 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,mn,accuracy,0.2986213853396099 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ms,accuracy,0.4242098184263618 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,my,accuracy,0.25131136516476127 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nb,accuracy,0.3981506388702084 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nl,accuracy,0.4362138533960995 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pl,accuracy,0.4219569603227976 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pt,accuracy,0.4520847343644923 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ro,accuracy,0.4180901143241426 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ru,accuracy,0.3596839273705447 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sl,accuracy,0.40605245460659045 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sq,accuracy,0.42757229320780094 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sv,accuracy,0.42347007397444514 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sw,accuracy,0.4112306657700067 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ta,accuracy,0.24603227975790182 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,te,accuracy,0.2503698722259583 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,th,accuracy,0.35400134498991254 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tl,accuracy,0.4119031607262945 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tr,accuracy,0.3640551445864156 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ur,accuracy,0.25934767989240076 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,vi,accuracy,0.38799596503026224 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-CN,accuracy,0.4624411566913248 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-TW,accuracy,0.4230665770006724 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,af,accuracy,0.43248150638870203 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,am,accuracy,0.2530262273032952 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ar,accuracy,0.3207128446536651 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,az,accuracy,0.3668123739071957 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,bn,accuracy,0.2956624075319435 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,cy,accuracy,0.421049092131809 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,da,accuracy,0.4544384667114997 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,de,accuracy,0.43211163416274373 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,el,accuracy,0.3650302622730329 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,en,accuracy,0.6973772696704774 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,es,accuracy,0.44078681909885675 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fa,accuracy,0.32612642905178213 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fi,accuracy,0.40356422326832553 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fr,accuracy,0.4506724949562878 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,he,accuracy,0.3217888365837256 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hi,accuracy,0.26903160726294556 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hu,accuracy,0.4037995965030262 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hy,accuracy,0.28375924680564896 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,id,accuracy,0.44361129791526566 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,is,accuracy,0.39290517821116344 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,it,accuracy,0.46469401479488903 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ja,accuracy,0.46257565568258235 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,jv,accuracy,0.41126429051782115 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ka,accuracy,0.24727639542703428 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,km,accuracy,0.2974108944182918 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,kn,accuracy,0.23850033624747816 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ko,accuracy,0.3656691324815064 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,lv,accuracy,0.40928043039677203 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ml,accuracy,0.25527908540685945 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,mn,accuracy,0.29105581708137185 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ms,accuracy,0.43786146603900467 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,my,accuracy,0.27269670477471414 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nb,accuracy,0.3901815736381977 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nl,accuracy,0.4535978480161399 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pl,accuracy,0.41893073301950234 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pt,accuracy,0.45901143241425685 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ro,accuracy,0.44115669132481505 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ru,accuracy,0.3276395427034297 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sl,accuracy,0.40504371217215873 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sq,accuracy,0.4251849361129792 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sv,accuracy,0.42293207800941496 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sw,accuracy,0.42999327505043705 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ta,accuracy,0.2832548755884331 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,te,accuracy,0.26593813046402154 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,th,accuracy,0.36788836583725626 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tl,accuracy,0.425689307330195 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tr,accuracy,0.37094821788836585 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ur,accuracy,0.2883658372562206 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,vi,accuracy,0.37357094821788833 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-CN,accuracy,0.4937794216543375 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-TW,accuracy,0.44421654337592476 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,en,accuracy,0.8695622435020519 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,de,accuracy,0.6273034657650043 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,es,accuracy,0.6754503002001334 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,fr,accuracy,0.653523332289383 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,hi,accuracy,0.45371100752958055 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,th,accuracy,0.5527667269439421 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,en,accuracy,0.6225262197902417 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,de,accuracy,0.4956043956043956 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,es,accuracy,0.4993995997331555 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,fr,accuracy,0.46329470717193855 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,hi,accuracy,0.3220867694514163 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,th,accuracy,0.43627486437613017 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,ToxicConversationsClassification,en,accuracy,0.6265799999999999 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,TweetSentimentExtractionClassification,en,accuracy,0.5240803621958121 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,ArxivClusteringP2P,en,v_measure,0.3970858340673288 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,ArxivClusteringS2S,en,v_measure,0.2824284771372105 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,BiorxivClusteringP2P,en,v_measure,0.33632603955439844 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,BiorxivClusteringS2S,en,v_measure,0.27038042665369927 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,MedrxivClusteringP2P,en,v_measure,0.31374938993074253 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,MedrxivClusteringS2S,en,v_measure,0.26871455379644094 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,RedditClustering,en,v_measure,0.4023390747226228 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,RedditClusteringP2P,en,v_measure,0.49090518272935624 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,StackExchangeClustering,en,v_measure,0.5274481093815175 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,StackExchangeClusteringP2P,en,v_measure,0.3265999453562101 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,TwentyNewsgroupsClustering,en,v_measure,0.3212697126747911 +SGPT-125M-weightedmean-msmarco-specb-bitfit,PairClassification,SprintDuplicateQuestions,en,ap,0.8988577913120002 +SGPT-125M-weightedmean-msmarco-specb-bitfit,PairClassification,TwitterSemEval2015,en,ap,0.5474680676121269 +SGPT-125M-weightedmean-msmarco-specb-bitfit,PairClassification,TwitterURLCorpus,en,ap,0.8105760818661524 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Reranking,AskUbuntuDupQuestions,en,map,0.5583700395192394 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Reranking,MindSmallReranking,en,map,0.30402396942935334 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Reranking,SciDocsRR,en,map,0.7133941904192648 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Reranking,StackOverflowDupQuestions,en,map,0.44744984645554653 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,ArguAna,en,ndcg_at_10,0.45425 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,ClimateFEVER,en,ndcg_at_10,0.21858 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,CQADupstackRetrieval,en,ndcg_at_10,0.27248666666666665 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,DBPedia,en,ndcg_at_10,0.22718 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,FEVER,en,ndcg_at_10,0.6045 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,FiQA2018,en,ndcg_at_10,0.21118 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,HotpotQA,en,ndcg_at_10,0.40876 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,MSMARCO,en,ndcg_at_10,0.27975 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,NFCorpus,en,ndcg_at_10,0.22792 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,NQ,en,ndcg_at_10,0.29725 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,QuoraRetrieval,en,ndcg_at_10,0.7298 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,SCIDOCS,en,ndcg_at_10,0.12206 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,SciFact,en,ndcg_at_10,0.56899 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,Touche2020,en,ndcg_at_10,0.22972 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,TRECCOVID,en,ndcg_at_10,0.70302 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,BIOSSES,en,cosine_spearman,0.7520954502580506 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,SICK-R,en,cosine_spearman,0.6592910683118656 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS12,en,cosine_spearman,0.6652980061546658 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS13,en,cosine_spearman,0.7616628863742361 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS14,en,cosine_spearman,0.6904572664009687 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS15,en,cosine_spearman,0.7923677712825851 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS16,en,cosine_spearman,0.7606792422441928 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,ko-ko,cosine_spearman,0.5238601027550566 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,ar-ar,cosine_spearman,0.5561674586076298 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,en-ar,cosine_spearman,0.08209569244801064 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,en-de,cosine_spearman,0.3018181775929109 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,en-en,cosine_spearman,0.8495398260629698 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,en-tr,cosine_spearman,0.010393399782021343 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,es-en,cosine_spearman,0.28776666666659906 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,es-es,cosine_spearman,0.7188444295144646 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,fr-en,cosine_spearman,0.26339466714066445 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,it-en,cosine_spearman,0.20729929404589678 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS17,nl-en,cosine_spearman,0.2505342961279355 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,en,cosine_spearman,0.6566183708171826 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,de,cosine_spearman,0.21987647321429005 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,es,cosine_spearman,0.49811823238907665 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,pl,cosine_spearman,0.23308439517991938 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,tr,cosine_spearman,0.3466348380997687 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,ar,cosine_spearman,0.2818922448944151 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,ru,cosine_spearman,0.09068119621940965 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,zh,cosine_spearman,0.2548511383289232 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,fr,cosine_spearman,0.6766493409568727 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,de-en,cosine_spearman,0.5307135629778896 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,es-en,cosine_spearman,0.49512539047677256 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,it,cosine_spearman,0.4824795739512037 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,pl-en,cosine_spearman,0.36798674894178013 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,zh-en,cosine_spearman,0.2868170719697501 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,es-it,cosine_spearman,0.45782560880405704 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,de-fr,cosine_spearman,0.3296920218281008 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,de-pl,cosine_spearman,0.20447284723752715 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STS22,fr-pl,cosine_spearman,0.6197797868009122 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,STSBenchmark,en,cosine_spearman,0.7533716094627373 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Summarization,SummEval,en,cosine_spearman,0.2890145030911965 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Classification,average,en,accuracy,0.6072305523949799 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Clustering,average,en,v_measure,0.3579315223640289 +SGPT-125M-weightedmean-msmarco-specb-bitfit,PairClassification,average,en,ap,0.7523006469300931 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Reranking,average,en,map,0.505809511455851 +SGPT-125M-weightedmean-msmarco-specb-bitfit,Retrieval,average,en,ndcg_at_10,0.37036311111111125 +SGPT-125M-weightedmean-msmarco-specb-bitfit,STS,average,en,cosine_spearman,0.7341381497369455 diff --git a/evaluation/mteb/SICK-R.json b/evaluation/mteb/SICK-R.json new file mode 100644 index 0000000000000000000000000000000000000000..a612d3794387c3a02b6fb98305505e1c54f39ef1 --- /dev/null +++ b/evaluation/mteb/SICK-R.json @@ -0,0 +1,17 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.7662983928119752, + "spearman": 0.6592910683118656 + }, + "euclidean": { + "pearson": 0.7110290039690963, + "spearman": 0.6480076622426653 + }, + "evaluation_time": 6.26, + "manhattan": { + "pearson": 0.708944726230188, + "spearman": 0.6475082576033987 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS12.json b/evaluation/mteb/STS12.json new file mode 100644 index 0000000000000000000000000000000000000000..2647297eb22dd7db49acfbcee1d6427a6bedf2b1 --- /dev/null +++ b/evaluation/mteb/STS12.json @@ -0,0 +1,19 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.7442679147085554, + "spearman": 0.6652980061546658 + }, + "euclidean": { + "pearson": 0.7487039477408763, + "spearman": 0.7063397666902785 + }, + "evaluation_time": 5.89, + "manhattan": { + "pearson": 0.7497015137513088, + "spearman": 0.7075951355434326 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STS13.json b/evaluation/mteb/STS13.json new file mode 100644 index 0000000000000000000000000000000000000000..03e3ec1f041d1bc4a5038e639e30650072fdb87b --- /dev/null +++ b/evaluation/mteb/STS13.json @@ -0,0 +1,19 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.7562472426599542, + "spearman": 0.7616628863742361 + }, + "euclidean": { + "pearson": 0.763297128081315, + "spearman": 0.7719385151966562 + }, + "evaluation_time": 3.17, + "manhattan": { + "pearson": 0.7650363291423257, + "spearman": 0.7737081896355399 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STS14.json b/evaluation/mteb/STS14.json new file mode 100644 index 0000000000000000000000000000000000000000..f5c9868eede5d1737218f8591a7fad78810d1d58 --- /dev/null +++ b/evaluation/mteb/STS14.json @@ -0,0 +1,19 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.7448227705407036, + "spearman": 0.6904572664009687 + }, + "euclidean": { + "pearson": 0.7176138185714849, + "spearman": 0.6893415452043307 + }, + "evaluation_time": 5.93, + "manhattan": { + "pearson": 0.7168010915543306, + "spearman": 0.6899176321262805 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STS15.json b/evaluation/mteb/STS15.json new file mode 100644 index 0000000000000000000000000000000000000000..36dffbb94f4f225cbcf41a556ca96ecd800b7bb9 --- /dev/null +++ b/evaluation/mteb/STS15.json @@ -0,0 +1,19 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.781566527175902, + "spearman": 0.7923677712825851 + }, + "euclidean": { + "pearson": 0.7629138438696417, + "spearman": 0.7720108266215374 + }, + "evaluation_time": 5.26, + "manhattan": { + "pearson": 0.7627464935799118, + "spearman": 0.7715286174478099 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STS16.json b/evaluation/mteb/STS16.json new file mode 100644 index 0000000000000000000000000000000000000000..d5bc5d9d8fc0b18af40477e0f926a4b15832ad28 --- /dev/null +++ b/evaluation/mteb/STS16.json @@ -0,0 +1,19 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.75068454465977, + "spearman": 0.7606792422441928 + }, + "euclidean": { + "pearson": 0.7064605440627698, + "spearman": 0.7021776051117844 + }, + "evaluation_time": 3.58, + "manhattan": { + "pearson": 0.7032479295054919, + "spearman": 0.6989782458638528 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STS17.json b/evaluation/mteb/STS17.json new file mode 100644 index 0000000000000000000000000000000000000000..120dedd80232038357eea15f4123d692e717d9de --- /dev/null +++ b/evaluation/mteb/STS17.json @@ -0,0 +1,161 @@ +{ + "test": { + "ar-ar": { + "cos_sim": { + "pearson": 0.5554431928210687, + "spearman": 0.5561674586076298 + }, + "euclidean": { + "pearson": 0.5807442713714088, + "spearman": 0.5574066216931719 + }, + "manhattan": { + "pearson": 0.5784021675638542, + "spearman": 0.5520365812536853 + } + }, + "en-ar": { + "cos_sim": { + "pearson": 0.11378463868809098, + "spearman": 0.08209569244801064 + }, + "euclidean": { + "pearson": 0.0107041700730406, + "spearman": -0.02205219710893189 + }, + "manhattan": { + "pearson": -0.007671300251104268, + "spearman": -0.03430645020535567 + } + }, + "en-de": { + "cos_sim": { + "pearson": 0.32714035609290126, + "spearman": 0.3018181775929109 + }, + "euclidean": { + "pearson": 0.2557368595910298, + "spearman": 0.23316649115731378 + }, + "manhattan": { + "pearson": 0.24144200325329615, + "spearman": 0.21646215463384572 + } + }, + "en-en": { + "cos_sim": { + "pearson": 0.8336340470799157, + "spearman": 0.8495398260629698 + }, + "euclidean": { + "pearson": 0.8069876969911645, + "spearman": 0.8097451731130427 + }, + "manhattan": { + "pearson": 0.8065869354146945, + "spearman": 0.808540858718528 + } + }, + "en-tr": { + "cos_sim": { + "pearson": 0.019200044163754912, + "spearman": 0.010393399782021343 + }, + "euclidean": { + "pearson": -0.011376003191297994, + "spearman": -0.018947106671763914 + }, + "manhattan": { + "pearson": -0.038362564474484336, + "spearman": -0.04242750882792888 + } + }, + "es-en": { + "cos_sim": { + "pearson": 0.26561262451099577, + "spearman": 0.28776666666659906 + }, + "euclidean": { + "pearson": 0.14640410196999087, + "spearman": 0.1610557011701786 + }, + "manhattan": { + "pearson": 0.15019405495911273, + "spearman": 0.1537192083104197 + } + }, + "es-es": { + "cos_sim": { + "pearson": 0.6975442020014331, + "spearman": 0.7188444295144646 + }, + "euclidean": { + "pearson": 0.7384934185952773, + "spearman": 0.7326911108021089 + }, + "manhattan": { + "pearson": 0.7404354196954575, + "spearman": 0.7337650787943871 + } + }, + "evaluation_time": 15.15, + "fr-en": { + "cos_sim": { + "pearson": 0.2770511842301491, + "spearman": 0.26339466714066445 + }, + "euclidean": { + "pearson": 0.09323158236506385, + "spearman": 0.0732083231520273 + }, + "manhattan": { + "pearson": 0.07807399527573071, + "spearman": 0.05525546663067112 + } + }, + "it-en": { + "cos_sim": { + "pearson": 0.24226521799447692, + "spearman": 0.20729929404589678 + }, + "euclidean": { + "pearson": 0.06753378617205011, + "spearman": 0.06281654679029505 + }, + "manhattan": { + "pearson": 0.07087180250449322, + "spearman": 0.0641611659259516 + } + }, + "ko-ko": { + "cos_sim": { + "pearson": 0.39433272899394367, + "spearman": 0.5238601027550566 + }, + "euclidean": { + "pearson": 0.46409999048857453, + "spearman": 0.5100333465175934 + }, + "manhattan": { + "pearson": 0.4655753533133655, + "spearman": 0.5107550440519388 + } + }, + "nl-en": { + "cos_sim": { + "pearson": 0.2913141236406123, + "spearman": 0.2505342961279355 + }, + "euclidean": { + "pearson": 0.10657141303961999, + "spearman": 0.09712124819778453 + }, + "manhattan": { + "pearson": 0.12481782693315688, + "spearman": 0.11287958480905973 + } + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STS22.json b/evaluation/mteb/STS22.json new file mode 100644 index 0000000000000000000000000000000000000000..f9af257f45a7a43efc057f3a293342c58ad01c2d --- /dev/null +++ b/evaluation/mteb/STS22.json @@ -0,0 +1,259 @@ +{ + "test": { + "ar": { + "cos_sim": { + "pearson": 0.09134927430889528, + "spearman": 0.2818922448944151 + }, + "euclidean": { + "pearson": 0.1986814169549051, + "spearman": 0.2751958864494863 + }, + "manhattan": { + "pearson": 0.2180949221238945, + "spearman": 0.2825217200494078 + } + }, + "de": { + "cos_sim": { + "pearson": 0.1926519187000913, + "spearman": 0.21987647321429005 + }, + "euclidean": { + "pearson": 0.17850618752342948, + "spearman": 0.2286669392885474 + }, + "manhattan": { + "pearson": 0.1816183594260708, + "spearman": 0.23637510352837907 + } + }, + "de-en": { + "cos_sim": { + "pearson": 0.49378654125882004, + "spearman": 0.5307135629778896 + }, + "euclidean": { + "pearson": 0.49292014167110915, + "spearman": 0.5054523702399645 + }, + "manhattan": { + "pearson": 0.5126576414126853, + "spearman": 0.5197908640319361 + } + }, + "de-fr": { + "cos_sim": { + "pearson": 0.3512956772546032, + "spearman": 0.3296920218281008 + }, + "euclidean": { + "pearson": 0.3423140384382136, + "spearman": 0.3219303153191447 + }, + "manhattan": { + "pearson": 0.3418946827660064, + "spearman": 0.3488706570973238 + } + }, + "de-pl": { + "cos_sim": { + "pearson": 0.30507667380509634, + "spearman": 0.20447284723752715 + }, + "euclidean": { + "pearson": 0.29662041381794474, + "spearman": 0.20939990379746756 + }, + "manhattan": { + "pearson": 0.32511208050632806, + "spearman": 0.23773047901712493 + } + }, + "en": { + "cos_sim": { + "pearson": 0.6404750650962878, + "spearman": 0.6566183708171826 + }, + "euclidean": { + "pearson": 0.6690887604405887, + "spearman": 0.6689814072484552 + }, + "manhattan": { + "pearson": 0.6731627110509089, + "spearman": 0.6701048176165322 + } + }, + "es": { + "cos_sim": { + "pearson": 0.34221261828226934, + "spearman": 0.49811823238907665 + }, + "euclidean": { + "pearson": 0.4450394399762147, + "spearman": 0.5095918449507287 + }, + "manhattan": { + "pearson": 0.4583191034038624, + "spearman": 0.5019040986611795 + } + }, + "es-en": { + "cos_sim": { + "pearson": 0.4492565239256213, + "spearman": 0.49512539047677256 + }, + "euclidean": { + "pearson": 0.4879346518897415, + "spearman": 0.5147957870101565 + }, + "manhattan": { + "pearson": 0.49513145538980435, + "spearman": 0.5189520789318917 + } + }, + "es-it": { + "cos_sim": { + "pearson": 0.3854682179114309, + "spearman": 0.45782560880405704 + }, + "euclidean": { + "pearson": 0.46496857002368486, + "spearman": 0.4821270426410012 + }, + "manhattan": { + "pearson": 0.4687183911937405, + "spearman": 0.4755698777385152 + } + }, + "evaluation_time": 69.57, + "fr": { + "cos_sim": { + "pearson": 0.544745185734135, + "spearman": 0.6766493409568727 + }, + "euclidean": { + "pearson": 0.6013580336797049, + "spearman": 0.6612319300814538 + }, + "manhattan": { + "pearson": 0.6081621036870816, + "spearman": 0.6570010026716765 + } + }, + "fr-pl": { + "cos_sim": { + "pearson": 0.7110820459712155, + "spearman": 0.6197797868009122 + }, + "euclidean": { + "pearson": 0.6030910689156633, + "spearman": 0.6197797868009122 + }, + "manhattan": { + "pearson": 0.6634051769640379, + "spearman": 0.6197797868009122 + } + }, + "it": { + "cos_sim": { + "pearson": 0.4524169032111187, + "spearman": 0.4824795739512037 + }, + "euclidean": { + "pearson": 0.49227194943998964, + "spearman": 0.49641024420428087 + }, + "manhattan": { + "pearson": 0.4949788773297025, + "spearman": 0.49940515338096303 + } + }, + "pl": { + "cos_sim": { + "pearson": 0.03620381732096531, + "spearman": 0.23308439517991938 + }, + "euclidean": { + "pearson": 0.00965453312113125, + "spearman": 0.24235967620790316 + }, + "manhattan": { + "pearson": 0.014408922275701605, + "spearman": 0.25161920137046095 + } + }, + "pl-en": { + "cos_sim": { + "pearson": 0.3642138324083909, + "spearman": 0.36798674894178013 + }, + "euclidean": { + "pearson": 0.27760612942610086, + "spearman": 0.29140966500287624 + }, + "manhattan": { + "pearson": 0.28456674031350115, + "spearman": 0.2746356370924497 + } + }, + "ru": { + "cos_sim": { + "pearson": 0.03638648294235208, + "spearman": 0.09068119621940965 + }, + "euclidean": { + "pearson": 0.008123129118737714, + "spearman": 0.09173672890166147 + }, + "manhattan": { + "pearson": 0.007545188998226579, + "spearman": 0.08431719541986525 + } + }, + "tr": { + "cos_sim": { + "pearson": 0.16694896287262667, + "spearman": 0.3466348380997687 + }, + "euclidean": { + "pearson": 0.29415825529188605, + "spearman": 0.3833011033170646 + }, + "manhattan": { + "pearson": 0.3123273195263394, + "spearman": 0.39100557857557955 + } + }, + "zh": { + "cos_sim": { + "pearson": 0.029720915749084316, + "spearman": 0.2548511383289232 + }, + "euclidean": { + "pearson": 0.12751569670148918, + "spearman": 0.24940721642439287 + }, + "manhattan": { + "pearson": 0.14310238482989826, + "spearman": 0.2469821216148647 + } + }, + "zh-en": { + "cos_sim": { + "pearson": 0.2655350664089358, + "spearman": 0.2868170719697501 + }, + "euclidean": { + "pearson": 0.12613577889195138, + "spearman": 0.13589493311702933 + }, + "manhattan": { + "pearson": 0.11640157427420958, + "spearman": 0.10345223941212416 + } + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/STSBenchmark.json b/evaluation/mteb/STSBenchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..f3ff526fd23d2cfd6f94acf67c45bbbb2ebf1213 --- /dev/null +++ b/evaluation/mteb/STSBenchmark.json @@ -0,0 +1,34 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.7653032504460737, + "spearman": 0.7533716094627373 + }, + "euclidean": { + "pearson": 0.6964662673290599, + "spearman": 0.6730188896368857 + }, + "evaluation_time": 1.37, + "manhattan": { + "pearson": 0.6945096082050808, + "spearman": 0.670718727259371 + } + }, + "validation": { + "cos_sim": { + "pearson": 0.805759061953967, + "spearman": 0.8094604064820345 + }, + "euclidean": { + "pearson": 0.766103829800008, + "spearman": 0.7712739354933887 + }, + "evaluation_time": 3.63, + "manhattan": { + "pearson": 0.7650423590936141, + "spearman": 0.7695655357394099 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/SciDocsRR.json b/evaluation/mteb/SciDocsRR.json new file mode 100644 index 0000000000000000000000000000000000000000..ff2d9d3704f416d88a9f345504341172c0a8d25a --- /dev/null +++ b/evaluation/mteb/SciDocsRR.json @@ -0,0 +1,14 @@ +{ + "test": { + "evaluation_time": 88.78, + "map": 0.7133941904192648, + "mrr": 0.8973766429648782 + }, + "validation": { + "evaluation_time": 86.05, + "map": 0.7152125832536065, + "mrr": 0.9032372021068328 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/SciFact.json b/evaluation/mteb/SciFact.json new file mode 100644 index 0000000000000000000000000000000000000000..11a7663294e77d7207c5cc3cdc33d5159f9e8cd7 --- /dev/null +++ b/evaluation/mteb/SciFact.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 102.89, + "map_at_1": 0.43333, + "map_at_10": 0.52364, + "map_at_100": 0.53184, + "map_at_1000": 0.53234, + "map_at_3": 0.49832, + "map_at_5": 0.51244, + "mrr_at_1": 0.45333, + "mrr_at_10": 0.53455, + "mrr_at_100": 0.54191, + "mrr_at_1000": 0.54235, + "mrr_at_3": 0.51556, + "mrr_at_5": 0.52622, + "ndcg_at_1": 0.45333, + "ndcg_at_10": 0.56899, + "ndcg_at_100": 0.60702, + "ndcg_at_1000": 0.62046, + "ndcg_at_3": 0.52451, + "ndcg_at_5": 0.54534, + "precision_at_1": 0.45333, + "precision_at_10": 0.078, + "precision_at_100": 0.00987, + "precision_at_1000": 0.0011, + "precision_at_3": 0.20778, + "precision_at_5": 0.13867, + "recall_at_1": 0.43333, + "recall_at_10": 0.697, + "recall_at_100": 0.869, + "recall_at_1000": 0.976, + "recall_at_3": 0.57817, + "recall_at_5": 0.62828 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SprintDuplicateQuestions.json b/evaluation/mteb/SprintDuplicateQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..01e9afae1e53a97ee933f0fbbaff729ff82b667b --- /dev/null +++ b/evaluation/mteb/SprintDuplicateQuestions.json @@ -0,0 +1,92 @@ +{ + "test": { + "cos_sim": { + "accuracy": 0.997, + "accuracy_threshold": 0.7497431039810181, + "ap": 0.8988577913120002, + "f1": 0.8462694041061593, + "f1_threshold": 0.7372856140136719, + "precision": 0.847542627883651, + "recall": 0.845 + }, + "dot": { + "accuracy": 0.9924752475247525, + "accuracy_threshold": 1300.7469482421875, + "ap": 0.5681855467290009, + "f1": 0.5608412618928393, + "f1_threshold": 1153.91796875, + "precision": 0.5616850551654965, + "recall": 0.56 + }, + "euclidean": { + "accuracy": 0.9970594059405941, + "accuracy_threshold": 26.410518646240234, + "ap": 0.9012451226491524, + "f1": 0.8444211629125197, + "f1_threshold": 26.42844581604004, + "precision": 0.8866886688668867, + "recall": 0.806 + }, + "evaluation_time": 11.33, + "manhattan": { + "accuracy": 0.9971287128712871, + "accuracy_threshold": 548.025634765625, + "ap": 0.9067590584183216, + "f1": 0.8485436893203884, + "f1_threshold": 597.0555419921875, + "precision": 0.8245283018867925, + "recall": 0.874 + }, + "max": { + "accuracy": 0.9971287128712871, + "ap": 0.9067590584183216, + "f1": 0.8485436893203884 + } + }, + "validation": { + "cos_sim": { + "accuracy": 0.9967227722772277, + "accuracy_threshold": 0.7613695859909058, + "ap": 0.8883123214507284, + "f1": 0.8282619907168643, + "f1_threshold": 0.7497333884239197, + "precision": 0.8551650692225772, + "recall": 0.803 + }, + "dot": { + "accuracy": 0.9923267326732673, + "accuracy_threshold": 1290.875244140625, + "ap": 0.5531250999744529, + "f1": 0.56, + "f1_threshold": 1160.09130859375, + "precision": 0.5671794871794872, + "recall": 0.553 + }, + "euclidean": { + "accuracy": 0.997, + "accuracy_threshold": 26.642988204956055, + "ap": 0.8946082919005356, + "f1": 0.8430864836872086, + "f1_threshold": 27.082069396972656, + "precision": 0.874328678839957, + "recall": 0.814 + }, + "evaluation_time": 13.04, + "manhattan": { + "accuracy": 0.9970693069306931, + "accuracy_threshold": 571.5331420898438, + "ap": 0.9011046374915657, + "f1": 0.849440488301119, + "f1_threshold": 579.8192138671875, + "precision": 0.8643892339544513, + "recall": 0.835 + }, + "max": { + "accuracy": 0.9970693069306931, + "ap": 0.9011046374915657, + "f1": 0.849440488301119 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/StackExchangeClustering.json b/evaluation/mteb/StackExchangeClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..3e0c98f82a472c5b42fdc53e174555ccf5938384 --- /dev/null +++ b/evaluation/mteb/StackExchangeClustering.json @@ -0,0 +1,14 @@ +{ + "test": { + "evaluation_time": 337.8, + "v_measure": 0.5274481093815175, + "v_measure_std": 0.04315360617635268 + }, + "validation": { + "evaluation_time": 266.06, + "v_measure": 0.4878126733436697, + "v_measure_std": 0.03856213527967481 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/StackExchangeClusteringP2P.json b/evaluation/mteb/StackExchangeClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..a718d89ad82943c70667f88e274f2b23c62d69cb --- /dev/null +++ b/evaluation/mteb/StackExchangeClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 470.78, + "v_measure": 0.3265999453562101, + "v_measure_std": 0.015712961835047078 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/StackOverflowDupQuestions.json b/evaluation/mteb/StackOverflowDupQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb915dad80e4d9db16e9cfb5b73824356ed742e --- /dev/null +++ b/evaluation/mteb/StackOverflowDupQuestions.json @@ -0,0 +1,14 @@ +{ + "test": { + "evaluation_time": 47.72, + "map": 0.44744984645554653, + "mrr": 0.45333879764026824 + }, + "validation": { + "evaluation_time": 45.26, + "map": 0.44744984645554653, + "mrr": 0.45333879764026824 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/SummEval.json b/evaluation/mteb/SummEval.json new file mode 100644 index 0000000000000000000000000000000000000000..3a9cdd545f0bb8bfc83a3a45e1843758ac1dba95 --- /dev/null +++ b/evaluation/mteb/SummEval.json @@ -0,0 +1,15 @@ +{ + "test": { + "cos_sim": { + "pearson": 0.295961822471627, + "spearman": 0.2890145030911965 + }, + "dot": { + "pearson": 0.2917474339962901, + "spearman": 0.2736297597081396 + }, + "evaluation_time": 39.55 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/TRECCOVID.json b/evaluation/mteb/TRECCOVID.json new file mode 100644 index 0000000000000000000000000000000000000000..26d882ffd498093164acc81367855d889b6b2c1d --- /dev/null +++ b/evaluation/mteb/TRECCOVID.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 178.94, + "map_at_1": 0.00241, + "map_at_10": 0.01672, + "map_at_100": 0.07859, + "map_at_1000": 0.17616, + "map_at_3": 0.00631, + "map_at_5": 0.00968, + "mrr_at_1": 0.9, + "mrr_at_10": 0.92952, + "mrr_at_100": 0.93036, + "mrr_at_1000": 0.93036, + "mrr_at_3": 0.92667, + "mrr_at_5": 0.92667, + "ndcg_at_1": 0.83, + "ndcg_at_10": 0.70302, + "ndcg_at_100": 0.48149, + "ndcg_at_1000": 0.40709, + "ndcg_at_3": 0.79173, + "ndcg_at_5": 0.75347, + "precision_at_1": 0.9, + "precision_at_10": 0.726, + "precision_at_100": 0.4846, + "precision_at_1000": 0.18094, + "precision_at_3": 0.84, + "precision_at_5": 0.788, + "recall_at_1": 0.00241, + "recall_at_10": 0.01814, + "recall_at_100": 0.11141, + "recall_at_1000": 0.37709, + "recall_at_3": 0.00647, + "recall_at_5": 0.01015 + } +} \ No newline at end of file diff --git a/evaluation/mteb/Touche2020.json b/evaluation/mteb/Touche2020.json new file mode 100644 index 0000000000000000000000000000000000000000..00c976abeaf655e833b82b4134995e2e8e483343 --- /dev/null +++ b/evaluation/mteb/Touche2020.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 344.64, + "map_at_1": 0.02782, + "map_at_10": 0.0906, + "map_at_100": 0.14571, + "map_at_1000": 0.16007, + "map_at_3": 0.05037, + "map_at_5": 0.0663, + "mrr_at_1": 0.34694, + "mrr_at_10": 0.48243, + "mrr_at_100": 0.49065, + "mrr_at_1000": 0.49065, + "mrr_at_3": 0.44898, + "mrr_at_5": 0.46429, + "ndcg_at_1": 0.31633, + "ndcg_at_10": 0.22972, + "ndcg_at_100": 0.34777, + "ndcg_at_1000": 0.45639, + "ndcg_at_3": 0.26398, + "ndcg_at_5": 0.24418, + "precision_at_1": 0.34694, + "precision_at_10": 0.19796, + "precision_at_100": 0.07224, + "precision_at_1000": 0.01445, + "precision_at_3": 0.26531, + "precision_at_5": 0.23265, + "recall_at_1": 0.02782, + "recall_at_10": 0.14841, + "recall_at_100": 0.4486, + "recall_at_1000": 0.78227, + "recall_at_3": 0.05959, + "recall_at_5": 0.08969 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ToxicConversationsClassification.json b/evaluation/mteb/ToxicConversationsClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..e90f990c2742fca07140a60810f6d84d3829f1d2 --- /dev/null +++ b/evaluation/mteb/ToxicConversationsClassification.json @@ -0,0 +1,14 @@ +{ + "test": { + "accuracy": 0.6265799999999999, + "accuracy_stderr": 0.03391476374678143, + "ap": 0.1096353161716344, + "ap_stderr": 0.007857292626948172, + "evaluation_time": 1110.08, + "f1": 0.48294226423442643, + "f1_stderr": 0.018159281905209768, + "main_score": 0.1096353161716344 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/TweetSentimentExtractionClassification.json b/evaluation/mteb/TweetSentimentExtractionClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..012b7a04d28a29e1131f1f1342872defb686759c --- /dev/null +++ b/evaluation/mteb/TweetSentimentExtractionClassification.json @@ -0,0 +1,12 @@ +{ + "test": { + "accuracy": 0.5240803621958121, + "accuracy_stderr": 0.018047541749608313, + "evaluation_time": 36.03, + "f1": 0.5261009636022186, + "f1_stderr": 0.01720012960423703, + "main_score": 0.5240803621958121 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/TwentyNewsgroupsClustering.json b/evaluation/mteb/TwentyNewsgroupsClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..d7b8918487514b42f787cfbcb923eb47b74c7c48 --- /dev/null +++ b/evaluation/mteb/TwentyNewsgroupsClustering.json @@ -0,0 +1,9 @@ +{ + "test": { + "evaluation_time": 34.69, + "v_measure": 0.3212697126747911, + "v_measure_std": 0.016166107661741317 + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/TwitterSemEval2015.json b/evaluation/mteb/TwitterSemEval2015.json new file mode 100644 index 0000000000000000000000000000000000000000..8e7a1d6fd7c39b26db8aea4d8f7adcc3c43ecae0 --- /dev/null +++ b/evaluation/mteb/TwitterSemEval2015.json @@ -0,0 +1,48 @@ +{ + "test": { + "cos_sim": { + "accuracy": 0.8069976753889253, + "accuracy_threshold": 0.760696530342102, + "ap": 0.5474680676121269, + "f1": 0.5318923998590391, + "f1_threshold": 0.6627046465873718, + "precision": 0.47935634130849036, + "recall": 0.5973614775725594 + }, + "dot": { + "accuracy": 0.7933480360016689, + "accuracy_threshold": 1049.509033203125, + "ap": 0.48469021289336267, + "f1": 0.5048010973936901, + "f1_threshold": 762.2753295898438, + "precision": 0.4206084051345173, + "recall": 0.6311345646437995 + }, + "euclidean": { + "accuracy": 0.7978780473266973, + "accuracy_threshold": 22.804443359375, + "ap": 0.5025832725516481, + "f1": 0.49655838666827684, + "f1_threshold": 28.18334197998047, + "precision": 0.4578044978846582, + "recall": 0.5424802110817942 + }, + "evaluation_time": 12.23, + "manhattan": { + "accuracy": 0.797699231090183, + "accuracy_threshold": 473.0945129394531, + "ap": 0.49898924857143634, + "f1": 0.4933043378734119, + "f1_threshold": 604.353759765625, + "precision": 0.43561754598746716, + "recall": 0.5686015831134564 + }, + "max": { + "accuracy": 0.8069976753889253, + "ap": 0.5474680676121269, + "f1": 0.5318923998590391 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file diff --git a/evaluation/mteb/TwitterURLCorpus.json b/evaluation/mteb/TwitterURLCorpus.json new file mode 100644 index 0000000000000000000000000000000000000000..62fb0bd23264d5432b496fe0612204bc47b1320b --- /dev/null +++ b/evaluation/mteb/TwitterURLCorpus.json @@ -0,0 +1,48 @@ +{ + "test": { + "cos_sim": { + "accuracy": 0.8690573213800598, + "accuracy_threshold": 0.6966493129730225, + "ap": 0.8105760818661524, + "f1": 0.7364688856729379, + "f1_threshold": 0.6471728086471558, + "precision": 0.6946491946491946, + "recall": 0.7836464428703419 + }, + "dot": { + "accuracy": 0.8380680715644041, + "accuracy_threshold": 934.6768798828125, + "ap": 0.724977400594746, + "f1": 0.6868460650173216, + "f1_threshold": 830.3577270507812, + "precision": 0.6295464750785811, + "recall": 0.7556205728364644 + }, + "euclidean": { + "accuracy": 0.8597430822369697, + "accuracy_threshold": 27.972591400146484, + "ap": 0.7886101740829327, + "f1": 0.7107960824663695, + "f1_threshold": 29.554906845092773, + "precision": 0.7036897306270279, + "recall": 0.718047428395442 + }, + "evaluation_time": 38.51, + "manhattan": { + "accuracy": 0.8594132029339854, + "accuracy_threshold": 588.2883911132812, + "ap": 0.7877876711171923, + "f1": 0.7107869075515911, + "f1_threshold": 626.1203002929688, + "precision": 0.6980697847067557, + "recall": 0.7239759778256852 + }, + "max": { + "accuracy": 0.8690573213800598, + "ap": 0.8105760818661524, + "f1": 0.7364688856729379 + } + }, + "dataset_version": null, + "mteb_version": "0.0.2" +} \ No newline at end of file