version: 0.0.1 config: REPO_ID: "mteb/leaderboard" RESULTS_REPO: mteb/results LEADERBOARD_NAME: "MTEB Leaderboard" tasks: BitextMining: icon: "🎌" metric: f1 metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)" Classification: icon: "❀️" metric: accuracy metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)" Clustering: icon: "✨" metric: v_measure metric_description: "Validity Measure (v_measure)" PairClassification: icon: "🎭" metric: cos_sim_ap metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)" Reranking: icon: "πŸ₯ˆ" metric: map metric_description: "Mean Average Precision (MAP)" Retrieval: icon: "πŸ”Ž" metric: ndcg_at_10 metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)" STS: icon: "πŸ€–" metric: cos_sim_spearman metric_description: "Spearman correlation based on cosine similarity" Summarization: icon: "πŸ“œ" metric: cos_sim_spearman metric_description: "Spearman correlation based on cosine similarity" boards: en: title: English language_long: "English" has_overall: true acronym: null icon: null special_icons: null credits: null tasks: Classification: - AmazonCounterfactualClassification (en) - AmazonPolarityClassification - AmazonReviewsClassification (en) - Banking77Classification - EmotionClassification - ImdbClassification - MassiveIntentClassification (en) - MassiveScenarioClassification (en) - MTOPDomainClassification (en) - MTOPIntentClassification (en) - ToxicConversationsClassification - TweetSentimentExtractionClassification Clustering: - ArxivClusteringP2P - ArxivClusteringS2S - BiorxivClusteringP2P - BiorxivClusteringS2S - MedrxivClusteringP2P - MedrxivClusteringS2S - RedditClustering - RedditClusteringP2P - StackExchangeClustering - StackExchangeClusteringP2P - TwentyNewsgroupsClustering PairClassification: - SprintDuplicateQuestions - TwitterSemEval2015 - TwitterURLCorpus Reranking: - AskUbuntuDupQuestions - MindSmallReranking - SciDocsRR - StackOverflowDupQuestions Retrieval: - ArguAna - ClimateFEVER - CQADupstackRetrieval - DBPedia - FEVER - FiQA2018 - HotpotQA - MSMARCO - NFCorpus - NQ - QuoraRetrieval - SCIDOCS - SciFact - Touche2020 - TRECCOVID STS: - BIOSSES - SICK-R - STS12 - STS13 - STS14 - STS15 - STS16 - STS17 (en-en) - STS22 (en) - STSBenchmark Summarization: - SummEval en-x: title: "English-X" language_long: " 117 (Pairs of: English & other language)" has_overall: false acronym: null icon: null special_icons: null credits: null tasks: BitextMining: ['BUCC (de-en)', 'BUCC (fr-en)', 'BUCC (ru-en)', 'BUCC (zh-en)', 'Tatoeba (afr-eng)', 'Tatoeba (amh-eng)', 'Tatoeba (ang-eng)', 'Tatoeba (ara-eng)', 'Tatoeba (arq-eng)', 'Tatoeba (arz-eng)', 'Tatoeba (ast-eng)', 'Tatoeba (awa-eng)', 'Tatoeba (aze-eng)', 'Tatoeba (bel-eng)', 'Tatoeba (ben-eng)', 'Tatoeba (ber-eng)', 'Tatoeba (bos-eng)', 'Tatoeba (bre-eng)', 'Tatoeba (bul-eng)', 'Tatoeba (cat-eng)', 'Tatoeba (cbk-eng)', 'Tatoeba (ceb-eng)', 'Tatoeba (ces-eng)', 'Tatoeba (cha-eng)', 'Tatoeba (cmn-eng)', 'Tatoeba (cor-eng)', 'Tatoeba (csb-eng)', 'Tatoeba (cym-eng)', 'Tatoeba (dan-eng)', 'Tatoeba (deu-eng)', 'Tatoeba (dsb-eng)', 'Tatoeba (dtp-eng)', 'Tatoeba (ell-eng)', 'Tatoeba (epo-eng)', 'Tatoeba (est-eng)', 'Tatoeba (eus-eng)', 'Tatoeba (fao-eng)', 'Tatoeba (fin-eng)', 'Tatoeba (fra-eng)', 'Tatoeba (fry-eng)', 'Tatoeba (gla-eng)', 'Tatoeba (gle-eng)', 'Tatoeba (glg-eng)', 'Tatoeba (gsw-eng)', 'Tatoeba (heb-eng)', 'Tatoeba (hin-eng)', 'Tatoeba (hrv-eng)', 'Tatoeba (hsb-eng)', 'Tatoeba (hun-eng)', 'Tatoeba (hye-eng)', 'Tatoeba (ido-eng)', 'Tatoeba (ile-eng)', 'Tatoeba (ina-eng)', 'Tatoeba (ind-eng)', 'Tatoeba (isl-eng)', 'Tatoeba (ita-eng)', 'Tatoeba (jav-eng)', 'Tatoeba (jpn-eng)', 'Tatoeba (kab-eng)', 'Tatoeba (kat-eng)', 'Tatoeba (kaz-eng)', 'Tatoeba (khm-eng)', 'Tatoeba (kor-eng)', 'Tatoeba (kur-eng)', 'Tatoeba (kzj-eng)', 'Tatoeba (lat-eng)', 'Tatoeba (lfn-eng)', 'Tatoeba (lit-eng)', 'Tatoeba (lvs-eng)', 'Tatoeba (mal-eng)', 'Tatoeba (mar-eng)', 'Tatoeba (max-eng)', 'Tatoeba (mhr-eng)', 'Tatoeba (mkd-eng)', 'Tatoeba (mon-eng)', 'Tatoeba (nds-eng)', 'Tatoeba (nld-eng)', 'Tatoeba (nno-eng)', 'Tatoeba (nob-eng)', 'Tatoeba (nov-eng)', 'Tatoeba (oci-eng)', 'Tatoeba (orv-eng)', 'Tatoeba (pam-eng)', 'Tatoeba (pes-eng)', 'Tatoeba (pms-eng)', 'Tatoeba (pol-eng)', 'Tatoeba (por-eng)', 'Tatoeba (ron-eng)', 'Tatoeba (rus-eng)', 'Tatoeba (slk-eng)', 'Tatoeba (slv-eng)', 'Tatoeba (spa-eng)', 'Tatoeba (sqi-eng)', 'Tatoeba (srp-eng)', 'Tatoeba (swe-eng)', 'Tatoeba (swg-eng)', 'Tatoeba (swh-eng)', 'Tatoeba (tam-eng)', 'Tatoeba (tat-eng)', 'Tatoeba (tel-eng)', 'Tatoeba (tgl-eng)', 'Tatoeba (tha-eng)', 'Tatoeba (tuk-eng)', 'Tatoeba (tur-eng)', 'Tatoeba (tzl-eng)', 'Tatoeba (uig-eng)', 'Tatoeba (ukr-eng)', 'Tatoeba (urd-eng)', 'Tatoeba (uzb-eng)', 'Tatoeba (vie-eng)', 'Tatoeba (war-eng)', 'Tatoeba (wuu-eng)', 'Tatoeba (xho-eng)', 'Tatoeba (yid-eng)', 'Tatoeba (yue-eng)', 'Tatoeba (zsm-eng)'] zh: title: Chinese language_long: Chinese has_overall: true acronym: C-MTEB icon: "πŸ‡¨πŸ‡³" special_icons: Classification: "🧑" credits: "[FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)" tasks: Classification: - AmazonReviewsClassification (zh) - IFlyTek - JDReview - MassiveIntentClassification (zh-CN) - MassiveScenarioClassification (zh-CN) - MultilingualSentiment - OnlineShopping - TNews - Waimai Clustering: - CLSClusteringP2P - CLSClusteringS2S - ThuNewsClusteringP2P - ThuNewsClusteringS2S PairClassification: - Cmnli - Ocnli Reranking: - CMedQAv1 - CMedQAv2 - MMarcoReranking - T2Reranking Retrieval: - CmedqaRetrieval - CovidRetrieval - DuRetrieval - EcomRetrieval - MedicalRetrieval - MMarcoRetrieval - T2Retrieval - VideoRetrieval STS: - AFQMC - ATEC - BQ - LCQMC - PAWSX - QBQTC - STS22 (zh) - STSB da: title: Danish language_long: Danish has_overall: false acronym: null icon: "πŸ‡©πŸ‡°" special_icons: Classification: "🀍" credits: "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" tasks: BitextMining: - BornholmBitextMining Classification: - AngryTweetsClassification - DanishPoliticalCommentsClassification - DKHateClassification - LccSentimentClassification - MassiveIntentClassification (da) - MassiveScenarioClassification (da) - NordicLangClassification - ScalaDaClassification fr: title: French language_long: "French" has_overall: true acronym: "F-MTEB" icon: "πŸ‡«πŸ‡·" special_icons: Classification: "πŸ’™" credits: "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [Wissam Siblini](https://github.com/wissam-sib), [Mathieu Ciancone](https://github.com/MathieuCiancone), [Marion Schaeffer](https://github.com/schmarion)" tasks: Classification: - AmazonReviewsClassification (fr) - MasakhaNEWSClassification (fra) - MassiveIntentClassification (fr) - MassiveScenarioClassification (fr) - MTOPDomainClassification (fr) - MTOPIntentClassification (fr) Clustering: - AlloProfClusteringP2P - AlloProfClusteringS2S - HALClusteringS2S - MLSUMClusteringP2P - MLSUMClusteringS2S - MasakhaNEWSClusteringP2P (fra) - MasakhaNEWSClusteringS2S (fra) PairClassification: - OpusparcusPC (fr) - PawsX (fr) Reranking: - AlloprofReranking - SyntecReranking Retrieval: - AlloprofRetrieval - BSARDRetrieval - MintakaRetrieval (fr) - SyntecRetrieval - XPQARetrieval (fr) STS: - STS22 (fr) - STSBenchmarkMultilingualSTS (fr) - SICKFr Summarization: - SummEvalFr 'no': title: Norwegian language_long: "Norwegian BokmΓ₯l" has_overall: false acronym: null icon: "πŸ‡³πŸ‡΄" special_icons: Classification: "πŸ’™" credits: "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" tasks: Classification: &id001 - NoRecClassification - NordicLangClassification - NorwegianParliament - MassiveIntentClassification (nb) - MassiveScenarioClassification (nb) - ScalaNbClassification law: title: Law language_long: "English, German, Chinese" has_overall: false acronym: null icon: "βš–οΈ" special_icons: null credits: "[Voyage AI](https://www.voyageai.com/)" tasks: Retrieval: - AILACasedocs - AILAStatutes - GerDaLIRSmall - LeCaRDv2 - LegalBenchConsumerContractsQA - LegalBenchCorporateLobbying - LegalQuAD - LegalSummarization pl: title: Polish language_long: Polish has_overall: true acronym: null icon: "πŸ‡΅πŸ‡±" special_icons: Classification: "🀍" credits: "[RafaΕ‚ PoΕ›wiata](https://github.com/rafalposwiata)" tasks: Classification: - AllegroReviews - CBD - MassiveIntentClassification (pl) - MassiveScenarioClassification (pl) - PAC - PolEmo2.0-IN - PolEmo2.0-OUT Clustering: - 8TagsClustering PairClassification: - CDSC-E - PPC - PSC - SICK-E-PL Retrieval: - ArguAna-PL - DBPedia-PL - FiQA-PL - HotpotQA-PL - MSMARCO-PL - NFCorpus-PL - NQ-PL - Quora-PL - SCIDOCS-PL - SciFact-PL - TRECCOVID-PL STS: - CDSC-R - SICK-R-PL - STS22 (pl) se: title: Swedish language_long: Swedish has_overall: false acronym: null icon: "πŸ‡ΈπŸ‡ͺ" special_icons: Classification: "πŸ’›" credits: "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" tasks: Classification: - NoRecClassification - NordicLangClassification - NorwegianParliament - MassiveIntentClassification (nb) - MassiveScenarioClassification (nb) - ScalaNbClassification other-cls: title: "Other Languages" language_long: "47 (Only languages not included in the other tabs)" has_overall: false acronym: null icon: null special_icons: Classification: "πŸ’œπŸ’šπŸ’™" credits: null tasks: Classification: ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)'] other-sts: title: Other language_long: "Arabic, Chinese, Dutch, English, French, German, Italian, Korean, Polish, Russian, Spanish (Only language combos not included in the other tabs)" has_overall: false acronym: null icon: null special_icons: STS: "πŸ‘½" credits: null tasks: STS: ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark"]