diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -5,324 +5,20 @@ import re from datasets import load_dataset import gradio as gr -from huggingface_hub import HfApi, hf_hub_download +from huggingface_hub import hf_hub_download from huggingface_hub.repocard import metadata_load import pandas as pd from tqdm.autonotebook import tqdm from utils.model_size import get_model_parameters_memory +from envs import LEADERBOARD_CONFIG, MODEL_META, REPO_ID, RESULTS_REPO, API -TASKS = [ - "BitextMining", - "Classification", - "Clustering", - "PairClassification", - "Reranking", - "Retrieval", - "STS", - "Summarization", -] - -TASK_LIST_BITEXT_MINING = ['BUCC (de-en)', 'BUCC (fr-en)', 'BUCC (ru-en)', 'BUCC (zh-en)', 'Tatoeba (afr-eng)', 'Tatoeba (amh-eng)', 'Tatoeba (ang-eng)', 'Tatoeba (ara-eng)', 'Tatoeba (arq-eng)', 'Tatoeba (arz-eng)', 'Tatoeba (ast-eng)', 'Tatoeba (awa-eng)', 'Tatoeba (aze-eng)', 'Tatoeba (bel-eng)', 'Tatoeba (ben-eng)', 'Tatoeba (ber-eng)', 'Tatoeba (bos-eng)', 'Tatoeba (bre-eng)', 'Tatoeba (bul-eng)', 'Tatoeba (cat-eng)', 'Tatoeba (cbk-eng)', 'Tatoeba (ceb-eng)', 'Tatoeba (ces-eng)', 'Tatoeba (cha-eng)', 'Tatoeba (cmn-eng)', 'Tatoeba (cor-eng)', 'Tatoeba (csb-eng)', 'Tatoeba (cym-eng)', 'Tatoeba (dan-eng)', 'Tatoeba (deu-eng)', 'Tatoeba (dsb-eng)', 'Tatoeba (dtp-eng)', 'Tatoeba (ell-eng)', 'Tatoeba (epo-eng)', 'Tatoeba (est-eng)', 'Tatoeba (eus-eng)', 'Tatoeba (fao-eng)', 'Tatoeba (fin-eng)', 'Tatoeba (fra-eng)', 'Tatoeba (fry-eng)', 'Tatoeba (gla-eng)', 'Tatoeba (gle-eng)', 'Tatoeba (glg-eng)', 'Tatoeba (gsw-eng)', 'Tatoeba (heb-eng)', 'Tatoeba (hin-eng)', 'Tatoeba (hrv-eng)', 'Tatoeba (hsb-eng)', 'Tatoeba (hun-eng)', 'Tatoeba (hye-eng)', 'Tatoeba (ido-eng)', 'Tatoeba (ile-eng)', 'Tatoeba (ina-eng)', 'Tatoeba (ind-eng)', 'Tatoeba (isl-eng)', 'Tatoeba (ita-eng)', 'Tatoeba (jav-eng)', 'Tatoeba (jpn-eng)', 'Tatoeba (kab-eng)', 'Tatoeba (kat-eng)', 'Tatoeba (kaz-eng)', 'Tatoeba (khm-eng)', 'Tatoeba (kor-eng)', 'Tatoeba (kur-eng)', 'Tatoeba (kzj-eng)', 'Tatoeba (lat-eng)', 'Tatoeba (lfn-eng)', 'Tatoeba (lit-eng)', 'Tatoeba (lvs-eng)', 'Tatoeba (mal-eng)', 'Tatoeba (mar-eng)', 'Tatoeba (max-eng)', 'Tatoeba (mhr-eng)', 'Tatoeba (mkd-eng)', 'Tatoeba (mon-eng)', 'Tatoeba (nds-eng)', 'Tatoeba (nld-eng)', 'Tatoeba (nno-eng)', 'Tatoeba (nob-eng)', 'Tatoeba (nov-eng)', 'Tatoeba (oci-eng)', 'Tatoeba (orv-eng)', 'Tatoeba (pam-eng)', 'Tatoeba (pes-eng)', 'Tatoeba (pms-eng)', 'Tatoeba (pol-eng)', 'Tatoeba (por-eng)', 'Tatoeba (ron-eng)', 'Tatoeba (rus-eng)', 'Tatoeba (slk-eng)', 'Tatoeba (slv-eng)', 'Tatoeba (spa-eng)', 'Tatoeba (sqi-eng)', 'Tatoeba (srp-eng)', 'Tatoeba (swe-eng)', 'Tatoeba (swg-eng)', 'Tatoeba (swh-eng)', 'Tatoeba (tam-eng)', 'Tatoeba (tat-eng)', 'Tatoeba (tel-eng)', 'Tatoeba (tgl-eng)', 'Tatoeba (tha-eng)', 'Tatoeba (tuk-eng)', 'Tatoeba (tur-eng)', 'Tatoeba (tzl-eng)', 'Tatoeba (uig-eng)', 'Tatoeba (ukr-eng)', 'Tatoeba (urd-eng)', 'Tatoeba (uzb-eng)', 'Tatoeba (vie-eng)', 'Tatoeba (war-eng)', 'Tatoeba (wuu-eng)', 'Tatoeba (xho-eng)', 'Tatoeba (yid-eng)', 'Tatoeba (yue-eng)', 'Tatoeba (zsm-eng)'] -TASK_LIST_BITEXT_MINING_DA = ["BornholmBitextMining"] - -TASK_LIST_CLASSIFICATION = [ - "AmazonCounterfactualClassification (en)", - "AmazonPolarityClassification", - "AmazonReviewsClassification (en)", - "Banking77Classification", - "EmotionClassification", - "ImdbClassification", - "MassiveIntentClassification (en)", - "MassiveScenarioClassification (en)", - "MTOPDomainClassification (en)", - "MTOPIntentClassification (en)", - "ToxicConversationsClassification", - "TweetSentimentExtractionClassification", -] - -TASK_LIST_CLASSIFICATION_DA = [ - "AngryTweetsClassification", - "DanishPoliticalCommentsClassification", - "DKHateClassification", - "LccSentimentClassification", - "MassiveIntentClassification (da)", - "MassiveScenarioClassification (da)", - "NordicLangClassification", - "ScalaDaClassification", -] - -TASK_LIST_CLASSIFICATION_FR = [ - "AmazonReviewsClassification (fr)", - "MasakhaNEWSClassification (fra)", - "MassiveIntentClassification (fr)", - "MassiveScenarioClassification (fr)", - "MTOPDomainClassification (fr)", - "MTOPIntentClassification (fr)", -] - -TASK_LIST_CLASSIFICATION_NB = [ - "NoRecClassification", - "NordicLangClassification", - "NorwegianParliament", - "MassiveIntentClassification (nb)", - "MassiveScenarioClassification (nb)", - "ScalaNbClassification", -] +TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"] +BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"] -TASK_LIST_CLASSIFICATION_PL = [ - "AllegroReviews", - "CBD", - "MassiveIntentClassification (pl)", - "MassiveScenarioClassification (pl)", - "PAC", - "PolEmo2.0-IN", - "PolEmo2.0-OUT", -] - -TASK_LIST_CLASSIFICATION_SV = [ - "DalajClassification", - "MassiveIntentClassification (sv)", - "MassiveScenarioClassification (sv)", - "NordicLangClassification", - "ScalaSvClassification", - "SweRecClassification", -] - -TASK_LIST_CLASSIFICATION_ZH = [ - "AmazonReviewsClassification (zh)", - "IFlyTek", - "JDReview", - "MassiveIntentClassification (zh-CN)", - "MassiveScenarioClassification (zh-CN)", - "MultilingualSentiment", - "OnlineShopping", - "TNews", - "Waimai", -] +TASKS = list(TASKS_CONFIG.keys()) -TASK_LIST_CLASSIFICATION_OTHER = ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)'] - -TASK_LIST_CLUSTERING = [ - "ArxivClusteringP2P", - "ArxivClusteringS2S", - "BiorxivClusteringP2P", - "BiorxivClusteringS2S", - "MedrxivClusteringP2P", - "MedrxivClusteringS2S", - "RedditClustering", - "RedditClusteringP2P", - "StackExchangeClustering", - "StackExchangeClusteringP2P", - "TwentyNewsgroupsClustering", -] - - -TASK_LIST_CLUSTERING_DE = [ - "BlurbsClusteringP2P", - "BlurbsClusteringS2S", - "TenKGnadClusteringP2P", - "TenKGnadClusteringS2S", -] - -TASK_LIST_CLUSTERING_FR = [ - "AlloProfClusteringP2P", - "AlloProfClusteringS2S", - "HALClusteringS2S", - "MLSUMClusteringP2P", - "MLSUMClusteringS2S", - "MasakhaNEWSClusteringP2P (fra)", - "MasakhaNEWSClusteringS2S (fra)", -] - -TASK_LIST_CLUSTERING_PL = [ - "8TagsClustering", -] - -TASK_LIST_CLUSTERING_ZH = [ - "CLSClusteringP2P", - "CLSClusteringS2S", - "ThuNewsClusteringP2P", - "ThuNewsClusteringS2S", -] - -TASK_LIST_PAIR_CLASSIFICATION = [ - "SprintDuplicateQuestions", - "TwitterSemEval2015", - "TwitterURLCorpus", -] - -TASK_LIST_PAIR_CLASSIFICATION_FR = [ - "OpusparcusPC (fr)", - "PawsX (fr)", -] - -TASK_LIST_PAIR_CLASSIFICATION_PL = [ - "CDSC-E", - "PPC", - "PSC", - "SICK-E-PL", -] - -TASK_LIST_PAIR_CLASSIFICATION_ZH = [ - "Cmnli", - "Ocnli", -] - -TASK_LIST_RERANKING = [ - "AskUbuntuDupQuestions", - "MindSmallReranking", - "SciDocsRR", - "StackOverflowDupQuestions", -] - -TASK_LIST_RERANKING_FR = [ - "AlloprofReranking", - "SyntecReranking", -] - -TASK_LIST_RERANKING_ZH = [ - "CMedQAv1", - "CMedQAv2", - "MMarcoReranking", - "T2Reranking", -] - -TASK_LIST_RETRIEVAL = [ - "ArguAna", - "ClimateFEVER", - "CQADupstackRetrieval", - "DBPedia", - "FEVER", - "FiQA2018", - "HotpotQA", - "MSMARCO", - "NFCorpus", - "NQ", - "QuoraRetrieval", - "SCIDOCS", - "SciFact", - "Touche2020", - "TRECCOVID", -] - -TASK_LIST_RETRIEVAL_FR = [ - "AlloprofRetrieval", - "BSARDRetrieval", - "MintakaRetrieval (fr)", -# "MultiLongDocRetrieval", - "SyntecRetrieval", - "XPQARetrieval (fr)", -] - -TASK_LIST_RETRIEVAL_LAW = [ - "AILACasedocs", - "AILAStatutes", - "GerDaLIRSmall", - "LeCaRDv2", - "LegalBenchConsumerContractsQA", - "LegalBenchCorporateLobbying", - "LegalQuAD", - "LegalSummarization", -] - -TASK_LIST_RETRIEVAL_PL = [ - "ArguAna-PL", - "DBPedia-PL", - "FiQA-PL", - "HotpotQA-PL", - "MSMARCO-PL", - "NFCorpus-PL", - "NQ-PL", - "Quora-PL", - "SCIDOCS-PL", - "SciFact-PL", - "TRECCOVID-PL", -] - -TASK_LIST_RETRIEVAL_ZH = [ - "CmedqaRetrieval", - "CovidRetrieval", - "DuRetrieval", - "EcomRetrieval", - "MedicalRetrieval", - "MMarcoRetrieval", - "T2Retrieval", - "VideoRetrieval", -] - -TASK_LIST_RETRIEVAL_NORM = TASK_LIST_RETRIEVAL + [ - "CQADupstackAndroidRetrieval", - "CQADupstackEnglishRetrieval", - "CQADupstackGamingRetrieval", - "CQADupstackGisRetrieval", - "CQADupstackMathematicaRetrieval", - "CQADupstackPhysicsRetrieval", - "CQADupstackProgrammersRetrieval", - "CQADupstackStatsRetrieval", - "CQADupstackTexRetrieval", - "CQADupstackUnixRetrieval", - "CQADupstackWebmastersRetrieval", - "CQADupstackWordpressRetrieval" -] - -TASK_LIST_STS = [ - "BIOSSES", - "SICK-R", - "STS12", - "STS13", - "STS14", - "STS15", - "STS16", - "STS17 (en-en)", - "STS22 (en)", - "STSBenchmark", -] - -TASK_LIST_STS_FR = [ - "STS22 (fr)", - "STSBenchmarkMultilingualSTS (fr)", - "SICKFr", -] - -TASK_LIST_STS_PL = [ - "CDSC-R", - "SICK-R-PL", - "STS22 (pl)", -] - -TASK_LIST_STS_ZH = [ - "AFQMC", - "ATEC", - "BQ", - "LCQMC", - "PAWSX", - "QBQTC", - "STS22 (zh)", - "STSB", -] - -TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",] - -TASK_LIST_SUMMARIZATION = ["SummEval",] - -TASK_LIST_SUMMARIZATION_FR = ["SummEvalFr"] - -TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION -TASK_LIST_FR = TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR -TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL -TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH - -TASK_TO_METRIC = { - "BitextMining": "f1", - "Clustering": "v_measure", - "Classification": "accuracy", - "PairClassification": "cos_sim_ap", - "Reranking": "map", - "Retrieval": "ndcg_at_10", - "STS": "cos_sim_spearman", - "Summarization": "cos_sim_spearman", -} +TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()} def make_clickable_model(model_name, link=None): if link is None: @@ -332,893 +28,29 @@ def make_clickable_model(model_name, link=None): f'{model_name.split("/")[-1]}' ) -# Models without metadata, thus we cannot fetch their results naturally -EXTERNAL_MODELS = [ - "Baichuan-text-embedding", - "Cohere-embed-english-v3.0", - "Cohere-embed-multilingual-v3.0", - "Cohere-embed-multilingual-light-v3.0", - "DanskBERT", - "LASER2", - "LLM2Vec-Llama-supervised", - "LLM2Vec-Llama-unsupervised", - "LLM2Vec-Mistral-supervised", - "LLM2Vec-Mistral-unsupervised", - "LLM2Vec-Sheared-Llama-supervised", - "LLM2Vec-Sheared-Llama-unsupervised", - "LaBSE", - "OpenSearch-text-hybrid", - "all-MiniLM-L12-v2", - "all-MiniLM-L6-v2", - "all-mpnet-base-v2", - "allenai-specter", - "bert-base-10lang-cased", - "bert-base-15lang-cased", - "bert-base-25lang-cased", - "bert-base-multilingual-cased", - "bert-base-multilingual-uncased", - "bert-base-swedish-cased", - "bert-base-uncased", - "bge-base-zh-v1.5", - "bge-large-en-v1.5", - "bge-large-zh-v1.5", - "bge-large-zh-noinstruct", - "bge-small-zh-v1.5", - "contriever-base-msmarco", - "cross-en-de-roberta-sentence-transformer", - "dfm-encoder-large-v1", - "dfm-sentence-encoder-large-1", - "distiluse-base-multilingual-cased-v2", - "e5-base", - "e5-large", - "e5-mistral-7b-instruct", - "e5-small", - "electra-small-nordic", - "electra-small-swedish-cased-discriminator", - "flaubert_base_cased", - "flaubert_base_uncased", - "flaubert_large_cased", - "gbert-base", - "gbert-large", - "gelectra-base", - "gelectra-large", - "glove.6B.300d", - "google-gecko.text-embedding-preview-0409", - "google-gecko-256.text-embedding-preview-0409", - "gottbert-base", - "gtr-t5-base", - "gtr-t5-large", - "gtr-t5-xl", - "gtr-t5-xxl", - "herbert-base-retrieval-v2", - "komninos", - "luotuo-bert-medium", - "m3e-base", - "m3e-large", - "mistral-embed", - "msmarco-bert-co-condensor", - "multi-qa-MiniLM-L6-cos-v1", - "multilingual-e5-base", - "multilingual-e5-large", - "multilingual-e5-small", - "nb-bert-base", - "nb-bert-large", - "nomic-embed-text-v1.5-64", - "nomic-embed-text-v1.5-128", - "nomic-embed-text-v1.5-256", - "nomic-embed-text-v1.5-512", - "norbert3-base", - "norbert3-large", - "paraphrase-multilingual-MiniLM-L12-v2", - "paraphrase-multilingual-mpnet-base-v2", - "sentence-bert-swedish-cased", - "sentence-camembert-base", - "sentence-camembert-large", - "sentence-croissant-llm-base", - "sentence-t5-base", - "sentence-t5-large", - "sentence-t5-xl", - "sentence-t5-xxl", - "silver-retriever-base-v1", - "sup-simcse-bert-base-uncased", - "st-polish-paraphrase-from-distilroberta", - "st-polish-paraphrase-from-mpnet", - "text2vec-base-chinese", - "text2vec-base-multilingual", - "text2vec-large-chinese", - "text-embedding-3-small", - "text-embedding-3-large", - "text-embedding-3-large-256", - "text-embedding-ada-002", - "text-similarity-ada-001", - "text-similarity-babbage-001", - "text-similarity-curie-001", - "text-similarity-davinci-001", - "text-search-ada-doc-001", - "text-search-ada-001", - "text-search-babbage-001", - "text-search-curie-001", - "text-search-davinci-001", - "titan-embed-text-v1", - "udever-bloom-1b1", - "udever-bloom-560m", - "universal-sentence-encoder-multilingual-3", - "universal-sentence-encoder-multilingual-large-3", - "unsup-simcse-bert-base-uncased", - "use-cmlm-multilingual", - "voyage-2", - "voyage-code-2", - "voyage-large-2-instruct", - "voyage-law-2", - "voyage-lite-01-instruct", - "voyage-lite-02-instruct", - "xlm-roberta-base", - "xlm-roberta-large", -] - -EXTERNAL_MODEL_TO_LINK = { - "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding", - "Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0", - "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0", - "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0", - "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT", - "LASER2": "https://github.com/facebookresearch/LASER", - "LLM2Vec-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised", - "LLM2Vec-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp", - "LLM2Vec-Mistral-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised", - "LLM2Vec-Mistral-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp", - "LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised", - "LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp", - "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE", - "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval", - "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter", - "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter", - "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2", - "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2", - "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2", - "bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased", - "bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased", - "bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased", - "bert-base-multilingual-cased": "https://huggingface.co/google-bert/bert-base-multilingual-cased", - "bert-base-multilingual-uncased": "https://huggingface.co/google-bert/bert-base-multilingual-uncased", - "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased", - "bert-base-uncased": "https://huggingface.co/bert-base-uncased", - "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5", - "bge-large-en-v1.5": "https://huggingface.co/BAAI/bge-large-en-v1.5", - "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5", - "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct", - "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5", - "camembert-base": "https://huggingface.co/almanach/camembert-base", - "camembert-large": "https://huggingface.co/almanach/camembert-large", - "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco", - "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer", - "distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased", - "distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased", - "distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased", - "distilbert-base-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-fr-cased", - "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased", - "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2", - "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1", - "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1", - "e5-base": "https://huggingface.co/intfloat/e5-base", - "e5-large": "https://huggingface.co/intfloat/e5-large", - "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct", - "e5-small": "https://huggingface.co/intfloat/e5-small", - "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic", - "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator", - "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased", - "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased", - "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased", - "gbert-base": "https://huggingface.co/deepset/gbert-base", - "gbert-large": "https://huggingface.co/deepset/gbert-large", - "gelectra-base": "https://huggingface.co/deepset/gelectra-base", - "gelectra-large": "https://huggingface.co/deepset/gelectra-large", - "glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d", - "google-gecko.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models", - "google-gecko-256.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models", - "gottbert-base": "https://huggingface.co/uklfr/gottbert-base", - "gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base", - "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large", - "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl", - "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl", - "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2", - "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos", - "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium", - "m3e-base": "https://huggingface.co/moka-ai/m3e-base", - "m3e-large": "https://huggingface.co/moka-ai/m3e-large", - "mistral-embed": "https://docs.mistral.ai/guides/embeddings", - "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor", - "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1", - "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base", - "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large", - "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small", - "nb-bert-base": "https://huggingface.co/NbAiLab/nb-bert-base", - "nb-bert-large": "https://huggingface.co/NbAiLab/nb-bert-large", - "nomic-embed-text-v1.5-64": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", - "nomic-embed-text-v1.5-128": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", - "nomic-embed-text-v1.5-256": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", - "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", - "norbert3-base": "https://huggingface.co/ltg/norbert3-base", - "norbert3-large": "https://huggingface.co/ltg/norbert3-large", - "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2", - "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", - "sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base", - "sentence-camembert-large": "https://huggingface.co/dangvantuan/sentence-camembert-large", - "sentence-croissant-llm-base": "https://huggingface.co/Wissam42/sentence-croissant-llm-base", - "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased", - "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base", - "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large", - "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl", - "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl", - "silver-retriever-base-v1": "https://huggingface.co/ipipan/silver-retriever-base-v1", - "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased", - "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta", - "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet", - "text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese", - "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese", - "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates", - "text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates", - "text-embedding-3-large-256": "https://openai.com/blog/new-embedding-models-and-api-updates", - "text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model", - "text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-similarity-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-similarity-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-search-ada-doc-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-search-ada-query-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-search-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-search-curie-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings", - "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html", - "udever-bloom-1b1": "https://huggingface.co/izhx/udever-bloom-1b1", - "udever-bloom-560m": "https://huggingface.co/izhx/udever-bloom-560m", - "universal-sentence-encoder-multilingual-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3", - "universal-sentence-encoder-multilingual-large-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3", - "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased", - "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual", - "voyage-2": "https://docs.voyageai.com/embeddings/", - "voyage-code-2": "https://docs.voyageai.com/embeddings/", - "voyage-large-2-instruct": "https://docs.voyageai.com/embeddings/", - "voyage-law-2": "https://docs.voyageai.com/embeddings/", - "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/", - "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/", - "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base", - "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large", -} - -EXTERNAL_MODEL_TO_DIM = { - "Baichuan-text-embedding": 1024, - "Cohere-embed-english-v3.0": 1024, - "Cohere-embed-multilingual-v3.0": 1024, - "Cohere-embed-multilingual-light-v3.0": 384, - "DanskBERT": 768, - "LASER2": 1024, - "LLM2Vec-Llama-supervised": 4096, - "LLM2Vec-Llama-unsupervised": 4096, - "LLM2Vec-Mistral-supervised": 4096, - "LLM2Vec-Mistral-unsupervised": 4096, - "LLM2Vec-Sheared-Llama-supervised": 2048, - "LLM2Vec-Sheared-Llama-unsupervised": 2048, - "LaBSE": 768, - "all-MiniLM-L12-v2": 384, - "all-MiniLM-L6-v2": 384, - "all-mpnet-base-v2": 768, - "allenai-specter": 768, - "bert-base-10lang-cased": 768, - "bert-base-15lang-cased": 768, - "bert-base-25lang-cased": 768, - "bert-base-multilingual-cased": 768, - "bert-base-multilingual-uncased": 768, - "bert-base-swedish-cased": 768, - "bert-base-uncased": 768, - "bge-base-zh-v1.5": 768, - "bge-large-en-v1.5": 1024, - "bge-large-zh-v1.5": 1024, - "bge-large-zh-noinstruct": 1024, - "bge-small-zh-v1.5": 512, - "camembert-base": 512, - "camembert-large": 768, - "contriever-base-msmarco": 768, - "cross-en-de-roberta-sentence-transformer": 768, - "distilbert-base-25lang-cased": 768, - "distilbert-base-en-fr-cased": 768, - "distilbert-base-en-fr-es-pt-it-cased": 768, - "distilbert-base-fr-cased": 768, - "distilbert-base-uncased": 768, - "distiluse-base-multilingual-cased-v2": 512, - "dfm-encoder-large-v1": 1024, - "dfm-sentence-encoder-large-1": 1024, - "e5-base": 768, - "e5-large": 1024, - "e5-mistral-7b-instruct": 4096, - "e5-small": 384, - "electra-small-nordic": 256, - "electra-small-swedish-cased-discriminator": 256, - "flaubert_base_cased": 768, - "flaubert_base_uncased": 768, - "flaubert_large_cased": 1024, - "luotuo-bert-medium": 768, - "gbert-base": 768, - "gbert-large": 1024, - "gelectra-base": 768, - "gelectra-large": 1024, - "glove.6B.300d": 300, - "google-gecko.text-embedding-preview-0409": 768, - "google-gecko-256.text-embedding-preview-0409": 256, - "gottbert-base": 768, - "gtr-t5-base": 768, - "gtr-t5-large": 768, - "gtr-t5-xl": 768, - "gtr-t5-xxl": 768, - "herbert-base-retrieval-v2": 768, - "komninos": 300, - "m3e-base": 768, - "m3e-large": 768, - "mistral-embed": 1024, - "msmarco-bert-co-condensor": 768, - "multi-qa-MiniLM-L6-cos-v1": 384, - "multilingual-e5-base": 768, - "multilingual-e5-small": 384, - "multilingual-e5-large": 1024, - "nb-bert-base": 768, - "nb-bert-large": 1024, - "nomic-embed-text-v1.5-64": 64, - "nomic-embed-text-v1.5-128": 128, - "nomic-embed-text-v1.5-256": 256, - "nomic-embed-text-v1.5-512": 512, - "norbert3-base": 768, - "norbert3-large": 1024, - "OpenSearch-text-hybrid": 1792, - "paraphrase-multilingual-MiniLM-L12-v2": 384, - "paraphrase-multilingual-mpnet-base-v2": 768, - "sentence-camembert-base": 768, - "sentence-camembert-large": 1024, - "sentence-croissant-llm-base": 2048, - "sentence-bert-swedish-cased": 768, - "sentence-t5-base": 768, - "sentence-t5-large": 768, - "sentence-t5-xl": 768, - "sentence-t5-xxl": 768, - "silver-retriever-base-v1": 768, - "sup-simcse-bert-base-uncased": 768, - "st-polish-paraphrase-from-distilroberta": 768, - "st-polish-paraphrase-from-mpnet": 768, - "text2vec-base-chinese": 768, - "text2vec-large-chinese": 1024, - "text-embedding-3-large": 3072, - "text-embedding-3-large-256": 256, - "text-embedding-3-small": 1536, - "text-embedding-ada-002": 1536, - "text-similarity-ada-001": 1024, - "text-similarity-babbage-001": 2048, - "text-similarity-curie-001": 4096, - "text-similarity-davinci-001": 12288, - "text-search-ada-doc-001": 1024, - "text-search-ada-query-001": 1024, - "text-search-ada-001": 1024, - "text-search-babbage-001": 2048, - "text-search-curie-001": 4096, - "text-search-davinci-001": 12288, - "titan-embed-text-v1": 1536, - "udever-bloom-1b1": 1536, - "udever-bloom-560m": 1024, - "universal-sentence-encoder-multilingual-3": 512, - "universal-sentence-encoder-multilingual-large-3": 512, - "unsup-simcse-bert-base-uncased": 768, - "use-cmlm-multilingual": 768, - "voyage-2": 1024, - "voyage-code-2": 1536, - "voyage-large-2-instruct": 1024, - "voyage-law-2": 1024, - "voyage-lite-01-instruct": 1024, - "voyage-lite-02-instruct": 1024, - "xlm-roberta-base": 768, - "xlm-roberta-large": 1024, -} - -EXTERNAL_MODEL_TO_SEQLEN = { - "Baichuan-text-embedding": 512, - "Cohere-embed-english-v3.0": 512, - "Cohere-embed-multilingual-v3.0": 512, - "Cohere-embed-multilingual-light-v3.0": 512, - "DanskBERT": 514, - "LASER2": "N/A", - "LLM2Vec-Llama-supervised": 4096, - "LLM2Vec-Llama-unsupervised": 4096, - "LLM2Vec-Mistral-supervised": 32768, - "LLM2Vec-Mistral-unsupervised": 32768, - "LLM2Vec-Sheared-Llama-supervised": 4096, - "LLM2Vec-Sheared-Llama-unsupervised": 4096, - "LaBSE": 512, - "all-MiniLM-L12-v2": 512, - "all-MiniLM-L6-v2": 512, - "all-mpnet-base-v2": 514, - "allenai-specter": 512, - "bert-base-10lang-cased": 512, - "bert-base-15lang-cased": 512, - "bert-base-25lang-cased": 512, - "bert-base-multilingual-cased": 512, - "bert-base-multilingual-uncased": 512, - "bert-base-swedish-cased": 512, - "bert-base-uncased": 512, - "bge-base-zh-v1.5": 512, - "bge-large-en-v1.5": 512, - "bge-large-zh-v1.5": 512, - "bge-large-zh-noinstruct": 512, - "bge-small-zh-v1.5": 512, - "camembert-base": 512, - "camembert-large": 512, - "contriever-base-msmarco": 512, - "cross-en-de-roberta-sentence-transformer": 514, - "distilbert-base-25lang-cased": 512, - "distilbert-base-en-fr-cased": 512, - "distilbert-base-en-fr-es-pt-it-cased": 512, - "distilbert-base-fr-cased": 512, - "distilbert-base-uncased": 512, - "dfm-encoder-large-v1": 512, - "dfm-sentence-encoder-large-1": 512, - "distiluse-base-multilingual-cased-v2": 512, - "e5-base": 512, - "e5-large": 512, - "e5-mistral-7b-instruct": 32768, - "e5-small": 512, - "electra-small-nordic": 512, - "electra-small-swedish-cased-discriminator": 512, - "flaubert_base_cased": 512, - "flaubert_base_uncased": 512, - "flaubert_large_cased": 512, - "gbert-base": 512, - "gbert-large": 512, - "gelectra-base": 512, - "gelectra-large": 512, - "google-gecko.text-embedding-preview-0409": 2048, - "google-gecko-256.text-embedding-preview-0409": 2048, - "gottbert-base": 512, - "glove.6B.300d": "N/A", - "gtr-t5-base": 512, - "gtr-t5-large": 512, - "gtr-t5-xl": 512, - "gtr-t5-xxl": 512, - "herbert-base-retrieval-v2": 514, - "komninos": "N/A", - "luotuo-bert-medium": 512, - "m3e-base": 512, - "m3e-large": 512, -# "mistral-embed": "?", - "msmarco-bert-co-condensor": 512, - "multi-qa-MiniLM-L6-cos-v1": 512, - "multilingual-e5-base": 514, - "multilingual-e5-large": 514, - "multilingual-e5-small": 512, - "nb-bert-base": 512, - "nb-bert-large": 512, - "nomic-embed-text-v1.5-64": 8192, - "nomic-embed-text-v1.5-128": 8192, - "nomic-embed-text-v1.5-256": 8192, - "nomic-embed-text-v1.5-512": 8192, - "norbert3-base": 512, - "norbert3-large": 512, - "OpenSearch-text-hybrid": 512, - "paraphrase-multilingual-MiniLM-L12-v2": 512, - "paraphrase-multilingual-mpnet-base-v2": 514, - "sentence-camembert-base": 512, - "sentence-camembert-large": 512, - "sentence-croissant-llm-base": 2048, - "sentence-bert-swedish-cased": 512, - "sentence-t5-base": 512, - "sentence-t5-large": 512, - "sentence-t5-xl": 512, - "sentence-t5-xxl": 512, - "silver-retriever-base-v1": 514, - "sup-simcse-bert-base-uncased": 512, - "st-polish-paraphrase-from-distilroberta": 514, - "st-polish-paraphrase-from-mpnet": 514, - "text2vec-base-chinese": 512, - "text2vec-large-chinese": 512, - "text-embedding-3-large": 8191, - "text-embedding-3-large-256": 8191, - "text-embedding-3-small": 8191, - "text-embedding-ada-002": 8191, - "text-similarity-ada-001": 2046, - "text-similarity-babbage-001": 2046, - "text-similarity-curie-001": 2046, - "text-similarity-davinci-001": 2046, - "text-search-ada-doc-001": 2046, - "text-search-ada-query-001": 2046, - "text-search-ada-001": 2046, - "text-search-babbage-001": 2046, - "text-search-curie-001": 2046, - "text-search-davinci-001": 2046, - "titan-embed-text-v1": 8000, - "udever-bloom-1b1": 2048, - "udever-bloom-560m": 2048, - "universal-sentence-encoder-multilingual-3": 512, - "universal-sentence-encoder-multilingual-large-3": 512, - "use-cmlm-multilingual": 512, - "unsup-simcse-bert-base-uncased": 512, - "voyage-2": 1024, - "voyage-code-2": 16000, - "voyage-large-2-instruct": 16000, - "voyage-law-2": 4000, - "voyage-lite-01-instruct": 4000, - "voyage-lite-02-instruct": 4000, - "xlm-roberta-base": 514, - "xlm-roberta-large": 514, -} - -EXTERNAL_MODEL_TO_SIZE = { - "DanskBERT": 125, - "LASER2": 43, - "LLM2Vec-Llama-supervised": 6607, - "LLM2Vec-Llama-unsupervised": 6607, - "LLM2Vec-Mistral-supervised": 7111, - "LLM2Vec-Mistral-unsupervised": 7111, - "LLM2Vec-Sheared-Llama-supervised": 1280, - "LLM2Vec-Sheared-Llama-unsupervised": 1280, - "LaBSE": 471, - "allenai-specter": 110, - "all-MiniLM-L12-v2": 33, - "all-MiniLM-L6-v2": 23, - "all-mpnet-base-v2": 110, - "bert-base-10lang-cased": 138, - "bert-base-15lang-cased": 138, - "bert-base-25lang-cased": 138, - "bert-base-multilingual-cased": 179, - "bert-base-multilingual-uncased": 168, - "bert-base-uncased": 110, - "bert-base-swedish-cased": 125, - "bge-base-zh-v1.5": 102, - "bge-large-zh-v1.5": 326, - "bge-large-zh-noinstruct": 326, - "bge-small-zh-v1.5": 24, - "camembert-base": 111, - "camembert-large": 338, - "cross-en-de-roberta-sentence-transformer": 278, - "contriever-base-msmarco": 110, - "distilbert-base-25lang-cased": 110, - "distilbert-base-en-fr-cased": 110, - "distilbert-base-en-fr-es-pt-it-cased": 110, - "distilbert-base-fr-cased": 110, - "distilbert-base-uncased": 110, - "distiluse-base-multilingual-cased-v2": 135, - "dfm-encoder-large-v1": 355, - "dfm-sentence-encoder-large-1": 355, - "e5-base": 110, - "e5-large": 335, - "e5-mistral-7b-instruct": 7111, - "e5-small": 33, - "electra-small-nordic": 23, - "electra-small-swedish-cased-discriminator": 16, - "flaubert_base_cased": 138, - "flaubert_base_uncased": 138, - "flaubert_large_cased": 372, - "gbert-base": 110, - "gbert-large": 337, - "gelectra-base": 110, - "gelectra-large": 335, - "glove.6B.300d": 120, - "google-gecko.text-embedding-preview-0409": 1200, - "google-gecko-256.text-embedding-preview-0409": 1200, - "gottbert-base": 127, - "gtr-t5-base": 110, - "gtr-t5-large": 168, - "gtr-t5-xl": 1240, - "gtr-t5-xxl": 4865, - "herbert-base-retrieval-v2": 125, - "komninos": 134, - "luotuo-bert-medium": 328, - "m3e-base": 102, - "m3e-large": 102, - "msmarco-bert-co-condensor": 110, - "multi-qa-MiniLM-L6-cos-v1": 23, - "multilingual-e5-base": 278, - "multilingual-e5-small": 118, - "multilingual-e5-large": 560, - "nb-bert-base": 179, - "nb-bert-large": 355, - "nomic-embed-text-v1.5-64": 138, - "nomic-embed-text-v1.5-128": 138, - "nomic-embed-text-v1.5-256": 138, - "nomic-embed-text-v1.5-512": 138, - "norbert3-base": 131, - "norbert3-large": 368, - "paraphrase-multilingual-mpnet-base-v2": 278, - "paraphrase-multilingual-MiniLM-L12-v2": 118, - "sentence-camembert-base": 110, - "sentence-camembert-large": 337, - "sentence-croissant-llm-base": 1280, - "sentence-bert-swedish-cased": 125, - "sentence-t5-base": 110, - "sentence-t5-large": 168, - "sentence-t5-xl": 1240, - "sentence-t5-xxl": 4865, - "silver-retriever-base-v1": 125, - "sup-simcse-bert-base-uncased": 110, - "st-polish-paraphrase-from-distilroberta": 125, - "st-polish-paraphrase-from-mpnet": 125, - "text2vec-base-chinese": 102, - "text2vec-large-chinese": 326, - "unsup-simcse-bert-base-uncased": 110, - "use-cmlm-multilingual": 472, - #"voyage-law-2": 1220, - "voyage-lite-02-instruct": 1220, - "xlm-roberta-base": 279, - "xlm-roberta-large": 560, -} +EXTERNAL_MODELS = {k for k,v in MODEL_META["models_meta"].items() if v.get("is_external", False)} +EXTERNAL_MODEL_TO_LINK = {k: v["link"] for k,v in MODEL_META["models_meta"].items() if v.get("link", False)} +EXTERNAL_MODEL_TO_DIM = {k: v["dim"] for k,v in MODEL_META["models_meta"].items() if v.get("dim", False)} +EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["models_meta"].items() if v.get("seq_len", False)} +EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["models_meta"].items() if v.get("size", False)} +PROPRIETARY_MODELS = {k for k,v in MODEL_META["models_meta"].items() if v.get("is_proprietary", False)} +SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["models_meta"].items() if v.get("is_sentence_transformers_compatible", False)} +MODELS_TO_SKIP = MODEL_META["models_to_skip"] PROPRIETARY_MODELS = { - "Baichuan-text-embedding", - "Cohere-embed-english-v3.0", - "Cohere-embed-multilingual-v3.0", - "Cohere-embed-multilingual-light-v3.0", - "OpenSearch-text-hybrid", - "mistral-embed", - "text-embedding-3-small", - "text-embedding-3-large", - "text-embedding-3-large-256", - "text-embedding-ada-002", - "text-similarity-ada-001", - "text-similarity-babbage-001", - "text-similarity-curie-001", - "text-similarity-davinci-001", - "text-search-ada-doc-001", - "text-search-ada-query-001", - "text-search-ada-001", - "text-search-curie-001", - "text-search-babbage-001", - "text-search-davinci-001", - "titan-embed-text-v1", - "voyage-2", - "voyage-code-2", - "voyage-law-2", - "voyage-lite-01-instruct", - "voyage-lite-02-instruct", - "google-gecko.text-embedding-preview-0409", - "google-gecko-256.text-embedding-preview-0409", -} - -PROPRIETARY_MODELS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard")) + make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) for model in PROPRIETARY_MODELS } SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = { - "allenai-specter", - "allenai-specter", - "all-MiniLM-L12-v2", - "all-MiniLM-L6-v2", - "all-mpnet-base-v2", - "bert-base-10lang-cased", - "bert-base-15lang-cased", - "bert-base-25lang-cased", - "bert-base-multilingual-cased", - "bert-base-multilingual-uncased", - "bert-base-swedish-cased", - "bert-base-uncased", - "bge-base-zh-v1.5", - "bge-large-zh-v1.5", - "bge-large-zh-noinstruct", - "bge-small-zh-v1.5", - "camembert-base", - "camembert-large", - "contriever-base-msmarco", - "cross-en-de-roberta-sentence-transformer", - "DanskBERT", - "distilbert-base-25lang-cased", - "distilbert-base-en-fr-cased", - "distilbert-base-en-fr-es-pt-it-cased", - "distilbert-base-fr-cased", - "distilbert-base-uncased", - "distiluse-base-multilingual-cased-v2", - "dfm-encoder-large-v1", - "dfm-sentence-encoder-large-1", - "e5-base", - "e5-large", - "e5-mistral-7b-instruct", - "e5-small", - "electra-small-nordic", - "electra-small-swedish-cased-discriminator", - "flaubert_base_cased", - "flaubert_base_uncased", - "flaubert_large_cased", - "gbert-base", - "gbert-large", - "gelectra-base", - "gelectra-large", - "glove.6B.300d", - "gottbert-base", - "gtr-t5-base", - "gtr-t5-large", - "gtr-t5-xl", - "gtr-t5-xxl", - "herbert-base-retrieval-v2", - "komninos", - "luotuo-bert-medium", - "LaBSE", - "m3e-base", - "m3e-large", - "msmarco-bert-co-condensor", - "multi-qa-MiniLM-L6-cos-v1", - "multilingual-e5-base", - "multilingual-e5-large", - "multilingual-e5-small", - "nb-bert-base", - "nb-bert-large", - "nomic-embed-text-v1.5-64", - "nomic-embed-text-v1.5-128", - "nomic-embed-text-v1.5-256", - "nomic-embed-text-v1.5-512", - "norbert3-base", - "norbert3-large", - "paraphrase-multilingual-mpnet-base-v2", - "paraphrase-multilingual-MiniLM-L12-v2", - "sentence-camembert-base", - "sentence-camembert-large", - "sentence-croissant-llm-base", - "sentence-bert-swedish-cased", - "sentence-t5-base", - "sentence-t5-large", - "sentence-t5-xl", - "sentence-t5-xxl", - "silver-retriever-base-v1", - "sup-simcse-bert-base-uncased", - "st-polish-paraphrase-from-distilroberta", - "st-polish-paraphrase-from-mpnet", - "text2vec-base-chinese", - "text2vec-large-chinese", - "udever-bloom-1b1", - "udever-bloom-560m", - "universal-sentence-encoder-multilingual-3", - "universal-sentence-encoder-multilingual-large-3", - "unsup-simcse-bert-base-uncased", - "use-cmlm-multilingual", - "xlm-roberta-base", - "xlm-roberta-large", -} -SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = { - make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard")) + make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}")) for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS } -MODELS_TO_SKIP = { - "baseplate/instructor-large-1", # Duplicate - "radames/e5-large", # Duplicate - "gentlebowl/instructor-large-safetensors", # Duplicate - "Consensus/instructor-base", # Duplicate - "GovCompete/instructor-xl", # Duplicate - "GovCompete/e5-large-v2", # Duplicate - "t12e/instructor-base", # Duplicate - "michaelfeil/ct2fast-e5-large-v2", - "michaelfeil/ct2fast-e5-large", - "michaelfeil/ct2fast-e5-small-v2", - "newsrx/instructor-xl-newsrx", - "newsrx/instructor-large-newsrx", - "fresha/e5-large-v2-endpoint", - "ggrn/e5-small-v2", - "michaelfeil/ct2fast-e5-small", - "jncraton/e5-small-v2-ct2-int8", - "anttip/ct2fast-e5-small-v2-hfie", - "newsrx/instructor-large", - "newsrx/instructor-xl", - "dmlls/all-mpnet-base-v2", - "cgldo/semanticClone", - "Malmuk1/e5-large-v2_Sharded", - "jncraton/gte-small-ct2-int8", - "Einas/einas_ashkar", - "gruber/e5-small-v2-ggml", - "jncraton/bge-small-en-ct2-int8", - "vectoriseai/bge-small-en", - "recipe/embeddings", - "dhairya0907/thenlper-get-large", - "Narsil/bge-base-en", - "kozistr/fused-large-en", - "sionic-ai/sionic-ai-v2", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1 - "sionic-ai/sionic-ai-v1", # Wait for https://huggingface.co/sionic-ai/sionic-ai-v2/discussions/1 - "BAAI/bge-large-en", # Deprecated in favor of v1.5 - "BAAI/bge-base-en", # Deprecated in favor of v1.5 - "BAAI/bge-small-en", # Deprecated in favor of v1.5 - "d0rj/e5-large-en-ru", - "d0rj/e5-base-en-ru", - "d0rj/e5-small-en-ru", - "aident-ai/bge-base-en-onnx", - "barisaydin/bge-base-en", - "barisaydin/gte-large", - "barisaydin/gte-base", - "barisaydin/gte-small", - "barisaydin/bge-small-en", - "odunola/e5-base-v2", - "goldenrooster/multilingual-e5-large", - "davidpeer/gte-small", - "barisaydin/bge-large-en", - "jamesgpt1/english-large-v1", - "vectoriseai/bge-large-en-v1.5", - "vectoriseai/bge-base-en-v1.5", - "vectoriseai/instructor-large", - "vectoriseai/instructor-base", - "vectoriseai/gte-large", - "vectoriseai/gte-base", - "vectoriseai/e5-large-v2", - "vectoriseai/bge-small-en-v1.5", - "vectoriseai/e5-base-v2", - "vectoriseai/e5-large", - "vectoriseai/multilingual-e5-large", - "vectoriseai/gte-small", - "vectoriseai/ember-v1", - "vectoriseai/e5-base", - "vectoriseai/e5-small-v2", - "michaelfeil/ct2fast-bge-large-en-v1.5", - "michaelfeil/ct2fast-bge-large-en-v1.5", - "michaelfeil/ct2fast-bge-base-en-v1.5", - "michaelfeil/ct2fast-gte-large", - "michaelfeil/ct2fast-gte-base", - "michaelfeil/ct2fast-bge-small-en-v1.5", - "rizki/bgr-tf", - "ef-zulla/e5-multi-sml-torch", - "cherubhao/yogamodel", - "morgendigital/multilingual-e5-large-quantized", - "jncraton/gte-tiny-ct2-int8", - "Research2NLP/electrical_stella", - "Intel/bge-base-en-v1.5-sts-int8-static", - "Intel/bge-base-en-v1.5-sts-int8-dynamic", - "Intel/bge-base-en-v1.5-sst2", - "Intel/bge-base-en-v1.5-sst2-int8-static", - "Intel/bge-base-en-v1.5-sst2-int8-dynamic", - "Intel/bge-small-en-v1.5-sst2", - "Intel/bge-small-en-v1.5-sst2-int8-dynamic", - "Intel/bge-small-en-v1.5-sst2-int8-static", - "binqiangliu/EmbeddingModlebgelargeENv1.5", - "DecisionOptimizationSystem/DeepFeatEmbeddingLargeContext", - "woody72/multilingual-e5-base", - "Severian/embed", - "Frazic/udever-bloom-3b-sentence", - "jamesgpt1/zzz", - "karrar-alwaili/UAE-Large-V1", - "odunola/UAE-Large-VI", - "shubham-bgi/UAE-Large", - "retrainai/instructor-xl", - "weakit-v/bge-base-en-v1.5-onnx", - "ieasybooks/multilingual-e5-large-onnx", - "gizmo-ai/Cohere-embed-multilingual-v3.0", - "jingyeom/korean_embedding_model", - "barisaydin/text2vec-base-multilingual", - "mlx-community/multilingual-e5-large-mlx", - "mlx-community/multilingual-e5-base-mlx", - "mlx-community/multilingual-e5-small-mlx", - "maiyad/multilingual-e5-small", - "khoa-klaytn/bge-base-en-v1.5-angle", - "khoa-klaytn/bge-small-en-v1.5-angle", - "mixamrepijey/instructor-small", - "mixamrepijey/instructor-models", - "lsf1000/bge-evaluation", # Empty - "giulio98/placeholder", # Empty - "Severian/nomic", # Copy - "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy - "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy - "TitanML/jina-v2-base-en-embed", # Copy - "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant - "Geolumina/instructor-xl", # Duplicate - "krilecy/e5-mistral-7b-instruct", - "beademiguelperez/sentence-transformers-multilingual-e5-small", - "arcdev/SFR-Embedding-Mistral", - "arcdev/e5-mistral-7b-instruct", - "Koat/gte-tiny", - "SmartComponents/bge-micro-v2", - "ildodeltaRule/multilingual-e5-large", - "hsikchi/dump", - "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised", - "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised", - "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised", - "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse", - "McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse", - "McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse", - "jncraton/GIST-small-Embedding-v0-ct2-int8", - "jncraton/stella-base-en-v2-ct2-int8", - "lightbird-ai/nomic", - "jamesdborin/jina-v2-base-en-embed", - "iampanda/Test", -} - +TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS} +for board in BOARDS_CONFIG.values(): + for task_category, task_list in board["tasks"].items(): + TASK_TO_TASK_TYPE[task_category].extend(task_list) def add_lang(examples): if not(examples["eval_language"]): @@ -1231,22 +63,14 @@ def norm(names): return set([name.split(" ")[0] for name in names]) def add_task(examples): # Could be added to the dataset loading script instead - if examples["mteb_dataset_name"] in norm(TASK_LIST_CLASSIFICATION + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH): - examples["mteb_task"] = "Classification" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_FR + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH): - examples["mteb_task"] = "Clustering" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH): - examples["mteb_task"] = "PairClassification" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH): - examples["mteb_task"] = "Reranking" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_RETRIEVAL_LAW): - examples["mteb_task"] = "Retrieval" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH): - examples["mteb_task"] = "STS" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_SUMMARIZATION + TASK_LIST_SUMMARIZATION_FR): - examples["mteb_task"] = "Summarization" - elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA): - examples["mteb_task"] = "BitextMining" + task_name = examples["mteb_dataset_name"] + task_type = None + for task_category, task_list in TASK_TO_TASK_TYPE.items(): + if task_name in norm(task_list): + task_type = task_category + break + if task_type is not None: + examples["mteb_task"] = task_type else: print("WARNING: Task not found for dataset", examples["mteb_dataset_name"]) examples["mteb_task"] = "Unknown" @@ -1268,12 +92,12 @@ else: pbar = tqdm(models_to_run, desc="Fetching external model results") for model in pbar: pbar.set_description(f"Fetching external model results for {model!r}") - ds = load_dataset("mteb/results", model, trust_remote_code=True) + ds = load_dataset(RESULTS_REPO, model, trust_remote_code=True) # For local debugging: #, download_mode='force_redownload', verification_mode="no_checks") ds = ds.map(add_lang) ds = ds.map(add_task) - base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))} + base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))} # For now only one metric per task - Could add more metrics lateron for task, metric in TASK_TO_METRIC.items(): ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict() @@ -1328,7 +152,7 @@ def add_rank(df): return df def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True): - api = HfApi() + api = API models = api.list_models(filter="mteb") # Initialize list to models that we cannot fetch metadata from df_list = [] @@ -1407,19 +231,11 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_ df.fillna("", inplace=True) return df -def get_mteb_average(): - global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION +def get_mteb_average(task_dict: dict): + all_tasks = reduce(lambda x, y: x + y, task_dict.values()) DATA_OVERALL = get_mteb_data( - tasks=[ - "Classification", - "Clustering", - "PairClassification", - "Reranking", - "Retrieval", - "STS", - "Summarization", - ], - datasets=TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION, + tasks=list(task_dict.keys()), + datasets=all_tasks, fillna=False, add_emb_dim=True, rank=False, @@ -1427,280 +243,60 @@ def get_mteb_average(): # Debugging: # DATA_OVERALL.to_csv("overall.csv") - DATA_OVERALL.insert(1, f"Average ({len(TASK_LIST_EN)} datasets)", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(7, f"STS Average ({len(TASK_LIST_STS)} datasets)", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False)) - DATA_OVERALL.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False)) - DATA_OVERALL.sort_values(f"Average ({len(TASK_LIST_EN)} datasets)", ascending=False, inplace=True) + DATA_OVERALL.insert(1, f"Average ({len(all_tasks)} datasets)", DATA_OVERALL[all_tasks].mean(axis=1, skipna=False)) + for i, (task_category, task_category_list) in enumerate(task_dict.items()): + DATA_OVERALL.insert(i+2, f"{task_category} Average ({len(task_category_list)} datasets)", DATA_OVERALL[task_category_list].mean(axis=1, skipna=False)) + DATA_OVERALL.sort_values(f"Average ({len(all_tasks)} datasets)", ascending=False, inplace=True) # Start ranking from 1 DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1))) DATA_OVERALL = DATA_OVERALL.round(2) - DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION]) - # Only keep rows with at least one score in addition to the "Model" & rank column - DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 4:].ne("").any(axis=1)] - - DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING]) - DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 4:].ne("").any(axis=1)] - - DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION]) - DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RERANKING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING]) - DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL]) - DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 4:].ne("").any(axis=1)] - - DATA_STS_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS]) - DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 4:].ne("").any(axis=1)] - - DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_SUMMARIZATION]) - DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)] + DATA_TASKS = {} + for task_category, task_category_list in task_dict.items(): + DATA_TASKS[task_category] = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + task_category_list]) + DATA_TASKS[task_category] = DATA_TASKS[task_category][DATA_TASKS[task_category].iloc[:, 4:].ne("").any(axis=1)] # Fill NaN after averaging DATA_OVERALL.fillna("", inplace=True) - DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]] - DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)] + data_overall_rows = ["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(all_tasks)} datasets)"] + for task_category, task_category_list in task_dict.items(): + data_overall_rows.append(f"{task_category} Average ({len(task_category_list)} datasets)") - return DATA_OVERALL - -def get_mteb_average_zh(): - global DATA_OVERALL_ZH, DATA_CLASSIFICATION_ZH, DATA_CLUSTERING_ZH, DATA_PAIR_CLASSIFICATION_ZH, DATA_RERANKING_ZH, DATA_RETRIEVAL_ZH, DATA_STS_ZH - DATA_OVERALL_ZH = get_mteb_data( - tasks=[ - "Classification", - "Clustering", - "PairClassification", - "Reranking", - "Retrieval", - "STS", - ], - datasets=TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH, - fillna=False, - add_emb_dim=True, - rank=False, - ) - # Debugging: - # DATA_OVERALL_ZH.to_csv("overall.csv") - - DATA_OVERALL_ZH.insert(1, f"Average ({len(TASK_LIST_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLASSIFICATION_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_CLUSTERING_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_PAIR_CLASSIFICATION_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RERANKING_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_RETRIEVAL_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.insert(7, f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)", DATA_OVERALL_ZH[TASK_LIST_STS_ZH].mean(axis=1, skipna=False)) - DATA_OVERALL_ZH.sort_values(f"Average ({len(TASK_LIST_ZH)} datasets)", ascending=False, inplace=True) - # Start ranking from 1 - DATA_OVERALL_ZH.insert(0, "Rank", list(range(1, len(DATA_OVERALL_ZH) + 1))) - - DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2) - - DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_ZH]) - # Only keep rows with at least one score in addition to the "Model" & rank column - DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 4:].ne("").any(axis=1)] - - DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_ZH]) - DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 4:].ne("").any(axis=1)] - - DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_ZH]) - DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING_ZH]) - DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_ZH]) - DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 4:].ne("").any(axis=1)] - - DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_ZH]) - DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 4:].ne("").any(axis=1)] - - # Fill NaN after averaging - DATA_OVERALL_ZH.fillna("", inplace=True) - - DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]] - DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)] - - return DATA_OVERALL_ZH - -def get_mteb_average_fr(): - global DATA_OVERALL_FR, DATA_CLASSIFICATION_FR, DATA_CLUSTERING_FR, DATA_PAIR_CLASSIFICATION_FR, DATA_RERANKING_FR, DATA_RETRIEVAL_FR, DATA_STS_FR, DATA_SUMMARIZATION_FR - DATA_OVERALL_FR = get_mteb_data( - tasks=[ - "Classification", - "Clustering", - "PairClassification", - "Reranking", - "Retrieval", - "STS", - "Summarization" - ], - datasets=TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR, - fillna=False, - add_emb_dim=True, - rank=False, - ) - # Debugging: - # DATA_OVERALL_FR.to_csv("overall.csv") - - DATA_OVERALL_FR.insert(1, f"Average ({len(TASK_LIST_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLASSIFICATION_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLUSTERING_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_PAIR_CLASSIFICATION_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RERANKING_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RETRIEVAL_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(7, f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_STS_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)", DATA_OVERALL_FR[TASK_LIST_SUMMARIZATION_FR].mean(axis=1, skipna=False)) - DATA_OVERALL_FR.sort_values(f"Average ({len(TASK_LIST_FR)} datasets)", ascending=False, inplace=True) - # Start ranking from 1 - DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1))) - DATA_OVERALL_FR = DATA_OVERALL_FR.round(2) - - DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_FR]) - DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 4:].ne("").any(axis=1)] - - DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_FR]) - DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 4:].ne("").any(axis=1)] - - DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_FR]) - DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RERANKING_FR]) - DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_FR]) - DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 4:].ne("").any(axis=1)] - - DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_FR]) - DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 4:].ne("").any(axis=1)] - - DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_SUMMARIZATION_FR]) - DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)] - - # Fill NaN after averaging - DATA_OVERALL_FR.fillna("", inplace=True) - - DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]] - DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)] - - return DATA_OVERALL_FR - -def get_mteb_average_pl(): - global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL - DATA_OVERALL_PL = get_mteb_data( - tasks=[ - "Classification", - "Clustering", - "PairClassification", - "Retrieval", - "STS", - ], - datasets=TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL, - fillna=False, - add_emb_dim=True, - rank=False, - ) - # Debugging: - # DATA_OVERALL_PL.to_csv("overall.csv") - - DATA_OVERALL_PL.insert(1, f"Average ({len(TASK_LIST_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PL].mean(axis=1, skipna=False)) - DATA_OVERALL_PL.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLASSIFICATION_PL].mean(axis=1, skipna=False)) - DATA_OVERALL_PL.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_CLUSTERING_PL].mean(axis=1, skipna=False)) - DATA_OVERALL_PL.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_PAIR_CLASSIFICATION_PL].mean(axis=1, skipna=False)) - DATA_OVERALL_PL.insert(5, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_RETRIEVAL_PL].mean(axis=1, skipna=False)) - DATA_OVERALL_PL.insert(6, f"STS Average ({len(TASK_LIST_STS_PL)} datasets)", DATA_OVERALL_PL[TASK_LIST_STS_PL].mean(axis=1, skipna=False)) - DATA_OVERALL_PL.sort_values(f"Average ({len(TASK_LIST_PL)} datasets)", ascending=False, inplace=True) - # Start ranking from 1 - DATA_OVERALL_PL.insert(0, "Rank", list(range(1, len(DATA_OVERALL_PL) + 1))) + DATA_OVERALL = DATA_OVERALL[data_overall_rows] + DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)] - DATA_OVERALL_PL = DATA_OVERALL_PL.round(2) + return DATA_OVERALL, DATA_TASKS - DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLASSIFICATION_PL]) - # Only keep rows with at least one score in addition to the "Model" & rank column - DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 4:].ne("").any(axis=1)] - - DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_CLUSTERING_PL]) - DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 4:].ne("").any(axis=1)] - - DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_PAIR_CLASSIFICATION_PL]) - DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 4:].ne("").any(axis=1)] - - DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_RETRIEVAL_PL]) - DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 4:].ne("").any(axis=1)] - - DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_STS_PL]) - DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 4:].ne("").any(axis=1)] - - # Fill NaN after averaging - DATA_OVERALL_PL.fillna("", inplace=True) - - DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]] - DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)] - - return DATA_OVERALL_PL - -get_mteb_average() -get_mteb_average_fr() -get_mteb_average_pl() -get_mteb_average_zh() -DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_BITEXT_MINING] -DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)"] + TASK_LIST_BITEXT_MINING_DA] -DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_DA] -DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_NB] -DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_SV] -DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLASSIFICATION_OTHER] -DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE] -DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER] -DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW] +boards_data = {} +all_data_tasks = [] +for board, board_config in BOARDS_CONFIG.items(): + boards_data[board] = { + "data_overall": None, + "data_tasks": {} + } + if board_config["has_overall"]: + data_overall, data_tasks = get_mteb_average(board_config["tasks"]) + boards_data[board]["data_overall"] = data_overall + boards_data[board]["data_tasks"] = data_tasks + all_data_tasks.extend(data_tasks.values()) + else: + for task_category, task_category_list in board_config["tasks"].items(): + columns = ["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens"] + if len(task_category_list) > 1: + columns.append("Average") + print(board, task_category, task_category_list) + data_task_category = get_mteb_data([task_category], [], task_category_list)[columns + task_category_list] + boards_data[board]["data_tasks"][task_category] = data_task_category + all_data_tasks.append(data_task_category) # Exact, add all non-nan integer values for every dataset NUM_SCORES = 0 DATASETS = [] MODELS = [] # LANGUAGES = [] -for d in [ - DATA_BITEXT_MINING, - DATA_BITEXT_MINING_DA, - DATA_CLASSIFICATION_EN, - DATA_CLASSIFICATION_DA, - DATA_CLASSIFICATION_FR, - DATA_CLASSIFICATION_NB, - DATA_CLASSIFICATION_PL, - DATA_CLASSIFICATION_SV, - DATA_CLASSIFICATION_ZH, - DATA_CLASSIFICATION_OTHER, - DATA_CLUSTERING, - DATA_CLUSTERING_DE, - DATA_CLUSTERING_FR, - DATA_CLUSTERING_PL, - DATA_CLUSTERING_ZH, - DATA_PAIR_CLASSIFICATION, - DATA_PAIR_CLASSIFICATION_FR, - DATA_PAIR_CLASSIFICATION_PL, - DATA_PAIR_CLASSIFICATION_ZH, - DATA_RERANKING, - DATA_RERANKING_FR, - DATA_RERANKING_ZH, - DATA_RETRIEVAL, - DATA_RETRIEVAL_FR, - DATA_RETRIEVAL_PL, - DATA_RETRIEVAL_ZH, - DATA_RETRIEVAL_LAW, - DATA_STS_EN, - DATA_STS_FR, - DATA_STS_PL, - DATA_STS_ZH, - DATA_STS_OTHER, - DATA_SUMMARIZATION, - DATA_SUMMARIZATION_FR, -]: +for d in all_data_tasks: # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum() cols_to_ignore = 4 if "Average" in d.columns else 3 # Count number of scores including only non-nan floats & excluding the rank column @@ -1746,319 +342,44 @@ Each inner tab can have the following keys: - refresh: The function to refresh the leaderboard """ -chinese_credits = "[FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)" -french_credits = "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [Wissam Siblini](https://github.com/wissam-sib), [Mathieu Ciancone](https://github.com/MathieuCiancone), [Marion Schaeffer](https://github.com/schmarion)" -danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" -norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)" -polish_credits = "[Rafaล‚ Poล›wiata](https://github.com/rafalposwiata)" - data = { - "Overall": { - "metric": "Various, refer to task tabs", - "data": [ - { - "language": "English", - "description": "**Overall MTEB English leaderboard** ๐Ÿ”ฎ", - "data": DATA_OVERALL, - "refresh": get_mteb_average, - }, - { - "language": "Chinese", - "data": DATA_OVERALL_ZH, - "description": "**Overall MTEB Chinese leaderboard (C-MTEB)** ๐Ÿ”ฎ๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "refresh": get_mteb_average_zh, - }, - { - "language": "French", - "data": DATA_OVERALL_FR, - "description": "**Overall MTEB French leaderboard (F-MTEB)** ๐Ÿ”ฎ๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "refresh": get_mteb_average_fr, - }, - { - "language": "Polish", - "data": DATA_OVERALL_PL, - "description": "**Overall MTEB Polish leaderboard** ๐Ÿ”ฎ๐Ÿ‡ต๐Ÿ‡ฑ", - "refresh": get_mteb_average_pl, - }, - ] - }, - "Bitext Mining": { - "metric": "[F1](https://huggingface.co/spaces/evaluate-metric/f1)", - "data": [ - { - "language": "English-X", - "language_long": "117 (Pairs of: English & other language)", - "description": "**Bitext Mining English-X Leaderboard** ๐ŸŽŒ", - "data": DATA_BITEXT_MINING, - "refresh": partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING), - }, - { - "language": "Danish", - "language_long": "Danish & Bornholmsk (Danish Dialect)", - "description": "**Bitext Mining Danish Leaderboard** ๐ŸŽŒ๐Ÿ‡ฉ๐Ÿ‡ฐ", - "credits": danish_credits, - "data": DATA_BITEXT_MINING_DA, - "refresh": partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING_DA), - } - ] - }, - "Classification": { - "metric": "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)", - "data": [ - { - "language": "English", - "description": "**Classification English Leaderboard** โค๏ธ", - "data": DATA_CLASSIFICATION_EN, - "refresh": partial(get_mteb_data, tasks=["Classification"], langs=["en"]) - }, - { - "language": "Chinese", - "description": "**Classification Chinese Leaderboard** ๐Ÿงก๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "data": DATA_CLASSIFICATION_ZH, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_ZH) - }, - { - "language": "Danish", - "description": "**Classification Danish Leaderboard** ๐Ÿค๐Ÿ‡ฉ๐Ÿ‡ฐ", - "credits": danish_credits, - "data": DATA_CLASSIFICATION_DA, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA) - }, - { - "language": "French", - "description": "**Classification French Leaderboard** ๐Ÿ’™๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "data": DATA_CLASSIFICATION_FR, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_FR) - }, - { - "language": "Norwegian", - "language_long": "Norwegian Bokmรฅl", - "description": "**Classification Norwegian Leaderboard** ๐Ÿ’™๐Ÿ‡ณ๐Ÿ‡ด", - "credits": norwegian_credits, - "data": DATA_CLASSIFICATION_NB, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_NB) - }, - { - "language": "Polish", - "description": "**Classification Polish Leaderboard** ๐Ÿค๐Ÿ‡ต๐Ÿ‡ฑ", - "credits": polish_credits, - "data": DATA_CLASSIFICATION_PL, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_PL) - }, - { - "language": "Swedish", - "description": "**Classification Swedish Leaderboard** ๐Ÿ’›๐Ÿ‡ธ๐Ÿ‡ช", - "credits": norwegian_credits, - "data": DATA_CLASSIFICATION_SV, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_SV) - }, - { - "language": "Other", - "language_long": "47 (Only languages not included in the other tabs)", - "description": "**Classification Other Languages Leaderboard** ๐Ÿ’œ๐Ÿ’š๐Ÿ’™", - "data": DATA_CLASSIFICATION_OTHER, - "refresh": partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_OTHER) - } - ] - }, - "Clustering": { - "metric": "Validity Measure (v_measure)", - "data": [ - { - "language": "English", - "description": "**Clustering Leaderboard** โœจ", - "data": DATA_CLUSTERING, - "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING) - }, - { - "language": "Chinese", - "description": "**Clustering Chinese Leaderboard** โœจ๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "data": DATA_CLUSTERING_ZH, - "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH) - }, - { - "language": "French", - "description": "**Clustering French Leaderboard** โœจ๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "data": DATA_CLUSTERING_FR, - "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_FR) - }, - { - "language": "German", - "description": "**Clustering German Leaderboard** โœจ๐Ÿ‡ฉ๐Ÿ‡ช", - "credits": "[Silvan](https://github.com/slvnwhrl)", - "data": DATA_CLUSTERING_DE, - "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_DE) - }, - { - "language": "Polish", - "description": "**Clustering Polish Leaderboard** โœจ๐Ÿ‡ต๐Ÿ‡ฑ", - "credits": polish_credits, - "data": DATA_CLUSTERING_PL, - "refresh": partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_PL) - }, - ] - }, - "Pair Classification": { - "metric": "Average Precision based on Cosine Similarities (cos_sim_ap)", - "data": [ - { - "language": "English", - "description": "**Pair Classification English Leaderboard** ๐ŸŽญ", - "data": DATA_PAIR_CLASSIFICATION, - "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION) - }, - { - "language": "Chinese", - "description": "**Pair Classification Chinese Leaderboard** ๐ŸŽญ๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "data": DATA_PAIR_CLASSIFICATION_ZH, - "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH) - }, - { - "language": "French", - "description": "**Pair Classification French Leaderboard** ๐ŸŽญ๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "data": DATA_PAIR_CLASSIFICATION_FR, - "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_FR) - }, - { - "language": "Polish", - "description": "**Pair Classification Polish Leaderboard** ๐ŸŽญ๐Ÿ‡ต๐Ÿ‡ฑ", - "credits": polish_credits, - "data": DATA_PAIR_CLASSIFICATION_PL, - "refresh": partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_PL) - }, - ] - }, - "Reranking": { - "metric": "Mean Average Precision (MAP)", - "data": [ - { - "language": "English", - "description": "**Reranking English Leaderboard** ๐Ÿฅˆ", - "data": DATA_RERANKING, - "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING) - }, - { - "language": "Chinese", - "description": "**Reranking Chinese Leaderboard** ๐Ÿฅˆ๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "data": DATA_RERANKING_ZH, - "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH) - }, - { - "language": "French", - "description": "**Reranking French Leaderboard** ๐Ÿฅˆ๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "data": DATA_RERANKING_FR, - "refresh": partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_FR) - } - ] - }, - "Retrieval": { - "metric": "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)", - "data": [ - { - "language": "English", - "description": "**Retrieval English Leaderboard** ๐Ÿ”Ž", - "data": DATA_RETRIEVAL, - "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL) - }, - { - "language": "Chinese", - "description": "**Retrieval Chinese Leaderboard** ๐Ÿ”Ž๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "data": DATA_RETRIEVAL_ZH, - "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH) - }, - { - "language": "French", - "description": "**Retrieval French Leaderboard** ๐Ÿ”Ž๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "data": DATA_RETRIEVAL_FR, - "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR) - }, - { - "language": "Law", - "language_long": "English, German, Chinese", - "description": "**Retrieval Law Leaderboard** ๐Ÿ”Žโš–๏ธ", - "credits": "[Voyage AI](https://www.voyageai.com/)", - "data": DATA_RETRIEVAL_LAW, - "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_LAW) - }, - { - "language": "Polish", - "description": "**Retrieval Polish Leaderboard** ๐Ÿ”Ž๐Ÿ‡ต๐Ÿ‡ฑ", - "credits": polish_credits, - "data": DATA_RETRIEVAL_PL, - "refresh": partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_PL) - } - ] - }, - "STS": { - "metric": "Spearman correlation based on cosine similarity", - "data": [ - { - "language": "English", - "description": "**STS English Leaderboard** ๐Ÿค–", - "data": DATA_STS_EN, - "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS) - }, - { - "language": "Chinese", - "description": "**STS Chinese Leaderboard** ๐Ÿค–๐Ÿ‡จ๐Ÿ‡ณ", - "credits": chinese_credits, - "data": DATA_STS_ZH, - "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH) - }, - { - "language": "French", - "description": "**STS French Leaderboard** ๐Ÿค–๐Ÿ‡ซ๐Ÿ‡ท", - "credits": french_credits, - "data": DATA_STS_FR, - "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_FR) - }, - { - "language": "Polish", - "description": "**STS Polish Leaderboard** ๐Ÿค–๐Ÿ‡ต๐Ÿ‡ฑ", - "credits": polish_credits, - "data": DATA_STS_PL, - "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_PL) - }, - { - "language": "Other", - "language_long": "Arabic, Chinese, Dutch, English, French, German, Italian, Korean, Polish, Russian, Spanish (Only language combos not included in the other tabs)", - "description": "**STS Other Leaderboard** ๐Ÿ‘ฝ", - "data": DATA_STS_OTHER, - "refresh": partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_OTHER) - }, - ] - }, - "Summarization": { - "metric": "Spearman correlation based on cosine similarity", - "data": [ - { - "language": "English", - "description": "**Summarization Leaderboard** ๐Ÿ“œ", - "data": DATA_SUMMARIZATION, - "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION) - }, - { - "language": "French", - "description": "**Summarization Leaderboard** ๐Ÿ“œ", - "credits": french_credits, - "data": DATA_SUMMARIZATION_FR, - "refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR) - } - ] - } + "Overall": {"metric": "Various, refer to task tabs", "data": []} } +for task in TASKS: + data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []} + +for board, board_config in BOARDS_CONFIG.items(): + board_pretty_name = f"{board_config['title']} leaderboard" + acronym = board_config.get("acronym", None) + board_icon = board_config.get("icon", None) + if board_icon is None: + board_icon = "" + credits = board_config.get("credits", None) + + if board_config["has_overall"]: + overall_pretty_name = board_pretty_name + if acronym is not None: + overall_pretty_name += f" ({board_config['acronym']})" + data["Overall"]["data"].append({ + "language": board_config["title"], + "language_long": board_config["language_long"], + "description": f"**Overall MTEB {overall_pretty_name}** ๐Ÿ”ฎ{board_icon}", + "data": boards_data[board]["data_overall"], + "refresh": partial(get_mteb_average, board_config["tasks"]), + "credits": credits, + }) + for task_category, task_category_list in board_config["tasks"].items(): + task_icon = TASKS_CONFIG[task_category]['icon'] + if "special_icons" in board_config and isinstance(board_config["special_icons"], dict): + task_icon = board_config["special_icons"].get(task_category, task_icon) + data[task_category]["data"].append({ + "language": board_config["title"], + "language_long": board_config["language_long"], + "description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}", + "data": boards_data[board]["data_tasks"][task_category], + "refresh": partial(get_mteb_data, [task_category], [], task_category_list), + "credits": credits, + }) dataframes = [] full_dataframes = [] @@ -2144,6 +465,7 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes): output_dataframes.append(df) return output_dataframes + with gr.Blocks(css=css) as block: # Store the current task and language for updating the URL. This is a bit hacky, but it works @@ -2207,7 +529,7 @@ with gr.Blocks(css=css) as block: - **Metric:** {metric} - **Languages:** {item['language_long'] if 'language_long' in item else item['language']} - {"- **Credits:** " + item['credits'] if "credits" in item else ''} + {"- **Credits:** " + item['credits'] if ("credits" in item and item["credits"] is not None) else ''} """) with gr.Row():