Spaces:
Running
Running
orionweller
commited on
Commit
·
cf7ddc6
1
Parent(s):
807cc67
add instruction following
Browse files- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +114 -2
EXTERNAL_MODEL_RESULTS.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -226,6 +226,12 @@ TASK_LIST_RETRIEVAL_LAW = [
|
|
226 |
"LegalSummarization",
|
227 |
]
|
228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
TASK_LIST_RETRIEVAL_PL = [
|
230 |
"ArguAna-PL",
|
231 |
"DBPedia-PL",
|
@@ -322,6 +328,7 @@ TASK_TO_METRIC = {
|
|
322 |
"Retrieval": "ndcg_at_10",
|
323 |
"STS": "cos_sim_spearman",
|
324 |
"Summarization": "cos_sim_spearman",
|
|
|
325 |
}
|
326 |
|
327 |
def make_clickable_model(model_name, link=None):
|
@@ -339,6 +346,8 @@ EXTERNAL_MODELS = [
|
|
339 |
"Cohere-embed-multilingual-v3.0",
|
340 |
"Cohere-embed-multilingual-light-v3.0",
|
341 |
"DanskBERT",
|
|
|
|
|
342 |
"LASER2",
|
343 |
"LLM2Vec-Llama-supervised",
|
344 |
"LLM2Vec-Llama-unsupervised",
|
@@ -364,17 +373,22 @@ EXTERNAL_MODELS = [
|
|
364 |
"bge-large-zh-v1.5",
|
365 |
"bge-large-zh-noinstruct",
|
366 |
"bge-small-zh-v1.5",
|
|
|
367 |
"contriever-base-msmarco",
|
368 |
"cross-en-de-roberta-sentence-transformer",
|
369 |
"dfm-encoder-large-v1",
|
370 |
"dfm-sentence-encoder-large-1",
|
371 |
"distiluse-base-multilingual-cased-v2",
|
372 |
"e5-base",
|
|
|
373 |
"e5-large",
|
|
|
374 |
"e5-mistral-7b-instruct",
|
375 |
"e5-small",
|
376 |
"electra-small-nordic",
|
377 |
"electra-small-swedish-cased-discriminator",
|
|
|
|
|
378 |
"flaubert_base_cased",
|
379 |
"flaubert_base_uncased",
|
380 |
"flaubert_large_cased",
|
@@ -391,11 +405,18 @@ EXTERNAL_MODELS = [
|
|
391 |
"gtr-t5-xl",
|
392 |
"gtr-t5-xxl",
|
393 |
"herbert-base-retrieval-v2",
|
|
|
|
|
394 |
"komninos",
|
|
|
395 |
"luotuo-bert-medium",
|
396 |
"m3e-base",
|
397 |
"m3e-large",
|
|
|
398 |
"mistral-embed",
|
|
|
|
|
|
|
399 |
"msmarco-bert-co-condensor",
|
400 |
"multi-qa-MiniLM-L6-cos-v1",
|
401 |
"multilingual-e5-base",
|
@@ -423,6 +444,8 @@ EXTERNAL_MODELS = [
|
|
423 |
"sup-simcse-bert-base-uncased",
|
424 |
"st-polish-paraphrase-from-distilroberta",
|
425 |
"st-polish-paraphrase-from-mpnet",
|
|
|
|
|
426 |
"text2vec-base-chinese",
|
427 |
"text2vec-base-multilingual",
|
428 |
"text2vec-large-chinese",
|
@@ -470,6 +493,8 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
470 |
"LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
471 |
"LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
|
472 |
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
|
|
|
|
473 |
"OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
|
474 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
475 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
@@ -488,6 +513,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
488 |
"bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
|
489 |
"bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
|
490 |
"bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
|
|
|
491 |
"camembert-base": "https://huggingface.co/almanach/camembert-base",
|
492 |
"camembert-large": "https://huggingface.co/almanach/camembert-large",
|
493 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
@@ -501,11 +527,15 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
501 |
"dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
|
502 |
"dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
|
503 |
"e5-base": "https://huggingface.co/intfloat/e5-base",
|
|
|
504 |
"e5-large": "https://huggingface.co/intfloat/e5-large",
|
|
|
505 |
"e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
|
506 |
"e5-small": "https://huggingface.co/intfloat/e5-small",
|
507 |
"electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
|
508 |
"electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
|
|
|
|
|
509 |
"flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
|
510 |
"flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
|
511 |
"flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
|
@@ -522,11 +552,18 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
522 |
"gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
|
523 |
"gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
|
524 |
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
|
|
|
|
525 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
|
|
526 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
527 |
"m3e-base": "https://huggingface.co/moka-ai/m3e-base",
|
528 |
"m3e-large": "https://huggingface.co/moka-ai/m3e-large",
|
|
|
529 |
"mistral-embed": "https://docs.mistral.ai/guides/embeddings",
|
|
|
|
|
|
|
530 |
"msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
|
531 |
"multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
532 |
"multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
|
@@ -554,6 +591,8 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
554 |
"sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
|
555 |
"st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
|
556 |
"st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
|
|
|
|
|
557 |
"text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
|
558 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
559 |
"text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
@@ -593,6 +632,8 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
593 |
"Cohere-embed-multilingual-v3.0": 1024,
|
594 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
595 |
"DanskBERT": 768,
|
|
|
|
|
596 |
"LASER2": 1024,
|
597 |
"LLM2Vec-Llama-supervised": 4096,
|
598 |
"LLM2Vec-Llama-unsupervised": 4096,
|
@@ -617,6 +658,7 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
617 |
"bge-large-zh-v1.5": 1024,
|
618 |
"bge-large-zh-noinstruct": 1024,
|
619 |
"bge-small-zh-v1.5": 512,
|
|
|
620 |
"camembert-base": 512,
|
621 |
"camembert-large": 768,
|
622 |
"contriever-base-msmarco": 768,
|
@@ -630,11 +672,15 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
630 |
"dfm-encoder-large-v1": 1024,
|
631 |
"dfm-sentence-encoder-large-1": 1024,
|
632 |
"e5-base": 768,
|
|
|
633 |
"e5-large": 1024,
|
|
|
634 |
"e5-mistral-7b-instruct": 4096,
|
635 |
"e5-small": 384,
|
636 |
"electra-small-nordic": 256,
|
637 |
"electra-small-swedish-cased-discriminator": 256,
|
|
|
|
|
638 |
"flaubert_base_cased": 768,
|
639 |
"flaubert_base_uncased": 768,
|
640 |
"flaubert_large_cased": 1024,
|
@@ -652,10 +698,17 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
652 |
"gtr-t5-xl": 768,
|
653 |
"gtr-t5-xxl": 768,
|
654 |
"herbert-base-retrieval-v2": 768,
|
|
|
|
|
655 |
"komninos": 300,
|
|
|
656 |
"m3e-base": 768,
|
657 |
"m3e-large": 768,
|
|
|
658 |
"mistral-embed": 1024,
|
|
|
|
|
|
|
659 |
"msmarco-bert-co-condensor": 768,
|
660 |
"multi-qa-MiniLM-L6-cos-v1": 384,
|
661 |
"multilingual-e5-base": 768,
|
@@ -684,6 +737,8 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
684 |
"sup-simcse-bert-base-uncased": 768,
|
685 |
"st-polish-paraphrase-from-distilroberta": 768,
|
686 |
"st-polish-paraphrase-from-mpnet": 768,
|
|
|
|
|
687 |
"text2vec-base-chinese": 768,
|
688 |
"text2vec-large-chinese": 1024,
|
689 |
"text-embedding-3-large": 3072,
|
@@ -723,6 +778,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
723 |
"Cohere-embed-multilingual-v3.0": 512,
|
724 |
"Cohere-embed-multilingual-light-v3.0": 512,
|
725 |
"DanskBERT": 514,
|
|
|
|
|
726 |
"LASER2": "N/A",
|
727 |
"LLM2Vec-Llama-supervised": 4096,
|
728 |
"LLM2Vec-Llama-unsupervised": 4096,
|
@@ -760,11 +817,15 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
760 |
"dfm-sentence-encoder-large-1": 512,
|
761 |
"distiluse-base-multilingual-cased-v2": 512,
|
762 |
"e5-base": 512,
|
|
|
763 |
"e5-large": 512,
|
|
|
764 |
"e5-mistral-7b-instruct": 32768,
|
765 |
"e5-small": 512,
|
766 |
"electra-small-nordic": 512,
|
767 |
"electra-small-swedish-cased-discriminator": 512,
|
|
|
|
|
768 |
"flaubert_base_cased": 512,
|
769 |
"flaubert_base_uncased": 512,
|
770 |
"flaubert_large_cased": 512,
|
@@ -781,11 +842,18 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
781 |
"gtr-t5-xl": 512,
|
782 |
"gtr-t5-xxl": 512,
|
783 |
"herbert-base-retrieval-v2": 514,
|
|
|
|
|
784 |
"komninos": "N/A",
|
|
|
785 |
"luotuo-bert-medium": 512,
|
786 |
"m3e-base": 512,
|
787 |
"m3e-large": 512,
|
|
|
788 |
# "mistral-embed": "?",
|
|
|
|
|
|
|
789 |
"msmarco-bert-co-condensor": 512,
|
790 |
"multi-qa-MiniLM-L6-cos-v1": 512,
|
791 |
"multilingual-e5-base": 514,
|
@@ -814,6 +882,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
814 |
"sup-simcse-bert-base-uncased": 512,
|
815 |
"st-polish-paraphrase-from-distilroberta": 514,
|
816 |
"st-polish-paraphrase-from-mpnet": 514,
|
|
|
|
|
817 |
"text2vec-base-chinese": 512,
|
818 |
"text2vec-large-chinese": 512,
|
819 |
"text-embedding-3-large": 8191,
|
@@ -849,6 +919,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
849 |
|
850 |
EXTERNAL_MODEL_TO_SIZE = {
|
851 |
"DanskBERT": 125,
|
|
|
|
|
852 |
"LASER2": 43,
|
853 |
"LLM2Vec-Llama-supervised": 6607,
|
854 |
"LLM2Vec-Llama-unsupervised": 6607,
|
@@ -872,6 +944,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
872 |
"bge-large-zh-v1.5": 326,
|
873 |
"bge-large-zh-noinstruct": 326,
|
874 |
"bge-small-zh-v1.5": 24,
|
|
|
875 |
"camembert-base": 111,
|
876 |
"camembert-large": 338,
|
877 |
"cross-en-de-roberta-sentence-transformer": 278,
|
@@ -885,11 +958,15 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
885 |
"dfm-encoder-large-v1": 355,
|
886 |
"dfm-sentence-encoder-large-1": 355,
|
887 |
"e5-base": 110,
|
|
|
888 |
"e5-large": 335,
|
|
|
889 |
"e5-mistral-7b-instruct": 7111,
|
890 |
"e5-small": 33,
|
891 |
"electra-small-nordic": 23,
|
892 |
"electra-small-swedish-cased-discriminator": 16,
|
|
|
|
|
893 |
"flaubert_base_cased": 138,
|
894 |
"flaubert_base_uncased": 138,
|
895 |
"flaubert_large_cased": 372,
|
@@ -906,11 +983,18 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
906 |
"gtr-t5-xl": 1240,
|
907 |
"gtr-t5-xxl": 4865,
|
908 |
"herbert-base-retrieval-v2": 125,
|
|
|
|
|
909 |
"komninos": 134,
|
|
|
910 |
"luotuo-bert-medium": 328,
|
911 |
"m3e-base": 102,
|
912 |
"m3e-large": 102,
|
|
|
913 |
"msmarco-bert-co-condensor": 110,
|
|
|
|
|
|
|
914 |
"multi-qa-MiniLM-L6-cos-v1": 23,
|
915 |
"multilingual-e5-base": 278,
|
916 |
"multilingual-e5-small": 118,
|
@@ -936,7 +1020,9 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
936 |
"silver-retriever-base-v1": 125,
|
937 |
"sup-simcse-bert-base-uncased": 110,
|
938 |
"st-polish-paraphrase-from-distilroberta": 125,
|
939 |
-
"st-polish-paraphrase-from-mpnet": 125,
|
|
|
|
|
940 |
"text2vec-base-chinese": 102,
|
941 |
"text2vec-large-chinese": 326,
|
942 |
"unsup-simcse-bert-base-uncased": 110,
|
@@ -1014,7 +1100,9 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
|
1014 |
"dfm-encoder-large-v1",
|
1015 |
"dfm-sentence-encoder-large-1",
|
1016 |
"e5-base",
|
|
|
1017 |
"e5-large",
|
|
|
1018 |
"e5-mistral-7b-instruct",
|
1019 |
"e5-small",
|
1020 |
"electra-small-nordic",
|
@@ -1065,6 +1153,7 @@ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
|
1065 |
"sup-simcse-bert-base-uncased",
|
1066 |
"st-polish-paraphrase-from-distilroberta",
|
1067 |
"st-polish-paraphrase-from-mpnet",
|
|
|
1068 |
"text2vec-base-chinese",
|
1069 |
"text2vec-large-chinese",
|
1070 |
"udever-bloom-1b1",
|
@@ -1247,6 +1336,8 @@ def add_task(examples):
|
|
1247 |
examples["mteb_task"] = "Summarization"
|
1248 |
elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
|
1249 |
examples["mteb_task"] = "BitextMining"
|
|
|
|
|
1250 |
else:
|
1251 |
print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
|
1252 |
examples["mteb_task"] = "Unknown"
|
@@ -1333,7 +1424,13 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
1333 |
# Initialize list to models that we cannot fetch metadata from
|
1334 |
df_list = []
|
1335 |
for model in EXTERNAL_MODEL_RESULTS:
|
1336 |
-
results_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
1337 |
if len(datasets) > 0:
|
1338 |
res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
|
1339 |
elif langs:
|
@@ -1659,6 +1756,7 @@ DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLAS
|
|
1659 |
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
|
1660 |
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
|
1661 |
DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
|
|
|
1662 |
|
1663 |
# Exact, add all non-nan integer values for every dataset
|
1664 |
NUM_SCORES = 0
|
@@ -1693,6 +1791,7 @@ for d in [
|
|
1693 |
DATA_RETRIEVAL_PL,
|
1694 |
DATA_RETRIEVAL_ZH,
|
1695 |
DATA_RETRIEVAL_LAW,
|
|
|
1696 |
DATA_STS_EN,
|
1697 |
DATA_STS_FR,
|
1698 |
DATA_STS_PL,
|
@@ -1751,6 +1850,7 @@ french_credits = "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](ht
|
|
1751 |
danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
|
1752 |
norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
|
1753 |
polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
|
|
|
1754 |
|
1755 |
data = {
|
1756 |
"Overall": {
|
@@ -2057,6 +2157,18 @@ data = {
|
|
2057 |
"refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
|
2058 |
}
|
2059 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2060 |
}
|
2061 |
}
|
2062 |
|
|
|
226 |
"LegalSummarization",
|
227 |
]
|
228 |
|
229 |
+
TASK_LIST_RETRIEVAL_INSTRUCTIONS = [
|
230 |
+
"Robust04InstructionRetrieval",
|
231 |
+
"News21InstructionRetrieval",
|
232 |
+
"Core17InstructionRetrieval",
|
233 |
+
]
|
234 |
+
|
235 |
TASK_LIST_RETRIEVAL_PL = [
|
236 |
"ArguAna-PL",
|
237 |
"DBPedia-PL",
|
|
|
328 |
"Retrieval": "ndcg_at_10",
|
329 |
"STS": "cos_sim_spearman",
|
330 |
"Summarization": "cos_sim_spearman",
|
331 |
+
"InstructionRetrieval": "p-MRR",
|
332 |
}
|
333 |
|
334 |
def make_clickable_model(model_name, link=None):
|
|
|
346 |
"Cohere-embed-multilingual-v3.0",
|
347 |
"Cohere-embed-multilingual-light-v3.0",
|
348 |
"DanskBERT",
|
349 |
+
"FollowIR-7B",
|
350 |
+
"GritLM-7B",
|
351 |
"LASER2",
|
352 |
"LLM2Vec-Llama-supervised",
|
353 |
"LLM2Vec-Llama-unsupervised",
|
|
|
373 |
"bge-large-zh-v1.5",
|
374 |
"bge-large-zh-noinstruct",
|
375 |
"bge-small-zh-v1.5",
|
376 |
+
"bm25",
|
377 |
"contriever-base-msmarco",
|
378 |
"cross-en-de-roberta-sentence-transformer",
|
379 |
"dfm-encoder-large-v1",
|
380 |
"dfm-sentence-encoder-large-1",
|
381 |
"distiluse-base-multilingual-cased-v2",
|
382 |
"e5-base",
|
383 |
+
"e5-base-v2",
|
384 |
"e5-large",
|
385 |
+
"e5-large-v2",
|
386 |
"e5-mistral-7b-instruct",
|
387 |
"e5-small",
|
388 |
"electra-small-nordic",
|
389 |
"electra-small-swedish-cased-discriminator",
|
390 |
+
"flan-t5-base",
|
391 |
+
"flan-t5-large",
|
392 |
"flaubert_base_cased",
|
393 |
"flaubert_base_uncased",
|
394 |
"flaubert_large_cased",
|
|
|
405 |
"gtr-t5-xl",
|
406 |
"gtr-t5-xxl",
|
407 |
"herbert-base-retrieval-v2",
|
408 |
+
"instructor-base",
|
409 |
+
"instructor-xl",
|
410 |
"komninos",
|
411 |
+
"llama-2-7b-chat",
|
412 |
"luotuo-bert-medium",
|
413 |
"m3e-base",
|
414 |
"m3e-large",
|
415 |
+
"mistral-7b-instruct-v0.2",
|
416 |
"mistral-embed",
|
417 |
+
"monobert-large-msmarco",
|
418 |
+
"monot5-3b-msmarco-10k",
|
419 |
+
"monot5-base-msmarco-10k",
|
420 |
"msmarco-bert-co-condensor",
|
421 |
"multi-qa-MiniLM-L6-cos-v1",
|
422 |
"multilingual-e5-base",
|
|
|
444 |
"sup-simcse-bert-base-uncased",
|
445 |
"st-polish-paraphrase-from-distilroberta",
|
446 |
"st-polish-paraphrase-from-mpnet",
|
447 |
+
"tart-dual-contriever-msmarco",
|
448 |
+
"tart-full-flan-t5-xl",
|
449 |
"text2vec-base-chinese",
|
450 |
"text2vec-base-multilingual",
|
451 |
"text2vec-large-chinese",
|
|
|
493 |
"LLM2Vec-Sheared-Llama-supervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised",
|
494 |
"LLM2Vec-Sheared-Llama-unsupervised": "https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp",
|
495 |
"LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
|
496 |
+
"FollowIR-7B": "https://huggingface.co/jhu-clsp/FollowIR-7B",
|
497 |
+
"GritLM-7B": "https://huggingface.co/GritLM/GritLM-7B",
|
498 |
"OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
|
499 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
500 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
|
|
513 |
"bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
|
514 |
"bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
|
515 |
"bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
|
516 |
+
"bm25": "https://en.wikipedia.org/wiki/Okapi_BM25",
|
517 |
"camembert-base": "https://huggingface.co/almanach/camembert-base",
|
518 |
"camembert-large": "https://huggingface.co/almanach/camembert-large",
|
519 |
"contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
|
|
|
527 |
"dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
|
528 |
"dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
|
529 |
"e5-base": "https://huggingface.co/intfloat/e5-base",
|
530 |
+
"e5-base-v2": "https://huggingface.co/intfloat/e5-base-v2",
|
531 |
"e5-large": "https://huggingface.co/intfloat/e5-large",
|
532 |
+
"e5-large-v2": "https://huggingface.co/intfloat/e5-large-v2",
|
533 |
"e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
|
534 |
"e5-small": "https://huggingface.co/intfloat/e5-small",
|
535 |
"electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
|
536 |
"electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
|
537 |
+
"flan-t5-base": "https://huggingface.co/google/flan-t5-base",
|
538 |
+
"flan-t5-large": "https://huggingface.co/google/flan-t5-large",
|
539 |
"flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
|
540 |
"flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
|
541 |
"flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
|
|
|
552 |
"gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
|
553 |
"gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
|
554 |
"herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
|
555 |
+
"instructor-base": "https://huggingface.co/hkunlp/instructor-base",
|
556 |
+
"instructor-xl": "https://huggingface.co/hkunlp/instructor-xl",
|
557 |
"komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
|
558 |
+
"llama-2-7b-chat": "https://huggingface.co/meta-llama/Llama-2-7b-chat-hf",
|
559 |
"luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
|
560 |
"m3e-base": "https://huggingface.co/moka-ai/m3e-base",
|
561 |
"m3e-large": "https://huggingface.co/moka-ai/m3e-large",
|
562 |
+
"mistral-7b-instruct-v0.2": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
|
563 |
"mistral-embed": "https://docs.mistral.ai/guides/embeddings",
|
564 |
+
"monobert-large-msmarco": "https://huggingface.co/castorini/monobert-large-msmarco",
|
565 |
+
"monot5-3b-msmarco-10k": "https://huggingface.co/castorini/monot5-3b-msmarco-10k",
|
566 |
+
"monot5-base-msmarco-10k": "https://huggingface.co/castorini/monot5-base-msmarco-10k",
|
567 |
"msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
|
568 |
"multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
|
569 |
"multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
|
|
|
591 |
"sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
|
592 |
"st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
|
593 |
"st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
|
594 |
+
"tart-dual-contriever-msmarco": "https://huggingface.co/orionweller/tart-dual-contriever-msmarco",
|
595 |
+
"tart-full-flan-t5-xl": "https://huggingface.co/facebook/tart-full-flan-t5-xl",
|
596 |
"text2vec-base-chinese": "https://huggingface.co/shibing624/text2vec-base-chinese",
|
597 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
598 |
"text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
|
|
632 |
"Cohere-embed-multilingual-v3.0": 1024,
|
633 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
634 |
"DanskBERT": 768,
|
635 |
+
"FollowIR-7B": -1,
|
636 |
+
"GritLM-7B": 4096,
|
637 |
"LASER2": 1024,
|
638 |
"LLM2Vec-Llama-supervised": 4096,
|
639 |
"LLM2Vec-Llama-unsupervised": 4096,
|
|
|
658 |
"bge-large-zh-v1.5": 1024,
|
659 |
"bge-large-zh-noinstruct": 1024,
|
660 |
"bge-small-zh-v1.5": 512,
|
661 |
+
"bm25": -1,
|
662 |
"camembert-base": 512,
|
663 |
"camembert-large": 768,
|
664 |
"contriever-base-msmarco": 768,
|
|
|
672 |
"dfm-encoder-large-v1": 1024,
|
673 |
"dfm-sentence-encoder-large-1": 1024,
|
674 |
"e5-base": 768,
|
675 |
+
"e5-base-v2": 768,
|
676 |
"e5-large": 1024,
|
677 |
+
"e5-large-v2": 1024,
|
678 |
"e5-mistral-7b-instruct": 4096,
|
679 |
"e5-small": 384,
|
680 |
"electra-small-nordic": 256,
|
681 |
"electra-small-swedish-cased-discriminator": 256,
|
682 |
+
"flan-t5-base": -1,
|
683 |
+
"flan-t5-large": -1,
|
684 |
"flaubert_base_cased": 768,
|
685 |
"flaubert_base_uncased": 768,
|
686 |
"flaubert_large_cased": 1024,
|
|
|
698 |
"gtr-t5-xl": 768,
|
699 |
"gtr-t5-xxl": 768,
|
700 |
"herbert-base-retrieval-v2": 768,
|
701 |
+
"instructor-base": 768,
|
702 |
+
"instructor-xl": 768,
|
703 |
"komninos": 300,
|
704 |
+
"llama-2-7b-chat": -1,
|
705 |
"m3e-base": 768,
|
706 |
"m3e-large": 768,
|
707 |
+
"mistral-7b-instruct-v0.2": -1,
|
708 |
"mistral-embed": 1024,
|
709 |
+
"monobert-large-msmarco": -1,
|
710 |
+
"monot5-3b-msmarco-10k": -1,
|
711 |
+
"monot5-base-msmarco-10k": -1,
|
712 |
"msmarco-bert-co-condensor": 768,
|
713 |
"multi-qa-MiniLM-L6-cos-v1": 384,
|
714 |
"multilingual-e5-base": 768,
|
|
|
737 |
"sup-simcse-bert-base-uncased": 768,
|
738 |
"st-polish-paraphrase-from-distilroberta": 768,
|
739 |
"st-polish-paraphrase-from-mpnet": 768,
|
740 |
+
"tart-dual-contriever-msmarco": 768,
|
741 |
+
"tart-full-flan-t5-xl": -1,
|
742 |
"text2vec-base-chinese": 768,
|
743 |
"text2vec-large-chinese": 1024,
|
744 |
"text-embedding-3-large": 3072,
|
|
|
778 |
"Cohere-embed-multilingual-v3.0": 512,
|
779 |
"Cohere-embed-multilingual-light-v3.0": 512,
|
780 |
"DanskBERT": 514,
|
781 |
+
"FollowIR-7B": 32768,
|
782 |
+
"GritLM-7B": 32768,
|
783 |
"LASER2": "N/A",
|
784 |
"LLM2Vec-Llama-supervised": 4096,
|
785 |
"LLM2Vec-Llama-unsupervised": 4096,
|
|
|
817 |
"dfm-sentence-encoder-large-1": 512,
|
818 |
"distiluse-base-multilingual-cased-v2": 512,
|
819 |
"e5-base": 512,
|
820 |
+
"e5-base-v2": 512,
|
821 |
"e5-large": 512,
|
822 |
+
"e5-large-v2": 512,
|
823 |
"e5-mistral-7b-instruct": 32768,
|
824 |
"e5-small": 512,
|
825 |
"electra-small-nordic": 512,
|
826 |
"electra-small-swedish-cased-discriminator": 512,
|
827 |
+
"flan-t5-base": 512,
|
828 |
+
"flan-t5-large": 512,
|
829 |
"flaubert_base_cased": 512,
|
830 |
"flaubert_base_uncased": 512,
|
831 |
"flaubert_large_cased": 512,
|
|
|
842 |
"gtr-t5-xl": 512,
|
843 |
"gtr-t5-xxl": 512,
|
844 |
"herbert-base-retrieval-v2": 514,
|
845 |
+
"instructor-base": 512,
|
846 |
+
"instructor-xl": 512,
|
847 |
"komninos": "N/A",
|
848 |
+
"llama-2-7b-chat": 4096,
|
849 |
"luotuo-bert-medium": 512,
|
850 |
"m3e-base": 512,
|
851 |
"m3e-large": 512,
|
852 |
+
"mistral-7b-instruct-v0.2": 32768,
|
853 |
# "mistral-embed": "?",
|
854 |
+
"monobert-large-msmarco": 512,
|
855 |
+
"monot5-3b-msmarco-10k": 512,
|
856 |
+
"monot5-base-msmarco-10k": 512,
|
857 |
"msmarco-bert-co-condensor": 512,
|
858 |
"multi-qa-MiniLM-L6-cos-v1": 512,
|
859 |
"multilingual-e5-base": 514,
|
|
|
882 |
"sup-simcse-bert-base-uncased": 512,
|
883 |
"st-polish-paraphrase-from-distilroberta": 514,
|
884 |
"st-polish-paraphrase-from-mpnet": 514,
|
885 |
+
"tart-dual-contriever-msmarco": 512,
|
886 |
+
"tart-full-flan-t5-xl": 512,
|
887 |
"text2vec-base-chinese": 512,
|
888 |
"text2vec-large-chinese": 512,
|
889 |
"text-embedding-3-large": 8191,
|
|
|
919 |
|
920 |
EXTERNAL_MODEL_TO_SIZE = {
|
921 |
"DanskBERT": 125,
|
922 |
+
"FollowIR-7B": 7242,
|
923 |
+
"GritLM-7B": 7242,
|
924 |
"LASER2": 43,
|
925 |
"LLM2Vec-Llama-supervised": 6607,
|
926 |
"LLM2Vec-Llama-unsupervised": 6607,
|
|
|
944 |
"bge-large-zh-v1.5": 326,
|
945 |
"bge-large-zh-noinstruct": 326,
|
946 |
"bge-small-zh-v1.5": 24,
|
947 |
+
"bm25": 0,
|
948 |
"camembert-base": 111,
|
949 |
"camembert-large": 338,
|
950 |
"cross-en-de-roberta-sentence-transformer": 278,
|
|
|
958 |
"dfm-encoder-large-v1": 355,
|
959 |
"dfm-sentence-encoder-large-1": 355,
|
960 |
"e5-base": 110,
|
961 |
+
"e5-base-v2": 110,
|
962 |
"e5-large": 335,
|
963 |
+
"e5-large-v2": 335,
|
964 |
"e5-mistral-7b-instruct": 7111,
|
965 |
"e5-small": 33,
|
966 |
"electra-small-nordic": 23,
|
967 |
"electra-small-swedish-cased-discriminator": 16,
|
968 |
+
"flan-t5-base": 220,
|
969 |
+
"flan-t5-large": 770,
|
970 |
"flaubert_base_cased": 138,
|
971 |
"flaubert_base_uncased": 138,
|
972 |
"flaubert_large_cased": 372,
|
|
|
983 |
"gtr-t5-xl": 1240,
|
984 |
"gtr-t5-xxl": 4865,
|
985 |
"herbert-base-retrieval-v2": 125,
|
986 |
+
"instructor-base": 110,
|
987 |
+
"instructor-xl": 1241,
|
988 |
"komninos": 134,
|
989 |
+
"llama-2-7b-chat": 7000,
|
990 |
"luotuo-bert-medium": 328,
|
991 |
"m3e-base": 102,
|
992 |
"m3e-large": 102,
|
993 |
+
"mistral-7b-instruct-v0.2": 7111,
|
994 |
"msmarco-bert-co-condensor": 110,
|
995 |
+
"monobert-large-msmarco": 335,
|
996 |
+
"monot5-3b-msmarco-10k": 2480,
|
997 |
+
"monot5-base-msmarco-10k": 220,
|
998 |
"multi-qa-MiniLM-L6-cos-v1": 23,
|
999 |
"multilingual-e5-base": 278,
|
1000 |
"multilingual-e5-small": 118,
|
|
|
1020 |
"silver-retriever-base-v1": 125,
|
1021 |
"sup-simcse-bert-base-uncased": 110,
|
1022 |
"st-polish-paraphrase-from-distilroberta": 125,
|
1023 |
+
"st-polish-paraphrase-from-mpnet": 125,
|
1024 |
+
"tart-dual-contriever-msmarco": 110,
|
1025 |
+
"tart-full-flan-t5-xl": 2480,
|
1026 |
"text2vec-base-chinese": 102,
|
1027 |
"text2vec-large-chinese": 326,
|
1028 |
"unsup-simcse-bert-base-uncased": 110,
|
|
|
1100 |
"dfm-encoder-large-v1",
|
1101 |
"dfm-sentence-encoder-large-1",
|
1102 |
"e5-base",
|
1103 |
+
"e5-base-v2",
|
1104 |
"e5-large",
|
1105 |
+
"e5-large-v2",
|
1106 |
"e5-mistral-7b-instruct",
|
1107 |
"e5-small",
|
1108 |
"electra-small-nordic",
|
|
|
1153 |
"sup-simcse-bert-base-uncased",
|
1154 |
"st-polish-paraphrase-from-distilroberta",
|
1155 |
"st-polish-paraphrase-from-mpnet",
|
1156 |
+
"tart-dual-contriever-msmarco",
|
1157 |
"text2vec-base-chinese",
|
1158 |
"text2vec-large-chinese",
|
1159 |
"udever-bloom-1b1",
|
|
|
1336 |
examples["mteb_task"] = "Summarization"
|
1337 |
elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_DA):
|
1338 |
examples["mteb_task"] = "BitextMining"
|
1339 |
+
elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_INSTRUCTIONS):
|
1340 |
+
examples["mteb_task"] = "InstructionRetrieval"
|
1341 |
else:
|
1342 |
print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
|
1343 |
examples["mteb_task"] = "Unknown"
|
|
|
1424 |
# Initialize list to models that we cannot fetch metadata from
|
1425 |
df_list = []
|
1426 |
for model in EXTERNAL_MODEL_RESULTS:
|
1427 |
+
results_list = []
|
1428 |
+
for task in tasks:
|
1429 |
+
# Not all models have InstructionRetrieval, other new tasks
|
1430 |
+
if task not in EXTERNAL_MODEL_RESULTS[model]:
|
1431 |
+
continue
|
1432 |
+
results_list += EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]
|
1433 |
+
|
1434 |
if len(datasets) > 0:
|
1435 |
res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
|
1436 |
elif langs:
|
|
|
1756 |
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_CLUSTERING_DE]
|
1757 |
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_STS_OTHER]
|
1758 |
DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_LAW]
|
1759 |
+
DATA_RETRIEVAL_INSTRUCTIONS = get_mteb_data(["InstructionRetrieval"], [], TASK_LIST_RETRIEVAL_INSTRUCTIONS)[["Rank", "Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Average"] + TASK_LIST_RETRIEVAL_INSTRUCTIONS]
|
1760 |
|
1761 |
# Exact, add all non-nan integer values for every dataset
|
1762 |
NUM_SCORES = 0
|
|
|
1791 |
DATA_RETRIEVAL_PL,
|
1792 |
DATA_RETRIEVAL_ZH,
|
1793 |
DATA_RETRIEVAL_LAW,
|
1794 |
+
DATA_RETRIEVAL_INSTRUCTIONS,
|
1795 |
DATA_STS_EN,
|
1796 |
DATA_STS_FR,
|
1797 |
DATA_STS_PL,
|
|
|
1850 |
danish_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
|
1851 |
norwegian_credits = "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
|
1852 |
polish_credits = "[Rafał Poświata](https://github.com/rafalposwiata)"
|
1853 |
+
instruction_credits = "[Orion Weller, FollowIR paper](https://arxiv.org/abs/2403.15246)"
|
1854 |
|
1855 |
data = {
|
1856 |
"Overall": {
|
|
|
2157 |
"refresh": partial(get_mteb_data, tasks=TASK_LIST_SUMMARIZATION_FR)
|
2158 |
}
|
2159 |
]
|
2160 |
+
},
|
2161 |
+
"Retrieval w/Instructions": {
|
2162 |
+
"metric": "paired mean reciprocal rank (p-MRR)",
|
2163 |
+
"data": [
|
2164 |
+
{
|
2165 |
+
"language": "English",
|
2166 |
+
"description": "**Retrieval with Instructions Leaderboard** 🔎📋",
|
2167 |
+
"credits": instruction_credits,
|
2168 |
+
"data": DATA_RETRIEVAL_INSTRUCTIONS,
|
2169 |
+
"refresh": partial(get_mteb_data, tasks=TASK_LIST_RETRIEVAL_INSTRUCTIONS)
|
2170 |
+
}
|
2171 |
+
]
|
2172 |
}
|
2173 |
}
|
2174 |
|