Tom Aarsen commited on
Commit
0ebd4b8
2 Parent(s): 5bd316f 97c35aa

Merge branch 'main' into model_size_parameters

Browse files
Files changed (2) hide show
  1. EXTERNAL_MODEL_RESULTS.json +0 -0
  2. app.py +7 -1
EXTERNAL_MODEL_RESULTS.json CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -442,6 +442,7 @@ EXTERNAL_MODELS = [
442
  "use-cmlm-multilingual",
443
  "voyage-2",
444
  "voyage-code-2",
 
445
  "voyage-lite-01-instruct",
446
  "voyage-lite-02-instruct",
447
  "xlm-roberta-base",
@@ -565,6 +566,7 @@ EXTERNAL_MODEL_TO_LINK = {
565
  "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
566
  "voyage-2": "https://docs.voyageai.com/embeddings/",
567
  "voyage-code-2": "https://docs.voyageai.com/embeddings/",
 
568
  "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
569
  "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
570
  "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
@@ -687,6 +689,7 @@ EXTERNAL_MODEL_TO_DIM = {
687
  "use-cmlm-multilingual": 768,
688
  "voyage-2": 1024,
689
  "voyage-code-2": 1536,
 
690
  "voyage-lite-01-instruct": 1024,
691
  "voyage-lite-02-instruct": 1024,
692
  "xlm-roberta-base": 768,
@@ -809,6 +812,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
809
  "unsup-simcse-bert-base-uncased": 512,
810
  "voyage-2": 1024,
811
  "voyage-code-2": 16000,
 
812
  "voyage-lite-01-instruct": 4000,
813
  "voyage-lite-02-instruct": 4000,
814
  "xlm-roberta-base": 514,
@@ -901,6 +905,7 @@ EXTERNAL_MODEL_TO_SIZE = {
901
  "text2vec-large-chinese": 326,
902
  "unsup-simcse-bert-base-uncased": 110,
903
  "use-cmlm-multilingual": 472,
 
904
  "voyage-lite-02-instruct": 1220,
905
  "xlm-roberta-base": 279,
906
  "xlm-roberta-large": 560,
@@ -930,6 +935,7 @@ PROPRIETARY_MODELS = {
930
  "titan-embed-text-v1",
931
  "voyage-2",
932
  "voyage-code-2",
 
933
  "voyage-lite-01-instruct",
934
  "voyage-lite-02-instruct",
935
  "google-gecko.text-embedding-preview-0409",
@@ -2094,7 +2100,7 @@ with gr.Blocks(css=css) as block:
2094
  language_per_task = gr.JSON(value=dict(), visible=False)
2095
 
2096
  gr.Markdown(f"""
2097
- Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
2098
  """)
2099
 
2100
  with gr.Row():
 
442
  "use-cmlm-multilingual",
443
  "voyage-2",
444
  "voyage-code-2",
445
+ "voyage-law-2",
446
  "voyage-lite-01-instruct",
447
  "voyage-lite-02-instruct",
448
  "xlm-roberta-base",
 
566
  "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
567
  "voyage-2": "https://docs.voyageai.com/embeddings/",
568
  "voyage-code-2": "https://docs.voyageai.com/embeddings/",
569
+ "voyage-law-2": "https://docs.voyageai.com/embeddings/",
570
  "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
571
  "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
572
  "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
 
689
  "use-cmlm-multilingual": 768,
690
  "voyage-2": 1024,
691
  "voyage-code-2": 1536,
692
+ "voyage-law-2": 1024,
693
  "voyage-lite-01-instruct": 1024,
694
  "voyage-lite-02-instruct": 1024,
695
  "xlm-roberta-base": 768,
 
812
  "unsup-simcse-bert-base-uncased": 512,
813
  "voyage-2": 1024,
814
  "voyage-code-2": 16000,
815
+ "voyage-law-2": 4000,
816
  "voyage-lite-01-instruct": 4000,
817
  "voyage-lite-02-instruct": 4000,
818
  "xlm-roberta-base": 514,
 
905
  "text2vec-large-chinese": 326,
906
  "unsup-simcse-bert-base-uncased": 110,
907
  "use-cmlm-multilingual": 472,
908
+ "voyage-law-2": 1220,
909
  "voyage-lite-02-instruct": 1220,
910
  "xlm-roberta-base": 279,
911
  "xlm-roberta-large": 560,
 
935
  "titan-embed-text-v1",
936
  "voyage-2",
937
  "voyage-code-2",
938
+ "voyage-law-2",
939
  "voyage-lite-01-instruct",
940
  "voyage-lite-02-instruct",
941
  "google-gecko.text-embedding-preview-0409",
 
2100
  language_per_task = gr.JSON(value=dict(), visible=False)
2101
 
2102
  gr.Markdown(f"""
2103
+ Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb/blob/main/docs/adding_a_model.md" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
2104
  """)
2105
 
2106
  with gr.Row():