Tom Aarsen commited on
Commit
c1f5045
1 Parent(s): 2ad8c60

Add Nomic matryoshka model

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py CHANGED
@@ -320,6 +320,10 @@ EXTERNAL_MODELS = [
320
  "multilingual-e5-small",
321
  "nb-bert-base",
322
  "nb-bert-large",
 
 
 
 
323
  "norbert3-base",
324
  "norbert3-large",
325
  "paraphrase-multilingual-MiniLM-L12-v2",
@@ -403,6 +407,10 @@ EXTERNAL_MODEL_TO_LINK = {
403
  "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
404
  "nb-bert-base": "https://huggingface.co/NbAiLab/nb-bert-base",
405
  "nb-bert-large": "https://huggingface.co/NbAiLab/nb-bert-large",
 
 
 
 
406
  "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
407
  "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
408
  "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
@@ -486,6 +494,10 @@ EXTERNAL_MODEL_TO_DIM = {
486
  "multilingual-e5-large": 1024,
487
  "nb-bert-base": 768,
488
  "nb-bert-large": 1024,
 
 
 
 
489
  "norbert3-base": 768,
490
  "norbert3-large": 1024,
491
  "paraphrase-multilingual-MiniLM-L12-v2": 384,
@@ -569,6 +581,10 @@ EXTERNAL_MODEL_TO_SEQLEN = {
569
  "multilingual-e5-small": 512,
570
  "nb-bert-base": 512,
571
  "nb-bert-large": 512,
 
 
 
 
572
  "norbert3-base": 512,
573
  "norbert3-large": 512,
574
  "paraphrase-multilingual-MiniLM-L12-v2": 512,
@@ -651,6 +667,10 @@ EXTERNAL_MODEL_TO_SIZE = {
651
  "multilingual-e5-large": 2.24,
652
  "nb-bert-base": 0.71,
653
  "nb-bert-large": 1.42,
 
 
 
 
654
  "norbert3-base": 0.52,
655
  "norbert3-large": 1.47,
656
  "paraphrase-multilingual-mpnet-base-v2": 1.11,
@@ -781,6 +801,8 @@ MODELS_TO_SKIP = {
781
  "mixamrepijey/instructor-small",
782
  "mixamrepijey/instructor-models",
783
  "lsf1000/bge-evaluation", # Empty
 
 
784
  }
785
 
786
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
 
320
  "multilingual-e5-small",
321
  "nb-bert-base",
322
  "nb-bert-large",
323
+ "nomic-embed-text-v1.5-64",
324
+ "nomic-embed-text-v1.5-128",
325
+ "nomic-embed-text-v1.5-256",
326
+ "nomic-embed-text-v1.5-512",
327
  "norbert3-base",
328
  "norbert3-large",
329
  "paraphrase-multilingual-MiniLM-L12-v2",
 
407
  "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
408
  "nb-bert-base": "https://huggingface.co/NbAiLab/nb-bert-base",
409
  "nb-bert-large": "https://huggingface.co/NbAiLab/nb-bert-large",
410
+ "nomic-embed-text-v1.5-64": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
411
+ "nomic-embed-text-v1.5-128": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
412
+ "nomic-embed-text-v1.5-256": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
413
+ "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
414
  "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
415
  "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
416
  "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
 
494
  "multilingual-e5-large": 1024,
495
  "nb-bert-base": 768,
496
  "nb-bert-large": 1024,
497
+ "nomic-embed-text-v1.5-64": 64,
498
+ "nomic-embed-text-v1.5-128": 128,
499
+ "nomic-embed-text-v1.5-256": 256,
500
+ "nomic-embed-text-v1.5-512": 512,
501
  "norbert3-base": 768,
502
  "norbert3-large": 1024,
503
  "paraphrase-multilingual-MiniLM-L12-v2": 384,
 
581
  "multilingual-e5-small": 512,
582
  "nb-bert-base": 512,
583
  "nb-bert-large": 512,
584
+ "nomic-embed-text-v1.5-64": 8192,
585
+ "nomic-embed-text-v1.5-128": 8192,
586
+ "nomic-embed-text-v1.5-256": 8192,
587
+ "nomic-embed-text-v1.5-512": 8192,
588
  "norbert3-base": 512,
589
  "norbert3-large": 512,
590
  "paraphrase-multilingual-MiniLM-L12-v2": 512,
 
667
  "multilingual-e5-large": 2.24,
668
  "nb-bert-base": 0.71,
669
  "nb-bert-large": 1.42,
670
+ "nomic-embed-text-v1.5-64": 0.55,
671
+ "nomic-embed-text-v1.5-128": 0.55,
672
+ "nomic-embed-text-v1.5-256": 0.55,
673
+ "nomic-embed-text-v1.5-512": 0.55,
674
  "norbert3-base": 0.52,
675
  "norbert3-large": 1.47,
676
  "paraphrase-multilingual-mpnet-base-v2": 1.11,
 
801
  "mixamrepijey/instructor-small",
802
  "mixamrepijey/instructor-models",
803
  "lsf1000/bge-evaluation", # Empty
804
+ "giulio98/placeholder", # Empty
805
+ "Severian/nomic", # Copy
806
  }
807
 
808
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}