Tom Aarsen commited on
Commit
a8ba8f1
1 Parent(s): 0ebd4b8

Move globals around slightly

Browse files
Files changed (1) hide show
  1. app.py +98 -97
app.py CHANGED
@@ -946,6 +946,104 @@ PROPRIETARY_MODELS = {
946
  for model in PROPRIETARY_MODELS
947
  }
948
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
949
  MODELS_TO_SKIP = {
950
  "baseplate/instructor-large-1", # Duplicate
951
  "radames/e5-large", # Duplicate
@@ -1070,103 +1168,6 @@ MODELS_TO_SKIP = {
1070
  "Koat/gte-tiny",
1071
  }
1072
 
1073
- SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
1074
- "allenai-specter",
1075
- "allenai-specter",
1076
- "all-MiniLM-L12-v2",
1077
- "all-MiniLM-L6-v2",
1078
- "all-mpnet-base-v2",
1079
- "bert-base-10lang-cased",
1080
- "bert-base-15lang-cased",
1081
- "bert-base-25lang-cased",
1082
- "bert-base-multilingual-cased",
1083
- "bert-base-multilingual-uncased",
1084
- "bert-base-swedish-cased",
1085
- "bert-base-uncased",
1086
- "bge-base-zh-v1.5",
1087
- "bge-large-zh-v1.5",
1088
- "bge-large-zh-noinstruct",
1089
- "bge-small-zh-v1.5",
1090
- "camembert-base",
1091
- "camembert-large",
1092
- "contriever-base-msmarco",
1093
- "cross-en-de-roberta-sentence-transformer",
1094
- "DanskBERT",
1095
- "distilbert-base-25lang-cased",
1096
- "distilbert-base-en-fr-cased",
1097
- "distilbert-base-en-fr-es-pt-it-cased",
1098
- "distilbert-base-fr-cased",
1099
- "distilbert-base-uncased",
1100
- "distiluse-base-multilingual-cased-v2",
1101
- "dfm-encoder-large-v1",
1102
- "dfm-sentence-encoder-large-1",
1103
- "e5-base",
1104
- "e5-large",
1105
- "e5-mistral-7b-instruct",
1106
- "e5-small",
1107
- "electra-small-nordic",
1108
- "electra-small-swedish-cased-discriminator",
1109
- "flaubert_base_cased",
1110
- "flaubert_base_uncased",
1111
- "flaubert_large_cased",
1112
- "gbert-base",
1113
- "gbert-large",
1114
- "gelectra-base",
1115
- "gelectra-large",
1116
- "glove.6B.300d",
1117
- "gottbert-base",
1118
- "gtr-t5-base",
1119
- "gtr-t5-large",
1120
- "gtr-t5-xl",
1121
- "gtr-t5-xxl",
1122
- "herbert-base-retrieval-v2",
1123
- "komninos",
1124
- "luotuo-bert-medium",
1125
- "LaBSE",
1126
- "m3e-base",
1127
- "m3e-large",
1128
- "msmarco-bert-co-condensor",
1129
- "multi-qa-MiniLM-L6-cos-v1",
1130
- "multilingual-e5-base",
1131
- "multilingual-e5-large",
1132
- "multilingual-e5-small",
1133
- "nb-bert-base",
1134
- "nb-bert-large",
1135
- "nomic-embed-text-v1.5-64",
1136
- "nomic-embed-text-v1.5-128",
1137
- "nomic-embed-text-v1.5-256",
1138
- "nomic-embed-text-v1.5-512",
1139
- "norbert3-base",
1140
- "norbert3-large",
1141
- "paraphrase-multilingual-mpnet-base-v2",
1142
- "paraphrase-multilingual-MiniLM-L12-v2",
1143
- "sentence-camembert-base",
1144
- "sentence-camembert-large",
1145
- "sentence-croissant-llm-base",
1146
- "sentence-bert-swedish-cased",
1147
- "sentence-t5-base",
1148
- "sentence-t5-large",
1149
- "sentence-t5-xl",
1150
- "sentence-t5-xxl",
1151
- "silver-retriever-base-v1",
1152
- "sup-simcse-bert-base-uncased",
1153
- "st-polish-paraphrase-from-distilroberta",
1154
- "st-polish-paraphrase-from-mpnet",
1155
- "text2vec-base-chinese",
1156
- "text2vec-large-chinese",
1157
- "udever-bloom-1b1",
1158
- "udever-bloom-560m",
1159
- "universal-sentence-encoder-multilingual-3",
1160
- "universal-sentence-encoder-multilingual-large-3",
1161
- "unsup-simcse-bert-base-uncased",
1162
- "use-cmlm-multilingual",
1163
- "xlm-roberta-base",
1164
- "xlm-roberta-large",
1165
- }
1166
- SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
1167
- make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
1168
- for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
1169
- }
1170
 
1171
  def add_lang(examples):
1172
  if not(examples["eval_language"]):
 
946
  for model in PROPRIETARY_MODELS
947
  }
948
 
949
+ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
950
+ "allenai-specter",
951
+ "allenai-specter",
952
+ "all-MiniLM-L12-v2",
953
+ "all-MiniLM-L6-v2",
954
+ "all-mpnet-base-v2",
955
+ "bert-base-10lang-cased",
956
+ "bert-base-15lang-cased",
957
+ "bert-base-25lang-cased",
958
+ "bert-base-multilingual-cased",
959
+ "bert-base-multilingual-uncased",
960
+ "bert-base-swedish-cased",
961
+ "bert-base-uncased",
962
+ "bge-base-zh-v1.5",
963
+ "bge-large-zh-v1.5",
964
+ "bge-large-zh-noinstruct",
965
+ "bge-small-zh-v1.5",
966
+ "camembert-base",
967
+ "camembert-large",
968
+ "contriever-base-msmarco",
969
+ "cross-en-de-roberta-sentence-transformer",
970
+ "DanskBERT",
971
+ "distilbert-base-25lang-cased",
972
+ "distilbert-base-en-fr-cased",
973
+ "distilbert-base-en-fr-es-pt-it-cased",
974
+ "distilbert-base-fr-cased",
975
+ "distilbert-base-uncased",
976
+ "distiluse-base-multilingual-cased-v2",
977
+ "dfm-encoder-large-v1",
978
+ "dfm-sentence-encoder-large-1",
979
+ "e5-base",
980
+ "e5-large",
981
+ "e5-mistral-7b-instruct",
982
+ "e5-small",
983
+ "electra-small-nordic",
984
+ "electra-small-swedish-cased-discriminator",
985
+ "flaubert_base_cased",
986
+ "flaubert_base_uncased",
987
+ "flaubert_large_cased",
988
+ "gbert-base",
989
+ "gbert-large",
990
+ "gelectra-base",
991
+ "gelectra-large",
992
+ "glove.6B.300d",
993
+ "gottbert-base",
994
+ "gtr-t5-base",
995
+ "gtr-t5-large",
996
+ "gtr-t5-xl",
997
+ "gtr-t5-xxl",
998
+ "herbert-base-retrieval-v2",
999
+ "komninos",
1000
+ "luotuo-bert-medium",
1001
+ "LaBSE",
1002
+ "m3e-base",
1003
+ "m3e-large",
1004
+ "msmarco-bert-co-condensor",
1005
+ "multi-qa-MiniLM-L6-cos-v1",
1006
+ "multilingual-e5-base",
1007
+ "multilingual-e5-large",
1008
+ "multilingual-e5-small",
1009
+ "nb-bert-base",
1010
+ "nb-bert-large",
1011
+ "nomic-embed-text-v1.5-64",
1012
+ "nomic-embed-text-v1.5-128",
1013
+ "nomic-embed-text-v1.5-256",
1014
+ "nomic-embed-text-v1.5-512",
1015
+ "norbert3-base",
1016
+ "norbert3-large",
1017
+ "paraphrase-multilingual-mpnet-base-v2",
1018
+ "paraphrase-multilingual-MiniLM-L12-v2",
1019
+ "sentence-camembert-base",
1020
+ "sentence-camembert-large",
1021
+ "sentence-croissant-llm-base",
1022
+ "sentence-bert-swedish-cased",
1023
+ "sentence-t5-base",
1024
+ "sentence-t5-large",
1025
+ "sentence-t5-xl",
1026
+ "sentence-t5-xxl",
1027
+ "silver-retriever-base-v1",
1028
+ "sup-simcse-bert-base-uncased",
1029
+ "st-polish-paraphrase-from-distilroberta",
1030
+ "st-polish-paraphrase-from-mpnet",
1031
+ "text2vec-base-chinese",
1032
+ "text2vec-large-chinese",
1033
+ "udever-bloom-1b1",
1034
+ "udever-bloom-560m",
1035
+ "universal-sentence-encoder-multilingual-3",
1036
+ "universal-sentence-encoder-multilingual-large-3",
1037
+ "unsup-simcse-bert-base-uncased",
1038
+ "use-cmlm-multilingual",
1039
+ "xlm-roberta-base",
1040
+ "xlm-roberta-large",
1041
+ }
1042
+ SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
1043
+ make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))
1044
+ for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
1045
+ }
1046
+
1047
  MODELS_TO_SKIP = {
1048
  "baseplate/instructor-large-1", # Duplicate
1049
  "radames/e5-large", # Duplicate
 
1168
  "Koat/gte-tiny",
1169
  }
1170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1171
 
1172
  def add_lang(examples):
1173
  if not(examples["eval_language"]):