Add new external models (used for German clustering)

#9
by slvnwhrl - opened
Files changed (1) hide show
  1. app.py +25 -0
app.py CHANGED
@@ -151,6 +151,12 @@ EXTERNAL_MODELS = [
151
  "allenai-specter",
152
  "bert-base-uncased",
153
  "contriever-base-msmarco",
 
 
 
 
 
 
154
  "glove.6B.300d",
155
  "gtr-t5-base",
156
  "gtr-t5-large",
@@ -175,6 +181,8 @@ EXTERNAL_MODELS = [
175
  "text-search-curie-001",
176
  "text-search-davinci-001",
177
  "unsup-simcse-bert-base-uncased",
 
 
178
  ]
179
  EXTERNAL_MODEL_TO_LINK = {
180
  "LASER2": "https://github.com/facebookresearch/LASER",
@@ -215,6 +223,14 @@ EXTERNAL_MODEL_TO_LINK = {
215
  "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
216
  "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
217
  "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
 
 
 
 
 
 
 
 
218
  }
219
 
220
  EXTERNAL_MODEL_TO_DIM = {
@@ -226,6 +242,12 @@ EXTERNAL_MODEL_TO_DIM = {
226
  "allenai-specter": 768,
227
  "bert-base-uncased": 768,
228
  "contriever-base-msmarco": 768,
 
 
 
 
 
 
229
  "glove.6B.300d": 300,
230
  "gtr-t5-base": 768,
231
  "gtr-t5-large": 768,
@@ -256,6 +278,9 @@ EXTERNAL_MODEL_TO_DIM = {
256
  "text-search-davinci-001": 12288,
257
 
258
  "unsup-simcse-bert-base-uncased": 768,
 
 
 
259
  }
260
 
261
  MODELS_TO_SKIP = {
 
151
  "allenai-specter",
152
  "bert-base-uncased",
153
  "contriever-base-msmarco",
154
+ "cross-en-de-roberta-sentence-transformer",
155
+ "gbert-base",
156
+ "gbert-large",
157
+ "gelectra-base",
158
+ "gelectra-large",
159
+ "gottbert-base",
160
  "glove.6B.300d",
161
  "gtr-t5-base",
162
  "gtr-t5-large",
 
181
  "text-search-curie-001",
182
  "text-search-davinci-001",
183
  "unsup-simcse-bert-base-uncased",
184
+ "use-cmlm-multilingual",
185
+ "xlm-roberta-large",
186
  ]
187
  EXTERNAL_MODEL_TO_LINK = {
188
  "LASER2": "https://github.com/facebookresearch/LASER",
 
223
  "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
224
  "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
225
  "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
226
+ "cross-en-de-roberta-sentence-transformer",
227
+ "gbert-base": "https://huggingface.co/deepset/gbert-base",
228
+ "gbert-large": "https://huggingface.co/deepset/gbert-large",
229
+ "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
230
+ "gelectra-large": "https://huggingface.co/deepset/gelectra-large",
231
+ "gottbert-base": "https://huggingface.co/uklfr/gottbert-base",
232
+ "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
233
+ "xlm-roberta-large": "https://huggingface.co/xlm-roberta-large",
234
  }
235
 
236
  EXTERNAL_MODEL_TO_DIM = {
 
242
  "allenai-specter": 768,
243
  "bert-base-uncased": 768,
244
  "contriever-base-msmarco": 768,
245
+ "cross-en-de-roberta-sentence-transformer": 0,
246
+ "gbert-base": 768,
247
+ "gbert-large": 1024,
248
+ "gelectra-base": 768,
249
+ "gelectra-large": 1024,
250
+ "gottbert-base": 768,
251
  "glove.6B.300d": 300,
252
  "gtr-t5-base": 768,
253
  "gtr-t5-large": 768,
 
278
  "text-search-davinci-001": 12288,
279
 
280
  "unsup-simcse-bert-base-uncased": 768,
281
+
282
+ "use-cmlm-multilingual": 768,
283
+ "xlm-roberta-large": 1024,
284
  }
285
 
286
  MODELS_TO_SKIP = {