Spaces:
Sleeping
Sleeping
Commit
·
05875e9
1
Parent(s):
1b70983
add_followir_tab (#102)
Browse files- add instruction following (98e437fad08d4528d401497d70f52921adaec46a)
- update (c348ee586c774fc357278f25fc1e0a099acc687f)
- merge in main (9fc87322eb9d1862ea02ec4fb63ee99dcce81533)
- minor cleanup (2ba40c7d9fb787ca0017e32a6c68a32c77df0221)
- add bi-encoder button (77cc9e7a65257c5af5784bb60a3dac2073e7fe05)
- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +39 -7
- config.yaml +25 -0
- model_meta.yaml +134 -0
EXTERNAL_MODEL_RESULTS.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
app.py
CHANGED
|
@@ -17,6 +17,11 @@ TASKS_CONFIG = LEADERBOARD_CONFIG["tasks"]
|
|
| 17 |
BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
|
| 18 |
|
| 19 |
TASKS = list(TASKS_CONFIG.keys())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()}
|
| 22 |
|
|
@@ -34,18 +39,30 @@ EXTERNAL_MODEL_TO_DIM = {k: v["dim"] for k,v in MODEL_META["model_meta"].items()
|
|
| 34 |
EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
|
| 35 |
EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
|
| 36 |
PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
|
|
|
|
|
|
|
| 37 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
|
| 38 |
MODELS_TO_SKIP = MODEL_META["models_to_skip"]
|
|
|
|
|
|
|
| 39 |
|
| 40 |
PROPRIETARY_MODELS = {
|
| 41 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
| 42 |
for model in PROPRIETARY_MODELS
|
| 43 |
}
|
| 44 |
-
|
| 45 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
| 46 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
| 47 |
for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
|
| 48 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
|
| 51 |
for board_config in BOARDS_CONFIG.values():
|
|
@@ -164,7 +181,13 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 164 |
# Initialize list to models that we cannot fetch metadata from
|
| 165 |
df_list = []
|
| 166 |
for model in EXTERNAL_MODEL_RESULTS:
|
| 167 |
-
results_list = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
if len(datasets) > 0:
|
| 169 |
res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
|
| 170 |
elif langs:
|
|
@@ -383,7 +406,10 @@ for task in TASKS:
|
|
| 383 |
data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
|
| 384 |
|
| 385 |
for board, board_config in BOARDS_CONFIG.items():
|
| 386 |
-
|
|
|
|
|
|
|
|
|
|
| 387 |
acronym = board_config.get("acronym", None)
|
| 388 |
board_icon = board_config.get("icon", None)
|
| 389 |
if board_icon is None:
|
|
@@ -439,7 +465,7 @@ function(goalUrlObject) {
|
|
| 439 |
def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
| 440 |
current_task_language["task"] = event.target.id
|
| 441 |
# Either use the cached language for this task or the 1st language
|
| 442 |
-
current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[
|
| 443 |
return current_task_language, language_per_task
|
| 444 |
|
| 445 |
def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
|
@@ -461,6 +487,8 @@ MODEL_TYPES = [
|
|
| 461 |
"Open",
|
| 462 |
"Proprietary",
|
| 463 |
"Sentence Transformers",
|
|
|
|
|
|
|
| 464 |
]
|
| 465 |
|
| 466 |
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
@@ -484,6 +512,10 @@ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
| 484 |
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
|
| 485 |
elif model_type == "Sentence Transformers":
|
| 486 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
if masks:
|
| 488 |
df = df[reduce(lambda a, b: a | b, masks)]
|
| 489 |
else:
|
|
@@ -535,16 +567,16 @@ with gr.Blocks(css=css) as block:
|
|
| 535 |
with gr.Tabs() as outer_tabs:
|
| 536 |
# Store the tabs for updating them on load based on URL parameters
|
| 537 |
tabs.append(outer_tabs)
|
| 538 |
-
|
| 539 |
for task, task_values in data.items():
|
| 540 |
metric = task_values["metric"]
|
| 541 |
task_tab_id = task.lower().replace(" ", "-")
|
| 542 |
|
| 543 |
# Overall, Bitext Mining, Classification, etc.
|
| 544 |
-
|
|
|
|
| 545 |
# For updating the 'task' in the URL
|
| 546 |
task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
|
| 547 |
-
|
| 548 |
with gr.Tabs() as task_tabs:
|
| 549 |
# Store the task tabs for updating them on load based on URL parameters
|
| 550 |
tabs.append(task_tabs)
|
|
|
|
| 17 |
BOARDS_CONFIG = LEADERBOARD_CONFIG["boards"]
|
| 18 |
|
| 19 |
TASKS = list(TASKS_CONFIG.keys())
|
| 20 |
+
PRETTY_NAMES = {
|
| 21 |
+
"InstructionRetrieval": "Retrieval w/Instructions",
|
| 22 |
+
"PairClassification": "Pair Classification",
|
| 23 |
+
"BitextMining": "Bitext Mining",
|
| 24 |
+
}
|
| 25 |
|
| 26 |
TASK_TO_METRIC = {k:v["metric"] for k,v in TASKS_CONFIG.items()}
|
| 27 |
|
|
|
|
| 39 |
EXTERNAL_MODEL_TO_SEQLEN = {k: v["seq_len"] for k,v in MODEL_META["model_meta"].items() if v.get("seq_len", False)}
|
| 40 |
EXTERNAL_MODEL_TO_SIZE = {k: v["size"] for k,v in MODEL_META["model_meta"].items() if v.get("size", False)}
|
| 41 |
PROPRIETARY_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_proprietary", False)}
|
| 42 |
+
TASK_DESCRIPTIONS = {k: v["task_description"] for k,v in TASKS_CONFIG.items()}
|
| 43 |
+
TASK_DESCRIPTIONS["Overall"] = "Overall performance across MTEB tasks."
|
| 44 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {k for k,v in MODEL_META["model_meta"].items() if v.get("is_sentence_transformers_compatible", False)}
|
| 45 |
MODELS_TO_SKIP = MODEL_META["models_to_skip"]
|
| 46 |
+
CROSS_ENCODERS = MODEL_META["cross_encoders"]
|
| 47 |
+
BI_ENCODERS = [k for k, _ in MODEL_META["model_meta"].items() if k not in CROSS_ENCODERS + ["bm25"]]
|
| 48 |
|
| 49 |
PROPRIETARY_MODELS = {
|
| 50 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
| 51 |
for model in PROPRIETARY_MODELS
|
| 52 |
}
|
|
|
|
| 53 |
SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS = {
|
| 54 |
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
| 55 |
for model in SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS
|
| 56 |
}
|
| 57 |
+
CROSS_ENCODERS = {
|
| 58 |
+
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
| 59 |
+
for model in CROSS_ENCODERS
|
| 60 |
+
}
|
| 61 |
+
BI_ENCODERS = {
|
| 62 |
+
make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, f"https://huggingface.co/spaces/{REPO_ID}"))
|
| 63 |
+
for model in BI_ENCODERS
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
|
| 67 |
TASK_TO_TASK_TYPE = {task_category: [] for task_category in TASKS}
|
| 68 |
for board_config in BOARDS_CONFIG.values():
|
|
|
|
| 181 |
# Initialize list to models that we cannot fetch metadata from
|
| 182 |
df_list = []
|
| 183 |
for model in EXTERNAL_MODEL_RESULTS:
|
| 184 |
+
results_list = []
|
| 185 |
+
for task in tasks:
|
| 186 |
+
# Not all models have InstructionRetrieval, other new tasks
|
| 187 |
+
if task not in EXTERNAL_MODEL_RESULTS[model]:
|
| 188 |
+
continue
|
| 189 |
+
results_list += EXTERNAL_MODEL_RESULTS[model][task][task_to_metric[task]]
|
| 190 |
+
|
| 191 |
if len(datasets) > 0:
|
| 192 |
res = {k: v for d in results_list for k, v in d.items() if (k == "Model") or any([x in k for x in datasets])}
|
| 193 |
elif langs:
|
|
|
|
| 406 |
data[task] = {"metric": TASKS_CONFIG[task]["metric_description"], "data": []}
|
| 407 |
|
| 408 |
for board, board_config in BOARDS_CONFIG.items():
|
| 409 |
+
init_name = board_config["title"]
|
| 410 |
+
if init_name in PRETTY_NAMES:
|
| 411 |
+
init_name = PRETTY_NAMES[init_name]
|
| 412 |
+
board_pretty_name = f"{init_name} leaderboard"
|
| 413 |
acronym = board_config.get("acronym", None)
|
| 414 |
board_icon = board_config.get("icon", None)
|
| 415 |
if board_icon is None:
|
|
|
|
| 465 |
def update_url_task(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
| 466 |
current_task_language["task"] = event.target.id
|
| 467 |
# Either use the cached language for this task or the 1st language
|
| 468 |
+
current_task_language["language"] = language_per_task.get(event.target.id, event.target.children[1].children[0].id)
|
| 469 |
return current_task_language, language_per_task
|
| 470 |
|
| 471 |
def update_url_language(event: gr.SelectData, current_task_language: dict, language_per_task: dict):
|
|
|
|
| 487 |
"Open",
|
| 488 |
"Proprietary",
|
| 489 |
"Sentence Transformers",
|
| 490 |
+
"Cross-Encoders",
|
| 491 |
+
"Bi-Encoders"
|
| 492 |
]
|
| 493 |
|
| 494 |
def filter_data(search_query, model_types, model_sizes, *full_dataframes):
|
|
|
|
| 512 |
masks.append(df["Model"].isin(PROPRIETARY_MODELS))
|
| 513 |
elif model_type == "Sentence Transformers":
|
| 514 |
masks.append(df["Model"].isin(SENTENCE_TRANSFORMERS_COMPATIBLE_MODELS))
|
| 515 |
+
elif model_type == "Cross-Encoders":
|
| 516 |
+
masks.append(df["Model"].isin(CROSS_ENCODERS))
|
| 517 |
+
elif model_type == "Bi-Encoders":
|
| 518 |
+
masks.append(df["Model"].isin(BI_ENCODERS))
|
| 519 |
if masks:
|
| 520 |
df = df[reduce(lambda a, b: a | b, masks)]
|
| 521 |
else:
|
|
|
|
| 567 |
with gr.Tabs() as outer_tabs:
|
| 568 |
# Store the tabs for updating them on load based on URL parameters
|
| 569 |
tabs.append(outer_tabs)
|
|
|
|
| 570 |
for task, task_values in data.items():
|
| 571 |
metric = task_values["metric"]
|
| 572 |
task_tab_id = task.lower().replace(" ", "-")
|
| 573 |
|
| 574 |
# Overall, Bitext Mining, Classification, etc.
|
| 575 |
+
pretty_task_name = task if task not in PRETTY_NAMES.keys() else PRETTY_NAMES[task]
|
| 576 |
+
with gr.Tab(pretty_task_name, id=task_tab_id) as task_tab:
|
| 577 |
# For updating the 'task' in the URL
|
| 578 |
task_tab.select(update_url_task, [current_task_language, language_per_task], [current_task_language, language_per_task]).then(None, [current_task_language], [], js=set_window_url_params)
|
| 579 |
+
gr.Markdown(TASK_DESCRIPTIONS[task])
|
| 580 |
with gr.Tabs() as task_tabs:
|
| 581 |
# Store the task tabs for updating them on load based on URL parameters
|
| 582 |
tabs.append(task_tabs)
|
config.yaml
CHANGED
|
@@ -7,34 +7,47 @@ tasks:
|
|
| 7 |
icon: "🎌"
|
| 8 |
metric: f1
|
| 9 |
metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
|
|
|
|
| 10 |
Classification:
|
| 11 |
icon: "❤️"
|
| 12 |
metric: accuracy
|
| 13 |
metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
|
|
|
|
| 14 |
Clustering:
|
| 15 |
icon: "✨"
|
| 16 |
metric: v_measure
|
| 17 |
metric_description: "Validity Measure (v_measure)"
|
|
|
|
| 18 |
PairClassification:
|
| 19 |
icon: "🎭"
|
| 20 |
metric: cos_sim_ap
|
| 21 |
metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
|
|
|
|
| 22 |
Reranking:
|
| 23 |
icon: "🥈"
|
| 24 |
metric: map
|
| 25 |
metric_description: "Mean Average Precision (MAP)"
|
|
|
|
| 26 |
Retrieval:
|
| 27 |
icon: "🔎"
|
| 28 |
metric: ndcg_at_10
|
| 29 |
metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
|
|
|
|
| 30 |
STS:
|
| 31 |
icon: "🤖"
|
| 32 |
metric: cos_sim_spearman
|
| 33 |
metric_description: "Spearman correlation based on cosine similarity"
|
|
|
|
| 34 |
Summarization:
|
| 35 |
icon: "📜"
|
| 36 |
metric: cos_sim_spearman
|
| 37 |
metric_description: "Spearman correlation based on cosine similarity"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
boards:
|
| 39 |
en:
|
| 40 |
title: English
|
|
@@ -250,6 +263,18 @@ boards:
|
|
| 250 |
- MassiveIntentClassification (nb)
|
| 251 |
- MassiveScenarioClassification (nb)
|
| 252 |
- ScalaNbClassification
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
law:
|
| 254 |
title: Law
|
| 255 |
language_long: "English, German, Chinese"
|
|
|
|
| 7 |
icon: "🎌"
|
| 8 |
metric: f1
|
| 9 |
metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
|
| 10 |
+
task_description: "Bitext mining is the task of finding parallel sentences in two languages."
|
| 11 |
Classification:
|
| 12 |
icon: "❤️"
|
| 13 |
metric: accuracy
|
| 14 |
metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
|
| 15 |
+
task_description: "Classification is the task of assigning a label to a text."
|
| 16 |
Clustering:
|
| 17 |
icon: "✨"
|
| 18 |
metric: v_measure
|
| 19 |
metric_description: "Validity Measure (v_measure)"
|
| 20 |
+
task_description: "Clustering is the task of grouping similar documents together."
|
| 21 |
PairClassification:
|
| 22 |
icon: "🎭"
|
| 23 |
metric: cos_sim_ap
|
| 24 |
metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
|
| 25 |
+
task_description: "Pair classification is the task of determining whether two texts are similar."
|
| 26 |
Reranking:
|
| 27 |
icon: "🥈"
|
| 28 |
metric: map
|
| 29 |
metric_description: "Mean Average Precision (MAP)"
|
| 30 |
+
task_description: "Reranking is the task of reordering a list of documents to improve relevance."
|
| 31 |
Retrieval:
|
| 32 |
icon: "🔎"
|
| 33 |
metric: ndcg_at_10
|
| 34 |
metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
|
| 35 |
+
task_description: "Retrieval is the task of finding relevant documents for a query."
|
| 36 |
STS:
|
| 37 |
icon: "🤖"
|
| 38 |
metric: cos_sim_spearman
|
| 39 |
metric_description: "Spearman correlation based on cosine similarity"
|
| 40 |
+
task_description: "Semantic Textual Similarity is the task of determining how similar two texts are."
|
| 41 |
Summarization:
|
| 42 |
icon: "📜"
|
| 43 |
metric: cos_sim_spearman
|
| 44 |
metric_description: "Spearman correlation based on cosine similarity"
|
| 45 |
+
task_description: "Summarization is the task of generating a summary of a text."
|
| 46 |
+
InstructionRetrieval:
|
| 47 |
+
icon: "🔎📋"
|
| 48 |
+
metric: "p-MRR"
|
| 49 |
+
metric_description: "paired mean reciprocal rank"
|
| 50 |
+
task_description: "Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions."
|
| 51 |
boards:
|
| 52 |
en:
|
| 53 |
title: English
|
|
|
|
| 263 |
- MassiveIntentClassification (nb)
|
| 264 |
- MassiveScenarioClassification (nb)
|
| 265 |
- ScalaNbClassification
|
| 266 |
+
instructions:
|
| 267 |
+
title: English
|
| 268 |
+
language_long: "English"
|
| 269 |
+
has_overall: false
|
| 270 |
+
acronym: null
|
| 271 |
+
icon: null
|
| 272 |
+
credits: "[Orion Weller, FollowIR](https://arxiv.org/abs/2403.15246)"
|
| 273 |
+
tasks:
|
| 274 |
+
InstructionRetrieval:
|
| 275 |
+
- Robust04InstructionRetrieval
|
| 276 |
+
- News21InstructionRetrieval
|
| 277 |
+
- Core17InstructionRetrieval
|
| 278 |
law:
|
| 279 |
title: Law
|
| 280 |
language_long: "English, German, Chinese"
|
model_meta.yaml
CHANGED
|
@@ -47,6 +47,20 @@ model_meta:
|
|
| 47 |
is_external: true
|
| 48 |
is_proprietary: false
|
| 49 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
LASER2:
|
| 51 |
link: https://github.com/facebookresearch/LASER
|
| 52 |
seq_len: N/A
|
|
@@ -263,6 +277,12 @@ model_meta:
|
|
| 263 |
is_external: true
|
| 264 |
is_proprietary: false
|
| 265 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
camembert-base:
|
| 267 |
link: https://huggingface.co/almanach/camembert-base
|
| 268 |
seq_len: 512
|
|
@@ -359,6 +379,14 @@ model_meta:
|
|
| 359 |
is_external: true
|
| 360 |
is_proprietary: false
|
| 361 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
e5-base:
|
| 363 |
link: https://huggingface.co/intfloat/e5-base
|
| 364 |
seq_len: 512
|
|
@@ -367,6 +395,14 @@ model_meta:
|
|
| 367 |
is_external: true
|
| 368 |
is_proprietary: false
|
| 369 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
e5-large:
|
| 371 |
link: https://huggingface.co/intfloat/e5-large
|
| 372 |
seq_len: 512
|
|
@@ -407,6 +443,22 @@ model_meta:
|
|
| 407 |
is_external: true
|
| 408 |
is_proprietary: false
|
| 409 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 410 |
flaubert_base_cased:
|
| 411 |
link: https://huggingface.co/flaubert/flaubert_base_cased
|
| 412 |
seq_len: 512
|
|
@@ -535,6 +587,22 @@ model_meta:
|
|
| 535 |
is_external: true
|
| 536 |
is_proprietary: false
|
| 537 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 538 |
komninos:
|
| 539 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
|
| 540 |
seq_len: N/A
|
|
@@ -543,6 +611,14 @@ model_meta:
|
|
| 543 |
is_external: true
|
| 544 |
is_proprietary: false
|
| 545 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
luotuo-bert-medium:
|
| 547 |
link: https://huggingface.co/silk-road/luotuo-bert-medium
|
| 548 |
seq_len: 512
|
|
@@ -567,6 +643,14 @@ model_meta:
|
|
| 567 |
is_external: true
|
| 568 |
is_proprietary: false
|
| 569 |
is_sentence_transformers_compatible: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
mistral-embed:
|
| 571 |
link: https://docs.mistral.ai/guides/embeddings
|
| 572 |
seq_len: null
|
|
@@ -575,6 +659,30 @@ model_meta:
|
|
| 575 |
is_external: true
|
| 576 |
is_proprietary: true
|
| 577 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
msmarco-bert-co-condensor:
|
| 579 |
link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
|
| 580 |
seq_len: 512
|
|
@@ -903,6 +1011,22 @@ model_meta:
|
|
| 903 |
is_external: true
|
| 904 |
is_proprietary: true
|
| 905 |
is_sentence_transformers_compatible: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
text2vec-base-chinese:
|
| 907 |
link: https://huggingface.co/shibing624/text2vec-base-chinese
|
| 908 |
seq_len: 512
|
|
@@ -1184,3 +1308,13 @@ models_to_skip:
|
|
| 1184 |
- michaelfeil/ct2fast-gte-large
|
| 1185 |
- gizmo-ai/Cohere-embed-multilingual-v3.0
|
| 1186 |
- McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
is_external: true
|
| 48 |
is_proprietary: false
|
| 49 |
is_sentence_transformers_compatible: true
|
| 50 |
+
FollowIR-7B:
|
| 51 |
+
link: https://huggingface.co/jhu-clsp/FollowIR-7B
|
| 52 |
+
seq_len: 4096
|
| 53 |
+
size: 7240
|
| 54 |
+
is_external: true
|
| 55 |
+
is_propietary: false
|
| 56 |
+
is_sentence_transformer_compatible: false
|
| 57 |
+
GritLM-7B:
|
| 58 |
+
link: https://huggingface.co/GritLM/GritLM-7B
|
| 59 |
+
seq_len: 4096
|
| 60 |
+
size: 7240
|
| 61 |
+
is_external: true
|
| 62 |
+
is_propietary: false
|
| 63 |
+
is_sentence_transformer_compatible: false
|
| 64 |
LASER2:
|
| 65 |
link: https://github.com/facebookresearch/LASER
|
| 66 |
seq_len: N/A
|
|
|
|
| 277 |
is_external: true
|
| 278 |
is_proprietary: false
|
| 279 |
is_sentence_transformers_compatible: true
|
| 280 |
+
bm25:
|
| 281 |
+
link: https://en.wikipedia.org/wiki/Okapi_BM25
|
| 282 |
+
size: 0
|
| 283 |
+
is_external: true
|
| 284 |
+
is_proprietary: false
|
| 285 |
+
is_sentence_transformers_compatible: false
|
| 286 |
camembert-base:
|
| 287 |
link: https://huggingface.co/almanach/camembert-base
|
| 288 |
seq_len: 512
|
|
|
|
| 379 |
is_external: true
|
| 380 |
is_proprietary: false
|
| 381 |
is_sentence_transformers_compatible: true
|
| 382 |
+
e5-base-v2:
|
| 383 |
+
link: https://huggingface.co/intfloat/e5-base-v2
|
| 384 |
+
seq_len: 512
|
| 385 |
+
size: 110
|
| 386 |
+
dim: 768
|
| 387 |
+
is_external: true
|
| 388 |
+
is_proprietary: false
|
| 389 |
+
is_sentence_transformers_compatible: true
|
| 390 |
e5-base:
|
| 391 |
link: https://huggingface.co/intfloat/e5-base
|
| 392 |
seq_len: 512
|
|
|
|
| 395 |
is_external: true
|
| 396 |
is_proprietary: false
|
| 397 |
is_sentence_transformers_compatible: true
|
| 398 |
+
e5-large-v2:
|
| 399 |
+
link: https://huggingface.co/intfloat/e5-large-v2
|
| 400 |
+
seq_len: 512
|
| 401 |
+
size: 335
|
| 402 |
+
dim: 1024
|
| 403 |
+
is_external: true
|
| 404 |
+
is_proprietary: false
|
| 405 |
+
is_sentence_transformers_compatible: true
|
| 406 |
e5-large:
|
| 407 |
link: https://huggingface.co/intfloat/e5-large
|
| 408 |
seq_len: 512
|
|
|
|
| 443 |
is_external: true
|
| 444 |
is_proprietary: false
|
| 445 |
is_sentence_transformers_compatible: true
|
| 446 |
+
flan-t5-base:
|
| 447 |
+
link: https://huggingface.co/google/flan-t5-base
|
| 448 |
+
seq_len: 512
|
| 449 |
+
size: 220
|
| 450 |
+
dim: -1
|
| 451 |
+
is_external: true
|
| 452 |
+
is_proprietary: false
|
| 453 |
+
is_sentence_transformers_compatible: true
|
| 454 |
+
flan-t5-large:
|
| 455 |
+
link: https://huggingface.co/google/flan-t5-large
|
| 456 |
+
seq_len: 512
|
| 457 |
+
size: 770
|
| 458 |
+
dim: -1
|
| 459 |
+
is_external: true
|
| 460 |
+
is_proprietary: false
|
| 461 |
+
is_sentence_transformers_compatible: true
|
| 462 |
flaubert_base_cased:
|
| 463 |
link: https://huggingface.co/flaubert/flaubert_base_cased
|
| 464 |
seq_len: 512
|
|
|
|
| 587 |
is_external: true
|
| 588 |
is_proprietary: false
|
| 589 |
is_sentence_transformers_compatible: true
|
| 590 |
+
instructor-base:
|
| 591 |
+
link: https://huggingface.co/hkunlp/instructor-base
|
| 592 |
+
seq_len: N/A
|
| 593 |
+
size: 110
|
| 594 |
+
dim: 768
|
| 595 |
+
is_external: true
|
| 596 |
+
is_proprietary: false
|
| 597 |
+
is_sentence_transformers_compatible: true
|
| 598 |
+
instructor-xl:
|
| 599 |
+
link: https://huggingface.co/hkunlp/instructor-xl
|
| 600 |
+
seq_len: N/A
|
| 601 |
+
size: 1241
|
| 602 |
+
dim: 768
|
| 603 |
+
is_external: true
|
| 604 |
+
is_proprietary: false
|
| 605 |
+
is_sentence_transformers_compatible: true
|
| 606 |
komninos:
|
| 607 |
link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
|
| 608 |
seq_len: N/A
|
|
|
|
| 611 |
is_external: true
|
| 612 |
is_proprietary: false
|
| 613 |
is_sentence_transformers_compatible: true
|
| 614 |
+
llama-2-7b-chat:
|
| 615 |
+
link: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
|
| 616 |
+
seq_len: 4096
|
| 617 |
+
size: 7000
|
| 618 |
+
dim: -1
|
| 619 |
+
is_external: true
|
| 620 |
+
is_proprietary: false
|
| 621 |
+
is_sentence_transformers_compatible: false
|
| 622 |
luotuo-bert-medium:
|
| 623 |
link: https://huggingface.co/silk-road/luotuo-bert-medium
|
| 624 |
seq_len: 512
|
|
|
|
| 643 |
is_external: true
|
| 644 |
is_proprietary: false
|
| 645 |
is_sentence_transformers_compatible: true
|
| 646 |
+
mistral-7b-instruct-v0.2:
|
| 647 |
+
link: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
|
| 648 |
+
seq_len: 4096
|
| 649 |
+
size: 7240
|
| 650 |
+
dim: -1
|
| 651 |
+
is_external: true
|
| 652 |
+
is_proprietary: false
|
| 653 |
+
is_sentence_transformers_compatible: false
|
| 654 |
mistral-embed:
|
| 655 |
link: https://docs.mistral.ai/guides/embeddings
|
| 656 |
seq_len: null
|
|
|
|
| 659 |
is_external: true
|
| 660 |
is_proprietary: true
|
| 661 |
is_sentence_transformers_compatible: false
|
| 662 |
+
monobert-large-msmarco:
|
| 663 |
+
link: https://huggingface.co/castorini/monobert-large-msmarco
|
| 664 |
+
seq_len: 512
|
| 665 |
+
size: 770
|
| 666 |
+
dim: -1
|
| 667 |
+
is_external: true
|
| 668 |
+
is_proprietary: false
|
| 669 |
+
is_sentence_transformers_compatible: false
|
| 670 |
+
monot5-3b-msmarco-10k:
|
| 671 |
+
link: https://huggingface.co/castorini/monot5-3b-msmarco-10k
|
| 672 |
+
seq_len: 512
|
| 673 |
+
size: 2480
|
| 674 |
+
dim: -1
|
| 675 |
+
is_external: true
|
| 676 |
+
is_proprietary: false
|
| 677 |
+
is_sentence_transformers_compatible: false
|
| 678 |
+
monot5-base-msmarco-10k:
|
| 679 |
+
link: https://huggingface.co/castorini/monot5-base-msmarco-10k
|
| 680 |
+
seq_len: 512
|
| 681 |
+
size: 220
|
| 682 |
+
dim: -1
|
| 683 |
+
is_external: true
|
| 684 |
+
is_proprietary: false
|
| 685 |
+
is_sentence_transformers_compatible: false
|
| 686 |
msmarco-bert-co-condensor:
|
| 687 |
link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
|
| 688 |
seq_len: 512
|
|
|
|
| 1011 |
is_external: true
|
| 1012 |
is_proprietary: true
|
| 1013 |
is_sentence_transformers_compatible: false
|
| 1014 |
+
tart-dual-contriever-msmarco:
|
| 1015 |
+
link: https://huggingface.co/orionweller/tart-dual-contriever-msmarco
|
| 1016 |
+
seq_len: 512
|
| 1017 |
+
size: 110
|
| 1018 |
+
dim: 768
|
| 1019 |
+
is_external: true
|
| 1020 |
+
is_proprietary: false
|
| 1021 |
+
is_sentence_transformers_compatible: false
|
| 1022 |
+
tart-full-flan-t5-xl:
|
| 1023 |
+
link: https://huggingface.co/facebook/tart-full-flan-t5-xl
|
| 1024 |
+
seq_len: 512
|
| 1025 |
+
size: 2480
|
| 1026 |
+
dim: -1
|
| 1027 |
+
is_external: true
|
| 1028 |
+
is_proprietary: false
|
| 1029 |
+
is_sentence_transformers_compatible: false
|
| 1030 |
text2vec-base-chinese:
|
| 1031 |
link: https://huggingface.co/shibing624/text2vec-base-chinese
|
| 1032 |
seq_len: 512
|
|
|
|
| 1308 |
- michaelfeil/ct2fast-gte-large
|
| 1309 |
- gizmo-ai/Cohere-embed-multilingual-v3.0
|
| 1310 |
- McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
|
| 1311 |
+
cross_encoders:
|
| 1312 |
+
- FollowIR-7B
|
| 1313 |
+
- flan-t5-base
|
| 1314 |
+
- flan-t5-large
|
| 1315 |
+
- monobert-large-msmarco
|
| 1316 |
+
- monot5-3b-msmarco-10k
|
| 1317 |
+
- monot5-base-msmarco-10k
|
| 1318 |
+
- llama-2-7b-chat
|
| 1319 |
+
- mistral-7b-instruct-v0.2
|
| 1320 |
+
- tart-full-flan-t5-xl
|