Spaces:
Sleeping
Sleeping
Commit
·
b6a14bc
1
Parent(s):
c9586ba
Update app.py
Browse files
app.py
CHANGED
|
@@ -336,6 +336,7 @@ EXTERNAL_MODELS = [
|
|
| 336 |
"text2vec-large-chinese",
|
| 337 |
"text-embedding-3-small",
|
| 338 |
"text-embedding-3-large",
|
|
|
|
| 339 |
"text-embedding-ada-002",
|
| 340 |
"text-similarity-ada-001",
|
| 341 |
"text-similarity-babbage-001",
|
|
@@ -418,6 +419,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
| 418 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
| 419 |
"text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
| 420 |
"text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
|
|
|
| 421 |
"text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model",
|
| 422 |
"text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 423 |
"text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
|
@@ -499,7 +501,8 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
| 499 |
"text2vec-base-chinese": 768,
|
| 500 |
"text2vec-large-chinese": 1024,
|
| 501 |
"text-embedding-3-large": 3072,
|
| 502 |
-
"text-embedding-3-
|
|
|
|
| 503 |
"text-embedding-ada-002": 1536,
|
| 504 |
"text-similarity-ada-001": 1024,
|
| 505 |
"text-similarity-babbage-001": 2048,
|
|
@@ -581,6 +584,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
| 581 |
"text2vec-base-chinese": 512,
|
| 582 |
"text2vec-large-chinese": 512,
|
| 583 |
"text-embedding-3-large": 8191,
|
|
|
|
| 584 |
"text-embedding-3-small": 8191,
|
| 585 |
"text-embedding-ada-002": 8191,
|
| 586 |
"text-similarity-ada-001": 2046,
|
|
@@ -882,7 +886,7 @@ def make_datasets_clickable(df):
|
|
| 882 |
return df
|
| 883 |
|
| 884 |
def add_rank(df):
|
| 885 |
-
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "
|
| 886 |
if len(cols_to_rank) == 1:
|
| 887 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 888 |
else:
|
|
@@ -914,7 +918,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 914 |
if add_emb_dim:
|
| 915 |
res["Model Size (GB)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
|
| 916 |
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
| 917 |
-
res["
|
| 918 |
df_list.append(res)
|
| 919 |
|
| 920 |
for model in models:
|
|
@@ -953,7 +957,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
| 953 |
if add_emb_dim:
|
| 954 |
try:
|
| 955 |
# Fails on gated repos, so we only include scores for them
|
| 956 |
-
out["Embedding Dimensions"], out["
|
| 957 |
except:
|
| 958 |
pass
|
| 959 |
df_list.append(out)
|
|
@@ -1030,7 +1034,7 @@ def get_mteb_average():
|
|
| 1030 |
# Fill NaN after averaging
|
| 1031 |
DATA_OVERALL.fillna("", inplace=True)
|
| 1032 |
|
| 1033 |
-
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "
|
| 1034 |
DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1035 |
|
| 1036 |
return DATA_OVERALL
|
|
@@ -1089,7 +1093,7 @@ def get_mteb_average_zh():
|
|
| 1089 |
# Fill NaN after averaging
|
| 1090 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
| 1091 |
|
| 1092 |
-
DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "
|
| 1093 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
|
| 1094 |
|
| 1095 |
return DATA_OVERALL_ZH
|
|
@@ -1143,7 +1147,7 @@ def get_mteb_average_pl():
|
|
| 1143 |
# Fill NaN after averaging
|
| 1144 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
| 1145 |
|
| 1146 |
-
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "
|
| 1147 |
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1148 |
|
| 1149 |
return DATA_OVERALL_PL
|
|
@@ -1215,7 +1219,7 @@ table > thead {
|
|
| 1215 |
}
|
| 1216 |
|
| 1217 |
table {
|
| 1218 |
-
--cell-width-1:
|
| 1219 |
}
|
| 1220 |
|
| 1221 |
table > tbody > tr > td:nth-child(2) > div {
|
|
@@ -1227,11 +1231,6 @@ block = gr.Blocks(css=css)
|
|
| 1227 |
with block:
|
| 1228 |
gr.Markdown(f"""
|
| 1229 |
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
| 1230 |
-
|
| 1231 |
-
- **Total Datasets**: {NUM_DATASETS}
|
| 1232 |
-
- **Total Languages**: 113
|
| 1233 |
-
- **Total Scores**: {NUM_SCORES}
|
| 1234 |
-
- **Total Models**: {NUM_MODELS}
|
| 1235 |
""")
|
| 1236 |
with gr.Tabs():
|
| 1237 |
with gr.TabItem("Overall"):
|
|
@@ -1248,6 +1247,7 @@ with block:
|
|
| 1248 |
DATA_OVERALL,
|
| 1249 |
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
|
| 1250 |
type="pandas",
|
|
|
|
| 1251 |
)
|
| 1252 |
with gr.Row():
|
| 1253 |
data_run_overall = gr.Button("Refresh")
|
|
@@ -1266,10 +1266,11 @@ with block:
|
|
| 1266 |
DATA_OVERALL_ZH,
|
| 1267 |
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_ZH.columns),
|
| 1268 |
type="pandas",
|
|
|
|
| 1269 |
)
|
| 1270 |
with gr.Row():
|
| 1271 |
data_run_overall_zh = gr.Button("Refresh")
|
| 1272 |
-
data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
|
| 1273 |
with gr.TabItem("Polish"):
|
| 1274 |
with gr.Row():
|
| 1275 |
gr.Markdown("""
|
|
@@ -1284,6 +1285,7 @@ with block:
|
|
| 1284 |
DATA_OVERALL_PL,
|
| 1285 |
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_PL.columns),
|
| 1286 |
type="pandas",
|
|
|
|
| 1287 |
)
|
| 1288 |
with gr.Row():
|
| 1289 |
data_run_overall_pl = gr.Button("Refresh")
|
|
@@ -1834,8 +1836,12 @@ with block:
|
|
| 1834 |
partial(get_mteb_data, tasks=["Summarization"]),
|
| 1835 |
outputs=data_summarization,
|
| 1836 |
)
|
| 1837 |
-
gr.Markdown(
|
| 1838 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1839 |
Made with ❤️ for NLP. If this work is useful to you, please consider citing:
|
| 1840 |
|
| 1841 |
```bibtex
|
|
|
|
| 336 |
"text2vec-large-chinese",
|
| 337 |
"text-embedding-3-small",
|
| 338 |
"text-embedding-3-large",
|
| 339 |
+
"text-embedding-3-large-256",
|
| 340 |
"text-embedding-ada-002",
|
| 341 |
"text-similarity-ada-001",
|
| 342 |
"text-similarity-babbage-001",
|
|
|
|
| 419 |
"text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
|
| 420 |
"text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
| 421 |
"text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
| 422 |
+
"text-embedding-3-large-256": "https://openai.com/blog/new-embedding-models-and-api-updates",
|
| 423 |
"text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model",
|
| 424 |
"text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
| 425 |
"text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
|
|
|
|
| 501 |
"text2vec-base-chinese": 768,
|
| 502 |
"text2vec-large-chinese": 1024,
|
| 503 |
"text-embedding-3-large": 3072,
|
| 504 |
+
"text-embedding-3-large-256": 256,
|
| 505 |
+
"text-embedding-3-small": 1536,
|
| 506 |
"text-embedding-ada-002": 1536,
|
| 507 |
"text-similarity-ada-001": 1024,
|
| 508 |
"text-similarity-babbage-001": 2048,
|
|
|
|
| 584 |
"text2vec-base-chinese": 512,
|
| 585 |
"text2vec-large-chinese": 512,
|
| 586 |
"text-embedding-3-large": 8191,
|
| 587 |
+
"text-embedding-3-large-256": 8191,
|
| 588 |
"text-embedding-3-small": 8191,
|
| 589 |
"text-embedding-ada-002": 8191,
|
| 590 |
"text-similarity-ada-001": 2046,
|
|
|
|
| 886 |
return df
|
| 887 |
|
| 888 |
def add_rank(df):
|
| 889 |
+
cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens"]]
|
| 890 |
if len(cols_to_rank) == 1:
|
| 891 |
df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
|
| 892 |
else:
|
|
|
|
| 918 |
if add_emb_dim:
|
| 919 |
res["Model Size (GB)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
|
| 920 |
res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
|
| 921 |
+
res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
|
| 922 |
df_list.append(res)
|
| 923 |
|
| 924 |
for model in models:
|
|
|
|
| 957 |
if add_emb_dim:
|
| 958 |
try:
|
| 959 |
# Fails on gated repos, so we only include scores for them
|
| 960 |
+
out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (GB)"] = get_dim_seq_size(model)
|
| 961 |
except:
|
| 962 |
pass
|
| 963 |
df_list.append(out)
|
|
|
|
| 1034 |
# Fill NaN after averaging
|
| 1035 |
DATA_OVERALL.fillna("", inplace=True)
|
| 1036 |
|
| 1037 |
+
DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
|
| 1038 |
DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1039 |
|
| 1040 |
return DATA_OVERALL
|
|
|
|
| 1093 |
# Fill NaN after averaging
|
| 1094 |
DATA_OVERALL_ZH.fillna("", inplace=True)
|
| 1095 |
|
| 1096 |
+
DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
|
| 1097 |
DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
|
| 1098 |
|
| 1099 |
return DATA_OVERALL_ZH
|
|
|
|
| 1147 |
# Fill NaN after averaging
|
| 1148 |
DATA_OVERALL_PL.fillna("", inplace=True)
|
| 1149 |
|
| 1150 |
+
DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
|
| 1151 |
DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
|
| 1152 |
|
| 1153 |
return DATA_OVERALL_PL
|
|
|
|
| 1219 |
}
|
| 1220 |
|
| 1221 |
table {
|
| 1222 |
+
--cell-width-1: 210px
|
| 1223 |
}
|
| 1224 |
|
| 1225 |
table > tbody > tr > td:nth-child(2) > div {
|
|
|
|
| 1231 |
with block:
|
| 1232 |
gr.Markdown(f"""
|
| 1233 |
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1234 |
""")
|
| 1235 |
with gr.Tabs():
|
| 1236 |
with gr.TabItem("Overall"):
|
|
|
|
| 1247 |
DATA_OVERALL,
|
| 1248 |
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
|
| 1249 |
type="pandas",
|
| 1250 |
+
height=600,
|
| 1251 |
)
|
| 1252 |
with gr.Row():
|
| 1253 |
data_run_overall = gr.Button("Refresh")
|
|
|
|
| 1266 |
DATA_OVERALL_ZH,
|
| 1267 |
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_ZH.columns),
|
| 1268 |
type="pandas",
|
| 1269 |
+
height=600,
|
| 1270 |
)
|
| 1271 |
with gr.Row():
|
| 1272 |
data_run_overall_zh = gr.Button("Refresh")
|
| 1273 |
+
data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
|
| 1274 |
with gr.TabItem("Polish"):
|
| 1275 |
with gr.Row():
|
| 1276 |
gr.Markdown("""
|
|
|
|
| 1285 |
DATA_OVERALL_PL,
|
| 1286 |
datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_PL.columns),
|
| 1287 |
type="pandas",
|
| 1288 |
+
height=600,
|
| 1289 |
)
|
| 1290 |
with gr.Row():
|
| 1291 |
data_run_overall_pl = gr.Button("Refresh")
|
|
|
|
| 1836 |
partial(get_mteb_data, tasks=["Summarization"]),
|
| 1837 |
outputs=data_summarization,
|
| 1838 |
)
|
| 1839 |
+
gr.Markdown(f"""
|
| 1840 |
+
- **Total Datasets**: {NUM_DATASETS}
|
| 1841 |
+
- **Total Languages**: 113
|
| 1842 |
+
- **Total Scores**: {NUM_SCORES}
|
| 1843 |
+
- **Total Models**: {NUM_MODELS}
|
| 1844 |
+
""" + r"""
|
| 1845 |
Made with ❤️ for NLP. If this work is useful to you, please consider citing:
|
| 1846 |
|
| 1847 |
```bibtex
|