Muennighoff commited on
Commit
d2198dc
1 Parent(s): 1e84aac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -336,6 +336,7 @@ EXTERNAL_MODELS = [
336
  "text2vec-large-chinese",
337
  "text-embedding-3-small",
338
  "text-embedding-3-large",
 
339
  "text-embedding-ada-002",
340
  "text-similarity-ada-001",
341
  "text-similarity-babbage-001",
@@ -418,6 +419,7 @@ EXTERNAL_MODEL_TO_LINK = {
418
  "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
419
  "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
420
  "text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates",
 
421
  "text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model",
422
  "text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
423
  "text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
@@ -499,7 +501,8 @@ EXTERNAL_MODEL_TO_DIM = {
499
  "text2vec-base-chinese": 768,
500
  "text2vec-large-chinese": 1024,
501
  "text-embedding-3-large": 3072,
502
- "text-embedding-3-small": 1536,
 
503
  "text-embedding-ada-002": 1536,
504
  "text-similarity-ada-001": 1024,
505
  "text-similarity-babbage-001": 2048,
@@ -581,6 +584,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
581
  "text2vec-base-chinese": 512,
582
  "text2vec-large-chinese": 512,
583
  "text-embedding-3-large": 8191,
 
584
  "text-embedding-3-small": 8191,
585
  "text-embedding-ada-002": 8191,
586
  "text-similarity-ada-001": 2046,
@@ -882,7 +886,7 @@ def make_datasets_clickable(df):
882
  return df
883
 
884
  def add_rank(df):
885
- cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
886
  if len(cols_to_rank) == 1:
887
  df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
888
  else:
@@ -914,7 +918,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
914
  if add_emb_dim:
915
  res["Model Size (GB)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
916
  res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
917
- res["Sequence Length"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
918
  df_list.append(res)
919
 
920
  for model in models:
@@ -953,7 +957,7 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
953
  if add_emb_dim:
954
  try:
955
  # Fails on gated repos, so we only include scores for them
956
- out["Embedding Dimensions"], out["Sequence Length"], out["Model Size (GB)"] = get_dim_seq_size(model)
957
  except:
958
  pass
959
  df_list.append(out)
@@ -1030,7 +1034,7 @@ def get_mteb_average():
1030
  # Fill NaN after averaging
1031
  DATA_OVERALL.fillna("", inplace=True)
1032
 
1033
- DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
1034
  DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
1035
 
1036
  return DATA_OVERALL
@@ -1089,7 +1093,7 @@ def get_mteb_average_zh():
1089
  # Fill NaN after averaging
1090
  DATA_OVERALL_ZH.fillna("", inplace=True)
1091
 
1092
- DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
1093
  DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
1094
 
1095
  return DATA_OVERALL_ZH
@@ -1143,7 +1147,7 @@ def get_mteb_average_pl():
1143
  # Fill NaN after averaging
1144
  DATA_OVERALL_PL.fillna("", inplace=True)
1145
 
1146
- DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
1147
  DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
1148
 
1149
  return DATA_OVERALL_PL
@@ -1215,7 +1219,7 @@ table > thead {
1215
  }
1216
 
1217
  table {
1218
- --cell-width-1: 350px
1219
  }
1220
 
1221
  table > tbody > tr > td:nth-child(2) > div {
@@ -1227,11 +1231,6 @@ block = gr.Blocks(css=css)
1227
  with block:
1228
  gr.Markdown(f"""
1229
  Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
1230
-
1231
- - **Total Datasets**: {NUM_DATASETS}
1232
- - **Total Languages**: 113
1233
- - **Total Scores**: {NUM_SCORES}
1234
- - **Total Models**: {NUM_MODELS}
1235
  """)
1236
  with gr.Tabs():
1237
  with gr.TabItem("Overall"):
@@ -1248,6 +1247,7 @@ with block:
1248
  DATA_OVERALL,
1249
  datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
1250
  type="pandas",
 
1251
  )
1252
  with gr.Row():
1253
  data_run_overall = gr.Button("Refresh")
@@ -1266,10 +1266,11 @@ with block:
1266
  DATA_OVERALL_ZH,
1267
  datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_ZH.columns),
1268
  type="pandas",
 
1269
  )
1270
  with gr.Row():
1271
  data_run_overall_zh = gr.Button("Refresh")
1272
- data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
1273
  with gr.TabItem("Polish"):
1274
  with gr.Row():
1275
  gr.Markdown("""
@@ -1284,6 +1285,7 @@ with block:
1284
  DATA_OVERALL_PL,
1285
  datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_PL.columns),
1286
  type="pandas",
 
1287
  )
1288
  with gr.Row():
1289
  data_run_overall_pl = gr.Button("Refresh")
@@ -1834,8 +1836,12 @@ with block:
1834
  partial(get_mteb_data, tasks=["Summarization"]),
1835
  outputs=data_summarization,
1836
  )
1837
- gr.Markdown(r"""
1838
-
 
 
 
 
1839
  Made with ❤️ for NLP. If this work is useful to you, please consider citing:
1840
 
1841
  ```bibtex
 
336
  "text2vec-large-chinese",
337
  "text-embedding-3-small",
338
  "text-embedding-3-large",
339
+ "text-embedding-3-large-256",
340
  "text-embedding-ada-002",
341
  "text-similarity-ada-001",
342
  "text-similarity-babbage-001",
 
419
  "text2vec-large-chinese": "https://huggingface.co/GanymedeNil/text2vec-large-chinese",
420
  "text-embedding-3-small": "https://openai.com/blog/new-embedding-models-and-api-updates",
421
  "text-embedding-3-large": "https://openai.com/blog/new-embedding-models-and-api-updates",
422
+ "text-embedding-3-large-256": "https://openai.com/blog/new-embedding-models-and-api-updates",
423
  "text-embedding-ada-002": "https://openai.com/blog/new-and-improved-embedding-model",
424
  "text-similarity-ada-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
425
  "text-similarity-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
 
501
  "text2vec-base-chinese": 768,
502
  "text2vec-large-chinese": 1024,
503
  "text-embedding-3-large": 3072,
504
+ "text-embedding-3-large-256": 256,
505
+ "text-embedding-3-small": 1536,
506
  "text-embedding-ada-002": 1536,
507
  "text-similarity-ada-001": 1024,
508
  "text-similarity-babbage-001": 2048,
 
584
  "text2vec-base-chinese": 512,
585
  "text2vec-large-chinese": 512,
586
  "text-embedding-3-large": 8191,
587
+ "text-embedding-3-large-256": 8191,
588
  "text-embedding-3-small": 8191,
589
  "text-embedding-ada-002": 8191,
590
  "text-similarity-ada-001": 2046,
 
886
  return df
887
 
888
  def add_rank(df):
889
+ cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens"]]
890
  if len(cols_to_rank) == 1:
891
  df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
892
  else:
 
918
  if add_emb_dim:
919
  res["Model Size (GB)"] = EXTERNAL_MODEL_TO_SIZE.get(model, "")
920
  res["Embedding Dimensions"] = EXTERNAL_MODEL_TO_DIM.get(model, "")
921
+ res["Max Tokens"] = EXTERNAL_MODEL_TO_SEQLEN.get(model, "")
922
  df_list.append(res)
923
 
924
  for model in models:
 
957
  if add_emb_dim:
958
  try:
959
  # Fails on gated repos, so we only include scores for them
960
+ out["Embedding Dimensions"], out["Max Tokens"], out["Model Size (GB)"] = get_dim_seq_size(model)
961
  except:
962
  pass
963
  df_list.append(out)
 
1034
  # Fill NaN after averaging
1035
  DATA_OVERALL.fillna("", inplace=True)
1036
 
1037
+ DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_EN)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL)} datasets)", f"STS Average ({len(TASK_LIST_STS)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION)} dataset)"]]
1038
  DATA_OVERALL = DATA_OVERALL[DATA_OVERALL.iloc[:, 5:].ne("").any(axis=1)]
1039
 
1040
  return DATA_OVERALL
 
1093
  # Fill NaN after averaging
1094
  DATA_OVERALL_ZH.fillna("", inplace=True)
1095
 
1096
+ DATA_OVERALL_ZH = DATA_OVERALL_ZH[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_ZH)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_ZH)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_ZH)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_ZH)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_ZH)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_ZH)} datasets)", f"STS Average ({len(TASK_LIST_STS_ZH)} datasets)"]]
1097
  DATA_OVERALL_ZH = DATA_OVERALL_ZH[DATA_OVERALL_ZH.iloc[:, 5:].ne("").any(axis=1)]
1098
 
1099
  return DATA_OVERALL_ZH
 
1147
  # Fill NaN after averaging
1148
  DATA_OVERALL_PL.fillna("", inplace=True)
1149
 
1150
+ DATA_OVERALL_PL = DATA_OVERALL_PL[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_PL)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_PL)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_PL)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_PL)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_PL)} datasets)", f"STS Average ({len(TASK_LIST_STS_PL)} datasets)"]]
1151
  DATA_OVERALL_PL = DATA_OVERALL_PL[DATA_OVERALL_PL.iloc[:, 5:].ne("").any(axis=1)]
1152
 
1153
  return DATA_OVERALL_PL
 
1219
  }
1220
 
1221
  table {
1222
+ --cell-width-1: 210px
1223
  }
1224
 
1225
  table > tbody > tr > td:nth-child(2) > div {
 
1231
  with block:
1232
  gr.Markdown(f"""
1233
  Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
 
 
 
 
 
1234
  """)
1235
  with gr.Tabs():
1236
  with gr.TabItem("Overall"):
 
1247
  DATA_OVERALL,
1248
  datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
1249
  type="pandas",
1250
+ height=600,
1251
  )
1252
  with gr.Row():
1253
  data_run_overall = gr.Button("Refresh")
 
1266
  DATA_OVERALL_ZH,
1267
  datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_ZH.columns),
1268
  type="pandas",
1269
+ height=600,
1270
  )
1271
  with gr.Row():
1272
  data_run_overall_zh = gr.Button("Refresh")
1273
+ data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
1274
  with gr.TabItem("Polish"):
1275
  with gr.Row():
1276
  gr.Markdown("""
 
1285
  DATA_OVERALL_PL,
1286
  datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_PL.columns),
1287
  type="pandas",
1288
+ height=600,
1289
  )
1290
  with gr.Row():
1291
  data_run_overall_pl = gr.Button("Refresh")
 
1836
  partial(get_mteb_data, tasks=["Summarization"]),
1837
  outputs=data_summarization,
1838
  )
1839
+ gr.Markdown(f"""
1840
+ - **Total Datasets**: {NUM_DATASETS}
1841
+ - **Total Languages**: 113
1842
+ - **Total Scores**: {NUM_SCORES}
1843
+ - **Total Models**: {NUM_MODELS}
1844
+ """ + r"""
1845
  Made with ❤️ for NLP. If this work is useful to you, please consider citing:
1846
 
1847
  ```bibtex