Muennighoff commited on
Commit
3ffdc42
1 Parent(s): 003d24d
Files changed (2) hide show
  1. README.md +0 -1
  2. app.py +133 -96
README.md CHANGED
@@ -1,4 +1,3 @@
1
-
2
  ---
3
  title: leaderboard
4
  emoji: 🔥
 
 
1
  ---
2
  title: leaderboard
3
  emoji: 🔥
app.py CHANGED
@@ -96,19 +96,6 @@ TASK_LIST_SUMMARIZATION = [
96
 
97
  TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
98
 
99
- TASK_TO_TASK_LIST = {}
100
-
101
-
102
-
103
- def make_clickable_model(model_name):
104
- # Remove user from model name
105
- model_name_show = " ".join(model_name.split("/")[1:])
106
- link = "https://huggingface.co/" + model_name
107
- return (
108
- f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
109
- )
110
-
111
-
112
  TASK_TO_METRIC = {
113
  "BitextMining": "f1",
114
  "Clustering": "v_measure",
@@ -120,7 +107,16 @@ TASK_TO_METRIC = {
120
  "Summarization": "cos_sim_spearman",
121
  }
122
 
123
- def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC):
 
 
 
 
 
 
 
 
 
124
  api = HfApi()
125
  models = api.list_models(filter="mteb")
126
  df_list = []
@@ -141,9 +137,7 @@ def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_st
141
  # {"type": "f1", "value": 38.809586587791664},
142
  # ],
143
  # },
144
-
145
  # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
146
- #if langs is None:
147
  task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
148
  out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
149
  #else:
@@ -170,53 +164,60 @@ def get_mteb_data(tasks=["Clustering"], metric="v_measure", langs=[], cast_to_st
170
  cols = sorted(list(df.columns))
171
  cols.insert(0, cols.pop(cols.index("Model")))
172
  df = df[cols]
173
- # df.insert(1, "Average", df.mean(axis=1, skipna=False))
174
  df.fillna("", inplace=True)
175
  if cast_to_str:
176
  return df.astype(str) # Cast to str as Gradio does not accept floats
177
  return df
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- DATA_OVERALL = get_mteb_data(
181
- tasks=[
182
- "Classification",
183
- "Clustering",
184
- "PairClassification",
185
- "Reranking",
186
- "Retrieval",
187
- "STS",
188
- "Summarization",
189
- ],
190
- langs=["en", "en-en"],
191
- cast_to_str=False
192
- )
193
-
194
- DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
195
- DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
196
- DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
197
- DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
198
- DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
199
- DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
200
- DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
201
- DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
202
- DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
203
 
204
- DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
205
- DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
206
- DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
207
- DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
208
- DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
209
- DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
210
- DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
211
 
212
- DATA_OVERALL = DATA_OVERALL[["Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
213
 
 
214
 
 
215
  block = gr.Blocks()
216
 
 
217
  with block:
218
  gr.Markdown(
219
- """Leaderboard for XX most popular Blocks Event Spaces. To learn more and join, see <a href="https://huggingface.co/Gradio-Blocks" target="_blank" style="text-decoration: underline">Blocks Party Event</a>"""
220
  )
221
  with gr.Tabs():
222
  with gr.TabItem("Overall"):
@@ -225,11 +226,30 @@ with block:
225
  with gr.Row():
226
  data_overall = gr.components.Dataframe(
227
  DATA_OVERALL,
228
- datatype="markdown",
229
  type="pandas",
230
- col_count=(len(DATA_OVERALL.columns), "fixed"),
231
  wrap=True,
232
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  with gr.TabItem("Classification"):
234
  with gr.TabItem("English"):
235
  with gr.Row():
@@ -237,20 +257,17 @@ with block:
237
  with gr.Row():
238
  data_classification_en = gr.components.Dataframe(
239
  DATA_CLASSIFICATION_EN,
240
- datatype="markdown",
241
  type="pandas",
242
- col_count=(len(DATA_CLASSIFICATION_EN.columns), "fixed"),
243
  )
244
  with gr.Row():
245
- data_run = gr.Button("Refresh")
246
  task_classification_en = gr.Variable(value="Classification")
247
- metric_classification_en = gr.Variable(value="accuracy")
248
  lang_classification_en = gr.Variable(value=["en"])
249
- data_run.click(
250
  get_mteb_data,
251
  inputs=[
252
  task_classification_en,
253
- metric_classification_en,
254
  lang_classification_en,
255
  ],
256
  outputs=data_classification_en,
@@ -260,16 +277,15 @@ with block:
260
  gr.Markdown("""Multilingual Classification""")
261
  with gr.Row():
262
  data_classification = gr.components.Dataframe(
263
- datatype=["markdown"] * 500,
264
  type="pandas",
265
  )
266
  with gr.Row():
267
  data_run = gr.Button("Refresh")
268
  task_classification = gr.Variable(value="Classification")
269
- metric_classification = gr.Variable(value="accuracy")
270
  data_run.click(
271
  get_mteb_data,
272
- inputs=[task_classification, metric_classification],
273
  outputs=data_classification,
274
  )
275
  with gr.TabItem("Clustering"):
@@ -277,48 +293,68 @@ with block:
277
  gr.Markdown("""Leaderboard for Clustering""")
278
  with gr.Row():
279
  data_clustering = gr.components.Dataframe(
280
- datatype=["markdown"] * 500,
 
281
  type="pandas",
 
282
  )
283
  with gr.Row():
284
  data_run = gr.Button("Refresh")
285
  task_clustering = gr.Variable(value="Clustering")
286
- metric_clustering = gr.Variable(value="v_measure")
287
  data_run.click(
288
  get_mteb_data,
289
- inputs=[task_clustering, metric_clustering],
290
  outputs=data_clustering,
291
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  with gr.TabItem("Retrieval"):
293
  with gr.Row():
294
  gr.Markdown("""Leaderboard for Retrieval""")
295
  with gr.Row():
296
  data_retrieval = gr.components.Dataframe(
297
- datatype=["markdown"] * 500,
 
298
  type="pandas",
299
  )
300
  with gr.Row():
301
  data_run = gr.Button("Refresh")
302
  task_retrieval = gr.Variable(value="Retrieval")
303
- metric_retrieval = gr.Variable(value="ndcg_at_10")
304
  data_run.click(
305
- get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval
306
  )
307
  with gr.TabItem("Reranking"):
308
  with gr.Row():
309
  gr.Markdown("""Leaderboard for Reranking""")
310
  with gr.Row():
311
  data_reranking = gr.components.Dataframe(
312
- datatype=["markdown"] * 500,
 
313
  type="pandas",
314
- # col_count=(12, "fixed"),
315
  )
316
  with gr.Row():
317
  data_run = gr.Button("Refresh")
318
  task_reranking = gr.Variable(value="Reranking")
319
  metric_reranking = gr.Variable(value="map")
320
  data_run.click(
321
- get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking
322
  )
323
  with gr.TabItem("STS"):
324
  with gr.TabItem("English"):
@@ -326,17 +362,18 @@ with block:
326
  gr.Markdown("""Leaderboard for STS""")
327
  with gr.Row():
328
  data_sts_en = gr.components.Dataframe(
329
- datatype=["markdown"] * 500,
 
330
  type="pandas",
 
331
  )
332
  with gr.Row():
333
  data_run_en = gr.Button("Refresh")
334
  task_sts_en = gr.Variable(value="STS")
335
- metric_sts_en = gr.Variable(value="cos_sim_spearman")
336
  lang_sts_en = gr.Variable(value=["en", "en-en"])
337
  data_run.click(
338
  get_mteb_data,
339
- inputs=[task_sts_en, metric_sts_en, lang_sts_en],
340
  outputs=data_sts_en,
341
  )
342
  with gr.TabItem("Multilingual"):
@@ -344,49 +381,49 @@ with block:
344
  gr.Markdown("""Leaderboard for STS""")
345
  with gr.Row():
346
  data_sts = gr.components.Dataframe(
347
- datatype=["markdown"] * 500,
348
  type="pandas",
349
  )
350
  with gr.Row():
351
  data_run = gr.Button("Refresh")
352
  task_sts = gr.Variable(value="STS")
353
- metric_sts = gr.Variable(value="cos_sim_spearman")
354
- data_run.click(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
355
  with gr.TabItem("Summarization"):
356
  with gr.Row():
357
  gr.Markdown("""Leaderboard for Summarization""")
358
  with gr.Row():
359
  data_summarization = gr.components.Dataframe(
360
- datatype=["markdown"] * 500,
 
361
  type="pandas",
 
362
  )
363
  with gr.Row():
364
  data_run = gr.Button("Refresh")
365
  task_summarization = gr.Variable(value="Summarization")
366
- metric_summarization = gr.Variable(value="cos_sim_spearman")
367
  data_run.click(
368
  get_mteb_data,
369
- inputs=[task_summarization, metric_summarization],
370
  outputs=data_summarization,
371
  )
372
  # running the function on page load in addition to when the button is clicked
373
- #block.load(
374
- # get_mteb_data,
375
- # inputs=[task_classification_en, metric_classification_en],
376
- # outputs=data_classification_en,
377
- # show_progress=False,
378
- #)
379
- block.load(
380
- get_mteb_data,
381
- inputs=[task_classification, metric_classification],
382
- outputs=data_classification,
383
- )
384
- block.load(get_mteb_data, inputs=[task_clustering, metric_clustering], outputs=data_clustering)
385
- block.load(get_mteb_data, inputs=[task_retrieval, metric_retrieval], outputs=data_retrieval)
386
- block.load(get_mteb_data, inputs=[task_reranking, metric_reranking], outputs=data_reranking)
387
- block.load(get_mteb_data, inputs=[task_sts, metric_sts], outputs=data_sts)
388
- block.load(
389
- get_mteb_data, inputs=[task_summarization, metric_summarization], outputs=data_summarization
390
- )
391
 
392
  block.launch()
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  TASK_TO_METRIC = {
100
  "BitextMining": "f1",
101
  "Clustering": "v_measure",
 
107
  "Summarization": "cos_sim_spearman",
108
  }
109
 
110
+ def make_clickable_model(model_name):
111
+ # Remove user from model name
112
+ model_name_show = " ".join(model_name.split("/")[1:])
113
+ link = "https://huggingface.co/" + model_name
114
+ return (
115
+ f'<a target="_blank" style="text-decoration: underline" href="{link}">{model_name_show}</a>'
116
+ )
117
+
118
+
119
+ def get_mteb_data(tasks=["Clustering"], langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC):
120
  api = HfApi()
121
  models = api.list_models(filter="mteb")
122
  df_list = []
 
137
  # {"type": "f1", "value": 38.809586587791664},
138
  # ],
139
  # },
 
140
  # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
 
141
  task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and (sub_res.get("dataset", {}).get("config", "default") in ("default", *langs))]
142
  out = [{res["dataset"]["name"].replace("MTEB ", ""): [round(score["value"], 2) for score in res["metrics"] if score["type"] == task_to_metric.get(res["task"]["type"])][0]} for res in task_results]
143
  #else:
 
164
  cols = sorted(list(df.columns))
165
  cols.insert(0, cols.pop(cols.index("Model")))
166
  df = df[cols]
 
167
  df.fillna("", inplace=True)
168
  if cast_to_str:
169
  return df.astype(str) # Cast to str as Gradio does not accept floats
170
  return df
171
 
172
+ def get_mteb_average(get_all_avgs=False):
173
+ global DATA_OVERALL, DATA_CLASSIFICATION_EN, DATA_CLUSTERING, DATA_PAIR_CLASSIFICATION, DATA_RERANKING, DATA_RETRIEVAL, DATA_STS_EN, DATA_SUMMARIZATION
174
+ DATA_OVERALL = get_mteb_data(
175
+ tasks=[
176
+ "Classification",
177
+ "Clustering",
178
+ "PairClassification",
179
+ "Reranking",
180
+ "Retrieval",
181
+ "STS",
182
+ "Summarization",
183
+ ],
184
+ langs=["en", "en-en"],
185
+ cast_to_str=False
186
+ )
187
+
188
+ DATA_OVERALL.insert(1, "Average", DATA_OVERALL[TASK_LIST_EN].mean(axis=1, skipna=False))
189
+ DATA_OVERALL.insert(2, "Classification Average", DATA_OVERALL[TASK_LIST_CLASSIFICATION].mean(axis=1, skipna=False))
190
+ DATA_OVERALL.insert(3, "Clustering Average", DATA_OVERALL[TASK_LIST_CLUSTERING].mean(axis=1, skipna=False))
191
+ DATA_OVERALL.insert(4, "Pair Classification Average", DATA_OVERALL[TASK_LIST_PAIR_CLASSIFICATION].mean(axis=1, skipna=False))
192
+ DATA_OVERALL.insert(5, "Reranking Average", DATA_OVERALL[TASK_LIST_RERANKING].mean(axis=1, skipna=False))
193
+ DATA_OVERALL.insert(6, "Retrieval Average", DATA_OVERALL[TASK_LIST_RETRIEVAL].mean(axis=1, skipna=False))
194
+ DATA_OVERALL.insert(7, "STS Average", DATA_OVERALL[TASK_LIST_STS].mean(axis=1, skipna=False))
195
+ DATA_OVERALL.insert(8, "Summarization Average", DATA_OVERALL[TASK_LIST_SUMMARIZATION].mean(axis=1, skipna=False))
196
+ DATA_OVERALL.sort_values("Average", ascending=False, inplace=True)
197
+ # Start ranking from 1
198
+ DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
199
 
200
+ DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
+ DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
203
+ DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
204
+ DATA_PAIR_CLASSIFICATION = DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION]
205
+ DATA_RERANKING = DATA_OVERALL[["Model"] + TASK_LIST_RERANKING]
206
+ DATA_RETRIEVAL = DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL]
207
+ DATA_STS_EN = DATA_OVERALL[["Model"] + TASK_LIST_STS]
208
+ DATA_SUMMARIZATION = DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION]
209
 
210
+ DATA_OVERALL = DATA_OVERALL[["Rank", "Model", "Average", "Classification Average", "Clustering Average", "Pair Classification Average", "Reranking Average", "Retrieval Average", "STS Average", "Summarization Average"]]
211
 
212
+ return DATA_OVERALL
213
 
214
+ get_mteb_average()
215
  block = gr.Blocks()
216
 
217
+
218
  with block:
219
  gr.Markdown(
220
+ """MTEB Leaderboard. See <a href="https://huggingface.co/Gradio-Blocks" target="_blank" style="text-decoration: underline">Blocks Party Event</a>"""
221
  )
222
  with gr.Tabs():
223
  with gr.TabItem("Overall"):
 
226
  with gr.Row():
227
  data_overall = gr.components.Dataframe(
228
  DATA_OVERALL,
229
+ datatype=["markdown"] * len(DATA_OVERALL.columns) * 2,
230
  type="pandas",
231
+ #col_count=(len(DATA_OVERALL.columns), "fixed"),
232
  wrap=True,
233
  )
234
+ with gr.Row():
235
+ data_run = gr.Button("Refresh")
236
+ data_run.click(get_mteb_average, inputs=None, outputs=data_overall)
237
+ with gr.TabItem("BitextMining"):
238
+ with gr.Row():
239
+ gr.Markdown("""Leaderboard for Clustering""")
240
+ with gr.Row():
241
+ data_bitext_mining = gr.components.Dataframe(
242
+ datatype=["markdown"] * 500, # hack when we don't know how many columns
243
+ type="pandas",
244
+ )
245
+ with gr.Row():
246
+ data_run = gr.Button("Refresh")
247
+ task_bitext_mining = gr.Variable(value="BitextMining")
248
+ data_run.click(
249
+ get_mteb_data,
250
+ inputs=[task_bitext_mining],
251
+ outputs=data_bitext_mining,
252
+ )
253
  with gr.TabItem("Classification"):
254
  with gr.TabItem("English"):
255
  with gr.Row():
 
257
  with gr.Row():
258
  data_classification_en = gr.components.Dataframe(
259
  DATA_CLASSIFICATION_EN,
260
+ datatype=["markdown"] * len(DATA_CLASSIFICATION_EN.columns) * 20,
261
  type="pandas",
 
262
  )
263
  with gr.Row():
264
+ data_run_classification_en = gr.Button("Refresh")
265
  task_classification_en = gr.Variable(value="Classification")
 
266
  lang_classification_en = gr.Variable(value=["en"])
267
+ data_run_classification_en.click(
268
  get_mteb_data,
269
  inputs=[
270
  task_classification_en,
 
271
  lang_classification_en,
272
  ],
273
  outputs=data_classification_en,
 
277
  gr.Markdown("""Multilingual Classification""")
278
  with gr.Row():
279
  data_classification = gr.components.Dataframe(
280
+ datatype=["markdown"] * 500, # hack when we don't know how many columns
281
  type="pandas",
282
  )
283
  with gr.Row():
284
  data_run = gr.Button("Refresh")
285
  task_classification = gr.Variable(value="Classification")
 
286
  data_run.click(
287
  get_mteb_data,
288
+ inputs=[task_classification],
289
  outputs=data_classification,
290
  )
291
  with gr.TabItem("Clustering"):
 
293
  gr.Markdown("""Leaderboard for Clustering""")
294
  with gr.Row():
295
  data_clustering = gr.components.Dataframe(
296
+ DATA_CLUSTERING,
297
+ datatype="markdown",
298
  type="pandas",
299
+ col_count=(len(DATA_CLUSTERING.columns), "fixed"),
300
  )
301
  with gr.Row():
302
  data_run = gr.Button("Refresh")
303
  task_clustering = gr.Variable(value="Clustering")
 
304
  data_run.click(
305
  get_mteb_data,
306
+ inputs=[task_clustering],
307
  outputs=data_clustering,
308
  )
309
+ with gr.TabItem("Pair Classification"):
310
+ with gr.Row():
311
+ gr.Markdown("""Leaderboard for Pair Classification""")
312
+ with gr.Row():
313
+ data_pair_classification = gr.components.Dataframe(
314
+ DATA_PAIR_CLASSIFICATION,
315
+ datatype="markdown",
316
+ type="pandas",
317
+ col_count=(len(DATA_PAIR_CLASSIFICATION.columns), "fixed"),
318
+ )
319
+ with gr.Row():
320
+ data_run = gr.Button("Refresh")
321
+ task_pair_classification = gr.Variable(value="Clustering")
322
+ data_run.click(
323
+ get_mteb_data,
324
+ inputs=[task_pair_classification],
325
+ outputs=data_pair_classification,
326
+ )
327
  with gr.TabItem("Retrieval"):
328
  with gr.Row():
329
  gr.Markdown("""Leaderboard for Retrieval""")
330
  with gr.Row():
331
  data_retrieval = gr.components.Dataframe(
332
+ DATA_RETRIEVAL,
333
+ datatype=["markdown"] * len(DATA_RETRIEVAL.columns) * 2,
334
  type="pandas",
335
  )
336
  with gr.Row():
337
  data_run = gr.Button("Refresh")
338
  task_retrieval = gr.Variable(value="Retrieval")
 
339
  data_run.click(
340
+ get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval
341
  )
342
  with gr.TabItem("Reranking"):
343
  with gr.Row():
344
  gr.Markdown("""Leaderboard for Reranking""")
345
  with gr.Row():
346
  data_reranking = gr.components.Dataframe(
347
+ DATA_RERANKING,
348
+ datatype="markdown",
349
  type="pandas",
350
+ col_count=(len(DATA_RERANKING.columns), "fixed"),
351
  )
352
  with gr.Row():
353
  data_run = gr.Button("Refresh")
354
  task_reranking = gr.Variable(value="Reranking")
355
  metric_reranking = gr.Variable(value="map")
356
  data_run.click(
357
+ get_mteb_data, inputs=[task_reranking], outputs=data_reranking
358
  )
359
  with gr.TabItem("STS"):
360
  with gr.TabItem("English"):
 
362
  gr.Markdown("""Leaderboard for STS""")
363
  with gr.Row():
364
  data_sts_en = gr.components.Dataframe(
365
+ DATA_STS_EN,
366
+ datatype="markdown",
367
  type="pandas",
368
+ col_count=(len(DATA_STS_EN.columns), "fixed"),
369
  )
370
  with gr.Row():
371
  data_run_en = gr.Button("Refresh")
372
  task_sts_en = gr.Variable(value="STS")
 
373
  lang_sts_en = gr.Variable(value=["en", "en-en"])
374
  data_run.click(
375
  get_mteb_data,
376
+ inputs=[task_sts_en, lang_sts_en],
377
  outputs=data_sts_en,
378
  )
379
  with gr.TabItem("Multilingual"):
 
381
  gr.Markdown("""Leaderboard for STS""")
382
  with gr.Row():
383
  data_sts = gr.components.Dataframe(
384
+ datatype=["markdown"] * 50, # hack when we don't know how many columns
385
  type="pandas",
386
  )
387
  with gr.Row():
388
  data_run = gr.Button("Refresh")
389
  task_sts = gr.Variable(value="STS")
390
+ data_run.click(get_mteb_data, inputs=[task_sts], outputs=data_sts)
 
391
  with gr.TabItem("Summarization"):
392
  with gr.Row():
393
  gr.Markdown("""Leaderboard for Summarization""")
394
  with gr.Row():
395
  data_summarization = gr.components.Dataframe(
396
+ DATA_SUMMARIZATION,
397
+ datatype="markdown",
398
  type="pandas",
399
+ col_count=(len(DATA_SUMMARIZATION.columns), "fixed"),
400
  )
401
  with gr.Row():
402
  data_run = gr.Button("Refresh")
403
  task_summarization = gr.Variable(value="Summarization")
 
404
  data_run.click(
405
  get_mteb_data,
406
+ inputs=[task_summarization],
407
  outputs=data_summarization,
408
  )
409
  # running the function on page load in addition to when the button is clicked
410
+ block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
411
+ block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
412
+ block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
413
+ block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
414
+ block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
415
+ block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
416
+ block.load(get_mteb_data, inputs=[task_sts], outputs=data_sts)
417
+ block.load(get_mteb_data, inputs=[task_summarization], outputs=data_summarization)
 
 
 
 
 
 
 
 
 
 
418
 
419
  block.launch()
420
+
421
+
422
+ # Possible changes:
423
+ # Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
424
+ # Could make it load in the background without the Gradio logo closer to the Deep RL space
425
+ # Could add graphs / other visual content
426
+
427
+ # Sources:
428
+ # https://huggingface.co/spaces/gradio/leaderboard
429
+ # https://huggingface.co/spaces/huggingface-projects/Deep-Reinforcement-Learning-Leaderboard