Muennighoff commited on
Commit
17e0108
1 Parent(s): a51beac

Fix dataframe dtypes for proper sorting

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -206,7 +206,7 @@ for model in EXTERNAL_MODELS:
206
  EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
207
 
208
 
209
- def get_mteb_data(tasks=["Clustering"], langs=[], cast_to_str=True, task_to_metric=TASK_TO_METRIC):
210
  api = HfApi()
211
  models = api.list_models(filter="mteb")
212
  # Initialize list to models that we cannot fetch metadata from
@@ -255,8 +255,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], cast_to_str=True, task_to_metr
255
  cols.insert(0, cols.pop(cols.index("Model")))
256
  df = df[cols]
257
  df.fillna("", inplace=True)
258
- if cast_to_str:
259
- return df.astype(str) # Cast to str as Gradio does not accept floats
260
  return df
261
 
262
  def get_mteb_average():
@@ -272,7 +270,6 @@ def get_mteb_average():
272
  "Summarization",
273
  ],
274
  langs=["en", "en-en"],
275
- cast_to_str=False
276
  )
277
  # Approximation (Missing Bitext Mining & including some nans)
278
  NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
@@ -292,7 +289,7 @@ def get_mteb_average():
292
  # Start ranking from 1
293
  DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
294
 
295
- DATA_OVERALL = DATA_OVERALL.round(2).astype(str)
296
 
297
  DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
298
  DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
@@ -331,7 +328,7 @@ with block:
331
  with gr.Row():
332
  data_overall = gr.components.Dataframe(
333
  DATA_OVERALL,
334
- datatype=["markdown"] * len(DATA_OVERALL.columns) * 2,
335
  type="pandas",
336
  wrap=True,
337
  )
@@ -348,7 +345,7 @@ with block:
348
  """)
349
  with gr.Row():
350
  data_bitext_mining = gr.components.Dataframe(
351
- datatype=["markdown"] * 500, # hack when we don't know how many columns
352
  type="pandas",
353
  )
354
  with gr.Row():
@@ -371,7 +368,7 @@ with block:
371
  with gr.Row():
372
  data_classification_en = gr.components.Dataframe(
373
  DATA_CLASSIFICATION_EN,
374
- datatype=["markdown"] * len(DATA_CLASSIFICATION_EN.columns) * 20,
375
  type="pandas",
376
  )
377
  with gr.Row():
@@ -396,7 +393,7 @@ with block:
396
  """)
397
  with gr.Row():
398
  data_classification = gr.components.Dataframe(
399
- datatype=["markdown"] * 200, # hack when we don't know how many columns
400
  type="pandas",
401
  )
402
  with gr.Row():
@@ -418,7 +415,7 @@ with block:
418
  with gr.Row():
419
  data_clustering = gr.components.Dataframe(
420
  DATA_CLUSTERING,
421
- datatype=["markdown"] * len(DATA_CLUSTERING.columns) * 2,
422
  type="pandas",
423
  )
424
  with gr.Row():
@@ -440,7 +437,7 @@ with block:
440
  with gr.Row():
441
  data_pair_classification = gr.components.Dataframe(
442
  DATA_PAIR_CLASSIFICATION,
443
- datatype=["markdown"] * len(DATA_PAIR_CLASSIFICATION.columns) * 2,
444
  type="pandas",
445
  )
446
  with gr.Row():
@@ -462,7 +459,8 @@ with block:
462
  with gr.Row():
463
  data_retrieval = gr.components.Dataframe(
464
  DATA_RETRIEVAL,
465
- datatype=["markdown"] * len(DATA_RETRIEVAL.columns) * 2,
 
466
  type="pandas",
467
  )
468
  with gr.Row():
@@ -482,7 +480,7 @@ with block:
482
  with gr.Row():
483
  data_reranking = gr.components.Dataframe(
484
  DATA_RERANKING,
485
- datatype=["markdown"] * len(DATA_RERANKING.columns) * 2,
486
  type="pandas",
487
  )
488
  with gr.Row():
@@ -504,7 +502,7 @@ with block:
504
  with gr.Row():
505
  data_sts_en = gr.components.Dataframe(
506
  DATA_STS_EN,
507
- datatype=["markdown"] * len(DATA_STS_EN.columns) * 2,
508
  type="pandas",
509
  )
510
  with gr.Row():
@@ -526,7 +524,7 @@ with block:
526
  """)
527
  with gr.Row():
528
  data_sts = gr.components.Dataframe(
529
- datatype=["markdown"] * 50, # hack when we don't know how many columns
530
  type="pandas",
531
  )
532
  with gr.Row():
@@ -543,8 +541,8 @@ with block:
543
  """)
544
  with gr.Row():
545
  data_summarization = gr.components.Dataframe(
546
- DATA_SUMMARIZATION * len(DATA_SUMMARIZATION.columns) * 2,
547
- datatype="markdown",
548
  type="pandas",
549
  )
550
  with gr.Row():
@@ -564,6 +562,7 @@ with block:
564
  block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
565
  block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
566
  block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
 
567
  block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
568
  block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
569
  block.load(get_mteb_data, inputs=[task_sts_en, lang_sts_en], outputs=data_sts_en)
@@ -577,6 +576,7 @@ block.launch()
577
  # Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
578
  # Could make it load in the background without the Gradio logo closer to the Deep RL space
579
  # Could add graphs / other visual content
 
580
 
581
  # Sources:
582
  # https://huggingface.co/spaces/gradio/leaderboard
 
206
  EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
207
 
208
 
209
+ def get_mteb_data(tasks=["Clustering"], langs=[], task_to_metric=TASK_TO_METRIC):
210
  api = HfApi()
211
  models = api.list_models(filter="mteb")
212
  # Initialize list to models that we cannot fetch metadata from
 
255
  cols.insert(0, cols.pop(cols.index("Model")))
256
  df = df[cols]
257
  df.fillna("", inplace=True)
 
 
258
  return df
259
 
260
  def get_mteb_average():
 
270
  "Summarization",
271
  ],
272
  langs=["en", "en-en"],
 
273
  )
274
  # Approximation (Missing Bitext Mining & including some nans)
275
  NUM_SCORES = DATA_OVERALL.shape[0] * DATA_OVERALL.shape[1]
 
289
  # Start ranking from 1
290
  DATA_OVERALL.insert(0, "Rank", list(range(1, len(DATA_OVERALL) + 1)))
291
 
292
+ DATA_OVERALL = DATA_OVERALL.round(2)
293
 
294
  DATA_CLASSIFICATION_EN = DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION]
295
  DATA_CLUSTERING = DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING]
 
328
  with gr.Row():
329
  data_overall = gr.components.Dataframe(
330
  DATA_OVERALL,
331
+ datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL.columns),
332
  type="pandas",
333
  wrap=True,
334
  )
 
345
  """)
346
  with gr.Row():
347
  data_bitext_mining = gr.components.Dataframe(
348
+ datatype=["markdown"] + ["number"] * 500, # hack when we don't know how many columns
349
  type="pandas",
350
  )
351
  with gr.Row():
 
368
  with gr.Row():
369
  data_classification_en = gr.components.Dataframe(
370
  DATA_CLASSIFICATION_EN,
371
+ datatype=["markdown"] + ["number"] * len(DATA_CLASSIFICATION_EN.columns),
372
  type="pandas",
373
  )
374
  with gr.Row():
 
393
  """)
394
  with gr.Row():
395
  data_classification = gr.components.Dataframe(
396
+ datatype=["markdown"] + ["number"] * 200, # hack when we don't know how many columns
397
  type="pandas",
398
  )
399
  with gr.Row():
 
415
  with gr.Row():
416
  data_clustering = gr.components.Dataframe(
417
  DATA_CLUSTERING,
418
+ datatype=["markdown"] + ["number"] * len(DATA_CLUSTERING.columns),
419
  type="pandas",
420
  )
421
  with gr.Row():
 
437
  with gr.Row():
438
  data_pair_classification = gr.components.Dataframe(
439
  DATA_PAIR_CLASSIFICATION,
440
+ datatype=["markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION.columns),
441
  type="pandas",
442
  )
443
  with gr.Row():
 
459
  with gr.Row():
460
  data_retrieval = gr.components.Dataframe(
461
  DATA_RETRIEVAL,
462
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
463
+ datatype=["markdown"] + ["number"] * len(DATA_RETRIEVAL.columns) * 2,
464
  type="pandas",
465
  )
466
  with gr.Row():
 
480
  with gr.Row():
481
  data_reranking = gr.components.Dataframe(
482
  DATA_RERANKING,
483
+ datatype=["markdown"] + ["number"] * len(DATA_RERANKING.columns),
484
  type="pandas",
485
  )
486
  with gr.Row():
 
502
  with gr.Row():
503
  data_sts_en = gr.components.Dataframe(
504
  DATA_STS_EN,
505
+ datatype=["markdown"] + ["number"] * len(DATA_STS_EN.columns),
506
  type="pandas",
507
  )
508
  with gr.Row():
 
524
  """)
525
  with gr.Row():
526
  data_sts = gr.components.Dataframe(
527
+ datatype=["markdown"] + ["number"] * 100, # hack when we don't know how many columns
528
  type="pandas",
529
  )
530
  with gr.Row():
 
541
  """)
542
  with gr.Row():
543
  data_summarization = gr.components.Dataframe(
544
+ DATA_SUMMARIZATION,
545
+ datatype=["markdown"] + ["number"] * 2,
546
  type="pandas",
547
  )
548
  with gr.Row():
 
562
  block.load(get_mteb_data, inputs=[task_classification_en, lang_classification_en], outputs=data_classification_en)
563
  block.load(get_mteb_data, inputs=[task_classification], outputs=data_classification)
564
  block.load(get_mteb_data, inputs=[task_clustering], outputs=data_clustering)
565
+ block.load(get_mteb_data, inputs=[task_pair_classification], outputs=data_pair_classification)
566
  block.load(get_mteb_data, inputs=[task_retrieval], outputs=data_retrieval)
567
  block.load(get_mteb_data, inputs=[task_reranking], outputs=data_reranking)
568
  block.load(get_mteb_data, inputs=[task_sts_en, lang_sts_en], outputs=data_sts_en)
 
576
  # Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
577
  # Could make it load in the background without the Gradio logo closer to the Deep RL space
578
  # Could add graphs / other visual content
579
+ # Could add verification marks
580
 
581
  # Sources:
582
  # https://huggingface.co/spaces/gradio/leaderboard