eduagarcia commited on
Commit
879c7e7
1 Parent(s): 6f8ad2f

Clean some invalid tasks and columns for when loading the leaderboard and using the refresh button

Browse files
Files changed (1) hide show
  1. app.py +14 -2
app.py CHANGED
@@ -242,8 +242,12 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
242
  df = df.groupby("Model", as_index=False).first()
243
  # Put 'Model' column first
244
  cols = sorted(list(df.columns))
 
 
 
 
245
  i = 0
246
- for column in ["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens"]:
247
  if column in cols:
248
  cols.insert(i, cols.pop(cols.index(column)))
249
  i += 1
@@ -310,6 +314,7 @@ for board, board_config in BOARDS_CONFIG.items():
310
  else:
311
  for task_category, task_category_list in board_config["tasks"].items():
312
  data_task_category = get_mteb_data(tasks=[task_category], datasets=task_category_list, refresh=False)
 
313
  boards_data[board]["data_tasks"][task_category] = data_task_category
314
  all_data_tasks.append(data_task_category)
315
 
@@ -364,6 +369,13 @@ Each inner tab can have the following keys:
364
  - refresh: The function to refresh the leaderboard
365
  """
366
 
 
 
 
 
 
 
 
367
  data = {
368
  "Overall": {"metric": "Various, refer to task tabs", "data": []}
369
  }
@@ -399,7 +411,7 @@ for board, board_config in BOARDS_CONFIG.items():
399
  "language_long": board_config["language_long"],
400
  "description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}",
401
  "data": boards_data[board]["data_tasks"][task_category],
402
- "refresh": partial(get_mteb_data, tasks=[task_category], datasets=task_category_list),
403
  "credits": credits,
404
  })
405
 
 
242
  df = df.groupby("Model", as_index=False).first()
243
  # Put 'Model' column first
244
  cols = sorted(list(df.columns))
245
+ base_columns = ["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens"]
246
+ if len(datasets) > 0:
247
+ #filter invalid columns
248
+ cols = [col for col in cols if col in base_columns + datasets]
249
  i = 0
250
+ for column in base_columns:
251
  if column in cols:
252
  cols.insert(i, cols.pop(cols.index(column)))
253
  i += 1
 
314
  else:
315
  for task_category, task_category_list in board_config["tasks"].items():
316
  data_task_category = get_mteb_data(tasks=[task_category], datasets=task_category_list, refresh=False)
317
+ data_task_category.drop(columns=["Embedding Dimensions", "Max Tokens"], inplace=True)
318
  boards_data[board]["data_tasks"][task_category] = data_task_category
319
  all_data_tasks.append(data_task_category)
320
 
 
369
  - refresh: The function to refresh the leaderboard
370
  """
371
 
372
+ def get_refresh_function(task_category, task_list):
373
+ def _refresh():
374
+ data_task_category = get_mteb_data(tasks=[task_category], datasets=task_list)
375
+ data_task_category.drop(columns=["Embedding Dimensions", "Max Tokens"], inplace=True)
376
+ return data_task_category
377
+ return _refresh
378
+
379
  data = {
380
  "Overall": {"metric": "Various, refer to task tabs", "data": []}
381
  }
 
411
  "language_long": board_config["language_long"],
412
  "description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}",
413
  "data": boards_data[board]["data_tasks"][task_category],
414
+ "refresh": get_refresh_function(task_category, task_category_list),
415
  "credits": credits,
416
  })
417