Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
eduagarcia
commited on
Commit
•
879c7e7
1
Parent(s):
6f8ad2f
Clean some invalid tasks and columns for when loading the leaderboard and using the refresh button
Browse files
app.py
CHANGED
@@ -242,8 +242,12 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
|
|
242 |
df = df.groupby("Model", as_index=False).first()
|
243 |
# Put 'Model' column first
|
244 |
cols = sorted(list(df.columns))
|
|
|
|
|
|
|
|
|
245 |
i = 0
|
246 |
-
for column in
|
247 |
if column in cols:
|
248 |
cols.insert(i, cols.pop(cols.index(column)))
|
249 |
i += 1
|
@@ -310,6 +314,7 @@ for board, board_config in BOARDS_CONFIG.items():
|
|
310 |
else:
|
311 |
for task_category, task_category_list in board_config["tasks"].items():
|
312 |
data_task_category = get_mteb_data(tasks=[task_category], datasets=task_category_list, refresh=False)
|
|
|
313 |
boards_data[board]["data_tasks"][task_category] = data_task_category
|
314 |
all_data_tasks.append(data_task_category)
|
315 |
|
@@ -364,6 +369,13 @@ Each inner tab can have the following keys:
|
|
364 |
- refresh: The function to refresh the leaderboard
|
365 |
"""
|
366 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
data = {
|
368 |
"Overall": {"metric": "Various, refer to task tabs", "data": []}
|
369 |
}
|
@@ -399,7 +411,7 @@ for board, board_config in BOARDS_CONFIG.items():
|
|
399 |
"language_long": board_config["language_long"],
|
400 |
"description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}",
|
401 |
"data": boards_data[board]["data_tasks"][task_category],
|
402 |
-
"refresh":
|
403 |
"credits": credits,
|
404 |
})
|
405 |
|
|
|
242 |
df = df.groupby("Model", as_index=False).first()
|
243 |
# Put 'Model' column first
|
244 |
cols = sorted(list(df.columns))
|
245 |
+
base_columns = ["Model", "Model Size (Million Parameters)", "Memory Usage (GB, fp32)", "Embedding Dimensions", "Max Tokens"]
|
246 |
+
if len(datasets) > 0:
|
247 |
+
#filter invalid columns
|
248 |
+
cols = [col for col in cols if col in base_columns + datasets]
|
249 |
i = 0
|
250 |
+
for column in base_columns:
|
251 |
if column in cols:
|
252 |
cols.insert(i, cols.pop(cols.index(column)))
|
253 |
i += 1
|
|
|
314 |
else:
|
315 |
for task_category, task_category_list in board_config["tasks"].items():
|
316 |
data_task_category = get_mteb_data(tasks=[task_category], datasets=task_category_list, refresh=False)
|
317 |
+
data_task_category.drop(columns=["Embedding Dimensions", "Max Tokens"], inplace=True)
|
318 |
boards_data[board]["data_tasks"][task_category] = data_task_category
|
319 |
all_data_tasks.append(data_task_category)
|
320 |
|
|
|
369 |
- refresh: The function to refresh the leaderboard
|
370 |
"""
|
371 |
|
372 |
+
def get_refresh_function(task_category, task_list):
|
373 |
+
def _refresh():
|
374 |
+
data_task_category = get_mteb_data(tasks=[task_category], datasets=task_list)
|
375 |
+
data_task_category.drop(columns=["Embedding Dimensions", "Max Tokens"], inplace=True)
|
376 |
+
return data_task_category
|
377 |
+
return _refresh
|
378 |
+
|
379 |
data = {
|
380 |
"Overall": {"metric": "Various, refer to task tabs", "data": []}
|
381 |
}
|
|
|
411 |
"language_long": board_config["language_long"],
|
412 |
"description": f"**{task_category} {board_pretty_name}** {task_icon}{board_icon}",
|
413 |
"data": boards_data[board]["data_tasks"][task_category],
|
414 |
+
"refresh": get_refresh_function(task_category, task_category_list),
|
415 |
"credits": credits,
|
416 |
})
|
417 |
|