Spaces:
Sleeping
Sleeping
from collections import defaultdict | |
from src.leaderboards.saved import leaderboard_to_tags | |
from src.static.env import API | |
def group_all_tags(input_tags: list[str]) -> dict: | |
"""Groups the tags by categories, following the division in the README. | |
Args: | |
input_tags (list[str]): list of tags | |
Returns: | |
dict: category to tag list | |
""" | |
output_tags = defaultdict(list) | |
for tag in input_tags: | |
if tag == "arena": | |
output_tags["judge"].append("humans") | |
continue | |
try: | |
category, value = tag.split(":") | |
output_tags[category].append(value.lower()) | |
except ValueError: | |
continue | |
return output_tags | |
def get_leaderboard_info() -> tuple[list, dict]: | |
"""Looks up all spaces tagged as leaderboards or arenas on the hub, | |
and homogeneizes their tags. | |
Returns: | |
dict: All leaderboard names to their tag dicts by category | |
""" | |
leaderboards = [ | |
(s.id, s.tags) for s in API.list_spaces( | |
filter=["leaderboard"] | |
)] | |
arenas = [ | |
(s.id, s.tags) for s in API.list_spaces( | |
filter=["arena"] | |
)] | |
saved_leaderboards = [(k, v) for k, v in leaderboard_to_tags.items()] | |
seen_leaderboards = [] | |
leaderboard_to_info = defaultdict(list) | |
info_to_leaderboard = defaultdict(lambda: defaultdict(list)) | |
for name, tags in leaderboards + arenas + saved_leaderboards: | |
# If we have a duplicate between the leaderboards from the hub (leaderboards, arena) | |
# and the ones we saved manually, we use the version from the hub | |
if name in seen_leaderboards: | |
continue | |
seen_leaderboards.append(name) | |
# If the model has its own tags, plus the ones we saved, we aggregate them | |
if name in leaderboard_to_tags: | |
tags += leaderboard_to_tags[name] | |
grouped_tags = group_all_tags(tags) | |
for category, tags in grouped_tags.items(): | |
for tag in tags: | |
info_to_leaderboard[category][tag].append(name) | |
leaderboard_to_info[name].append(f"{category}:{tag}") | |
# We pass everything to sets | |
for leaderboard, tags in leaderboard_to_info.items(): | |
leaderboard_to_info[leaderboard] = sorted(list(set(tags))) | |
for category, category_dict in info_to_leaderboard.items(): | |
for tag, space_list in category_dict.items(): | |
info_to_leaderboard[category][tag] = sorted(list(set(space_list))) | |
info_to_leaderboard["all"] = sorted(list(set(seen_leaderboards))) | |
return leaderboard_to_info, info_to_leaderboard | |