Clémentine
added judge as selection, passed tags to lower at extraction
e5438c2
from collections import defaultdict
from src.leaderboards.saved import leaderboard_to_tags
from src.static.env import API
def group_all_tags(input_tags: list[str]) -> dict:
"""Groups the tags by categories, following the division in the README.
Args:
input_tags (list[str]): list of tags
Returns:
dict: category to tag list
"""
output_tags = defaultdict(list)
for tag in input_tags:
if tag == "arena":
output_tags["judge"].append("humans")
continue
try:
category, value = tag.split(":")
output_tags[category].append(value.lower())
except ValueError:
continue
return output_tags
def get_leaderboard_info() -> tuple[list, dict]:
"""Looks up all spaces tagged as leaderboards or arenas on the hub,
and homogeneizes their tags.
Returns:
dict: All leaderboard names to their tag dicts by category
"""
leaderboards = [
(s.id, s.tags) for s in API.list_spaces(
filter=["leaderboard"]
)]
arenas = [
(s.id, s.tags) for s in API.list_spaces(
filter=["arena"]
)]
saved_leaderboards = [(k, v) for k, v in leaderboard_to_tags.items()]
seen_leaderboards = []
leaderboard_to_info = defaultdict(list)
info_to_leaderboard = defaultdict(lambda: defaultdict(list))
for name, tags in leaderboards + arenas + saved_leaderboards:
# If we have a duplicate between the leaderboards from the hub (leaderboards, arena)
# and the ones we saved manually, we use the version from the hub
if name in seen_leaderboards:
continue
seen_leaderboards.append(name)
# If the model has its own tags, plus the ones we saved, we aggregate them
if name in leaderboard_to_tags:
tags += leaderboard_to_tags[name]
grouped_tags = group_all_tags(tags)
for category, tags in grouped_tags.items():
for tag in tags:
info_to_leaderboard[category][tag].append(name)
leaderboard_to_info[name].append(f"{category}:{tag}")
# We pass everything to sets
for leaderboard, tags in leaderboard_to_info.items():
leaderboard_to_info[leaderboard] = sorted(list(set(tags)))
for category, category_dict in info_to_leaderboard.items():
for tag, space_list in category_dict.items():
info_to_leaderboard[category][tag] = sorted(list(set(space_list)))
info_to_leaderboard["all"] = sorted(list(set(seen_leaderboards)))
return leaderboard_to_info, info_to_leaderboard