import dataclasses import datetime import operator import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi from constants import ( OWNER_CHOICES, SLEEP_TIME_INT_TO_STR, SLEEP_TIME_STR_TO_INT, WHOAMI, ) @dataclasses.dataclass(frozen=True) class DemoInfo: space_id: str url: str title: str owner: str sdk: str sdk_version: str likes: int status: str last_modified: str sleep_time: int replicas: int private: bool hardware: str suggested_hardware: str created: str = "" arxiv: list[str] = dataclasses.field(default_factory=list) github: list[str] = dataclasses.field(default_factory=list) tags: list[str] = dataclasses.field(default_factory=list) def __post_init__(self): object.__setattr__(self, "last_modified", DemoInfo.convert_timestamp(self.last_modified)) object.__setattr__(self, "created", DemoInfo.convert_timestamp(self.created)) @staticmethod def convert_timestamp(timestamp: str) -> str: try: return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y/%m/%d %H:%M:%S") except ValueError: return timestamp @classmethod def from_space_id(cls, space_id: str) -> "DemoInfo": api = HfApi() space_info = api.space_info(repo_id=space_id) card = space_info.cardData runtime = space_info.runtime resources = runtime["resources"] return cls( space_id=space_id, url=f"https://huggingface.co/spaces/{space_id}", title=card["title"] if "title" in card else "", owner=space_id.split("/")[0], sdk=card["sdk"], sdk_version=card.get("sdk_version", ""), likes=space_info.likes, status=runtime["stage"], last_modified=space_info.lastModified, sleep_time=runtime["gcTimeout"] or 0, replicas=resources["replicas"] if resources is not None else 0, private=space_info.private, hardware=runtime["hardware"]["current"] or runtime["hardware"]["requested"], suggested_hardware=card.get("suggested_hardware", ""), ) def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame: with pathlib.Path(path).open() as f: data = yaml.safe_load(f) demo_info = [] for space_id in tqdm.auto.tqdm(list(data)): base_info = DemoInfo.from_space_id(space_id) info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id])) demo_info.append(info) return pd.DataFrame([dataclasses.asdict(info) for info in demo_info]) class Prettifier: @staticmethod def get_arxiv_link(links: list[str]) -> str: links = [Prettifier.create_link(link.split("/")[-1], link) for link in links] return "\n".join(links) @staticmethod def get_github_link(links: list[str]) -> str: links = [Prettifier.create_link("github", link) for link in links] return "\n".join(links) @staticmethod def get_tag_list(tags: list[str]) -> str: return ", ".join(tags) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' @staticmethod def to_div(text: str | None, category_name: str) -> str: if text is None: text = "" class_name = f"{category_name}-{text.lower()}" return f'