import dataclasses import datetime import operator import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi from constants import (OWNER_CHOICES, SLEEP_TIME_INT_TO_STR, SLEEP_TIME_STR_TO_INT, WHOAMI) @dataclasses.dataclass(frozen=True) class DemoInfo: space_id: str url: str title: str owner: str sdk: str sdk_version: str likes: int status: str last_modified: str sleep_time: int replicas: int private: bool hardware: str suggested_hardware: str created: str = '' arxiv: list[str] = dataclasses.field(default_factory=list) github: list[str] = dataclasses.field(default_factory=list) tags: list[str] = dataclasses.field(default_factory=list) def __post_init__(self): object.__setattr__(self, 'last_modified', DemoInfo.convert_timestamp(self.last_modified)) object.__setattr__(self, 'created', DemoInfo.convert_timestamp(self.created)) @staticmethod def convert_timestamp(timestamp: str) -> str: try: return datetime.datetime.strptime( timestamp, '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y/%m/%d %H:%M:%S') except ValueError: return timestamp @classmethod def from_space_id(cls, space_id: str) -> 'DemoInfo': api = HfApi() space_info = api.space_info(repo_id=space_id) card = space_info.cardData runtime = space_info.runtime resources = runtime['resources'] return cls( space_id=space_id, url=f'https://huggingface.co/spaces/{space_id}', title=card['title'] if 'title' in card else '', owner=space_id.split('/')[0], sdk=card['sdk'], sdk_version=card.get('sdk_version', ''), likes=space_info.likes, status=runtime['stage'], last_modified=space_info.lastModified, sleep_time=runtime['gcTimeout'] or 0, replicas=resources['replicas'] if resources is not None else 0, private=space_info.private, hardware=runtime['hardware']['current'] or runtime['hardware']['requested'], suggested_hardware=card.get('suggested_hardware', ''), ) def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame: with pathlib.Path(path).open() as f: data = yaml.safe_load(f) demo_info = [] for space_id in tqdm.auto.tqdm(list(data)): base_info = DemoInfo.from_space_id(space_id) info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id])) demo_info.append(info) return pd.DataFrame([dataclasses.asdict(info) for info in demo_info]) class Prettifier: @staticmethod def get_arxiv_link(links: list[str]) -> str: links = [ Prettifier.create_link(link.split('/')[-1], link) for link in links ] return '\n'.join(links) @staticmethod def get_github_link(links: list[str]) -> str: links = [Prettifier.create_link('github', link) for link in links] return '\n'.join(links) @staticmethod def get_tag_list(tags: list[str]) -> str: return ', '.join(tags) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' @staticmethod def to_div(text: str | None, category_name: str) -> str: if text is None: text = '' class_name = f'{category_name}-{text.lower()}' return f'