import dataclasses import datetime import operator import pathlib import numpy as np import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi from constants import SLEEP_TIME_INT_TO_STR, SLEEP_TIME_STR_TO_INT @dataclasses.dataclass(frozen=True) class DemoInfo: space_id: str url: str title: str owner: str sdk: str sdk_version: str likes: int status: str last_modified: str sleep_time: int replicas: int private: bool hardware: str suggested_hardware: str created: str = "" def __post_init__(self): object.__setattr__(self, "last_modified", DemoInfo.convert_timestamp(self.last_modified)) object.__setattr__(self, "created", DemoInfo.convert_timestamp(self.created)) @staticmethod def convert_timestamp(timestamp: str | datetime.datetime) -> str: if isinstance(timestamp, datetime.datetime): return timestamp.strftime("%Y/%m/%d %H:%M:%S") try: return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y/%m/%d %H:%M:%S") except ValueError: return timestamp @classmethod def from_space_id(cls, space_id: str) -> "DemoInfo": api = HfApi() space_info = api.space_info(repo_id=space_id) card = space_info.cardData runtime = space_info.runtime return cls( space_id=space_id, url=f"https://huggingface.co/spaces/{space_id}", title=card["title"] if "title" in card else "", owner=space_id.split("/")[0], sdk=card["sdk"], sdk_version=card.get("sdk_version", ""), likes=space_info.likes, status=runtime.stage, last_modified=space_info.lastModified, sleep_time=runtime.sleep_time or 0, replicas=runtime.raw["replicas"]["current"] or runtime.raw["replicas"]["requested"], private=space_info.private, hardware=runtime.hardware or runtime.requested_hardware or "", suggested_hardware=card.get("suggested_hardware", ""), created=space_info.created_at, ) def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame: with pathlib.Path(path).open() as f: data = yaml.safe_load(f) demo_info = [] for space_id in tqdm.auto.tqdm(list(data)): base_info = DemoInfo.from_space_id(space_id) info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id])) demo_info.append(info) return pd.DataFrame([dataclasses.asdict(info) for info in demo_info]) class Prettifier: @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' @staticmethod def to_div(text: str | None, category_name: str) -> str: if text is None: text = "" class_name = f"{category_name}-{text.lower()}" return f'
{text}
' @staticmethod def add_div_tag_to_replicas(replicas: int) -> str: if replicas == 0: return "" if replicas == 1: return "1" return f'
{replicas}
' @staticmethod def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str: if hardware == "cpu-basic": return f'
{sleep_time_s}
' s = sleep_time_s.replace(" ", "-") return f'
{sleep_time_s}
' def __call__(self, df: pd.DataFrame) -> pd.DataFrame: new_rows = [] for _, row in df.iterrows(): new_row = dict(row) | { "status": self.to_div(row.status, "status"), "hardware": self.to_div(row.hardware, "hardware"), "suggested_hardware": self.to_div(row.suggested_hardware, "hardware"), "title": self.create_link(row.title, row.url), "owner": self.create_link(row.owner, f"https://huggingface.co/{row.owner}"), "sdk": self.to_div(row.sdk, "sdk"), "sleep_time": ( self.add_div_tag_to_sleep_time(SLEEP_TIME_INT_TO_STR[row.sleep_time], row.hardware) if ~np.isnan(row.sleep_time) else "" ), "replicas": self.add_div_tag_to_replicas(row.replicas), } new_rows.append(new_row) return pd.DataFrame(new_rows, columns=df.columns) class DemoList: COLUMN_INFO = [ ["featured_week", "str"], ["status", "markdown"], ["hardware", "markdown"], ["title", "markdown"], ["owner", "markdown"], ["likes", "number"], ["last_modified", "str"], ["created", "str"], ["sdk", "markdown"], ["sdk_version", "str"], ["suggested_hardware", "markdown"], ["sleep_time", "markdown"], ["replicas", "markdown"], ] def __init__(self, df: pd.DataFrame): self.df_raw = df self._prettifier = Prettifier() self.df_prettified = self._prettifier(df).loc[:, self.column_names] @property def column_names(self): return list(map(operator.itemgetter(0), self.COLUMN_INFO)) def get_column_datatypes(self, column_names: list[str]) -> list[str]: mapping = dict(self.COLUMN_INFO) return [mapping[name] for name in column_names] def filter( self, status: list[str], hardware: list[str], sdk: list[str], sleep_time: list[str], multiple_replicas: bool, owner: str, start_date: datetime.datetime, end_date: datetime.datetime, column_names: list[str], ) -> pd.DataFrame: df = self.df_raw.copy() if multiple_replicas: df = df[self.df_raw.replicas > 1] if owner != "(ALL)": df = df[self.df_raw.owner == owner] sleep_time_int = [SLEEP_TIME_STR_TO_INT[s] for s in sleep_time] df = df[ (self.df_raw.status.isin(status)) & (self.df_raw.hardware.isin(hardware)) & (self.df_raw.sleep_time.isin(sleep_time_int)) & (self.df_raw.sdk.isin(sdk)) & (self.df_raw.featured_week >= start_date) & (self.df_raw.featured_week <= end_date) ] df["featured_week"] = df["featured_week"].dt.strftime("%Y-%m-%d") return self._prettifier(df).loc[:, column_names]