Spaces:
Runtime error
Runtime error
import dataclasses | |
import datetime | |
import operator | |
import pathlib | |
import pandas as pd | |
import tqdm.auto | |
import yaml | |
from huggingface_hub import HfApi | |
from constants import ( | |
OWNER_CHOICES, | |
SLEEP_TIME_INT_TO_STR, | |
SLEEP_TIME_STR_TO_INT, | |
WHOAMI, | |
) | |
class DemoInfo: | |
space_id: str | |
url: str | |
title: str | |
owner: str | |
sdk: str | |
sdk_version: str | |
likes: int | |
status: str | |
last_modified: str | |
sleep_time: int | |
replicas: int | |
private: bool | |
hardware: str | |
suggested_hardware: str | |
created: str = "" | |
arxiv: list[str] = dataclasses.field(default_factory=list) | |
github: list[str] = dataclasses.field(default_factory=list) | |
tags: list[str] = dataclasses.field(default_factory=list) | |
def __post_init__(self): | |
object.__setattr__(self, "last_modified", DemoInfo.convert_timestamp(self.last_modified)) | |
object.__setattr__(self, "created", DemoInfo.convert_timestamp(self.created)) | |
def convert_timestamp(timestamp: str) -> str: | |
try: | |
return datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y/%m/%d %H:%M:%S") | |
except ValueError: | |
return timestamp | |
def from_space_id(cls, space_id: str) -> "DemoInfo": | |
api = HfApi() | |
space_info = api.space_info(repo_id=space_id) | |
card = space_info.cardData | |
runtime = space_info.runtime | |
resources = runtime["resources"] | |
return cls( | |
space_id=space_id, | |
url=f"https://huggingface.co/spaces/{space_id}", | |
title=card["title"] if "title" in card else "", | |
owner=space_id.split("/")[0], | |
sdk=card["sdk"], | |
sdk_version=card.get("sdk_version", ""), | |
likes=space_info.likes, | |
status=runtime["stage"], | |
last_modified=space_info.lastModified, | |
sleep_time=runtime["gcTimeout"] or 0, | |
replicas=resources["replicas"] if resources is not None else 0, | |
private=space_info.private, | |
hardware=runtime["hardware"]["current"] or runtime["hardware"]["requested"], | |
suggested_hardware=card.get("suggested_hardware", ""), | |
) | |
def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame: | |
with pathlib.Path(path).open() as f: | |
data = yaml.safe_load(f) | |
demo_info = [] | |
for space_id in tqdm.auto.tqdm(list(data)): | |
base_info = DemoInfo.from_space_id(space_id) | |
info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id])) | |
demo_info.append(info) | |
return pd.DataFrame([dataclasses.asdict(info) for info in demo_info]) | |
class Prettifier: | |
def get_arxiv_link(links: list[str]) -> str: | |
links = [Prettifier.create_link(link.split("/")[-1], link) for link in links] | |
return "\n".join(links) | |
def get_github_link(links: list[str]) -> str: | |
links = [Prettifier.create_link("github", link) for link in links] | |
return "\n".join(links) | |
def get_tag_list(tags: list[str]) -> str: | |
return ", ".join(tags) | |
def create_link(text: str, url: str) -> str: | |
return f'<a href={url} target="_blank">{text}</a>' | |
def to_div(text: str | None, category_name: str) -> str: | |
if text is None: | |
text = "" | |
class_name = f"{category_name}-{text.lower()}" | |
return f'<div class="{class_name}">{text}</div>' | |
def add_div_tag_to_replicas(replicas: int) -> str: | |
if replicas == 0: | |
return "" | |
if replicas == 1: | |
return "1" | |
return f'<div class="multiple-replicas">{replicas}</div>' | |
def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str: | |
if hardware == "cpu-basic": | |
return f'<div class="sleep-time-cpu-basic">{sleep_time_s}</div>' | |
s = sleep_time_s.replace(" ", "-") | |
return f'<div class="sleep-time-{s}">{sleep_time_s}</div>' | |
def __call__(self, df: pd.DataFrame) -> pd.DataFrame: | |
new_rows = [] | |
for _, row in df.iterrows(): | |
new_row = dict(row) | { | |
"status": self.to_div(row.status, "status"), | |
"hardware": self.to_div(row.hardware, "hardware"), | |
"suggested_hardware": self.to_div(row.suggested_hardware, "hardware"), | |
"title": self.create_link(row.title, row.url), | |
"owner": self.create_link(row.owner, f"https://huggingface.co/{row.owner}"), | |
"sdk": self.to_div(row.sdk, "sdk"), | |
"sleep_time": self.add_div_tag_to_sleep_time(SLEEP_TIME_INT_TO_STR[row.sleep_time], row.hardware), | |
"replicas": self.add_div_tag_to_replicas(row.replicas), | |
"arxiv": self.get_arxiv_link(row.arxiv), | |
"github": self.get_github_link(row.github), | |
"tags": self.get_tag_list(row.tags), | |
} | |
new_rows.append(new_row) | |
return pd.DataFrame(new_rows, columns=df.columns) | |
class DemoList: | |
COLUMN_INFO = [ | |
["status", "markdown"], | |
["hardware", "markdown"], | |
["title", "markdown"], | |
["owner", "markdown"], | |
["arxiv", "markdown"], | |
["github", "markdown"], | |
["likes", "number"], | |
["tags", "str"], | |
["last_modified", "str"], | |
["created", "str"], | |
["sdk", "markdown"], | |
["sdk_version", "str"], | |
["suggested_hardware", "markdown"], | |
["sleep_time", "markdown"], | |
["replicas", "markdown"], | |
["private", "bool"], | |
] | |
def __init__(self, df: pd.DataFrame): | |
self.df_raw = df | |
self._prettifier = Prettifier() | |
self.df_prettified = self._prettifier(df).loc[:, self.column_names] | |
def column_names(self): | |
return list(map(operator.itemgetter(0), self.COLUMN_INFO)) | |
def column_datatype(self): | |
return list(map(operator.itemgetter(1), self.COLUMN_INFO)) | |
def filter( | |
self, | |
status: list[str], | |
hardware: list[str], | |
sleep_time: list[str], | |
multiple_replicas: bool, | |
sdk: list[str], | |
visibility: list[str], | |
owner: list[str], | |
) -> pd.DataFrame: | |
df = self.df_raw.copy() | |
if multiple_replicas: | |
df = df[self.df_raw.replicas > 1] | |
if visibility == ["public"]: | |
df = df[~self.df_raw.private] | |
elif visibility == ["private"]: | |
df = df[self.df_raw.private] | |
df = df[ | |
(self.df_raw.status.isin(status)) & (self.df_raw.hardware.isin(hardware)) & (self.df_raw.sdk.isin(sdk)) | |
] | |
sleep_time_int = [SLEEP_TIME_STR_TO_INT[s] for s in sleep_time] | |
df = df[self.df_raw.sleep_time.isin(sleep_time_int)] | |
if set(owner) == set(OWNER_CHOICES): | |
pass | |
elif WHOAMI in owner: | |
df = df[self.df_raw.owner == WHOAMI] | |
else: | |
df = df[self.df_raw.owner != WHOAMI] | |
return self._prettifier(df).loc[:, self.column_names] | |