File size: 6,609 Bytes
6afd365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10b46cb
6afd365
 
 
 
 
 
 
 
10b46cb
 
6afd365
 
 
 
 
 
 
 
 
 
 
 
 
10b46cb
6afd365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10b46cb
6afd365
 
 
 
 
 
 
 
 
 
 
 
 
10b46cb
6afd365
10b46cb
6afd365
 
 
 
 
10b46cb
6afd365
 
 
 
 
 
 
 
 
 
 
 
 
 
afe5eb9
 
6afd365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afe5eb9
 
6afd365
eb94b80
6afd365
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import dataclasses
import datetime
import operator
import pathlib

import numpy as np
import pandas as pd
import tqdm.auto
import yaml
from huggingface_hub import HfApi

from constants import SLEEP_TIME_INT_TO_STR, SLEEP_TIME_STR_TO_INT


@dataclasses.dataclass(frozen=True)
class DemoInfo:
    space_id: str
    url: str
    title: str
    owner: str
    sdk: str
    sdk_version: str
    likes: int
    status: str
    last_modified: str
    sleep_time: int
    replicas: int
    private: bool
    hardware: str
    suggested_hardware: str
    created: str = ""

    def __post_init__(self) -> None:
        object.__setattr__(self, "last_modified", DemoInfo.convert_timestamp(self.last_modified))
        object.__setattr__(self, "created", DemoInfo.convert_timestamp(self.created))

    @staticmethod
    def convert_timestamp(timestamp: str | datetime.datetime) -> str:
        if isinstance(timestamp, datetime.datetime):
            return timestamp.strftime("%Y/%m/%d %H:%M:%S")
        try:
            dt = datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ").astimezone(datetime.timezone.utc)
            return dt.strftime("%Y/%m/%d %H:%M:%S")
        except ValueError:
            return timestamp

    @classmethod
    def from_space_id(cls, space_id: str) -> "DemoInfo":
        api = HfApi()
        space_info = api.space_info(repo_id=space_id)
        card = space_info.cardData
        runtime = space_info.runtime

        return cls(
            space_id=space_id,
            url=f"https://huggingface.co/spaces/{space_id}",
            title=card.get("title", ""),
            owner=space_id.split("/")[0],
            sdk=card["sdk"],
            sdk_version=card.get("sdk_version", ""),
            likes=space_info.likes,
            status=runtime.stage,
            last_modified=space_info.lastModified,
            sleep_time=runtime.sleep_time or 0,
            replicas=runtime.raw["replicas"]["current"] or runtime.raw["replicas"]["requested"],
            private=space_info.private,
            hardware=runtime.hardware or runtime.requested_hardware or "",
            suggested_hardware=card.get("suggested_hardware", ""),
            created=space_info.created_at,
        )


def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame:
    with pathlib.Path(path).open() as f:
        data = yaml.safe_load(f)
    demo_info = []
    for space_id in tqdm.auto.tqdm(list(data)):
        base_info = DemoInfo.from_space_id(space_id)
        info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id]))
        demo_info.append(info)
    return pd.DataFrame([dataclasses.asdict(info) for info in demo_info])


class Prettifier:
    @staticmethod
    def create_link(text: str, url: str) -> str:
        return f'<a href={url} target="_blank">{text}</a>'

    @staticmethod
    def to_div(text: str | None, category_name: str) -> str:
        if text is None:
            text = ""
        class_name = f"{category_name}-{text.lower()}"
        return f'<div class="{class_name}">{text}</div>'

    @staticmethod
    def add_div_tag_to_replicas(replicas: int) -> str:
        if replicas == 0:
            return ""
        if replicas == 1:
            return "1"
        return f'<div class="multiple-replicas">{replicas}</div>'

    @staticmethod
    def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str:
        if hardware == "cpu-basic":
            return f'<div class="sleep-time-cpu-basic">{sleep_time_s}</div>'
        s = sleep_time_s.replace(" ", "-")
        return f'<div class="sleep-time-{s}">{sleep_time_s}</div>'

    def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
        new_rows = []
        for _, row in df.iterrows():
            new_row = dict(row) | {
                "status": self.to_div(row.status, "status"),
                "hardware": self.to_div(row.hardware, "hardware"),
                "suggested_hardware": self.to_div(row.suggested_hardware, "hardware"),
                "title": self.create_link(row.title, row.url),
                "owner": self.create_link(row.owner, f"https://huggingface.co/{row.owner}"),
                "sdk": self.to_div(row.sdk, "sdk"),
                "sleep_time": (
                    self.add_div_tag_to_sleep_time(SLEEP_TIME_INT_TO_STR[row.sleep_time], row.hardware)
                    if ~np.isnan(row.sleep_time)
                    else ""
                ),
                "replicas": self.add_div_tag_to_replicas(row.replicas),
            }
            new_rows.append(new_row)
        return pd.DataFrame(new_rows, columns=df.columns)


class DemoList:
    COLUMN_INFO = (
        ["featured_week", "str"],
        ["status", "markdown"],
        ["hardware", "markdown"],
        ["title", "markdown"],
        ["owner", "markdown"],
        ["likes", "number"],
        ["last_modified", "str"],
        ["created", "str"],
        ["sdk", "markdown"],
        ["sdk_version", "str"],
        ["suggested_hardware", "markdown"],
        ["sleep_time", "markdown"],
        ["replicas", "markdown"],
    )

    def __init__(self, df: pd.DataFrame) -> None:
        self.df_raw = df
        self._prettifier = Prettifier()
        self.df_prettified = self._prettifier(df).loc[:, self.column_names]

    @property
    def column_names(self) -> list[str]:
        return list(map(operator.itemgetter(0), self.COLUMN_INFO))

    def get_column_datatypes(self, column_names: list[str]) -> list[str]:
        mapping = dict(self.COLUMN_INFO)
        return [mapping[name] for name in column_names]

    def filter(
        self,
        status: list[str],
        hardware: list[str],
        sdk: list[str],
        sleep_time: list[str],
        multiple_replicas: bool,
        owner: str,
        start_date: datetime.datetime,
        end_date: datetime.datetime,
        column_names: list[str],
    ) -> pd.DataFrame:
        df = self.df_raw.copy()

        if multiple_replicas:
            df = df[self.df_raw.replicas > 1]
        if owner != "(ALL)":
            df = df[self.df_raw.owner == owner]

        sleep_time_int = [SLEEP_TIME_STR_TO_INT[s] for s in sleep_time]
        df = df[
            (self.df_raw.status.isin(status))
            & (self.df_raw.hardware.isin(hardware))
            & (self.df_raw.sleep_time.isin(sleep_time_int))
            & (self.df_raw.sdk.isin(sdk))
            & (self.df_raw.featured_week >= start_date)
            & (self.df_raw.featured_week <= end_date)
        ]
        df["featured_week"] = df["featured_week"].dt.strftime("%Y-%m-%d")

        return self._prettifier(df).loc[:, column_names]