File size: 7,539 Bytes
a7de579
e51e8f8
58113ed
e51e8f8
 
 
 
 
 
 
6e67bdd
 
 
e51e8f8
a7de579
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
968af70
 
a7de579
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e51e8f8
 
a7de579
 
 
e51e8f8
 
a7de579
 
e51e8f8
 
968af70
 
 
58113ed
a7de579
 
8410cc1
 
 
 
58113ed
70a5579
 
a7de579
70a5579
 
 
 
 
cc8cf29
 
 
 
 
 
 
a7de579
968af70
a7de579
 
968af70
 
 
 
 
 
 
 
850f439
 
 
968af70
 
6fd31fb
cc8cf29
6e67bdd
70a5579
 
a7de579
 
 
 
 
 
968af70
 
a7de579
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e67bdd
a7de579
6e67bdd
 
 
 
 
 
 
 
 
a7de579
6e67bdd
 
a7de579
6e67bdd
 
a7de579
6e67bdd
a7de579
6e67bdd
a7de579
 
 
6e67bdd
 
a7de579
6e67bdd
 
 
 
a7de579
6e67bdd
a7de579
6e67bdd
a7de579
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import dataclasses
import datetime
import operator
import pathlib

import pandas as pd
import tqdm.auto
import yaml
from huggingface_hub import HfApi

from constants import (OWNER_CHOICES, SLEEP_TIME_INT_TO_STR,
                       SLEEP_TIME_STR_TO_INT, WHOAMI)


@dataclasses.dataclass(frozen=True)
class DemoInfo:
    space_id: str
    url: str
    title: str
    owner: str
    sdk: str
    sdk_version: str
    likes: int
    status: str
    last_modified: str
    sleep_time: int
    replicas: int
    private: bool
    hardware: str
    suggested_hardware: str
    created: str = ''
    arxiv: list[str] = dataclasses.field(default_factory=list)
    github: list[str] = dataclasses.field(default_factory=list)
    tags: list[str] = dataclasses.field(default_factory=list)

    def __post_init__(self):
        object.__setattr__(self, 'last_modified',
                           DemoInfo.convert_timestamp(self.last_modified))
        object.__setattr__(self, 'created',
                           DemoInfo.convert_timestamp(self.created))

    @staticmethod
    def convert_timestamp(timestamp: str) -> str:
        try:
            return datetime.datetime.strptime(
                timestamp,
                '%Y-%m-%dT%H:%M:%S.%fZ').strftime('%Y/%m/%d %H:%M:%S')
        except ValueError:
            return timestamp

    @classmethod
    def from_space_id(cls, space_id: str) -> 'DemoInfo':
        api = HfApi()
        space_info = api.space_info(repo_id=space_id)
        card = space_info.cardData
        runtime = space_info.runtime
        resources = runtime['resources']

        return cls(
            space_id=space_id,
            url=f'https://huggingface.co/spaces/{space_id}',
            title=card['title'] if 'title' in card else '',
            owner=space_id.split('/')[0],
            sdk=card['sdk'],
            sdk_version=card.get('sdk_version', ''),
            likes=space_info.likes,
            status=runtime['stage'],
            last_modified=space_info.lastModified,
            sleep_time=runtime['gcTimeout'] or 0,
            replicas=resources['replicas'] if resources is not None else 0,
            private=space_info.private,
            hardware=runtime['hardware']['current']
            or runtime['hardware']['requested'],
            suggested_hardware=card.get('suggested_hardware', ''),
        )


def get_df_from_yaml(path: pathlib.Path | str) -> pd.DataFrame:
    with pathlib.Path(path).open() as f:
        data = yaml.safe_load(f)
    demo_info = []
    for space_id in tqdm.auto.tqdm(list(data)):
        base_info = DemoInfo.from_space_id(space_id)
        info = DemoInfo(**(dataclasses.asdict(base_info) | data[space_id]))
        demo_info.append(info)
    return pd.DataFrame([dataclasses.asdict(info) for info in demo_info])


class Prettifier:
    @staticmethod
    def get_arxiv_link(links: list[str]) -> str:
        links = [
            Prettifier.create_link(link.split('/')[-1], link) for link in links
        ]
        return '\n'.join(links)

    @staticmethod
    def get_github_link(links: list[str]) -> str:
        links = [Prettifier.create_link('github', link) for link in links]
        return '\n'.join(links)

    @staticmethod
    def get_tag_list(tags: list[str]) -> str:
        return ', '.join(tags)

    @staticmethod
    def create_link(text: str, url: str) -> str:
        return f'<a href={url} target="_blank">{text}</a>'

    @staticmethod
    def to_div(text: str | None, category_name: str) -> str:
        if text is None:
            text = ''
        class_name = f'{category_name}-{text.lower()}'
        return f'<div class="{class_name}">{text}</div>'

    @staticmethod
    def add_div_tag_to_replicas(replicas: int) -> str:
        if replicas == 0:
            return ''
        if replicas == 1:
            return '1'
        return f'<div class="multiple-replicas">{replicas}</div>'

    @staticmethod
    def add_div_tag_to_sleep_time(sleep_time_s: str, hardware: str) -> str:
        if hardware == 'cpu-basic':
            return f'<div class="sleep-time-cpu-basic">{sleep_time_s}</div>'
        s = sleep_time_s.replace(' ', '-')
        return f'<div class="sleep-time-{s}">{sleep_time_s}</div>'

    def __call__(self, df: pd.DataFrame) -> pd.DataFrame:
        new_rows = []
        for _, row in df.iterrows():
            new_row = dict(row) | {
                'status':
                self.to_div(row.status, 'status'),
                'hardware':
                self.to_div(row.hardware, 'hardware'),
                'suggested_hardware':
                self.to_div(row.suggested_hardware, 'hardware'),
                'title':
                self.create_link(row.title, row.url),
                'owner':
                self.create_link(row.owner,
                                 f'https://huggingface.co/{row.owner}'),
                'sdk':
                self.to_div(row.sdk, 'sdk'),
                'sleep_time':
                self.add_div_tag_to_sleep_time(
                    SLEEP_TIME_INT_TO_STR[row.sleep_time], row.hardware),
                'replicas':
                self.add_div_tag_to_replicas(row.replicas),
                'arxiv':
                self.get_arxiv_link(row.arxiv),
                'github':
                self.get_github_link(row.github),
                'tags':
                self.get_tag_list(row.tags),
            }
            new_rows.append(new_row)
        return pd.DataFrame(new_rows, columns=df.columns)


class DemoList:
    COLUMN_INFO = [
        ['status', 'markdown'],
        ['hardware', 'markdown'],
        ['title', 'markdown'],
        ['owner', 'markdown'],
        ['arxiv', 'markdown'],
        ['github', 'markdown'],
        ['likes', 'number'],
        ['tags', 'str'],
        ['last_modified', 'str'],
        ['created', 'str'],
        ['sdk', 'markdown'],
        ['sdk_version', 'str'],
        ['suggested_hardware', 'markdown'],
        ['sleep_time', 'markdown'],
        ['replicas', 'markdown'],
        ['private', 'bool'],
    ]

    def __init__(self, df: pd.DataFrame):
        self.df_raw = df
        self._prettifier = Prettifier()
        self.df_prettified = self._prettifier(df).loc[:, self.column_names]

    @property
    def column_names(self):
        return list(map(operator.itemgetter(0), self.COLUMN_INFO))

    @property
    def column_datatype(self):
        return list(map(operator.itemgetter(1), self.COLUMN_INFO))

    def filter(
        self,
        status: list[str],
        hardware: list[str],
        sleep_time: list[str],
        multiple_replicas: bool,
        sdk: list[str],
        visibility: list[str],
        owner: list[str],
    ) -> pd.DataFrame:
        df = self.df_raw.copy()

        if multiple_replicas:
            df = df[self.df_raw.replicas > 1]

        if visibility == ['public']:
            df = df[~self.df_raw.private]
        elif visibility == ['private']:
            df = df[self.df_raw.private]

        df = df[(self.df_raw.status.isin(status))
                & (self.df_raw.hardware.isin(hardware))
                & (self.df_raw.sdk.isin(sdk))]

        sleep_time_int = [SLEEP_TIME_STR_TO_INT[s] for s in sleep_time]
        df = df[self.df_raw.sleep_time.isin(sleep_time_int)]

        if set(owner) == set(OWNER_CHOICES):
            pass
        elif WHOAMI in owner:
            df = df[self.df_raw.owner == WHOAMI]
        else:
            df = df[self.df_raw.owner != WHOAMI]

        return self._prettifier(df).loc[:, self.column_names]