import datetime import operator import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi repo_dir = pathlib.Path(__file__).parent class DemoList: COLUMN_INFO = [ ['status', 'markdown'], ['hardware', 'markdown'], ['title', 'markdown'], ['owner', 'markdown'], ['arxiv', 'markdown'], ['github', 'markdown'], ['likes', 'number'], ['tags', 'str'], ['last_modified', 'str'], ['created', 'str'], ['sdk', 'markdown'], ['sdk_version', 'str'], ['suggested_hardware', 'markdown'], ] def __init__(self): self.api = HfApi() self._raw_data = self.load_data() self.df_raw = pd.DataFrame(self._raw_data) self.df = self.prettify_df() @property def column_names(self): return list(map(operator.itemgetter(0), self.COLUMN_INFO)) @property def column_datatype(self): return list(map(operator.itemgetter(1), self.COLUMN_INFO)) @staticmethod def get_space_id(url: str) -> str: return '/'.join(url.split('/')[-2:]) def load_data(self) -> list[dict]: with open(repo_dir / 'list.yaml') as f: data = yaml.safe_load(f) res = [] for url in tqdm.auto.tqdm(list(data)): space_id = self.get_space_id(url) space_info = self.api.space_info(repo_id=space_id) card = space_info.cardData info = data[url] for tag in ['arxiv', 'github', 'tags']: if tag not in info: info[tag] = [] info['url'] = url info['owner'] = space_id.split('/')[0] info['title'] = card['title'] info['sdk'] = card['sdk'] info['sdk_version'] = card.get('sdk_version', '') info['likes'] = space_info.likes info['last_modified'] = space_info.lastModified info['status'] = space_info.runtime['stage'] info['suggested_hardware'] = card.get('suggested_hardware', '') info['hardware'] = space_info.runtime['hardware']['current'] if info['hardware'] is None: info['hardware'] = space_info.runtime['hardware']['requested'] res.append(info) return res def get_arxiv_link(self, links: list[str]) -> str: links = [self.create_link(link.split('/')[-1], link) for link in links] return '\n'.join(links) def get_github_link(self, links: list[str]) -> str: links = [self.create_link('github', link) for link in links] return '\n'.join(links) def get_tag_list(self, tags: list[str]) -> str: return ', '.join(tags) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' def to_div(self, text: str | None, category_name: str) -> str: if text is None: text = '' class_name = f'{category_name}-{text.lower()}' return f'
{text}
' @staticmethod def format_timestamp(timestamp: str) -> str: s = datetime.datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.000Z') return s.strftime('%Y/%m/%d %H:%M:%S') def prettify_df(self) -> pd.DataFrame: new_rows = [] for _, row in self.df_raw.copy().iterrows(): new_row = { 'status': self.to_div(row.status, 'status'), 'hardware': self.to_div(row.hardware, 'hardware'), 'suggested_hardware': self.to_div(row.suggested_hardware, 'hardware'), 'title': self.create_link(row.title, row.url), 'owner': self.create_link(row.owner, f'https://huggingface.co/{row.owner}'), 'arxiv': self.get_arxiv_link(row.arxiv), 'github': self.get_github_link(row.github), 'likes': row.likes, 'tags': self.get_tag_list(row.tags), 'last_modified': self.format_timestamp(row.last_modified), 'created': self.format_timestamp(row.created), 'sdk': self.to_div(row.sdk, 'sdk'), 'sdk_version': row.sdk_version, } new_rows.append(new_row) df = pd.DataFrame(new_rows).loc[:, self.column_names] return df