import copy import datetime import operator import pathlib import pandas as pd import tqdm.auto import yaml from huggingface_hub import HfApi repo_dir = pathlib.Path(__file__).parent class DemoList: COLUMN_INFO = [ ['status', 'markdown'], ['hardware', 'markdown'], ['title', 'markdown'], ['arxiv', 'markdown'], ['github', 'markdown'], ['likes', 'number'], ['tags', 'str'], ['last_modified', 'str'], ['created', 'str'], ['sdk', 'markdown'], ['sdk_version', 'str'], ['suggested_hardware', 'str'], ] def __init__(self): self.api = HfApi() self.data = self.load_data() self.df = self.to_df() def update_data(self) -> None: self.data = self.load_data() self.df = self.to_df() def load_data(self) -> dict: with open(repo_dir / 'list.yaml') as f: data = yaml.safe_load(f) for url in tqdm.auto.tqdm(list(data)): space_id = self.get_space_id(url) space_info = self.api.space_info(repo_id=space_id) card = space_info.cardData info = data[url] info['title'] = card['title'] info['sdk'] = self.colorize_sdk(card['sdk']) info['sdk_version'] = card.get('sdk_version', '') info['likes'] = space_info.likes last_modified = datetime.datetime.strptime( space_info.lastModified, '%Y-%m-%dT%H:%M:%S.000Z') info['last_modified'] = last_modified.strftime('%Y/%m/%d %H:%M:%S') created = datetime.datetime.strptime(info['created'], '%Y-%m-%d-%H-%M-%S') info['created'] = created.strftime('%Y/%m/%d %H:%M:%S') info['status'] = self.colorize_status(space_info.runtime['stage']) info['suggested_hardware'] = card.get('suggested_hardware', '') info['hardware'] = self.colorize_hardware( space_info.runtime['hardware']['current'], info['suggested_hardware']) return data @staticmethod def get_space_id(url: str) -> str: return '/'.join(url.split('/')[-2:]) @staticmethod def create_link(text: str, url: str) -> str: return f'{text}' def get_arxiv_link(self, url: str) -> str: links = sorted(self.data[url].get('arxiv', [])) links = [self.create_link(link.split('/')[-1], link) for link in links] return '\n'.join(links) def get_github_link(self, url: str) -> str: links = sorted(self.data[url].get('github', [])) links = [self.create_link('github', link) for link in links] return '\n'.join(links) def get_tag_list(self, url: str) -> str: tags = sorted(self.data[url].get('tags', [])) return ', '.join(tags) @property def column_names(self): return list(map(operator.itemgetter(0), self.COLUMN_INFO)) @property def column_datatype(self): return list(map(operator.itemgetter(1), self.COLUMN_INFO)) def colorize_status(self, status: str) -> str: if status == 'RUNNING': color = 'green' elif status in ['STOPPED', 'PAUSED']: color = 'orange' elif status in ['RUNTIME_ERROR', 'BUILD_ERROR']: color = 'red' else: color = '' return f'
{status}
' def colorize_sdk(self, sdk: str) -> str: if sdk == 'gradio': color = 'orange' elif sdk == 'docker': color = 'deepskyblue' else: color = '' return f'
{sdk}
' def colorize_hardware(self, hardware: str | None, suggested_hardware: str) -> str: if hardware is None: return '' if 't4' in hardware or 'a10g' in hardware or 'a100' in hardware: color = 'red' elif suggested_hardware and 'cpu' not in suggested_hardware: color = 'deepskyblue' else: return hardware return f'
{hardware}
' def to_df(self) -> pd.DataFrame: data = copy.deepcopy(self.data) for url in list(data): info = data[url] info['title'] = self.create_link(info['title'], url) info['arxiv'] = self.get_arxiv_link(url) info['github'] = self.get_github_link(url) info['tags'] = self.get_tag_list(url) df = pd.DataFrame(data).T.loc[:, self.column_names] return df