from __future__ import annotations import numpy as np import pandas as pd import requests from huggingface_hub.hf_api import SpaceInfo url = 'https://docs.google.com/spreadsheets/d/1fANyV8spnEGUBMevjnb1FupkbESq9lTM2CGQt413sXQ/edit#gid=874079331' csv_url = url.replace('/edit#gid=', '/export?format=csv&gid=') class ModelList: def __init__(self): self.table = pd.read_csv(csv_url) self._preprocess_table() self.table_header = ''' Model Name Data Type(s) Year Published Paper Code on Github Weights on 🤗 Other Weights ''' def _preprocess_table(self) -> None: self.table['name_lowercase'] = self.table.name.str.lower() rows = [] for row in self.table.itertuples(): paper = f'Paper' if isinstance( row.paper, str) else '' github = f'GitHub' if isinstance( row.github, str) else '' hf_model = f'Hub Model' if isinstance( row.hub, str) else '' other_model = f'Other Weights' if isinstance( row.other, str) else '' row = f''' {row.name} {row.type} {row.year} {paper} {github} {hf_model} {other_model} ''' rows.append(row) self.table['html_table_content'] = rows def render(self, search_query: str, case_sensitive: bool, filter_names: list[str], data_types: list[str]) -> tuple[int, str]: df = self.table if search_query: if case_sensitive: df = df[df.name.str.contains(search_query)] else: df = df[df.name_lowercase.str.contains(search_query.lower())] has_paper = 'Paper' in filter_names has_github = 'Github' in filter_names has_model = 'Hub Model' in filter_names or 'Other Weights' in filter_names df = self.filter_table(df, has_paper, has_github, has_model, data_types) return len(df), self.to_html(df, self.table_header) @staticmethod def filter_table(df: pd.DataFrame, has_paper: bool, has_github: bool, has_model: bool, data_types: list[str]) -> pd.DataFrame: if has_paper: df = df[~df.paper.isna()] if has_github: df = df[~df.github.isna()] if has_model: df = df[~df.hub.isna() | ~df.other.isna()] df = df[df.type.isin(set(data_types))] return df @staticmethod def to_html(df: pd.DataFrame, table_header: str) -> str: table_data = ''.join(df.html_table_content) html = f''' {table_header} {table_data}
''' return html