from __future__ import annotations import pandas as pd import requests from huggingface_hub.hf_api import SpaceInfo class PaperList: def __init__(self): self.organization_name = 'ICLR2023' self.table = pd.read_csv('iclr_submissions.csv') self._preprocess_table() self.table_header = ''' Title PDF Tldr Abstract ''' @staticmethod def load_space_info(author: str) -> list[SpaceInfo]: path = 'https://huggingface.co/api/spaces' r = requests.get(path, params={'author': author}) d = r.json() return [SpaceInfo(**x) for x in d] def add_spaces_to_table(self, organization_name: str, df: pd.DataFrame) -> pd.DataFrame: spaces = self.load_space_info(organization_name) name2space = { s.id.split('/')[1].lower(): f'https://huggingface.co/spaces/{s.id}' for s in spaces } return df def _preprocess_table(self) -> None: self.table = self.add_spaces_to_table(self.organization_name, self.table) self.table['title_lowercase'] = self.table.title.str.lower() rows = [] for row in self.table.itertuples(): paper = f'{row.title}' if isinstance( row.url, str) else row.title pdf = f'pdf' if isinstance( row.pdf, str) else '' tldr = row.tldr if isinstance(row.tldr, str) else '' row = f''' {paper} {pdf} {tldr} {row.abstract} ''' rows.append(row) self.table['html_table_content'] = rows def render(self, search_query: str, case_sensitive: bool) -> tuple[int, str]: df = self.add_spaces_to_table(self.organization_name, self.table) if search_query: if case_sensitive: df = df[df.title.str.contains(search_query)] else: df = df[df.title_lowercase.str.contains(search_query.lower())] return len(df), self.to_html(df, self.table_header) @staticmethod def to_html(df: pd.DataFrame, table_header: str) -> str: table_data = ''.join(df.html_table_content) html = f''' {table_header} {table_data}
''' return html