ICLR2023 / paper_list.py
dmvaldman's picture
Initial commit of papers
f3dcb90
raw
history blame contribute delete
No virus
2.73 kB
from __future__ import annotations
import pandas as pd
import requests
from huggingface_hub.hf_api import SpaceInfo
class PaperList:
def __init__(self):
self.organization_name = 'ICLR2023'
self.table = pd.read_csv('iclr_submissions.csv')
self._preprocess_table()
self.table_header = '''
<tr>
<td width="25%">Title</td>
<td width="5%">PDF</td>
<td width="20%">Tldr</td>
<td width="50%">Abstract</td>
</tr>'''
@staticmethod
def load_space_info(author: str) -> list[SpaceInfo]:
path = 'https://huggingface.co/api/spaces'
r = requests.get(path, params={'author': author})
d = r.json()
return [SpaceInfo(**x) for x in d]
def add_spaces_to_table(self, organization_name: str,
df: pd.DataFrame) -> pd.DataFrame:
spaces = self.load_space_info(organization_name)
name2space = {
s.id.split('/')[1].lower(): f'https://huggingface.co/spaces/{s.id}'
for s in spaces
}
return df
def _preprocess_table(self) -> None:
self.table = self.add_spaces_to_table(self.organization_name,
self.table)
self.table['title_lowercase'] = self.table.title.str.lower()
rows = []
for row in self.table.itertuples():
paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(
row.url, str) else row.title
pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>' if isinstance(
row.pdf, str) else ''
tldr = row.tldr if isinstance(row.tldr, str) else ''
row = f'''
<tr>
<td>{paper}</td>
<td>{pdf}</td>
<td>{tldr}</td>
<td>{row.abstract}</td>
</tr>'''
rows.append(row)
self.table['html_table_content'] = rows
def render(self, search_query: str, case_sensitive: bool) -> tuple[int, str]:
df = self.add_spaces_to_table(self.organization_name, self.table)
if search_query:
if case_sensitive:
df = df[df.title.str.contains(search_query)]
else:
df = df[df.title_lowercase.str.contains(search_query.lower())]
return len(df), self.to_html(df, self.table_header)
@staticmethod
def to_html(df: pd.DataFrame, table_header: str) -> str:
table_data = ''.join(df.html_table_content)
html = f'''
<table>
{table_header}
{table_data}
</table>'''
return html