File size: 4,362 Bytes
5092824
 
18c5186
5092824
 
 
 
 
6d5352f
5092824
6d5352f
5092824
 
 
18c5186
 
4810e2a
 
 
 
 
 
18c5186
5092824
 
6d5352f
5092824
 
 
 
6d5352f
5092824
 
 
 
4810e2a
 
5092824
 
 
 
 
 
18c5186
 
 
 
 
 
5092824
 
6d5352f
5092824
 
 
4810e2a
5092824
 
 
18c5186
5092824
 
 
 
 
2dc5d96
6d5352f
5092824
 
 
 
 
 
 
df43a99
 
 
5092824
 
2dc5d96
 
 
5092824
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from __future__ import annotations

import numpy as np
import pandas as pd


class PaperList:
    def __init__(self):
        self.organization_name = 'ICML2023'
        self.table = pd.read_csv('papers.csv')
        self._preprocess_table()

        self.table_header = '''
            <tr>
                <td width="38%">Title</td>
                <td width="25%">Authors</td>
                <td width="5%">arXiv</td>
                <td width="5%">GitHub</td>
                <td width="7%">Paper pages</td>
                <td width="5%">Spaces</td>
                <td width="5%">Models</td>
                <td width="5%">Datasets</td>
                <td width="5%">Claimed</td>
            </tr>'''

    def _preprocess_table(self) -> None:
        self.table['title_lowercase'] = self.table.title.str.lower()

        rows = []
        for row in self.table.itertuples():
            title = f'{row.title}'
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(
                row.arxiv, str) else ''
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(
                row.github, str) else ''
            hf_paper = f'<a href="{row.hf_paper}" target="_blank">Paper page</a>' if isinstance(
                row.hf_paper, str) else ''
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(
                row.hf_space, str) else ''
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(
                row.hf_model, str) else ''
            hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(
                row.hf_dataset, str) else ''
            author_linked = '✅' if ~np.isnan(
                row.n_linked_authors) and row.n_linked_authors > 0 else ''
            n_linked_authors = '' if np.isnan(row.n_linked_authors) else int(
                row.n_linked_authors)
            n_authors = '' if np.isnan(row.n_authors) else int(row.n_authors)
            claimed_paper = '' if n_linked_authors == '' else f'{n_linked_authors}/{n_authors} {author_linked}'
            row = f'''
                <tr>
                    <td>{title}</td>
                    <td>{row.authors}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_paper}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                    <td>{claimed_paper}</td>
                </tr>'''
            rows.append(row)
        self.table['html_table_content'] = rows

    def render(self, search_query: str, case_sensitive: bool,
               filter_names: list[str]) -> tuple[str, str]:
        df = self.table
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = 'arXiv' in filter_names
        has_github = 'GitHub' in filter_names
        has_hf_space = 'Space' in filter_names
        has_hf_model = 'Model' in filter_names
        has_hf_dataset = 'Dataset' in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space,
                               has_hf_model, has_hf_dataset)
        n_claimed = len(df[df.n_linked_authors > 0])
        return f'{len(df)} ({n_claimed} claimed)', self.to_html(
            df, self.table_header)

    @staticmethod
    def filter_table(df: pd.DataFrame, has_arxiv: bool, has_github: bool,
                     has_hf_space: bool, has_hf_model: bool,
                     has_hf_dataset: bool) -> pd.DataFrame:
        if has_arxiv:
            df = df[~df.arxiv.isna()]
        if has_github:
            df = df[~df.github.isna()]
        if has_hf_space:
            df = df[~df.hf_space.isna()]
        if has_hf_model:
            df = df[~df.hf_model.isna()]
        if has_hf_dataset:
            df = df[~df.hf_dataset.isna()]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = ''.join(df.html_table_content)
        html = f'''
        <table>
            {table_header}
            {table_data}
        </table>'''
        return html