File size: 4,244 Bytes
5092824
 
18c5186
5092824
 
 
 
 
8235d81
 
6d5352f
5092824
8235d81
5092824
18c5186
 
4810e2a
 
 
 
 
 
18c5186
8235d81
5092824
6d5352f
8235d81
5092824
 
 
8235d81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5092824
6d5352f
5092824
 
 
4810e2a
5092824
 
 
18c5186
8235d81
5092824
8235d81
5092824
8235d81
6d5352f
5092824
 
 
 
 
8235d81
 
 
 
 
 
2dc5d96
8235d81
5092824
 
8235d81
 
 
 
 
 
 
 
5092824
 
 
 
 
 
 
 
 
 
 
 
 
 
8235d81
 
5092824
 
 
8235d81
5092824
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from __future__ import annotations

import numpy as np
import pandas as pd


class PaperList:
    def __init__(self):
        self.organization_name = "ICML2023"
        self.table = pd.read_csv("papers.csv")
        self._preprocess_table()

        self.table_header = """
            <tr>
                <td width="38%">Title</td>
                <td width="25%">Authors</td>
                <td width="5%">arXiv</td>
                <td width="5%">GitHub</td>
                <td width="7%">Paper pages</td>
                <td width="5%">Spaces</td>
                <td width="5%">Models</td>
                <td width="5%">Datasets</td>
                <td width="5%">Claimed</td>
            </tr>"""

    def _preprocess_table(self) -> None:
        self.table["title_lowercase"] = self.table.title.str.lower()

        rows = []
        for row in self.table.itertuples():
            title = f"{row.title}"
            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(row.arxiv, str) else ""
            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(row.github, str) else ""
            hf_paper = (
                f'<a href="{row.hf_paper}" target="_blank">Paper page</a>' if isinstance(row.hf_paper, str) else ""
            )
            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(row.hf_space, str) else ""
            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(row.hf_model, str) else ""
            hf_dataset = (
                f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(row.hf_dataset, str) else ""
            )
            author_linked = "✅" if ~np.isnan(row.n_linked_authors) and row.n_linked_authors > 0 else ""
            n_linked_authors = "" if np.isnan(row.n_linked_authors) else int(row.n_linked_authors)
            n_authors = "" if np.isnan(row.n_authors) else int(row.n_authors)
            claimed_paper = "" if n_linked_authors == "" else f"{n_linked_authors}/{n_authors} {author_linked}"
            row = f"""
                <tr>
                    <td>{title}</td>
                    <td>{row.authors}</td>
                    <td>{arxiv}</td>
                    <td>{github}</td>
                    <td>{hf_paper}</td>
                    <td>{hf_space}</td>
                    <td>{hf_model}</td>
                    <td>{hf_dataset}</td>
                    <td>{claimed_paper}</td>
                </tr>"""
            rows.append(row)
        self.table["html_table_content"] = rows

    def render(self, search_query: str, case_sensitive: bool, filter_names: list[str]) -> tuple[str, str]:
        df = self.table
        if search_query:
            if case_sensitive:
                df = df[df.title.str.contains(search_query)]
            else:
                df = df[df.title_lowercase.str.contains(search_query.lower())]
        has_arxiv = "arXiv" in filter_names
        has_github = "GitHub" in filter_names
        has_hf_space = "Space" in filter_names
        has_hf_model = "Model" in filter_names
        has_hf_dataset = "Dataset" in filter_names
        df = self.filter_table(df, has_arxiv, has_github, has_hf_space, has_hf_model, has_hf_dataset)
        n_claimed = len(df[df.n_linked_authors > 0])
        return f"{len(df)} ({n_claimed} claimed)", self.to_html(df, self.table_header)

    @staticmethod
    def filter_table(
        df: pd.DataFrame,
        has_arxiv: bool,
        has_github: bool,
        has_hf_space: bool,
        has_hf_model: bool,
        has_hf_dataset: bool,
    ) -> pd.DataFrame:
        if has_arxiv:
            df = df[~df.arxiv.isna()]
        if has_github:
            df = df[~df.github.isna()]
        if has_hf_space:
            df = df[~df.hf_space.isna()]
        if has_hf_model:
            df = df[~df.hf_model.isna()]
        if has_hf_dataset:
            df = df[~df.hf_dataset.isna()]
        return df

    @staticmethod
    def to_html(df: pd.DataFrame, table_header: str) -> str:
        table_data = "".join(df.html_table_content)
        html = f"""
        <table>
            {table_header}
            {table_data}
        </table>"""
        return html