Spaces:

NAACL2022
/

NAACL2022-papers

Sleeping

App Files Files Community

hysts HF staff commited on Jul 13, 2022

Commit

0396aac

1 Parent(s): 90ca727

Remake this Space

Browse files

Files changed (8) hide show

.gitattributes +1 -0
.pre-commit-config.yaml +35 -0
.style.yapf +5 -0
app.py +74 -17
paper_list.py +102 -0
papers.csv +0 -0
requirements.txt +1 -1
style.css +22 -0

.gitattributes CHANGED Viewed

@@ -1,3 +1,4 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

+*.csv filter=lfs diff=lfs merge=lfs -text
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.2.0
+  hooks:
+  - id: check-executables-have-shebangs
+  - id: check-json
+  - id: check-merge-conflict
+  - id: check-shebang-scripts-are-executable
+  - id: check-toml
+  - id: check-yaml
+  - id: double-quote-string-fixer
+  - id: end-of-file-fixer
+  - id: mixed-line-ending
+    args: ['--fix=lf']
+  - id: requirements-txt-fixer
+  - id: trailing-whitespace
+- repo: https://github.com/myint/docformatter
+  rev: v1.4
+  hooks:
+  - id: docformatter
+    args: ['--in-place']
+- repo: https://github.com/pycqa/isort
+  rev: 5.10.1
+  hooks:
+    - id: isort
+- repo: https://github.com/pre-commit/mirrors-mypy
+  rev: v0.812
+  hooks:
+    - id: mypy
+      args: ['--ignore-missing-imports']
+- repo: https://github.com/google/yapf
+  rev: v0.32.0
+  hooks:
+  - id: yapf
+    args: ['--parallel', '--in-place']

.style.yapf ADDED Viewed

	@@ -0,0 +1,5 @@

+[style]
+based_on_style = pep8
+blank_line_before_nested_class_or_def = false
+spaces_before_comment = 2
+split_before_logical_operator = true

app.py CHANGED Viewed

@@ -1,24 +1,81 @@
 import gradio as gr
-import pandas as pd
-def update():
-    return pd.read_csv('papers.csv', header=0)
-block = gr.Blocks()
-with block:
-    gr.Markdown(
-        """# Papers List for NAACL 2022 as part of the NAACL event, To learn more and join, see <a href="https://huggingface.co/NAACL2022" target="_blank" style="text-decoration: underline">NAACL event</a>"""
-    )
-    with gr.Tabs():
-        with gr.TabItem("NAACL papers list"):
-            with gr.Row():
-                data = gr.Dataframe(type="pandas", wrap=True)
-            with gr.Row():
-                data_run = gr.Button("Refresh")
-                data_run.click(update, inputs=None, outputs=data)
-    block.load(update, inputs=None, outputs=data)
-block.launch()

+#!/usr/bin/env python
+from __future__ import annotations
 import gradio as gr
+from paper_list import PaperList
+DESCRIPTION = '# NAACL 2022 Papers'
+NOTES = '''
+- [NAACL 2022](https://2022.naacl.org/)
+'''
+FOOTER = '<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=naacl2022.papers" />'
+def main():
+    paper_list = PaperList()
+    with gr.Blocks(css='style.css') as demo:
+        gr.Markdown(DESCRIPTION)
+        search_box = gr.Textbox(
+            label='Search Title',
+            placeholder=
+            'You can search for titles with regular expressions. e.g. (?<!sur)face'
+        )
+        case_sensitive = gr.Checkbox(label='Case Sensitive')
+        filter_names = gr.CheckboxGroup(choices=[
+            'arXiv',
+            'GitHub',
+            'HF Space',
+            'HF Model',
+            'HF Dataset',
+        ],
+                                        label='Filter')
+        paper_category_names = [
+            'Long Paper',
+            'Short Paper',
+            'Special Theme Paper',
+            'Findings',
+            'Industry Track',
+        ]
+        paper_categories = gr.CheckboxGroup(choices=paper_category_names,
+                                            value=paper_category_names,
+                                            label='Paper Categories')
+        search_button = gr.Button('Search')
+        number_of_papers = gr.Textbox(label='Number of Papers Found')
+        table = gr.HTML(show_label=False)
+        gr.Markdown(NOTES)
+        gr.Markdown(FOOTER)
+        demo.load(paper_list.render,
+                  inputs=[
+                      search_box,
+                      case_sensitive,
+                      filter_names,
+                      paper_categories,
+                  ],
+                  outputs=[
+                      number_of_papers,
+                      table,
+                  ])
+        search_button.click(paper_list.render,
+                            inputs=[
+                                search_box,
+                                case_sensitive,
+                                filter_names,
+                                paper_categories,
+                            ],
+                            outputs=[
+                                number_of_papers,
+                                table,
+                            ])
+    demo.launch(enable_queue=True, share=False)
+if __name__ == '__main__':
+    main()

paper_list.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from __future__ import annotations
+import pandas as pd
+class PaperList:
+    def __init__(self):
+        self.table = pd.read_csv('papers.csv')
+        self._preprcess_table()
+        self.table_header = '''
+            <tr>
+                <td width="50%">Paper</td>
+                <td width="22%">Authors</td>
+                <td width="4%">pdf</td>
+                <td width="4%">category</td>
+                <td width="4%">arXiv</td>
+                <td width="4%">GitHub</td>
+                <td width="4%">HF Spaces</td>
+                <td width="4%">HF Models</td>
+                <td width="4%">HF Datasets</td>
+            </tr>'''
+    def _preprcess_table(self) -> None:
+        self.table['title_lowercase'] = self.table.title.str.lower()
+        rows = []
+        for row in self.table.itertuples():
+            paper = f'<a href="{row.url}" target="_blank">{row.title}</a>' if isinstance(
+                row.url, str) else row.title
+            pdf = f'<a href="{row.pdf}" target="_blank">pdf</a>' if isinstance(
+                row.pdf, str) else ''
+            arxiv = f'<a href="{row.arxiv}" target="_blank">arXiv</a>' if isinstance(
+                row.arxiv, str) else ''
+            github = f'<a href="{row.github}" target="_blank">GitHub</a>' if isinstance(
+                row.github, str) else ''
+            hf_space = f'<a href="{row.hf_space}" target="_blank">Space</a>' if isinstance(
+                row.hf_space, str) else ''
+            hf_model = f'<a href="{row.hf_model}" target="_blank">Model</a>' if isinstance(
+                row.hf_model, str) else ''
+            hf_dataset = f'<a href="{row.hf_dataset}" target="_blank">Dataset</a>' if isinstance(
+                row.hf_dataset, str) else ''
+            row = f'''
+                <tr>
+                    <td>{paper}</td>
+                    <td>{row.authors}</td>
+                    <td>{pdf}</td>
+                    <td>{row.category}</td>
+                    <td>{arxiv}</td>
+                    <td>{github}</td>
+                    <td>{hf_space}</td>
+                    <td>{hf_model}</td>
+                    <td>{hf_dataset}</td>
+                </tr>'''
+            rows.append(row)
+        self.table['html_table_content'] = rows
+    def render(self, search_query: str, case_sensitive: bool,
+               filter_names: list[str],
+               paper_categories: list[str]) -> tuple[int, str]:
+        df = self.table
+        if search_query:
+            if case_sensitive:
+                df = df[df.title.str.contains(search_query)]
+            else:
+                df = df[df.title_lowercase.str.contains(search_query.lower())]
+        has_arxiv = 'arXiv' in filter_names
+        has_github = 'GitHub' in filter_names
+        has_hf_space = 'HF Space' in filter_names
+        has_hf_model = 'HF Model' in filter_names
+        has_hf_dataset = 'HF Dataset' in filter_names
+        df = self.filter_table(df, has_arxiv, has_github, has_hf_space,
+                               has_hf_model, has_hf_dataset, paper_categories)
+        return len(df), self.to_html(df, self.table_header)
+    @staticmethod
+    def filter_table(df: pd.DataFrame, has_arxiv: bool, has_github: bool,
+                     has_hf_space: bool, has_hf_model: bool,
+                     has_hf_dataset: bool,
+                     paper_categories: list[str]) -> pd.DataFrame:
+        if has_arxiv:
+            df = df[~df.arxiv.isna()]
+        if has_github:
+            df = df[~df.github.isna()]
+        if has_hf_space:
+            df = df[~df.hf_space.isna()]
+        if has_hf_model:
+            df = df[~df.hf_model.isna()]
+        if has_hf_dataset:
+            df = df[~df.hf_dataset.isna()]
+        df = df[df.category.isin(set(paper_categories))]
+        return df
+    @staticmethod
+    def to_html(df: pd.DataFrame, table_header: str) -> str:
+        table_data = ''.join(df.html_table_content)
+        html = f'''
+        <table>
+            {table_header}
+            {table_data}
+        </table>'''
+        return html

papers.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

	@@ -1 +1 @@
1	- lxml


1	+ lxml

style.css ADDED Viewed

	@@ -0,0 +1,22 @@

+h1 {
+  text-align: center;
+}
+table a {
+  background-color: transparent;
+  color: #58a6ff;
+  text-decoration: none;
+}
+a:active,
+a:hover {
+  outline-width: 0;
+}
+a:hover {
+  text-decoration: underline;
+}
+table, th, td {
+  border: 1px solid;
+}
+img#visitor-badge {
+  display: block;
+  margin: auto;
+}