File size: 3,775 Bytes
08080f2
 
8c0737d
 
08080f2
9f6546e
093f447
4a02364
08080f2
 
b4eb3ca
9fb4b90
08080f2
d940698
 
 
 
 
01f4c07
d940698
 
 
5884212
2716ba4
9f6546e
093f447
 
 
 
 
 
 
 
 
 
b5de4af
 
 
 
9f6546e
 
 
 
8c0737d
 
 
 
 
 
 
 
 
 
aa632a5
2716ba4
4a02364
d940698
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a02364
 
 
9f6546e
 
9fb4b90
 
 
 
 
aa632a5
2176657
28b01c9
c9a6eb5
9fb4b90
3ace5d1
d940698
 
4a02364
d940698
8c0737d
d940698
4a02364
 
9f6546e
 
 
 
 
 
4a02364
093f447
b5de4af
 
 
 
 
093f447
 
 
 
 
 
4a02364
3ace5d1
57102fb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python

import datetime

import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_calendar import Calendar

from papers import PaperList, get_df

DESCRIPTION = "# [Daily Papers](https://huggingface.co/papers)"

FOOT_NOTE = """\
Related useful Spaces:
- [Semantic Scholar Paper Recommender](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) by [davanstrien](https://huggingface.co/davanstrien)
- [ArXiv CS RAG](https://huggingface.co/spaces/bishmoy/Arxiv-CS-RAG) by [bishmoy](https://huggingface.co/bishmoy)
- [Paper Q&A](https://huggingface.co/spaces/chansung/paper_qa) by [chansung](https://huggingface.co/chansung)
- [dailypapershackernews](https://huggingface.co/spaces/akhaliq/dailypapershackernews) by [akhaliq](https://huggingface.co/akhaliq)
"""


paper_list = PaperList(get_df())


def update_paper_list() -> None:
    global paper_list
    paper_list = PaperList(get_df())


scheduler = BackgroundScheduler()
scheduler.add_job(func=update_paper_list, trigger="cron", hour="*", timezone="UTC", misfire_grace_time=60)
scheduler.start()


def update_df() -> gr.Dataframe:
    return gr.Dataframe(value=paper_list.df_prettified)


def update_num_papers(df: pd.DataFrame) -> str:
    return f"{len(df)} / {len(paper_list.df_raw)}"


def search(
    start_date: datetime.datetime,
    end_date: datetime.datetime,
    search_title: str,
    search_abstract: str,
    max_num_to_retrieve: int,
) -> pd.DataFrame:
    return paper_list.search(start_date, end_date, search_title, search_abstract, max_num_to_retrieve)


with gr.Blocks(css_paths="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    with gr.Group():
        search_title = gr.Textbox(label="Search title")
        with gr.Row():
            with gr.Column(scale=4):
                search_abstract = gr.Textbox(
                    label="Search abstract",
                    info="The result may not be accurate as the abstract does not contain all the information.",
                )
            with gr.Column(scale=1):
                max_num_to_retrieve = gr.Slider(
                    label="Max number to retrieve",
                    info="This is used only for search on abstracts.",
                    minimum=1,
                    maximum=len(paper_list.df_raw),
                    step=1,
                    value=100,
                )
        with gr.Row():
            start_date = Calendar(label="Start date", type="datetime", value="2023-05-05")
            end_date = Calendar(label="End date", type="datetime")

    num_papers = gr.Textbox(label="Number of papers", value=update_num_papers(paper_list.df_raw), interactive=False)
    df = gr.Dataframe(
        value=paper_list.df_prettified,
        datatype=paper_list.column_datatype,
        type="pandas",
        interactive=False,
        max_height=1000,
        elem_id="table",
        column_widths=["10%", "10%", "60%", "10%", "5%", "5%"],
        wrap=True,
    )

    gr.Markdown(FOOT_NOTE)

    gr.on(
        triggers=[start_date.change, end_date.change, search_title.submit, search_abstract.submit],
        fn=search,
        inputs=[start_date, end_date, search_title, search_abstract, max_num_to_retrieve],
        outputs=df,
        api_name=False,
    ).then(
        fn=update_num_papers,
        inputs=df,
        outputs=num_papers,
        queue=False,
        api_name=False,
    )
    demo.load(
        fn=update_df,
        outputs=df,
        queue=False,
        api_name=False,
    ).then(
        fn=update_num_papers,
        inputs=df,
        outputs=num_papers,
        queue=False,
        api_name=False,
    )

if __name__ == "__main__":
    demo.queue(api_open=False).launch(show_api=False)