davanstrien HF staff commited on
Commit
aaf47df
1 Parent(s): a859d40
Files changed (3) hide show
  1. app.py +84 -0
  2. requirements.in +2 -0
  3. requirements.txt +180 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+
4
+
5
+ def parse_arxiv_id_from_paper_url(url):
6
+ return url.split("/")[-1]
7
+
8
+
9
+ def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
10
+ r = requests.post(
11
+ "https://api.semanticscholar.org/recommendations/v1/papers/",
12
+ json={
13
+ "positivePaperIds": [semantic_scholar_id],
14
+ },
15
+ params={"fields": "externalIds,title,year", "limit": 10},
16
+ )
17
+ return r.json()["recommendedPapers"]
18
+
19
+
20
+ # arxiv_id = parse_arxiv_id_from_paper_url(paper_page)
21
+
22
+ # recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
23
+ # print(recommendations)
24
+
25
+
26
+ def filter_recommendations(recommendations, max_paper_count=5):
27
+ # include only arxiv papers
28
+ arxiv_paper = [
29
+ r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
30
+ ]
31
+ if len(arxiv_paper) > max_paper_count:
32
+ arxiv_paper = arxiv_paper[:max_paper_count]
33
+ return arxiv_paper
34
+
35
+
36
+ # filtered_recommendations = filter_recommendations(recommendations)
37
+
38
+
39
+ def get_paper_title_from_arxiv_id(arxiv_id):
40
+ try:
41
+ return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[
42
+ "title"
43
+ ]
44
+ except Exception as e:
45
+ print(f"Error getting paper title for {arxiv_id}: {e}")
46
+ raise gr.Error("Error getting paper title for {arxiv_id}: {e}") from e
47
+
48
+
49
+ def format_recommendation_into_markdown(arxiv_id, recommendations):
50
+ title = get_paper_title_from_arxiv_id(arxiv_id)
51
+ url = f"https://huggingface.co/papers/{arxiv_id}"
52
+ comment = f"Recommended papers for [{title}]({url})\n\n"
53
+ comment += "The following papers were recommended by the Semantic Scholar API \n\n"
54
+ for r in recommendations:
55
+ hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
56
+ comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
57
+ return comment
58
+
59
+
60
+ def return_recommendations(url):
61
+ arxiv_id = parse_arxiv_id_from_paper_url(url)
62
+ recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
63
+ filtered_recommendations = filter_recommendations(recommendations)
64
+ return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
65
+
66
+
67
+ title = "Semantic Scholar Paper Recommender"
68
+ description = (
69
+ "Paste a link to a paper on Hugging Face Papers and get recommendations for similar"
70
+ " papers from Semantic Scholar."
71
+ )
72
+ examples = [
73
+ "https://huggingface.co/papers/2309.12307",
74
+ "https://huggingface.co/papers/2211.10086",
75
+ ]
76
+ interface = gr.Interface(
77
+ return_recommendations,
78
+ gr.Textbox(lines=1),
79
+ gr.Markdown(),
80
+ examples=examples,
81
+ title=title,
82
+ description=description,
83
+ )
84
+ interface.launch()
requirements.in ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ requests
requirements.txt ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.11
3
+ # by the following command:
4
+ #
5
+ # pip-compile
6
+ #
7
+ aiofiles==23.2.1
8
+ # via gradio
9
+ altair==5.1.1
10
+ # via gradio
11
+ annotated-types==0.5.0
12
+ # via pydantic
13
+ anyio==3.7.1
14
+ # via
15
+ # fastapi
16
+ # httpcore
17
+ # starlette
18
+ attrs==23.1.0
19
+ # via
20
+ # jsonschema
21
+ # referencing
22
+ certifi==2023.7.22
23
+ # via
24
+ # httpcore
25
+ # httpx
26
+ # requests
27
+ charset-normalizer==3.2.0
28
+ # via requests
29
+ click==8.1.7
30
+ # via uvicorn
31
+ contourpy==1.1.1
32
+ # via matplotlib
33
+ cycler==0.11.0
34
+ # via matplotlib
35
+ fastapi==0.103.1
36
+ # via gradio
37
+ ffmpy==0.3.1
38
+ # via gradio
39
+ filelock==3.12.4
40
+ # via huggingface-hub
41
+ fonttools==4.42.1
42
+ # via matplotlib
43
+ fsspec==2023.9.2
44
+ # via
45
+ # gradio-client
46
+ # huggingface-hub
47
+ gradio==3.45.1
48
+ # via -r requirements.in
49
+ gradio-client==0.5.2
50
+ # via gradio
51
+ h11==0.14.0
52
+ # via
53
+ # httpcore
54
+ # uvicorn
55
+ httpcore==0.18.0
56
+ # via httpx
57
+ httpx==0.25.0
58
+ # via
59
+ # gradio
60
+ # gradio-client
61
+ huggingface-hub==0.17.3
62
+ # via
63
+ # gradio
64
+ # gradio-client
65
+ idna==3.4
66
+ # via
67
+ # anyio
68
+ # httpx
69
+ # requests
70
+ importlib-resources==6.1.0
71
+ # via gradio
72
+ jinja2==3.1.2
73
+ # via
74
+ # altair
75
+ # gradio
76
+ jsonschema==4.19.1
77
+ # via altair
78
+ jsonschema-specifications==2023.7.1
79
+ # via jsonschema
80
+ kiwisolver==1.4.5
81
+ # via matplotlib
82
+ markupsafe==2.1.3
83
+ # via
84
+ # gradio
85
+ # jinja2
86
+ matplotlib==3.8.0
87
+ # via gradio
88
+ numpy==1.26.0
89
+ # via
90
+ # altair
91
+ # contourpy
92
+ # gradio
93
+ # matplotlib
94
+ # pandas
95
+ orjson==3.9.7
96
+ # via gradio
97
+ packaging==23.1
98
+ # via
99
+ # altair
100
+ # gradio
101
+ # gradio-client
102
+ # huggingface-hub
103
+ # matplotlib
104
+ pandas==2.1.1
105
+ # via
106
+ # altair
107
+ # gradio
108
+ pillow==10.0.1
109
+ # via
110
+ # gradio
111
+ # matplotlib
112
+ pydantic==2.4.1
113
+ # via
114
+ # fastapi
115
+ # gradio
116
+ pydantic-core==2.10.1
117
+ # via pydantic
118
+ pydub==0.25.1
119
+ # via gradio
120
+ pyparsing==3.1.1
121
+ # via matplotlib
122
+ python-dateutil==2.8.2
123
+ # via
124
+ # matplotlib
125
+ # pandas
126
+ python-multipart==0.0.6
127
+ # via gradio
128
+ pytz==2023.3.post1
129
+ # via pandas
130
+ pyyaml==6.0.1
131
+ # via
132
+ # gradio
133
+ # huggingface-hub
134
+ referencing==0.30.2
135
+ # via
136
+ # jsonschema
137
+ # jsonschema-specifications
138
+ requests==2.31.0
139
+ # via
140
+ # -r requirements.in
141
+ # gradio
142
+ # gradio-client
143
+ # huggingface-hub
144
+ rpds-py==0.10.3
145
+ # via
146
+ # jsonschema
147
+ # referencing
148
+ semantic-version==2.10.0
149
+ # via gradio
150
+ six==1.16.0
151
+ # via python-dateutil
152
+ sniffio==1.3.0
153
+ # via
154
+ # anyio
155
+ # httpcore
156
+ # httpx
157
+ starlette==0.27.0
158
+ # via fastapi
159
+ toolz==0.12.0
160
+ # via altair
161
+ tqdm==4.66.1
162
+ # via huggingface-hub
163
+ typing-extensions==4.8.0
164
+ # via
165
+ # fastapi
166
+ # gradio
167
+ # gradio-client
168
+ # huggingface-hub
169
+ # pydantic
170
+ # pydantic-core
171
+ tzdata==2023.3
172
+ # via pandas
173
+ urllib3==2.0.5
174
+ # via requests
175
+ uvicorn==0.23.2
176
+ # via gradio
177
+ websockets==11.0.3
178
+ # via
179
+ # gradio
180
+ # gradio-client