Moreno La Quatra
commited on
Commit
·
f1c1f0d
1
Parent(s):
1a23bf5
First commit app
Browse files- __pycache__/app.cpython-310.pyc +0 -0
- app.py +221 -0
__pycache__/app.cpython-310.pyc
ADDED
Binary file (5.77 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import xml.etree.ElementTree as ET
|
3 |
+
import re
|
4 |
+
from fuzzywuzzy import fuzz
|
5 |
+
from semanticscholar import SemanticScholar
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
def normalize_name(name):
|
9 |
+
return re.sub(r'\W+', '', name.lower())
|
10 |
+
|
11 |
+
def create_bibtex_key(authors, year, title):
|
12 |
+
first_author = authors.split(" and ")[0]
|
13 |
+
surname = first_author.split()[-1].lower()
|
14 |
+
surname_normalized = normalize_name(surname)
|
15 |
+
first_word_of_title = title.split()[0].lower()
|
16 |
+
return f"{surname_normalized}_{year}_{first_word_of_title}"
|
17 |
+
|
18 |
+
def is_title_match(input_title, db_title):
|
19 |
+
return fuzz.ratio(input_title.lower(), db_title.lower()) > 90 # Fuzzy match threshold
|
20 |
+
|
21 |
+
def get_crossref_bibtex(title, rows=1):
|
22 |
+
url = "https://api.crossref.org/works"
|
23 |
+
params = {"query.title": title, "rows": rows}
|
24 |
+
|
25 |
+
response = requests.get(url, params=params)
|
26 |
+
if response.status_code == 200:
|
27 |
+
data = response.json()
|
28 |
+
if data["message"]["items"]:
|
29 |
+
for item in data["message"]["items"]:
|
30 |
+
db_title = item["title"][0]
|
31 |
+
if is_title_match(title, db_title):
|
32 |
+
doi = item["DOI"]
|
33 |
+
authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
|
34 |
+
journal = item.get("container-title", ["Unknown"])[0]
|
35 |
+
year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
|
36 |
+
title = item["title"][0]
|
37 |
+
bibtex_key = create_bibtex_key(authors, year, title)
|
38 |
+
|
39 |
+
bibtex_entry = f"""@article{{{bibtex_key},
|
40 |
+
author = {{{authors}}},
|
41 |
+
title = {{{title}}},
|
42 |
+
journal = {{{journal}}},
|
43 |
+
year = {{{year}}},
|
44 |
+
doi = {{{doi}}},
|
45 |
+
url = {{{'https://doi.org/' + doi}}}
|
46 |
+
}}"""
|
47 |
+
return bibtex_entry
|
48 |
+
return None
|
49 |
+
return None
|
50 |
+
|
51 |
+
def get_arxiv_bibtex(title, max_results=1):
|
52 |
+
url = "http://export.arxiv.org/api/query"
|
53 |
+
params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results}
|
54 |
+
print(f"Querying arXiv with params: {params}")
|
55 |
+
|
56 |
+
response = requests.get(url, params=params)
|
57 |
+
if response.status_code == 200:
|
58 |
+
data = response.text
|
59 |
+
root = ET.fromstring(data)
|
60 |
+
namespace = "{http://www.w3.org/2005/Atom}"
|
61 |
+
|
62 |
+
# Find all the entries returned
|
63 |
+
entries = root.findall(f"{namespace}entry")
|
64 |
+
if not entries:
|
65 |
+
return "No entries found in arXiv."
|
66 |
+
|
67 |
+
for entry in entries:
|
68 |
+
arxiv_title = entry.find(f"{namespace}title").text.strip()
|
69 |
+
print(f"Found arXiv title: {arxiv_title} - Input title: {title}")
|
70 |
+
|
71 |
+
if is_title_match(title, arxiv_title):
|
72 |
+
# Extract relevant information if title matches
|
73 |
+
arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1]
|
74 |
+
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
|
75 |
+
authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
|
76 |
+
author_str = " and ".join(authors)
|
77 |
+
published = entry.find(f"{namespace}published").text[:4]
|
78 |
+
bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
|
79 |
+
|
80 |
+
# Build the BibTeX entry
|
81 |
+
bibtex_entry = f"""@article{{{bibtex_key},
|
82 |
+
author = {{{author_str}}},
|
83 |
+
title = {{{arxiv_title}}},
|
84 |
+
journal = {{arXiv preprint arXiv:{arxiv_id}}},
|
85 |
+
year = {{{published}}},
|
86 |
+
url = {{{arxiv_url}}}
|
87 |
+
}}"""
|
88 |
+
return bibtex_entry
|
89 |
+
|
90 |
+
return None
|
91 |
+
return None
|
92 |
+
|
93 |
+
def get_semanticscholar_bibtex(title):
|
94 |
+
sch = SemanticScholar()
|
95 |
+
try:
|
96 |
+
papers = sch.search_paper(title)
|
97 |
+
for paper in papers['data']:
|
98 |
+
if is_title_match(title, paper['title']):
|
99 |
+
authors = " and ".join([author['name'] for author in paper['authors']])
|
100 |
+
year = paper.get('year', 'Unknown')
|
101 |
+
journal = paper.get('venue', 'Unknown')
|
102 |
+
doi = paper.get('doi', 'Unknown')
|
103 |
+
bibtex_key = create_bibtex_key(authors, year, title)
|
104 |
+
|
105 |
+
bibtex_entry = f"""@article{{{bibtex_key},
|
106 |
+
author = {{{authors}}},
|
107 |
+
title = {{{paper['title']}}},
|
108 |
+
journal = {{{journal}}},
|
109 |
+
year = {{{year}}},
|
110 |
+
doi = {{{doi}}},
|
111 |
+
url = {{{'https://doi.org/' + doi if doi else 'N/A'}}}
|
112 |
+
}}"""
|
113 |
+
return bibtex_entry
|
114 |
+
return None
|
115 |
+
except Exception as e:
|
116 |
+
return None
|
117 |
+
|
118 |
+
def get_crossref_bibtex_by_doi(doi):
|
119 |
+
url = f"https://api.crossref.org/works/{doi}"
|
120 |
+
|
121 |
+
response = requests.get(url)
|
122 |
+
if response.status_code == 200:
|
123 |
+
item = response.json()["message"]
|
124 |
+
authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
|
125 |
+
journal = item.get("container-title", ["Unknown"])[0]
|
126 |
+
year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
|
127 |
+
title = item["title"][0]
|
128 |
+
bibtex_key = create_bibtex_key(authors, year, title)
|
129 |
+
|
130 |
+
bibtex_entry = f"""@article{{{bibtex_key},
|
131 |
+
author = {{{authors}}},
|
132 |
+
title = {{{title}}},
|
133 |
+
journal = {{{journal}}},
|
134 |
+
year = {{{year}}},
|
135 |
+
doi = {{{doi}}},
|
136 |
+
url = {{{'https://doi.org/' + doi}}}
|
137 |
+
}}"""
|
138 |
+
return bibtex_entry
|
139 |
+
return "CrossRef request by DOI failed."
|
140 |
+
|
141 |
+
def get_arxiv_bibtex_by_id(arxiv_id):
|
142 |
+
arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
|
143 |
+
url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
|
144 |
+
|
145 |
+
response = requests.get(url)
|
146 |
+
if response.status_code == 200:
|
147 |
+
data = response.text
|
148 |
+
root = ET.fromstring(data)
|
149 |
+
namespace = "{http://www.w3.org/2005/Atom}"
|
150 |
+
entry = root.find(f"{namespace}entry")
|
151 |
+
if entry is not None:
|
152 |
+
arxiv_title = entry.find(f"{namespace}title").text.strip()
|
153 |
+
authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
|
154 |
+
author_str = " and ".join(authors)
|
155 |
+
published = entry.find(f"{namespace}published").text[:4]
|
156 |
+
bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
|
157 |
+
|
158 |
+
bibtex_entry = f"""@article{{{bibtex_key},
|
159 |
+
author = {{{author_str}}},
|
160 |
+
title = {{{arxiv_title}}},
|
161 |
+
journal = {{arXiv preprint arXiv:{arxiv_id}}},
|
162 |
+
year = {{{published}}},
|
163 |
+
url = {{{arxiv_url}}}
|
164 |
+
}}"""
|
165 |
+
return bibtex_entry
|
166 |
+
return "ArXiv request by ID failed."
|
167 |
+
|
168 |
+
def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10):
|
169 |
+
if doi:
|
170 |
+
return get_crossref_bibtex_by_doi(doi)
|
171 |
+
elif arxiv_id:
|
172 |
+
return get_arxiv_bibtex_by_id(arxiv_id)
|
173 |
+
elif title:
|
174 |
+
# Try CrossRef
|
175 |
+
bibtex = get_crossref_bibtex(title, crossref_rows)
|
176 |
+
if bibtex:
|
177 |
+
return bibtex
|
178 |
+
else:
|
179 |
+
print("No peer-reviewed version found in CrossRef.")
|
180 |
+
|
181 |
+
# Try Semantic Scholar
|
182 |
+
bibtex = get_semanticscholar_bibtex(title)
|
183 |
+
if bibtex:
|
184 |
+
return bibtex
|
185 |
+
else:
|
186 |
+
print("No BibTeX entry found in Semantic Scholar.")
|
187 |
+
|
188 |
+
# Try arXiv
|
189 |
+
bibtex = get_arxiv_bibtex(title, arxiv_max_results)
|
190 |
+
if bibtex:
|
191 |
+
return bibtex
|
192 |
+
else:
|
193 |
+
print("No arXiv preprint found.")
|
194 |
+
|
195 |
+
return "No BibTeX entry found for this paper."
|
196 |
+
|
197 |
+
# Gradio Interface
|
198 |
+
def gradio_app(title=None, doi=None, arxiv_id=None):
|
199 |
+
return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id)
|
200 |
+
|
201 |
+
# Create the Gradio interface
|
202 |
+
interface = gr.Interface(
|
203 |
+
fn=gradio_app,
|
204 |
+
inputs=[
|
205 |
+
gr.Textbox(placeholder="Enter Title", label="Title"),
|
206 |
+
gr.Textbox(placeholder="Enter DOI", label="DOI"),
|
207 |
+
gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"),
|
208 |
+
],
|
209 |
+
outputs=gr.Code(label="BibTeX Entry"),
|
210 |
+
title="BibTeX Generator",
|
211 |
+
description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.",
|
212 |
+
examples=[
|
213 |
+
["Attention is All You Need", None, None],
|
214 |
+
[None, "10.1145/3292500.3330694", None],
|
215 |
+
[None, None, "1902.07153"],
|
216 |
+
]
|
217 |
+
|
218 |
+
)
|
219 |
+
|
220 |
+
if __name__ == "__main__":
|
221 |
+
interface.launch()
|