Moreno La Quatra commited on
Commit
f1c1f0d
·
1 Parent(s): 1a23bf5

First commit app

Browse files
Files changed (2) hide show
  1. __pycache__/app.cpython-310.pyc +0 -0
  2. app.py +221 -0
__pycache__/app.cpython-310.pyc ADDED
Binary file (5.77 kB). View file
 
app.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import xml.etree.ElementTree as ET
3
+ import re
4
+ from fuzzywuzzy import fuzz
5
+ from semanticscholar import SemanticScholar
6
+ import gradio as gr
7
+
8
+ def normalize_name(name):
9
+ return re.sub(r'\W+', '', name.lower())
10
+
11
+ def create_bibtex_key(authors, year, title):
12
+ first_author = authors.split(" and ")[0]
13
+ surname = first_author.split()[-1].lower()
14
+ surname_normalized = normalize_name(surname)
15
+ first_word_of_title = title.split()[0].lower()
16
+ return f"{surname_normalized}_{year}_{first_word_of_title}"
17
+
18
+ def is_title_match(input_title, db_title):
19
+ return fuzz.ratio(input_title.lower(), db_title.lower()) > 90 # Fuzzy match threshold
20
+
21
+ def get_crossref_bibtex(title, rows=1):
22
+ url = "https://api.crossref.org/works"
23
+ params = {"query.title": title, "rows": rows}
24
+
25
+ response = requests.get(url, params=params)
26
+ if response.status_code == 200:
27
+ data = response.json()
28
+ if data["message"]["items"]:
29
+ for item in data["message"]["items"]:
30
+ db_title = item["title"][0]
31
+ if is_title_match(title, db_title):
32
+ doi = item["DOI"]
33
+ authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
34
+ journal = item.get("container-title", ["Unknown"])[0]
35
+ year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
36
+ title = item["title"][0]
37
+ bibtex_key = create_bibtex_key(authors, year, title)
38
+
39
+ bibtex_entry = f"""@article{{{bibtex_key},
40
+ author = {{{authors}}},
41
+ title = {{{title}}},
42
+ journal = {{{journal}}},
43
+ year = {{{year}}},
44
+ doi = {{{doi}}},
45
+ url = {{{'https://doi.org/' + doi}}}
46
+ }}"""
47
+ return bibtex_entry
48
+ return None
49
+ return None
50
+
51
+ def get_arxiv_bibtex(title, max_results=1):
52
+ url = "http://export.arxiv.org/api/query"
53
+ params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results}
54
+ print(f"Querying arXiv with params: {params}")
55
+
56
+ response = requests.get(url, params=params)
57
+ if response.status_code == 200:
58
+ data = response.text
59
+ root = ET.fromstring(data)
60
+ namespace = "{http://www.w3.org/2005/Atom}"
61
+
62
+ # Find all the entries returned
63
+ entries = root.findall(f"{namespace}entry")
64
+ if not entries:
65
+ return "No entries found in arXiv."
66
+
67
+ for entry in entries:
68
+ arxiv_title = entry.find(f"{namespace}title").text.strip()
69
+ print(f"Found arXiv title: {arxiv_title} - Input title: {title}")
70
+
71
+ if is_title_match(title, arxiv_title):
72
+ # Extract relevant information if title matches
73
+ arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1]
74
+ arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
75
+ authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
76
+ author_str = " and ".join(authors)
77
+ published = entry.find(f"{namespace}published").text[:4]
78
+ bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
79
+
80
+ # Build the BibTeX entry
81
+ bibtex_entry = f"""@article{{{bibtex_key},
82
+ author = {{{author_str}}},
83
+ title = {{{arxiv_title}}},
84
+ journal = {{arXiv preprint arXiv:{arxiv_id}}},
85
+ year = {{{published}}},
86
+ url = {{{arxiv_url}}}
87
+ }}"""
88
+ return bibtex_entry
89
+
90
+ return None
91
+ return None
92
+
93
+ def get_semanticscholar_bibtex(title):
94
+ sch = SemanticScholar()
95
+ try:
96
+ papers = sch.search_paper(title)
97
+ for paper in papers['data']:
98
+ if is_title_match(title, paper['title']):
99
+ authors = " and ".join([author['name'] for author in paper['authors']])
100
+ year = paper.get('year', 'Unknown')
101
+ journal = paper.get('venue', 'Unknown')
102
+ doi = paper.get('doi', 'Unknown')
103
+ bibtex_key = create_bibtex_key(authors, year, title)
104
+
105
+ bibtex_entry = f"""@article{{{bibtex_key},
106
+ author = {{{authors}}},
107
+ title = {{{paper['title']}}},
108
+ journal = {{{journal}}},
109
+ year = {{{year}}},
110
+ doi = {{{doi}}},
111
+ url = {{{'https://doi.org/' + doi if doi else 'N/A'}}}
112
+ }}"""
113
+ return bibtex_entry
114
+ return None
115
+ except Exception as e:
116
+ return None
117
+
118
+ def get_crossref_bibtex_by_doi(doi):
119
+ url = f"https://api.crossref.org/works/{doi}"
120
+
121
+ response = requests.get(url)
122
+ if response.status_code == 200:
123
+ item = response.json()["message"]
124
+ authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
125
+ journal = item.get("container-title", ["Unknown"])[0]
126
+ year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
127
+ title = item["title"][0]
128
+ bibtex_key = create_bibtex_key(authors, year, title)
129
+
130
+ bibtex_entry = f"""@article{{{bibtex_key},
131
+ author = {{{authors}}},
132
+ title = {{{title}}},
133
+ journal = {{{journal}}},
134
+ year = {{{year}}},
135
+ doi = {{{doi}}},
136
+ url = {{{'https://doi.org/' + doi}}}
137
+ }}"""
138
+ return bibtex_entry
139
+ return "CrossRef request by DOI failed."
140
+
141
+ def get_arxiv_bibtex_by_id(arxiv_id):
142
+ arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
143
+ url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
144
+
145
+ response = requests.get(url)
146
+ if response.status_code == 200:
147
+ data = response.text
148
+ root = ET.fromstring(data)
149
+ namespace = "{http://www.w3.org/2005/Atom}"
150
+ entry = root.find(f"{namespace}entry")
151
+ if entry is not None:
152
+ arxiv_title = entry.find(f"{namespace}title").text.strip()
153
+ authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
154
+ author_str = " and ".join(authors)
155
+ published = entry.find(f"{namespace}published").text[:4]
156
+ bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
157
+
158
+ bibtex_entry = f"""@article{{{bibtex_key},
159
+ author = {{{author_str}}},
160
+ title = {{{arxiv_title}}},
161
+ journal = {{arXiv preprint arXiv:{arxiv_id}}},
162
+ year = {{{published}}},
163
+ url = {{{arxiv_url}}}
164
+ }}"""
165
+ return bibtex_entry
166
+ return "ArXiv request by ID failed."
167
+
168
+ def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10):
169
+ if doi:
170
+ return get_crossref_bibtex_by_doi(doi)
171
+ elif arxiv_id:
172
+ return get_arxiv_bibtex_by_id(arxiv_id)
173
+ elif title:
174
+ # Try CrossRef
175
+ bibtex = get_crossref_bibtex(title, crossref_rows)
176
+ if bibtex:
177
+ return bibtex
178
+ else:
179
+ print("No peer-reviewed version found in CrossRef.")
180
+
181
+ # Try Semantic Scholar
182
+ bibtex = get_semanticscholar_bibtex(title)
183
+ if bibtex:
184
+ return bibtex
185
+ else:
186
+ print("No BibTeX entry found in Semantic Scholar.")
187
+
188
+ # Try arXiv
189
+ bibtex = get_arxiv_bibtex(title, arxiv_max_results)
190
+ if bibtex:
191
+ return bibtex
192
+ else:
193
+ print("No arXiv preprint found.")
194
+
195
+ return "No BibTeX entry found for this paper."
196
+
197
+ # Gradio Interface
198
+ def gradio_app(title=None, doi=None, arxiv_id=None):
199
+ return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id)
200
+
201
+ # Create the Gradio interface
202
+ interface = gr.Interface(
203
+ fn=gradio_app,
204
+ inputs=[
205
+ gr.Textbox(placeholder="Enter Title", label="Title"),
206
+ gr.Textbox(placeholder="Enter DOI", label="DOI"),
207
+ gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"),
208
+ ],
209
+ outputs=gr.Code(label="BibTeX Entry"),
210
+ title="BibTeX Generator",
211
+ description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.",
212
+ examples=[
213
+ ["Attention is All You Need", None, None],
214
+ [None, "10.1145/3292500.3330694", None],
215
+ [None, None, "1902.07153"],
216
+ ]
217
+
218
+ )
219
+
220
+ if __name__ == "__main__":
221
+ interface.launch()