Spaces:

morenolq
/

bib-me

Running

App Files Files Community

Moreno La Quatra commited on Sep 21, 2024

Commit

f1c1f0d

1 Parent(s): 1a23bf5

First commit app

Browse files

Files changed (2) hide show

__pycache__/app.cpython-310.pyc +0 -0
app.py +221 -0

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (5.77 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import requests
+import xml.etree.ElementTree as ET
+import re
+from fuzzywuzzy import fuzz
+from semanticscholar import SemanticScholar
+import gradio as gr
+def normalize_name(name):
+    return re.sub(r'\W+', '', name.lower())
+def create_bibtex_key(authors, year, title):
+    first_author = authors.split(" and ")[0]
+    surname = first_author.split()[-1].lower()
+    surname_normalized = normalize_name(surname)
+    first_word_of_title = title.split()[0].lower()
+    return f"{surname_normalized}_{year}_{first_word_of_title}"
+def is_title_match(input_title, db_title):
+    return fuzz.ratio(input_title.lower(), db_title.lower()) > 90  # Fuzzy match threshold
+def get_crossref_bibtex(title, rows=1):
+    url = "https://api.crossref.org/works"
+    params = {"query.title": title, "rows": rows}
+    response = requests.get(url, params=params)
+    if response.status_code == 200:
+        data = response.json()
+        if data["message"]["items"]:
+            for item in data["message"]["items"]:
+                db_title = item["title"][0]
+                if is_title_match(title, db_title):
+                    doi = item["DOI"]
+                    authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
+                    journal = item.get("container-title", ["Unknown"])[0]
+                    year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
+                    title = item["title"][0]
+                    bibtex_key = create_bibtex_key(authors, year, title)
+                    bibtex_entry = f"""@article{{{bibtex_key},
+  author = {{{authors}}},
+  title = {{{title}}},
+  journal = {{{journal}}},
+  year = {{{year}}},
+  doi = {{{doi}}},
+  url = {{{'https://doi.org/' + doi}}}
+}}"""
+                    return bibtex_entry
+        return None
+    return None
+def get_arxiv_bibtex(title, max_results=1):
+    url = "http://export.arxiv.org/api/query"
+    params = {"search_query": f"ti:{title}", "start": 0, "max_results": max_results}
+    print(f"Querying arXiv with params: {params}")
+    response = requests.get(url, params=params)
+    if response.status_code == 200:
+        data = response.text
+        root = ET.fromstring(data)
+        namespace = "{http://www.w3.org/2005/Atom}"
+        # Find all the entries returned
+        entries = root.findall(f"{namespace}entry")
+        if not entries:
+            return "No entries found in arXiv."
+        for entry in entries:
+            arxiv_title = entry.find(f"{namespace}title").text.strip()
+            print(f"Found arXiv title: {arxiv_title} - Input title: {title}")
+            if is_title_match(title, arxiv_title):
+                # Extract relevant information if title matches
+                arxiv_id = entry.find(f"{namespace}id").text.split('/abs/')[-1]
+                arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
+                authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
+                author_str = " and ".join(authors)
+                published = entry.find(f"{namespace}published").text[:4]
+                bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
+                # Build the BibTeX entry
+                bibtex_entry = f"""@article{{{bibtex_key},
+  author = {{{author_str}}},
+  title = {{{arxiv_title}}},
+  journal = {{arXiv preprint arXiv:{arxiv_id}}},
+  year = {{{published}}},
+  url = {{{arxiv_url}}}
+}}"""
+                return bibtex_entry
+        return None
+    return None
+def get_semanticscholar_bibtex(title):
+    sch = SemanticScholar()
+    try:
+        papers = sch.search_paper(title)
+        for paper in papers['data']:
+            if is_title_match(title, paper['title']):
+                authors = " and ".join([author['name'] for author in paper['authors']])
+                year = paper.get('year', 'Unknown')
+                journal = paper.get('venue', 'Unknown')
+                doi = paper.get('doi', 'Unknown')
+                bibtex_key = create_bibtex_key(authors, year, title)
+                bibtex_entry = f"""@article{{{bibtex_key},
+  author = {{{authors}}},
+  title = {{{paper['title']}}},
+  journal = {{{journal}}},
+  year = {{{year}}},
+  doi = {{{doi}}},
+  url = {{{'https://doi.org/' + doi if doi else 'N/A'}}}
+}}"""
+                return bibtex_entry
+        return None
+    except Exception as e:
+        return None
+def get_crossref_bibtex_by_doi(doi):
+    url = f"https://api.crossref.org/works/{doi}"
+    response = requests.get(url)
+    if response.status_code == 200:
+        item = response.json()["message"]
+        authors = " and ".join([author["given"] + " " + author["family"] for author in item["author"]])
+        journal = item.get("container-title", ["Unknown"])[0]
+        year = item.get("issued", {}).get("date-parts", [[None]])[0][0]
+        title = item["title"][0]
+        bibtex_key = create_bibtex_key(authors, year, title)
+        bibtex_entry = f"""@article{{{bibtex_key},
+  author = {{{authors}}},
+  title = {{{title}}},
+  journal = {{{journal}}},
+  year = {{{year}}},
+  doi = {{{doi}}},
+  url = {{{'https://doi.org/' + doi}}}
+}}"""
+        return bibtex_entry
+    return "CrossRef request by DOI failed."
+def get_arxiv_bibtex_by_id(arxiv_id):
+    arxiv_url = f"https://arxiv.org/abs/{arxiv_id}"
+    url = f"http://export.arxiv.org/api/query?id_list={arxiv_id}"
+    response = requests.get(url)
+    if response.status_code == 200:
+        data = response.text
+        root = ET.fromstring(data)
+        namespace = "{http://www.w3.org/2005/Atom}"
+        entry = root.find(f"{namespace}entry")
+        if entry is not None:
+            arxiv_title = entry.find(f"{namespace}title").text.strip()
+            authors = [author.find(f"{namespace}name").text for author in entry.findall(f"{namespace}author")]
+            author_str = " and ".join(authors)
+            published = entry.find(f"{namespace}published").text[:4]
+            bibtex_key = create_bibtex_key(author_str, published, arxiv_title)
+            bibtex_entry = f"""@article{{{bibtex_key},
+  author = {{{author_str}}},
+  title = {{{arxiv_title}}},
+  journal = {{arXiv preprint arXiv:{arxiv_id}}},
+  year = {{{published}}},
+  url = {{{arxiv_url}}}
+}}"""
+            return bibtex_entry
+    return "ArXiv request by ID failed."
+def get_bibtex_for_paper(title=None, doi=None, arxiv_id=None, crossref_rows=1, arxiv_max_results=10):
+    if doi:
+        return get_crossref_bibtex_by_doi(doi)
+    elif arxiv_id:
+        return get_arxiv_bibtex_by_id(arxiv_id)
+    elif title:
+        # Try CrossRef
+        bibtex = get_crossref_bibtex(title, crossref_rows)
+        if bibtex:
+            return bibtex
+        else:
+            print("No peer-reviewed version found in CrossRef.")
+        # Try Semantic Scholar
+        bibtex = get_semanticscholar_bibtex(title)
+        if bibtex:
+            return bibtex
+        else:
+            print("No BibTeX entry found in Semantic Scholar.")
+        # Try arXiv
+        bibtex = get_arxiv_bibtex(title, arxiv_max_results)
+        if bibtex:
+            return bibtex
+        else:
+            print("No arXiv preprint found.")
+    return "No BibTeX entry found for this paper."
+# Gradio Interface
+def gradio_app(title=None, doi=None, arxiv_id=None):
+    return get_bibtex_for_paper(title=title, doi=doi, arxiv_id=arxiv_id)
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=gradio_app,
+    inputs=[
+        gr.Textbox(placeholder="Enter Title", label="Title"),
+        gr.Textbox(placeholder="Enter DOI", label="DOI"),
+        gr.Textbox(placeholder="Enter ArXiv ID", label="ArXiv ID"),
+    ],
+    outputs=gr.Code(label="BibTeX Entry"),
+    title="BibTeX Generator",
+    description="Provide a paper title, DOI, or ArXiv ID to retrieve the BibTeX entry.",
+    examples=[
+        ["Attention is All You Need", None, None],
+        [None, "10.1145/3292500.3330694", None],
+        [None, None, "1902.07153"],
+    ]
+)
+if __name__ == "__main__":
+    interface.launch()