citation-tool / app.py
chendelong1999
et al for 5+ authors
597458d
import requests
import pprint
import json
import os
import gradio as gr
import requests
from bs4 import BeautifulSoup
def get_dblp_bibref(title):
print(f'DBLP query: {title}')
try:
# Replace spaces in the title with '+'
title = title.replace(' ', '+')
# Send a GET request to the DBLP search page with the paper title
response = requests.get(f'https://dblp.org/search/publ/api?q={title}&format=xml')
soup = BeautifulSoup(response.content, 'lxml')
# Get the URL of the first paper in the search results
url = soup.select_one('url').text + '.bib'
response = requests.get(url)
paper_link = soup.select_one('url').text + '.html'
return response.text, paper_link
except Exception as e:
return f'Error during get bibref from DBLP: {e}', None
# set pprint width
pp = pprint.PrettyPrinter(width=128)
API_KEY = 'eRLnjZeWSs4gHjSemy5af1X7IbugACFg1tSX6F3R'
FIELDS = "paperId,title,url,year,authors,venue,abstract,citationCount,openAccessPdf,fieldsOfStudy,publicationDate,citations,references"
# def get_name_mapping(venues_data='/nfs/delong/data/s2orc/s2ag_full/publication-venues'):
# name_mapping = {} # from full name to abbreviated name
# for file in os.listdir(venues_data):
# with open(os.path.join(venues_data, file), 'r') as f:
# venues = [json.loads(line) for line in f.readlines()]
# print(f"Total number of venues in {file}: {len(venues)}")
# for venue in venues:
# if len(venue['alternate_names'])>0:
# # name_mapping[venue['name']] = venue['alternate_names'][0]
# # instead of using the first alternate name, use the shortest one
# name_mapping[venue['name']] = min(venue['alternate_names'], key=len)
# name_mapping['Neural Information Processing Systems'] = 'NeurIPS'
# print(f'loaded {len(name_mapping)} venues from {venues_data}')
# return name_mapping
# name_mapping = get_name_mapping()
# json.dump(name_mapping, open('name_mapping.json', 'w'), indent=4)
name_mapping = json.load(open('name_mapping.json', 'r'))
print(f'loaded {len(name_mapping)} venues from name_mapping.json')
def search_paper_title_semanticscholar(title):
url = "https://api.semanticscholar.org/graph/v1/paper/search"
headers = {"Accept": "application/json", "x-api-key": API_KEY}
params = {"query": title, "limit": 1}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
if data['total']!=0:
paper_id = data['data'][0]['paperId']
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}"
params = {"fields": FIELDS}
response = requests.get(url, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
return data
else:
print(f"Error: {response.status_code}")
return None
else:
print("No paper found with the given title.")
return None
else:
print(f"Error: {response.status_code}")
return None
def get_abbreviated_venue(name):
if name in name_mapping:
return name_mapping[name]
else:
return name
def get_md_citation(paper_info):
# citation_str = paper_info['authors'][0]['name'] + " *et al.* "
# citation_str = ', '.join([author['name'] for author in paper_info['authors']]) + '. '
citation_str = ''
for author in paper_info['authors'][:5]:
citation_str += f"{author['name']}, "
if len(paper_info['authors'])>5:
citation_str += '*et al.* '
else:
citation_str = citation_str[:-2] + '. '
citation_str += f"[{paper_info['title']}]({paper_info['url']}). "
citation_str += f"*{get_abbreviated_venue(paper_info['venue'])}*"
# citation_str += f" ({paper_info['year']})."
citation_str += f" ({paper_info['publicationDate'][:-3].replace('-', '.')})."
return citation_str
def summarize_paper_info(paper_info):
info_str = ""
# info_str += f"**Venue**: {paper_info['venue']}\n\n"
author_str = ''
for author in paper_info['authors']:
author_str += f"[{author['name']}](https://www.semanticscholar.org/author/{author['authorId']}), "
author_str = author_str[:-2]
info_str += f"**Authors**:\n\n{author_str}\n\n"
info_str += f"\n\n> **Abstract**: {paper_info['abstract']}\n\n"
info_str += f"**Citation Count**: {paper_info['citationCount']}\n\n"
return info_str
def get_output(title):
print(f"Title query: {title}")
paper_info = search_paper_title_semanticscholar(title)
if paper_info is not None:
citation_str = get_md_citation(paper_info)
else:
citation_str = "No paper found with that title."
bibtex, dblp_link = get_dblp_bibref(paper_info['title'])
citation_str = f"""
```text
{paper_info['title']}
```
{citation_str}
---
**Markdown source code**
```markdown
{citation_str}
```
**BibTex**
```bibtex
{bibtex}
```
{summarize_paper_info(paper_info)}
---
πŸ”— [[Open in Semantic Scholar]](https://www.semanticscholar.org/paper/{paper_info['paperId']}) | [[DBLP Page]]({dblp_link})
"""
print(citation_str)
return citation_str
def main():
iface = gr.Interface(
fn=get_output,
inputs=gr.components.Textbox(
lines=1,
label="Please input the title of the paper to get its citation.",
placeholder="Your title here",
autofocus=True,
),
outputs="markdown",
allow_flagging='never',
title="Citation Tool",
description="### Search paper title from [Semantic Scholar](https://www.semanticscholar.org/) and [DBLP](http://dblp.org/), and get structured citation.",
)
iface.launch()
if __name__=="__main__":
main()