Spaces:
Runtime error
Runtime error
import requests | |
import pprint | |
import json | |
import os | |
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
def get_dblp_bibref(title): | |
print(f'DBLP query: {title}') | |
try: | |
# Replace spaces in the title with '+' | |
title = title.replace(' ', '+') | |
# Send a GET request to the DBLP search page with the paper title | |
response = requests.get(f'https://dblp.org/search/publ/api?q={title}&format=xml') | |
soup = BeautifulSoup(response.content, 'lxml') | |
# Get the URL of the first paper in the search results | |
url = soup.select_one('url').text + '.bib' | |
response = requests.get(url) | |
paper_link = soup.select_one('url').text + '.html' | |
return response.text, paper_link | |
except Exception as e: | |
return f'Error during get bibref from DBLP: {e}', None | |
# set pprint width | |
pp = pprint.PrettyPrinter(width=128) | |
API_KEY = 'eRLnjZeWSs4gHjSemy5af1X7IbugACFg1tSX6F3R' | |
FIELDS = "paperId,title,url,year,authors,venue,abstract,citationCount,openAccessPdf,fieldsOfStudy,publicationDate,citations,references" | |
# def get_name_mapping(venues_data='/nfs/delong/data/s2orc/s2ag_full/publication-venues'): | |
# name_mapping = {} # from full name to abbreviated name | |
# for file in os.listdir(venues_data): | |
# with open(os.path.join(venues_data, file), 'r') as f: | |
# venues = [json.loads(line) for line in f.readlines()] | |
# print(f"Total number of venues in {file}: {len(venues)}") | |
# for venue in venues: | |
# if len(venue['alternate_names'])>0: | |
# # name_mapping[venue['name']] = venue['alternate_names'][0] | |
# # instead of using the first alternate name, use the shortest one | |
# name_mapping[venue['name']] = min(venue['alternate_names'], key=len) | |
# name_mapping['Neural Information Processing Systems'] = 'NeurIPS' | |
# print(f'loaded {len(name_mapping)} venues from {venues_data}') | |
# return name_mapping | |
# name_mapping = get_name_mapping() | |
# json.dump(name_mapping, open('name_mapping.json', 'w'), indent=4) | |
name_mapping = json.load(open('name_mapping.json', 'r')) | |
print(f'loaded {len(name_mapping)} venues from name_mapping.json') | |
def search_paper_title_semanticscholar(title): | |
url = "https://api.semanticscholar.org/graph/v1/paper/search" | |
headers = {"Accept": "application/json", "x-api-key": API_KEY} | |
params = {"query": title, "limit": 1} | |
response = requests.get(url, headers=headers, params=params) | |
if response.status_code == 200: | |
data = response.json() | |
if data['total']!=0: | |
paper_id = data['data'][0]['paperId'] | |
url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}" | |
params = {"fields": FIELDS} | |
response = requests.get(url, headers=headers, params=params) | |
if response.status_code == 200: | |
data = response.json() | |
return data | |
else: | |
print(f"Error: {response.status_code}") | |
return None | |
else: | |
print("No paper found with the given title.") | |
return None | |
else: | |
print(f"Error: {response.status_code}") | |
return None | |
def get_abbreviated_venue(name): | |
if name in name_mapping: | |
return name_mapping[name] | |
else: | |
return name | |
def get_md_citation(paper_info): | |
# citation_str = paper_info['authors'][0]['name'] + " *et al.* " | |
# citation_str = ', '.join([author['name'] for author in paper_info['authors']]) + '. ' | |
citation_str = '' | |
for author in paper_info['authors'][:5]: | |
citation_str += f"{author['name']}, " | |
if len(paper_info['authors'])>5: | |
citation_str += '*et al.* ' | |
else: | |
citation_str = citation_str[:-2] + '. ' | |
citation_str += f"[{paper_info['title']}]({paper_info['url']}). " | |
citation_str += f"*{get_abbreviated_venue(paper_info['venue'])}*" | |
# citation_str += f" ({paper_info['year']})." | |
citation_str += f" ({paper_info['publicationDate'][:-3].replace('-', '.')})." | |
return citation_str | |
def summarize_paper_info(paper_info): | |
info_str = "" | |
# info_str += f"**Venue**: {paper_info['venue']}\n\n" | |
author_str = '' | |
for author in paper_info['authors']: | |
author_str += f"[{author['name']}](https://www.semanticscholar.org/author/{author['authorId']}), " | |
author_str = author_str[:-2] | |
info_str += f"**Authors**:\n\n{author_str}\n\n" | |
info_str += f"\n\n> **Abstract**: {paper_info['abstract']}\n\n" | |
info_str += f"**Citation Count**: {paper_info['citationCount']}\n\n" | |
return info_str | |
def get_output(title): | |
print(f"Title query: {title}") | |
paper_info = search_paper_title_semanticscholar(title) | |
if paper_info is not None: | |
citation_str = get_md_citation(paper_info) | |
else: | |
citation_str = "No paper found with that title." | |
bibtex, dblp_link = get_dblp_bibref(paper_info['title']) | |
citation_str = f""" | |
```text | |
{paper_info['title']} | |
``` | |
{citation_str} | |
--- | |
**Markdown source code** | |
```markdown | |
{citation_str} | |
``` | |
**BibTex** | |
```bibtex | |
{bibtex} | |
``` | |
{summarize_paper_info(paper_info)} | |
--- | |
π [[Open in Semantic Scholar]](https://www.semanticscholar.org/paper/{paper_info['paperId']}) | [[DBLP Page]]({dblp_link}) | |
""" | |
print(citation_str) | |
return citation_str | |
def main(): | |
iface = gr.Interface( | |
fn=get_output, | |
inputs=gr.components.Textbox( | |
lines=1, | |
label="Please input the title of the paper to get its citation.", | |
placeholder="Your title here", | |
autofocus=True, | |
), | |
outputs="markdown", | |
allow_flagging='never', | |
title="Citation Tool", | |
description="### Search paper title from [Semantic Scholar](https://www.semanticscholar.org/) and [DBLP](http://dblp.org/), and get structured citation.", | |
) | |
iface.launch() | |
if __name__=="__main__": | |
main() | |