Spaces:

chendelong
/

citation-tool

Runtime error

File size: 6,023 Bytes


import requests
import pprint
import json
import os
import gradio as gr
import requests
from bs4 import BeautifulSoup

def get_dblp_bibref(title):

    print(f'DBLP query: {title}')

    try:
        # Replace spaces in the title with '+'
        title = title.replace(' ', '+')

        # Send a GET request to the DBLP search page with the paper title
        response = requests.get(f'https://dblp.org/search/publ/api?q={title}&format=xml')
        soup = BeautifulSoup(response.content, 'lxml')

        # Get the URL of the first paper in the search results
        url = soup.select_one('url').text + '.bib'
        response = requests.get(url)

        paper_link = soup.select_one('url').text + '.html'

        return response.text, paper_link
    except Exception as e:
        return f'Error during get bibref from DBLP: {e}', None

# set pprint width
pp = pprint.PrettyPrinter(width=128)
API_KEY = 'eRLnjZeWSs4gHjSemy5af1X7IbugACFg1tSX6F3R'
FIELDS = "paperId,title,url,year,authors,venue,abstract,citationCount,openAccessPdf,fieldsOfStudy,publicationDate,citations,references"


# def get_name_mapping(venues_data='/nfs/delong/data/s2orc/s2ag_full/publication-venues'):
#     name_mapping = {} # from full name to abbreviated name
#     for file in os.listdir(venues_data):
#         with open(os.path.join(venues_data, file), 'r') as f:
#             venues = [json.loads(line) for line in f.readlines()]
#             print(f"Total number of venues in {file}: {len(venues)}")
#             for venue in venues:
#                 if len(venue['alternate_names'])>0: 
#                     # name_mapping[venue['name']] = venue['alternate_names'][0]
#                     # instead of using the first alternate name, use the shortest one
#                     name_mapping[venue['name']] = min(venue['alternate_names'], key=len)

#     name_mapping['Neural Information Processing Systems'] = 'NeurIPS'

#     print(f'loaded {len(name_mapping)} venues from {venues_data}')
#     return name_mapping


# name_mapping = get_name_mapping()
# json.dump(name_mapping, open('name_mapping.json', 'w'), indent=4)

name_mapping = json.load(open('name_mapping.json', 'r'))
print(f'loaded {len(name_mapping)} venues from name_mapping.json')

def search_paper_title_semanticscholar(title):
    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    headers = {"Accept": "application/json", "x-api-key": API_KEY}
    params = {"query": title, "limit": 1}
    
    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code == 200:
        data = response.json()
        if data['total']!=0:
            paper_id = data['data'][0]['paperId']
            url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}"
            
            params = {"fields": FIELDS}
            response = requests.get(url, headers=headers, params=params)
            if response.status_code == 200:
                data = response.json()
                return data
            else:
                print(f"Error: {response.status_code}")
                return None
        else:
            print("No paper found with the given title.")
            return None
    else:
        print(f"Error: {response.status_code}")
        return None

def get_abbreviated_venue(name):
    if name in name_mapping:
        return name_mapping[name]
    else:
        return name

def get_md_citation(paper_info):

    # citation_str = paper_info['authors'][0]['name'] + " *et al.* "
    # citation_str = ', '.join([author['name'] for author in paper_info['authors']]) + '. '
    citation_str = ''
    for author in paper_info['authors'][:5]:
        citation_str += f"{author['name']}, "

    if len(paper_info['authors'])>5:
        citation_str += '*et al.* '
    else:
        citation_str = citation_str[:-2] + '. '    


    citation_str += f"[{paper_info['title']}]({paper_info['url']}). "
    citation_str += f"*{get_abbreviated_venue(paper_info['venue'])}*"
    # citation_str += f" ({paper_info['year']})."
    citation_str += f" ({paper_info['publicationDate'][:-3].replace('-', '.')})."
    return citation_str


def summarize_paper_info(paper_info):
    info_str = ""
    # info_str += f"**Venue**: {paper_info['venue']}\n\n"

    author_str = ''
    for author in paper_info['authors']:
        author_str += f"[{author['name']}](https://www.semanticscholar.org/author/{author['authorId']}), "
    author_str = author_str[:-2]

    info_str += f"**Authors**:\n\n{author_str}\n\n"

    info_str += f"\n\n> **Abstract**: {paper_info['abstract']}\n\n"
    info_str += f"**Citation Count**: {paper_info['citationCount']}\n\n"
    return info_str

def get_output(title):
    print(f"Title query: {title}")

    paper_info = search_paper_title_semanticscholar(title)
    if paper_info is not None:
        citation_str = get_md_citation(paper_info)
    else:
        citation_str = "No paper found with that title."
    
    bibtex, dblp_link = get_dblp_bibref(paper_info['title'])

    citation_str = f"""
```text
{paper_info['title']}
```

{citation_str}

---

**Markdown source code**

```markdown
{citation_str}
```


**BibTex**

```bibtex
{bibtex}
```

{summarize_paper_info(paper_info)}

---

🔗 [[Open in Semantic Scholar]](https://www.semanticscholar.org/paper/{paper_info['paperId']}) | [[DBLP Page]]({dblp_link})
"""

    print(citation_str)

    return citation_str

def main():
    iface = gr.Interface(
        fn=get_output, 
        inputs=gr.components.Textbox(
            lines=1, 
            label="Please input the title of the paper to get its citation.", 
            placeholder="Your title here",
            autofocus=True,
            ), 
        outputs="markdown", 
        allow_flagging='never',
        title="Citation Tool",
        description="### Search paper title from [Semantic Scholar](https://www.semanticscholar.org/) and [DBLP](http://dblp.org/), and get structured citation.",
        )
    iface.launch()

if __name__=="__main__":
    main()