Spaces:

librarian-bots
/

recommend_similar_papers

Running

File size: 3,041 Bytes

aaf47df
 
20e59fb
 
e66bce9
aaf47df
 
 
 
 
20e59fb
3a0a966
aaf47df
20e59fb
 
 
 
 
 
 
 
 
 
 
 
 
 
aaf47df
 
 
 
 
 
 
 
 
 
 
20e59fb
3a0a966
aaf47df
 
 
 
 
 
 
 
 
 
 
b4ee178
 
20e59fb
b4ee178
aaf47df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5002527
02bfdfc
aaf47df

import gradio as gr
import requests
from cachetools import cached, TTLCache

CACHE_TIME = 60 * 60 * 6  # 6 hours


def parse_arxiv_id_from_paper_url(url):
    return url.split("/")[-1]


@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
    try:
        r = requests.post(
            "https://api.semanticscholar.org/recommendations/v1/papers/",
            json={
                "positivePaperIds": [semantic_scholar_id],
            },
            params={"fields": "externalIds,title,year", "limit": 10},
        )
        return r.json()["recommendedPapers"]
    except KeyError as e:
        raise gr.Error(
            "Error getting recommendations, if this is a new paper it may not yet have"
            " been indexed by Semantic Scholar."
        ) from e


def filter_recommendations(recommendations, max_paper_count=5):
    # include only arxiv papers
    arxiv_paper = [
        r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
    ]
    if len(arxiv_paper) > max_paper_count:
        arxiv_paper = arxiv_paper[:max_paper_count]
    return arxiv_paper


@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
def get_paper_title_from_arxiv_id(arxiv_id):
    try:
        return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[
            "title"
        ]
    except Exception as e:
        print(f"Error getting paper title for {arxiv_id}: {e}")
        raise gr.Error("Error getting paper title for {arxiv_id}: {e}") from e


def format_recommendation_into_markdown(arxiv_id, recommendations):
    # title = get_paper_title_from_arxiv_id(arxiv_id)
    # url = f"https://huggingface.co/papers/{arxiv_id}"
    # comment = f"Recommended papers for [{title}]({url})\n\n"
    comment = "The following papers were recommended by the Semantic Scholar API \n\n"
    for r in recommendations:
        hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
        comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
    return comment


def return_recommendations(url):
    arxiv_id = parse_arxiv_id_from_paper_url(url)
    recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
    filtered_recommendations = filter_recommendations(recommendations)
    return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)


title = "Semantic Scholar Paper Recommender"
description = (
    "Paste a link to a paper on Hugging Face Papers and get recommendations for similar"
    " papers from Semantic Scholar. **Note**: Some papers may not have recommendations"
    " yet if they are new or have not been indexed by Semantic Scholar."
)
examples = [
    "https://huggingface.co/papers/2309.12307",
    "https://huggingface.co/papers/2211.10086",
]
interface = gr.Interface(
    return_recommendations,
    gr.Textbox(lines=1),
    gr.Markdown(),
    examples=examples,
    title=title,
    description=description,
)
interface.launch()