import gradio as gr import requests from cachetools import cached, TTLCache CACHE_TIME = 60 * 60 * 6 # 6 hours def parse_arxiv_id_from_paper_url(url): return url.split("/")[-1] @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME)) def get_recommendations_from_semantic_scholar(semantic_scholar_id: str): try: r = requests.post( "https://api.semanticscholar.org/recommendations/v1/papers/", json={ "positivePaperIds": [semantic_scholar_id], }, params={"fields": "externalIds,title,year", "limit": 10}, ) return r.json()["recommendedPapers"] except KeyError as e: raise gr.Error( "Error getting recommendations, if this is a new paper it may not yet have" " been indexed by Semantic Scholar." ) from e def filter_recommendations(recommendations, max_paper_count=5): # include only arxiv papers arxiv_paper = [ r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None ] if len(arxiv_paper) > max_paper_count: arxiv_paper = arxiv_paper[:max_paper_count] return arxiv_paper @cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME)) def get_paper_title_from_arxiv_id(arxiv_id): try: return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[ "title" ] except Exception as e: print(f"Error getting paper title for {arxiv_id}: {e}") raise gr.Error("Error getting paper title for {arxiv_id}: {e}") from e def format_recommendation_into_markdown(arxiv_id, recommendations): # title = get_paper_title_from_arxiv_id(arxiv_id) # url = f"https://huggingface.co/papers/{arxiv_id}" # comment = f"Recommended papers for [{title}]({url})\n\n" comment = "The following papers were recommended by the Semantic Scholar API \n\n" for r in recommendations: hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}" comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n" return comment def return_recommendations(url): arxiv_id = parse_arxiv_id_from_paper_url(url) recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}") filtered_recommendations = filter_recommendations(recommendations) return format_recommendation_into_markdown(arxiv_id, filtered_recommendations) title = "Semantic Scholar Paper Recommender" description = ( "Paste a link to a paper on Hugging Face Papers and get recommendations for similar" " papers from Semantic Scholar. **Note**: Some papers may not have recommendations" " yet if they are new or have not been indexed by Semantic Scholar." ) examples = [ "https://huggingface.co/papers/2309.12307", "https://huggingface.co/papers/2211.10086", ] interface = gr.Interface( return_recommendations, gr.Textbox(lines=1), gr.Markdown(), examples=examples, title=title, description=description, ) interface.launch()