File size: 3,042 Bytes
aaf47df
 
20e59fb
 
 
aaf47df
 
 
 
 
20e59fb
3a0a966
aaf47df
20e59fb
 
 
 
 
 
 
 
 
 
 
 
 
 
aaf47df
 
 
 
 
 
 
 
 
 
 
20e59fb
3a0a966
aaf47df
 
 
 
 
 
 
 
 
 
 
b4ee178
 
20e59fb
b4ee178
aaf47df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02bfdfc
 
aaf47df
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
import requests
from cachetools import cached, TTLCache

CACHE_TIME = 60 * 60 * 12  # 12 hours


def parse_arxiv_id_from_paper_url(url):
    return url.split("/")[-1]


@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
def get_recommendations_from_semantic_scholar(semantic_scholar_id: str):
    try:
        r = requests.post(
            "https://api.semanticscholar.org/recommendations/v1/papers/",
            json={
                "positivePaperIds": [semantic_scholar_id],
            },
            params={"fields": "externalIds,title,year", "limit": 10},
        )
        return r.json()["recommendedPapers"]
    except KeyError as e:
        raise gr.Error(
            "Error getting recommendations, if this is a new paper it may not yet have"
            " been indexed by Semantic Scholar."
        ) from e


def filter_recommendations(recommendations, max_paper_count=5):
    # include only arxiv papers
    arxiv_paper = [
        r for r in recommendations if r["externalIds"].get("ArXiv", None) is not None
    ]
    if len(arxiv_paper) > max_paper_count:
        arxiv_paper = arxiv_paper[:max_paper_count]
    return arxiv_paper


@cached(cache=TTLCache(maxsize=500, ttl=CACHE_TIME))
def get_paper_title_from_arxiv_id(arxiv_id):
    try:
        return requests.get(f"https://huggingface.co/api/papers/{arxiv_id}").json()[
            "title"
        ]
    except Exception as e:
        print(f"Error getting paper title for {arxiv_id}: {e}")
        raise gr.Error("Error getting paper title for {arxiv_id}: {e}") from e


def format_recommendation_into_markdown(arxiv_id, recommendations):
    # title = get_paper_title_from_arxiv_id(arxiv_id)
    # url = f"https://huggingface.co/papers/{arxiv_id}"
    # comment = f"Recommended papers for [{title}]({url})\n\n"
    comment = "The following papers were recommended by the Semantic Scholar API \n\n"
    for r in recommendations:
        hub_paper_url = f"https://huggingface.co/papers/{r['externalIds']['ArXiv']}"
        comment += f"* [{r['title']}]({hub_paper_url}) ({r['year']})\n"
    return comment


def return_recommendations(url):
    arxiv_id = parse_arxiv_id_from_paper_url(url)
    recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
    filtered_recommendations = filter_recommendations(recommendations)
    return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)


title = "Semantic Scholar Paper Recommender"
description = (
    "Paste a link to a paper on Hugging Face Papers and get recommendations for similar"
    " papers from Semantic Scholar.**Note**: Some papers may not have recommendations"
    " yet if they are new or have not been indexed by Semantic Scholar."
)
examples = [
    "https://huggingface.co/papers/2309.12307",
    "https://huggingface.co/papers/2211.10086",
]
interface = gr.Interface(
    return_recommendations,
    gr.Textbox(lines=1),
    gr.Markdown(),
    examples=examples,
    title=title,
    description=description,
)
interface.launch()