Spaces:

librarian-bots
/

recommend_similar_papers

Running

App Files Files Community

davanstrien HF staff commited on Jan 22

Commit

91bf496

•

1 Parent(s): 0255055

Add comment posting functionality

Browse files

Files changed (1) hide show

app.py +125 -4

app.py CHANGED Viewed

@@ -1,9 +1,33 @@
 import gradio as gr
 import requests
 from cachetools import cached, TTLCache
 CACHE_TIME = 60 * 60 * 6  # 6 hours
 def parse_arxiv_id_from_paper_url(url):
     return url.split("/")[-1]
@@ -59,10 +83,106 @@ def format_recommendation_into_markdown(arxiv_id, recommendations):
     return comment
-def return_recommendations(url):
     arxiv_id = parse_arxiv_id_from_paper_url(url)
     recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
     filtered_recommendations = filter_recommendations(recommendations)
     return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
@@ -73,15 +193,16 @@ description = (
     " yet if they are new or have not been indexed by Semantic Scholar."
 )
 examples = [
-    "https://huggingface.co/papers/2309.12307",
-    "https://huggingface.co/papers/2211.10086",
 ]
 interface = gr.Interface(
     return_recommendations,
-    gr.Textbox(lines=1),
     gr.Markdown(),
     examples=examples,
     title=title,
     description=description,
 )
 interface.launch()

 import gradio as gr
 import requests
 from cachetools import cached, TTLCache
+from bs4 import BeautifulSoup
+from httpx import Client
+import json
+from pathlib import Path
+from huggingface_hub import CommitScheduler
+from dotenv import load_dotenv
+import os
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
 CACHE_TIME = 60 * 60 * 6  # 6 hours
+client = Client()
+REPO_ID = "librarian-bots/paper-recommendations-v2"
+scheduler = CommitScheduler(
+    repo_id=REPO_ID,
+    repo_type="dataset",
+    folder_path="comments",
+    path_in_repo="data",
+    every=5,
+    token=HF_TOKEN,
+)
 def parse_arxiv_id_from_paper_url(url):
     return url.split("/")[-1]
     return comment
+def format_comment(result: str):
+    result = (
+        "This is an automated message from the [Librarian Bot](https://huggingface.co/librarian-bots). I found the following papers similar to this paper. \n\n"
+        + result
+    )
+    result += "\n\n Please give a thumbs up to this comment if you found it helpful!"
+    result += "\n\n If you want recommendations for any Paper on Hugging Face checkout [this](https://huggingface.co/spaces/librarian-bots/recommend_similar_papers) Space"
+    return result
+def post_comment(
+    paper_url: str, comment: str, token: str | None = None, base_url: str | None = None
+) -> bool:
+    if not base_url:
+        base_url = "https://huggingface.co"
+    paper_id = paper_url.split("/")[-1]
+    url = f"{base_url}/api/papers/{paper_id}/comment"
+    comment_data = {"comment": comment}
+    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
+    response = requests.post(url, json=comment_data, headers=headers)
+    if response.status_code == 201:
+        print(f"Comment posted successfully for {paper_url}!")
+        return True
+    else:
+        print(f"Failed to post comment! (Status Code: {response.status_code})")
+        print(response.text)
+        return False
+def is_comment_from_librarian_bot(html: str) -> bool:
+    """
+    Checks if the given HTML contains a comment from the librarian-bot.
+    Args:
+        html (str): The HTML content to check.
+    Returns:
+        bool: True if a comment from the librarian-bot is found, False otherwise.
+    """
+    soup = BeautifulSoup(html, "lxml")
+    librarian_bot_links = soup.find_all("a", string="librarian-bot")
+    return any(librarian_bot_links)
+def check_if_lib_bot_comment_exists(paper_url: str) -> bool:
+    """
+    Checks if a comment from the librarian bot exists for a given paper URL.
+    Args:
+        paper_url (str): The URL of the paper.
+    Returns:
+        bool: True if a comment from the librarian bot exists, False otherwise.
+    """
+    try:
+        resp = client.get(paper_url)
+        return is_comment_from_librarian_bot(resp.text)
+    except Exception as e:
+        print(f"Error checking if comment exists for {paper_url}: {e}")
+        return True  # default to not posting comment
+def log_comments(paper_url: str, comment: str):
+    """
+    Logs comments for a given paper URL.
+    Args:
+        paper_url (str): The URL of the paper.
+        comment (str): The comment to be logged.
+    Returns:
+        None
+    """
+    paper_id = paper_url.split("/")[-1]
+    file_path = Path(f"comments/{paper_id}.json")
+    if not file_path.exists():
+        with scheduler.lock:
+            with open(file_path, "w") as f:
+                data = {"paper_url": paper_url, "comment": comment}
+                json.dump(data, f)
+def return_recommendations(url: str, post_to_paper: bool = True) -> str:
     arxiv_id = parse_arxiv_id_from_paper_url(url)
     recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
     filtered_recommendations = filter_recommendations(recommendations)
+    if post_to_paper:
+        if comment_already_exists := check_if_lib_bot_comment_exists(url):
+            gr.Info(
+                f"Existing comment: {comment_already_exists}...skipping posting comment"
+            )
+        else:
+            comment = format_comment(
+                format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
+            )
+            if comment_status := post_comment(url, comment, token=HF_TOKEN):
+                log_comments(url, comment)
+                gr.Info(f"Comment status: {comment_status}")
+            else:
+                gr.Info("Failed to post comment")
     return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
     " yet if they are new or have not been indexed by Semantic Scholar."
 )
 examples = [
+    ["https://huggingface.co/papers/2309.12307", False],
+    ["https://huggingface.co/papers/2211.10086", False],
 ]
 interface = gr.Interface(
     return_recommendations,
+    [gr.Textbox(lines=1), gr.Checkbox(label="Post to Paper", default=False)],
     gr.Markdown(),
     examples=examples,
     title=title,
     description=description,
 )
+interface.queue()
 interface.launch()