Spaces:

librarian-bots
/

recommend_similar_papers

Running

App Files Files Community

davanstrien HF staff commited on May 8

Commit

26dad3e

•

1 Parent(s): 3205980

refactor to use api

Browse files

Files changed (1) hide show

app.py +128 -44

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ from huggingface_hub import CommitScheduler
 from dotenv import load_dotenv
 import os
 from functools import lru_cache
 load_dotenv()
@@ -95,57 +96,127 @@ def format_comment(result: str):
     return result
 def post_comment(
-    paper_url: str, comment: str, token: str | None = None, base_url: str | None = None
-) -> bool:
-    if not base_url:
-        base_url = "https://huggingface.co"
-    paper_id = paper_url.split("/")[-1]
-    url = f"{base_url}/api/papers/{paper_id}/comment"
-    comment_data = {"comment": comment}
-    headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}
-    response = requests.post(url, json=comment_data, headers=headers)
-    if response.status_code == 201:
-        print(f"Comment posted successfully for {paper_url}!")
-        return True
-    else:
-        print(f"Failed to post comment! (Status Code: {response.status_code})")
-        print(response.text)
-        return False
-@lru_cache(maxsize=500)
-def is_comment_from_librarian_bot(html: str) -> bool:
     """
-    Checks if the given HTML contains a comment from the librarian-bot.
     Args:
-        html (str): The HTML content to check.
     Returns:
-        bool: True if a comment from the librarian-bot is found, False otherwise.
     """
-    soup = BeautifulSoup(html, "lxml")
-    librarian_bot_links = soup.find_all("a", string="librarian-bot")
-    return any(librarian_bot_links)
-def check_if_lib_bot_comment_exists(paper_url: str) -> bool:
     """
-    Checks if a comment from the librarian bot exists for a given paper URL.
     Args:
-        paper_url (str): The URL of the paper.
     Returns:
-        bool: True if a comment from the librarian bot exists, False otherwise.
     """
     try:
-        resp = client.get(paper_url)
-        return is_comment_from_librarian_bot(resp.text)
     except Exception as e:
         print(f"Error checking if comment exists for {paper_url}: {e}")
-        return True  # default to not posting comment
 def log_comments(paper_url: str, comment: str):
@@ -168,22 +239,34 @@ def log_comments(paper_url: str, comment: str):
                 json.dump(data, f)
-def return_recommendations(url: str, post_to_paper: bool = True) -> str:
     arxiv_id = parse_arxiv_id_from_paper_url(url)
     recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
     filtered_recommendations = filter_recommendations(recommendations)
     if post_to_paper:
-        if comment_already_exists := check_if_lib_bot_comment_exists(url):
-            gr.Info(
-                f"Existing comment: {comment_already_exists}...skipping posting comment"
-            )
         else:
             comment = format_comment(
                 format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
             )
-            if comment_status := post_comment(url, comment, token=HF_TOKEN):
-                log_comments(url, comment)
-                gr.Info(f"Comment status: {comment_status}")
             else:
                 gr.Info("Failed to post comment")
     return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
@@ -196,14 +279,15 @@ description = (
     " yet if they are new or have not been indexed by Semantic Scholar."
 )
 examples = [
-    ["https://huggingface.co/papers/2309.12307", False],
-    ["https://huggingface.co/papers/2211.10086", False],
 ]
 interface = gr.Interface(
     return_recommendations,
     [
         gr.Textbox(lines=1),
-        gr.Checkbox(label="Post recommendations to Paper page?", default=False),
     ],
     gr.Markdown(),
     examples=examples,

 from dotenv import load_dotenv
 import os
 from functools import lru_cache
+from typing import Tuple
 load_dotenv()
     return result
+from typing import Tuple
 def post_comment(
+    paper_url: str, comment: str, comment_id: str | None = None, token: str = HF_TOKEN
+) -> Tuple[bool, str]:
     """
+    Post a comment on a paper or a reply to a comment using the Hugging Face API.
     Args:
+        paper_url (str): The URL of the paper to post the comment on.
+        comment (str): The text of the comment or reply to post.
+        comment_id (str, optional): The ID of the comment to reply to. If provided, the function will post a reply to the specified comment. Defaults to None.
+        token (str, optional): The authentication token to use for the API request. Defaults to HF_TOKEN.
     Returns:
+        Tuple[bool, str]: A tuple containing two elements:
+            - bool: True if the comment or reply was posted successfully, False otherwise.
+            - str: The ID of the posted comment or reply if successful, an empty string otherwise.
+    Raises:
+        requests.exceptions.RequestException: If an error occurs while making the API request.
     """
+    try:
+        paper_id = paper_url.split("/")[-1]
+        if comment_id:
+            url = f"https://huggingface.co/api/papers/{paper_id}/comment/{comment_id}/reply"
+        else:
+            url = f"https://huggingface.co/api/papers/{paper_id}/comment"
+        headers = {
+            "Authorization": f"Bearer {token}",
+            "Content-Type": "application/json",
+        }
+        comment_data = {"comment": comment}
+        response = requests.post(url, json=comment_data, headers=headers)
+        if response.status_code == 201:
+            posted_comment_id = response.json().get("id", "")
+            if comment_id:
+                print(
+                    f"Reply posted successfully to comment {comment_id} for {paper_url}. Reply ID: {posted_comment_id}"
+                )
+            else:
+                print(
+                    f"Comment posted successfully for {paper_url}. Comment ID: {posted_comment_id}"
+                )
+            return True, posted_comment_id
+        else:
+            print(
+                f"Failed to post {'reply' if comment_id else 'comment'} for {paper_url}. Status code: {response.status_code}"
+            )
+            print(f"Response text: {response.text}")
+            return False, ""
+    except requests.exceptions.RequestException as e:
+        print(
+            f"Error posting {'reply' if comment_id else 'comment'} for {paper_url}: {e}"
+        )
+        return False, ""
+# @lru_cache(maxsize=500)
+# def is_comment_from_librarian_bot(html: str) -> bool:
+#     """
+#     Checks if the given HTML contains a comment from the librarian-bot.
+#     Args:
+#         html (str): The HTML content to check.
+#     Returns:
+#         bool: True if a comment from the librarian-bot is found, False otherwise.
+#     """
+#     soup = BeautifulSoup(html, "lxml")
+#     librarian_bot_links = soup.find_all("a", string="librarian-bot")
+#     return any(librarian_bot_links)
+def check_if_lib_bot_comment_exists(paper_url: str) -> Tuple[bool, str]:
     """
+    Check if a comment or reply from the librarian-bot exists for a given paper URL using the Hugging Face API.
     Args:
+        paper_url (str): The URL of the paper to check for librarian-bot comments.
     Returns:
+        Tuple[bool, str]: A tuple containing two elements:
+            - bool: True if a comment or reply from the librarian-bot is found, False otherwise.
+            - str: The ID of the comment if a librarian-bot comment is found, an empty string otherwise.
+    Raises:
+        Exception: If an error occurs while retrieving comments from the API.
     """
     try:
+        paper_id = paper_url.split("/")[-1]
+        url = f"https://huggingface.co/api/papers/{paper_id}/?field=comments"
+        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
+        response = requests.get(url, headers=headers)
+        if response.status_code == 200:
+            paper_data = response.json()
+            comments = paper_data.get("comments", [])
+            for comment in comments:
+                comment_author = comment.get("author", {}).get("name")
+                if comment_author == "librarian-bot":
+                    return True, comment.get("id")
+                replies = comment.get("replies", [])
+                for reply in replies:
+                    reply_author = reply.get("author", {}).get("name")
+                    if reply_author == "librarian-bot":
+                        return True, comment.get("id")
+        else:
+            print(
+                f"Failed to retrieve comments for {paper_url}. Status code: {response.status_code}"
+            )
+        return False, ""
     except Exception as e:
         print(f"Error checking if comment exists for {paper_url}: {e}")
+        return True, ""  # default to not posting comment
 def log_comments(paper_url: str, comment: str):
                 json.dump(data, f)
+def return_recommendations(
+    url: str, comment_id: str | None = None, post_to_paper: bool = True
+) -> str:
     arxiv_id = parse_arxiv_id_from_paper_url(url)
     recommendations = get_recommendations_from_semantic_scholar(f"ArXiv:{arxiv_id}")
     filtered_recommendations = filter_recommendations(recommendations)
     if post_to_paper:
+        existing_comments, comment_id = check_if_lib_bot_comment_exists(url)
+        if existing_comments:
+            gr.Info(f"Existing comment: {comment_id}...skipping posting comment")
         else:
             comment = format_comment(
                 format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
             )
+            if comment_id:
+                comment_status, posted_comment_id = post_comment(
+                    url, comment, comment_id, token=HF_TOKEN
+                )
+                if comment_status:
+                    log_comments(url, comment)
+                    gr.Info(f"Posted reply to comment {posted_comment_id}")
+            if not comment_id:
+                comment_status, posted_comment_id = post_comment(
+                    url, comment, token=HF_TOKEN
+                )
+                if comment_status:
+                    log_comments(url, comment)
+                    gr.Info(f"Posted comment {posted_comment_id}")
             else:
                 gr.Info("Failed to post comment")
     return format_recommendation_into_markdown(arxiv_id, filtered_recommendations)
     " yet if they are new or have not been indexed by Semantic Scholar."
 )
 examples = [
+    ["https://huggingface.co/papers/2309.12307", None, False],
+    ["https://huggingface.co/papers/2211.10086", None, False],
 ]
 interface = gr.Interface(
     return_recommendations,
     [
         gr.Textbox(lines=1),
+        gr.Textbox(None, lines=1, label="Comment ID (if replying to a comment)"),
+        gr.Checkbox(False, label="Post recommendations to Paper page?"),
     ],
     gr.Markdown(),
     examples=examples,