egarciag's picture
Update app.py
f77c312 verified
from smolagents import CodeAgent, HfApiModel,load_tool,tool
from shutil import make_archive
import datetime
import requests
import pytz
import yaml
import os
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
@tool
def search_articles(search_term: str, start: int, end: int) -> str:
"""
Searches for academic articles on arxiv.org using a given search term, retrieves
results within a given range, and saves the output as a Markdown file.
Args:
search_term: The keyword or phrase to search for in ArXiv articles.
start: The starting index of the search results (used for pagination).
end: The number of articles to retrieve from the API.
Returns:
str: The articles has found.
Example:
>>> search_articles("deep learning", 0, 10)
"""
try:
# Construct the ArXiv API query
search_url = (
f"http://export.arxiv.org/api/query?search_query=all:{search_term}"
f"&start={start}&max_results={end}&sortBy=submittedDate&sortOrder=descending"
)
response = requests.get(search_url)
if response.status_code != 200:
return "Error: Failed to fetch articles from ArXiv."
# Extract and filter results
articles = []
entries = response.text.split("<entry>")[1:] # Splitting XML response
for entry in entries:
title_start = entry.find("<title>") + 7
title_end = entry.find("</title>")
title = entry[title_start:title_end].strip()
link_start = entry.find("<id>") + 4
link_end = entry.find("</id>")
link = entry[link_start:link_end].strip()
published_start = entry.find("<published>") + 11
published_end = entry.find("</published>")
published_date = entry[published_start:published_end][:10] # Extract YYYY-MM-DD
articles.append(f"### [{title}]({link})\n**Published Date:** {published_date}\n")
if not articles:
return f"No articles found for '{search_term}'."
return f"Articles: {articles}"
except Exception as e:
return f"Error: {str(e)}"
@tool
def download_articles(article_links: list, save_folder: str = "downloads") -> str:
"""
A tool that downloads articles from arxiv.org given a list of links.
Args:
article_links: List of article links from arXiv.
save_folder: Folder to save downloaded articles. Default is 'downloads'.
Returns:
str: Success or error message.
"""
if not article_links:
return "Error: No article links provided."
# Ensure the save folder exists
os.makedirs(save_folder, exist_ok=True)
downloaded_files = []
for link in article_links:
try:
# Extract the article ID from the link
article_id = link.split("/")[-1]
# Construct the PDF download URL
pdf_url = f"https://arxiv.org/pdf/{article_id}.pdf"
# Download the PDF
response = requests.get(pdf_url, stream=True)
if response.status_code != 200:
return f"Error: Failed to download {pdf_url}"
# Save the file
file_path = os.path.join(save_folder, f"{article_id}.pdf")
with open(file_path, "wb") as file:
for chunk in response.iter_content(1024):
file.write(chunk)
downloaded_files.append(file_path)
except Exception as e:
return f"Error downloading {link}: {str(e)}"
return f"Downloaded articles:\n" + "\n".join(downloaded_files)
@tool
def create_zip_package(articles_folder: str, output_zip: str) -> str:
"""
A tool that packages all downloaded articles (PDFs) from a folder into a zip file.
Args:
articles_folder: Folder containing downloaded articles.
output_zip: The name of the output zip file.
Returns:
str: Path to the created zip file.
"""
if not os.path.exists(articles_folder):
return f"Error: Folder '{articles_folder}' does not exist."
file_paths = [os.path.join(articles_folder, f) for f in os.listdir(articles_folder) if f.endswith(".pdf")]
if not file_paths:
return "Error: No PDF files found for zipping."
try:
with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
for file in file_paths:
zipf.write(file, os.path.basename(file))
return f"Zip package created successfully: {output_zip}"
except Exception as e:
return f"Error: {str(e)}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[search_articles, download_articles, create_zip_package, final_answer],
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()