Spaces:

Hargurjeet
/

AI_Research_Agent

Sleeping

App Files Files Community

AI_Research_Agent / src /utils /paper_tools.py

Hargurjeet

Update src/utils/paper_tools.py

36b20ea verified 6 months ago

raw

history blame contribute delete

6.26 kB

	import arxiv
	import json
	import os
	from typing import List
	from dotenv import load_dotenv
	import anthropic

	load_dotenv()

	PAPER_DIR = "/tmp/papers"
	client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

	def search_papers(topic: str, max_results: int = 5) -> List[str]:
	"""
	Search for research papers from arXiv on a given topic and store their information in a local directory.

	Args:
	topic (str): The research topic to search for.
	max_results (int, optional): Maximum number of search results to return. Defaults to 5.

	Returns:
	List[str]: A list of short IDs of the retrieved papers.
	"""
	client_arxiv = arxiv.Client()
	search = arxiv.Search(query=topic, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)
	papers = client_arxiv.results(search)

	path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
	os.makedirs(path, exist_ok=True)
	file_path = os.path.join(path, "papers_info.json")

	try:
	with open(file_path, "r") as f:
	papers_info = json.load(f)
	except (FileNotFoundError, json.JSONDecodeError):
	papers_info = {}

	paper_ids = []
	for paper in papers:
	paper_ids.append(paper.get_short_id())
	papers_info[paper.get_short_id()] = {
	"title": paper.title,
	"authors": [a.name for a in paper.authors],
	"summary": paper.summary,
	"pdf_url": paper.pdf_url,
	"published": str(paper.published.date())
	}

	with open(file_path, "w") as f:
	json.dump(papers_info, f, indent=2)

	return paper_ids

	def extract_info(paper_id: str) -> str:
	"""
	Retrieve information about a paper with a given paper ID from locally saved topic directories.

	Args:
	paper_id (str): The short ID of the paper to look up.

	Returns:
	str: A formatted JSON string of the paper's information, or an error message if not found.
	"""
	for topic_dir in os.listdir(PAPER_DIR):
	dir_path = os.path.join(PAPER_DIR, topic_dir)
	if os.path.isdir(dir_path):
	file_path = os.path.join(dir_path, "papers_info.json")
	if os.path.isfile(file_path):
	try:
	with open(file_path, "r") as f:
	papers_info = json.load(f)
	if paper_id in papers_info:
	return json.dumps(papers_info[paper_id], indent=2)
	except (FileNotFoundError, json.JSONDecodeError):
	continue
	return f"No saved info for paper ID: {paper_id}"


	tools = [
	{
	"name": "search_papers",
	"description": "Search for papers on arXiv.",
	"input_schema": {
	"type": "object",
	"properties": {
	"topic": {"type": "string"},
	"max_results": {"type": "integer", "default": 5}
	},
	"required": ["topic"]
	}
	},
	{
	"name": "extract_info",
	"description": "Get details about a specific paper ID.",
	"input_schema": {
	"type": "object",
	"properties": {
	"paper_id": {"type": "string"}
	},
	"required": ["paper_id"]
	}
	}
	]

	mapping_tool_function = {
	"search_papers": search_papers,
	"extract_info": extract_info
	}

	def execute_tool(tool_name, tool_args):
	"""
	Execute a tool function by its name using provided arguments and return the result.

	Args:
	tool_name (str): Name of the tool to execute (must exist in mapping_tool_function).
	tool_args (dict): Arguments to pass to the tool function.

	Returns:
	str: Stringified result of the tool function execution.
	"""
	result = mapping_tool_function[tool_name](**tool_args)
	if result is None:
	return "No results returned."
	if isinstance(result, list):
	return ', '.join(result)
	if isinstance(result, dict):
	return json.dumps(result, indent=2)
	return str(result)

	def process_query(query):
	"""
	Processes a user query using Claude 3 Sonnet and integrated tools, returning the assistant's final response.

	Args:
	query (str): The user input.

	Returns:
	str: Final assistant response as text.
	"""
	messages = [{'role': 'user', 'content': query}]

	response = client.messages.create(
	max_tokens=2024,
	model='claude-3-7-sonnet-20250219',
	tools=tools,
	messages=messages
	)

	process_query = True
	final_output = ""

	while process_query:
	assistant_content = []

	for content in response.content:
	if content.type == 'text':
	final_output = content.text
	assistant_content.append(content)

	# If only a simple reply, we’re done
	if len(response.content) == 1:
	process_query = False

	elif content.type == 'tool_use':
	assistant_content.append(content)
	messages.append({'role': 'assistant', 'content': assistant_content})

	tool_id = content.id
	tool_args = content.input
	tool_name = content.name

	print(f"Calling tool {tool_name} with args {tool_args}")
	result = execute_tool(tool_name, tool_args)

	messages.append({
	"role": "user",
	"content": [
	{
	"type": "tool_result",
	"tool_use_id": tool_id,
	"content": result
	}
	]
	})

	response = client.messages.create(
	max_tokens=2024,
	model='claude-3-7-sonnet-20250219',
	tools=tools,
	messages=messages
	)

	# If the assistant now only sends text, we're done
	if len(response.content) == 1 and response.content[0].type == "text":
	final_output = response.content[0].text
	process_query = False

	return final_output