Spaces:

Hargurjeet
/

AI_Research_Agent

Sleeping

File size: 6,263 Bytes

import arxiv
import json
import os
from typing import List
from dotenv import load_dotenv
import anthropic

load_dotenv()

PAPER_DIR = "/tmp/papers"
client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for research papers from arXiv on a given topic and store their information in a local directory.

    Args:
        topic (str): The research topic to search for.
        max_results (int, optional): Maximum number of search results to return. Defaults to 5.

    Returns:
        List[str]: A list of short IDs of the retrieved papers.
    """
    client_arxiv = arxiv.Client()
    search = arxiv.Search(query=topic, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)
    papers = client_arxiv.results(search)

    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    file_path = os.path.join(path, "papers_info.json")

    try:
        with open(file_path, "r") as f:
            papers_info = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        papers_info[paper.get_short_id()] = {
            "title": paper.title,
            "authors": [a.name for a in paper.authors],
            "summary": paper.summary,
            "pdf_url": paper.pdf_url,
            "published": str(paper.published.date())
        }

    with open(file_path, "w") as f:
        json.dump(papers_info, f, indent=2)

    return paper_ids

def extract_info(paper_id: str) -> str:
    """
    Retrieve information about a paper with a given paper ID from locally saved topic directories.

    Args:
        paper_id (str): The short ID of the paper to look up.

    Returns:
        str: A formatted JSON string of the paper's information, or an error message if not found.
    """
    for topic_dir in os.listdir(PAPER_DIR):
        dir_path = os.path.join(PAPER_DIR, topic_dir)
        if os.path.isdir(dir_path):
            file_path = os.path.join(dir_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as f:
                        papers_info = json.load(f)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError):
                    continue
    return f"No saved info for paper ID: {paper_id}"


tools = [
    {
        "name": "search_papers",
        "description": "Search for papers on arXiv.",
        "input_schema": {
            "type": "object",
            "properties": {
                "topic": {"type": "string"},
                "max_results": {"type": "integer", "default": 5}
            },
            "required": ["topic"]
        }
    },
    {
        "name": "extract_info",
        "description": "Get details about a specific paper ID.",
        "input_schema": {
            "type": "object",
            "properties": {
                "paper_id": {"type": "string"}
            },
            "required": ["paper_id"]
        }
    }
]

mapping_tool_function = {
    "search_papers": search_papers,
    "extract_info": extract_info
}

def execute_tool(tool_name, tool_args):
    """
    Execute a tool function by its name using provided arguments and return the result.

    Args:
        tool_name (str): Name of the tool to execute (must exist in mapping_tool_function).
        tool_args (dict): Arguments to pass to the tool function.

    Returns:
        str: Stringified result of the tool function execution.
    """
    result = mapping_tool_function[tool_name](**tool_args)
    if result is None:
        return "No results returned."
    if isinstance(result, list):
        return ', '.join(result)
    if isinstance(result, dict):
        return json.dumps(result, indent=2)
    return str(result)

def process_query(query):
    """
    Processes a user query using Claude 3 Sonnet and integrated tools, returning the assistant's final response.

    Args:
        query (str): The user input.

    Returns:
        str: Final assistant response as text.
    """
    messages = [{'role': 'user', 'content': query}]

    response = client.messages.create(
        max_tokens=2024,
        model='claude-3-7-sonnet-20250219', 
        tools=tools,
        messages=messages
    )

    process_query = True
    final_output = ""

    while process_query:
        assistant_content = []

        for content in response.content:
            if content.type == 'text':
                final_output = content.text
                assistant_content.append(content)

                # If only a simple reply, we’re done
                if len(response.content) == 1:
                    process_query = False

            elif content.type == 'tool_use':
                assistant_content.append(content)
                messages.append({'role': 'assistant', 'content': assistant_content})

                tool_id = content.id
                tool_args = content.input
                tool_name = content.name

                print(f"Calling tool {tool_name} with args {tool_args}")
                result = execute_tool(tool_name, tool_args)

                messages.append({
                    "role": "user",
                    "content": [
                        {
                            "type": "tool_result",
                            "tool_use_id": tool_id,
                            "content": result
                        }
                    ]
                })

                response = client.messages.create(
                    max_tokens=2024,
                    model='claude-3-7-sonnet-20250219', 
                    tools=tools,
                    messages=messages
                )

                # If the assistant now only sends text, we're done
                if len(response.content) == 1 and response.content[0].type == "text":
                    final_output = response.content[0].text
                    process_query = False

    return final_output