import os from dotenv import load_dotenv from langchain_tavily import TavilySearch from langchain_community.document_loaders import WikipediaLoader from langchain_core.tools import tool # Consolidated import for @tool decorator from datetime import datetime from langchain_experimental.utilities import PythonREPL import pypdf from langchain_community.document_loaders import PyPDFLoader from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain_core.tools import Tool, tool # Load environment variables load_dotenv() # === MATH TOOL === @tool def calculator(a: float, b: float, operation: str) -> float: """ Performs a mathematical operation (addition, subtraction, multiplication, division) on two numbers. Input should be a dictionary with 'a' (float, first number), 'b' (float, second number), and 'operation' (string, e.g., 'add', 'subtract', 'multiply', 'divide') keys. Example: {"a": 5.5, "b": 10.0, "operation": "add"} """ if operation == "add": return a + b elif operation == "subtract": return a - b elif operation == "multiply": return a * b elif operation == "divide": if b == 0: raise ValueError("Cannot divide by zero.") return a / b else: raise ValueError("Invalid operation. Choose from 'add', 'subtract', 'multiply', 'divide'.") # === SEARCH TOOLS === @tool def wikipedia_search(query: str) -> dict: """ Search Wikipedia for a given query and return up to 2 relevant document results. Useful for factual questions about people, places, events, etc. Input should be a string representing the search query. Example: {"query": "Barack Obama"} The output is a dictionary with a 'wiki_results' key containing the formatted search results. """ try: if not query.strip(): return {"wiki_results": "Error: Empty query provided."} # LangChain's WikipediaLoader returns Document objects loader = WikipediaLoader(query=query, load_max_docs=1, lang="es") search_docs = loader.load() if not search_docs: return {"wiki_results": "No results found on Wikipedia."} # Format results for the LLM, limiting content to avoid loops formatted = "\n\n---\n\n".join( f'\n{doc.page_content[:1500]}\n' for doc in search_docs ) return {"wiki_results": formatted} except Exception as e: return {"wiki_results": f"Error during Wikipedia search: {str(e)}"} @tool def web_search(query: str) -> dict: """ Search the web using Tavily for a given query and return up to 3 relevant snippets. Useful for up-to-date information, current events, or general web searches. Input should be a string representing the search query. Requires TAVILY_API_KEY environment variable. Example: {"query": "latest news on AI"} The output is a dictionary with a 'web_results' key containing the formatted search results. """ try: if not query.strip(): return {"web_results": "Error: Empty query provided."} if not os.getenv("TAVILY_API_KEY"): return {"web_results": "Error: Tavily API key is not configured."} # TavilySearchResults.invoke expects 'input' as a keyword argument search_results = TavilySearch(max_results=3).invoke(input=query) if not search_results: return {"web_results": "No results found on the web."} # Format results for the LLM, accessing dictionary keys instead of a 'metadata' attribute formatted = "\n\n---\n\n".join( f'\n{result.get("content", "")}\n' for result in search_results ) return {"web_results": formatted} except Exception as e: return {"web_results": f"Error during web search: {str(e)}"} # === UTILITY TOOLS === @tool def get_current_datetime(format_string: str = "%Y-%m-%d %H:%M:%S") -> str: """ Returns the current date and time in a specified format. Useful for questions related to the current date, time, or for calculating durations. Input is an optional format_string (string, default: "%Y-%m-%d %H:%M:%S"). Example: {"format_string": "%A, %B %d, %Y"} will return "Wednesday, July 16, 2025". """ try: # Using current time as per system prompt guidance current_time = datetime(2025, 7, 16, 12, 43, 1) # Specific time provided in context return current_time.strftime(format_string) except Exception as e: return f"Error getting current datetime: {str(e)}" @tool def pdf_qa(pdf_path: str, query: str) -> str: """ Answers a question by searching for information within a specific PDF file. Args: pdf_path: The file path to the PDF document. query: The question to answer. """ try: # 1. Load the document loader = PyPDFLoader(pdf_path) documents = loader.load_and_split() # 2. Create the embeddings and vector store embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vector_store = FAISS.from_documents(documents, embeddings) # 3. Create the retriever retriever = vector_store.as_retriever(search_kwargs={"k": 2}) # 4. Find relevant documents relevant_docs = retriever.invoke(query) # 5. Format the retrieved context for the LLM context = "\n\n".join([doc.page_content for doc in relevant_docs]) # 6. Return the context and the query to the agent for final answer generation return f"Context from PDF:\n{context}\n\nUser Query: {query}" except FileNotFoundError: return f"Error: The PDF file '{pdf_path}' was not found." except Exception as e: return f"An error occurred while processing the PDF: {e}" # === CODE EXECUTION TOOL === # Initialize PythonREPL python_repl_instance = PythonREPL() # Create a LangChain Tool from the PythonREPL @tool def execute_python_code(code: str) -> str: """ Executes Python code and returns the output. Useful for mathematical calculations, string manipulations, list operations, logic problems, and any task that can be solved with Python code. Input should be a string containing valid Python code to execute. Example: {"code": "print(2 + 2)"} """ try: # LangChain's PythonREPL.run expects a string input return python_repl_instance.run(code) except Exception as e: return f"Error executing Python code: {str(e)}" # === TOOLSET EXPORT === # List of all available tools to be imported by agent.py tools_for_llm = [ calculator, wikipedia_search, web_search, get_current_datetime, pdf_qa, execute_python_code, ] # For local testing of tools (optional) if __name__ == "__main__": print("Testing tools.py functionalities...") # Set dummy API key for testing if not already set in .env # os.environ["TAVILY_API_KEY"] = "YOUR_TAVILY_API_KEY" # Replace with a real key for actual testing # Test Math Tool print("\n--- Calculator Tool Test ---") print(f"Calculator(5, 3, 'multiply'): {calculator.invoke({'a': 5, 'b': 3, 'operation': 'multiply'})}") print(f"Calculator(10.5, 2.3, 'add'): {calculator.invoke({'a': 10.5, 'b': 2.3, 'operation': 'add'})}") try: print(f"Calculator(7, 0, 'divide') (should error): {calculator.invoke({'a': 7, 'b': 0, 'operation': 'divide'})}") except ValueError as e: print(f" Error caught as expected: {e}") # Test Search Tools print("\n--- Search Tools Test ---") wiki_res = wikipedia_search.invoke({'query': 'Artificial Intelligence'}) print(f"Wiki Search 'Artificial Intelligence': {wiki_res['wiki_results'][:200]}...") web_res = web_search.invoke({'query': 'Hugging Face new features'}) print(f"Web Search 'Hugging Face new features': {web_res['web_results'][:200]}...") # Test Utility Tool print("\n--- Utility Tools Test ---") print(f"Current Datetime (default): {get_current_datetime.invoke({})}") print(f"Current Datetime (custom format): {get_current_datetime.invoke({'format_string': '%A, %d %B %Y'})}") # Test Python REPL Tool print("\n--- Python REPL Tool Test ---") print(f"Python REPL '2 + 2': {python_repl.invoke({'code': '2 + 2'})}") test_code_len = 'len("hello")' print(f"Python REPL '{test_code_len}': {python_repl.invoke({'code': test_code_len})}") print(f"Python REPL error: {python_repl.invoke({'code': '10 / 0'})}")