import requests from langchain.tools import tool from duckduckgo_search import DDGS from bs4 import BeautifulSoup import tempfile from typing import Optional import os from urllib.parse import urlparse @tool("search", return_direct=False) def search(query: str) -> str: """Searches the internet using DuckDuckGo Args: query (str): Search query Returns: str: Search results """ with DDGS() as ddgs: results = [r for r in ddgs.text(query, max_results=5)] return results if results else "No results found." @tool("process_content", return_direct=False) def process_content(url: str) -> str: """Process content from a webpage Args: url (str): URL to get content Returns: str: Content in the webpage """ response = requests.get(url) soup = BeautifulSoup(response.content, "html.parser") return soup.get_text() @tool("save_file") def save_file(content: str, filename: Optional[str] = None) -> str: """ Save content to a temporary file and return the path. Useful for processing files from the GAIA API. Args: content: The content to save to the file filename: Optional filename, will generate a random name if not provided Returns: Path to the saved file """ temp_dir = tempfile.gettempdir() if filename is None: temp_file = tempfile.NamedTemporaryFile(delete=False) filepath = temp_file.name else: filepath = os.path.join(temp_dir, filename) # Write content to the file with open(filepath, "w") as f: f.write(content) return f"File saved to {filepath}. You can read this file to process its contents." @tool("download_file_from_url") def download_file_from_url(url: str, filename: Optional[str] = None) -> str: """ Download a file from a URL and save it to a temporary location. Args: url: The URL to download from filename: Optional filename, will generate one based on URL if not provided Returns: Path to the downloaded file """ try: # Parse URL to get filename if not provided if not filename: path = urlparse(url).path filename = os.path.basename(path) if not filename: # Generate a random name if we couldn't extract one import uuid filename = f"downloaded_{uuid.uuid4().hex[:8]}" # Create temporary file temp_dir = tempfile.gettempdir() filepath = os.path.join(temp_dir, filename) # Download the file response = requests.get(url, stream=True) response.raise_for_status() # Save the file with open(filepath, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return f"File downloaded to {filepath}. You can now process this file." except Exception as e: return f"Error downloading file: {str(e)}" @tool("extract_text_from_image") def extract_text_from_image(image_path: str) -> str: """ Extract text from an image using pytesseract (if available). Args: image_path: Path to the image file Returns: Extracted text or error message """ try: # Try to import pytesseract import pytesseract from PIL import Image # Open the image image = Image.open(image_path) # Extract text text = pytesseract.image_to_string(image) return f"Extracted text from image:\n\n{text}" except ImportError: return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system." except Exception as e: return f"Error extracting text from image: {str(e)}" @tool("analyze_csv_file") def analyze_csv_file(file_path: str, query: str) -> str: """ Analyze a CSV file using pandas and answer a question about it. Args: file_path: Path to the CSV file query: Question about the data Returns: Analysis result or error message """ try: import pandas as pd # Read the CSV file df = pd.read_csv(file_path) # Run various analyses based on the query result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" result += f"Columns: {', '.join(df.columns)}\n\n" # Add summary statistics result += "Summary statistics:\n" result += str(df.describe()) return result except ImportError: return "Error: pandas is not installed. Please install it with 'pip install pandas'." except Exception as e: return f"Error analyzing CSV file: {str(e)}" @tool("analyze_excel_file") def analyze_excel_file(file_path: str, query: str) -> str: """ Analyze an Excel file using pandas and answer a question about it. Args: file_path: Path to the Excel file query: Question about the data Returns: Analysis result or error message """ try: import pandas as pd # Read the Excel file df = pd.read_excel(file_path) # Run various analyses based on the query result = ( f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" ) result += f"Columns: {', '.join(df.columns)}\n\n" # Add summary statistics result += "Summary statistics:\n" result += str(df.describe()) return result except ImportError: return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'." except Exception as e: return f"Error analyzing Excel file: {str(e)}" def get_tools(): return [ search, # process_content, # save_file, # download_file_from_url, # extract_text_from_image, # analyze_csv_file, # analyze_excel_file ]