import os import time import requests import mwclient from typing import Optional from dotenv import load_dotenv from langchain_groq import ChatGroq from langchain_core.messages import HumanMessage from langchain.tools import tool from langchain_community.utilities import DuckDuckGoSearchAPIWrapper from langchain_community.document_loaders import WebBaseLoader load_dotenv() vision_llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", groq_api_key=os.getenv('GROQ_API_KEY')) @tool def web_search(query: str, domain: Optional[str] = None) -> str: """ Perform a web search and return the raw results as a string. Args: query (str): The search query. domain (Optional[str]): If provided, restricts the search to this domain. Returns: str: Raw search results concatenated into a string. """ try: time.sleep(2) search = DuckDuckGoSearchAPIWrapper() if domain: query = f"{query} site:{domain}" results = search.results(query, max_results=3) if not results: return "No results found." formatted = "" for r in results: formatted += f"Title: {r['title']}\nURL: {r['link']}\nSnippet: {r['snippet']}\n\n" return formatted.strip() except Exception as e: return f"Search error: {e}" @tool def visit_webpage(url: str): """ Fetches and loads the content of a webpage given its URL. Parameters: url (str): The URL of the webpage to be visited. Returns: str: A string containing the loaded content of the webpage. """ loader = WebBaseLoader(url) loader.requests_kwargs = {'verify': False} docs = loader.load() return f"Page content: {docs}" @tool def wikipedia_search(query: str, max_docs: int = 1) -> str: """ Search Wikipedia using mwclient and return exactly `max_docs` results. Args: query (str): The search query. max_docs (int): Number of results to return. Default is 1. """ try: time.sleep(2) site = mwclient.Site("en.wikipedia.org") results = site.search(query, limit=max_docs) output = "" count = 0 for page_info in results: title = page_info["title"] try: page = site.pages[title] content = page.text() first_paragraph = content.split('\n\n')[0] url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}" output += ( f"--- Result {count + 1} ---\n" f"Title: {title}\n" f"Summary: {first_paragraph}...\n" f"URL: {url}\n\n" ) count += 1 if count >= max_docs: break except Exception: continue return output.strip() or "No valid matching pages found." except Exception as e: return f"Wikipedia search error: {str(e)}" @tool def extract_text_from_image(image_path: str) -> str: """ Extracts text from an image file. Args: image_path (str): The file path to the image (e.g., '/path/to/document.png'). Returns: str: Extracted text paragraphs separated by newlines, prefixed with "Extracted text:\n". Returns an error message string starting with 'Error:' on failure. """ try: time.sleep(2) with open(image_path, "rb") as image_file: image_bytes = image_file.read() image_base64 = base64.b64encode(image_bytes).decode("utf-8") message = [ HumanMessage( content=[ { "type": "text", "text": ( "Extract text or provide explanation of this image" ), }, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_base64}" }, }, ] ) ] response = vision_llm.invoke(message) all_text = response.content + "\n\n" return all_text.strip() except Exception as e: error_msg = f"Error extracting text: {str(e)}" print(error_msg) return "" @tool def analyze_file(file_path: str) -> str: """ Load and analyze a CSV or Excel file using pandas. Provides basic metadata and summary statistics for numeric columns. Args: file_path (str): Path to the CSV or Excel file. Returns: str: Summary statistics and metadata about the file data. """ try: _, ext = os.path.splitext(file_path.lower()) if ext == '.csv': df = pd.read_csv(file_path) elif ext in ['.xls', '.xlsx']: df = pd.read_excel(file_path) else: return f"Error: Unsupported file extension '{ext}'. Supported: .csv, .xls, .xlsx" result = "Summary statistics for numeric columns:\n" result += str(df.describe()) result += "\n\n" result += f"Columns: {', '.join(df.columns)}\n\n" result += "Content:\n" result += df.astype(str).head(1000).to_string(index=False) return result except ImportError: return "Error: Required libraries are not installed. Install with 'pip install pandas openpyxl'." except FileNotFoundError: return f"Error: File not found at path '{file_path}'." except Exception as e: return f"Error analyzing file: {str(e)}"