Spaces:

echodrift
/

agent_course

Sleeping

File size: 6,039 Bytes

60ba1ca

import requests
from langchain.tools import tool
from duckduckgo_search import DDGS
from bs4 import BeautifulSoup
import tempfile
from typing import Optional
import os
from urllib.parse import urlparse


@tool("search", return_direct=False)
def search(query: str) -> str:
    """Searches the internet using DuckDuckGo

    Args:
        query (str): Search query

    Returns:
        str: Search results
    """
    with DDGS() as ddgs:
        results = [r for r in ddgs.text(query, max_results=5)]
    return results if results else "No results found."


@tool("process_content", return_direct=False)
def process_content(url: str) -> str:
    """Process content from a webpage

    Args:
        url (str): URL to get content

    Returns:
        str: Content in the webpage
    """
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    return soup.get_text()


@tool("save_file")
def save_file(content: str, filename: Optional[str] = None) -> str:
    """
    Save content to a temporary file and return the path.
    Useful for processing files from the GAIA API.

    Args:
        content: The content to save to the file
        filename: Optional filename, will generate a random name if not provided

    Returns:
        Path to the saved file
    """
    temp_dir = tempfile.gettempdir()
    if filename is None:
        temp_file = tempfile.NamedTemporaryFile(delete=False)
        filepath = temp_file.name
    else:
        filepath = os.path.join(temp_dir, filename)

    # Write content to the file
    with open(filepath, "w") as f:
        f.write(content)

    return f"File saved to {filepath}. You can read this file to process its contents."


@tool("download_file_from_url")
def download_file_from_url(url: str, filename: Optional[str] = None) -> str:
    """
    Download a file from a URL and save it to a temporary location.

    Args:
        url: The URL to download from
        filename: Optional filename, will generate one based on URL if not provided

    Returns:
        Path to the downloaded file
    """
    try:
        # Parse URL to get filename if not provided
        if not filename:
            path = urlparse(url).path
            filename = os.path.basename(path)
            if not filename:
                # Generate a random name if we couldn't extract one
                import uuid

                filename = f"downloaded_{uuid.uuid4().hex[:8]}"

        # Create temporary file
        temp_dir = tempfile.gettempdir()
        filepath = os.path.join(temp_dir, filename)

        # Download the file
        response = requests.get(url, stream=True)
        response.raise_for_status()

        # Save the file
        with open(filepath, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)

        return f"File downloaded to {filepath}. You can now process this file."
    except Exception as e:
        return f"Error downloading file: {str(e)}"


@tool("extract_text_from_image")
def extract_text_from_image(image_path: str) -> str:
    """
    Extract text from an image using pytesseract (if available).

    Args:
        image_path: Path to the image file

    Returns:
        Extracted text or error message
    """
    try:
        # Try to import pytesseract
        import pytesseract
        from PIL import Image

        # Open the image
        image = Image.open(image_path)

        # Extract text
        text = pytesseract.image_to_string(image)

        return f"Extracted text from image:\n\n{text}"
    except ImportError:
        return "Error: pytesseract is not installed. Please install it with 'pip install pytesseract' and ensure Tesseract OCR is installed on your system."
    except Exception as e:
        return f"Error extracting text from image: {str(e)}"


@tool("analyze_csv_file")
def analyze_csv_file(file_path: str, query: str) -> str:
    """
    Analyze a CSV file using pandas and answer a question about it.

    Args:
        file_path: Path to the CSV file
        query: Question about the data

    Returns:
        Analysis result or error message
    """
    try:
        import pandas as pd

        # Read the CSV file
        df = pd.read_csv(file_path)

        # Run various analyses based on the query
        result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        result += f"Columns: {', '.join(df.columns)}\n\n"

        # Add summary statistics
        result += "Summary statistics:\n"
        result += str(df.describe())

        return result
    except ImportError:
        return "Error: pandas is not installed. Please install it with 'pip install pandas'."
    except Exception as e:
        return f"Error analyzing CSV file: {str(e)}"


@tool("analyze_excel_file")
def analyze_excel_file(file_path: str, query: str) -> str:
    """
    Analyze an Excel file using pandas and answer a question about it.

    Args:
        file_path: Path to the Excel file
        query: Question about the data

    Returns:
        Analysis result or error message
    """
    try:
        import pandas as pd

        # Read the Excel file
        df = pd.read_excel(file_path)

        # Run various analyses based on the query
        result = (
            f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n"
        )
        result += f"Columns: {', '.join(df.columns)}\n\n"

        # Add summary statistics
        result += "Summary statistics:\n"
        result += str(df.describe())

        return result
    except ImportError:
        return "Error: pandas and openpyxl are not installed. Please install them with 'pip install pandas openpyxl'."
    except Exception as e:
        return f"Error analyzing Excel file: {str(e)}"


def get_tools():
    return [
        search,
        # process_content,
        # save_file,
        # download_file_from_url,
        # extract_text_from_image,
        # analyze_csv_file,
        # analyze_excel_file
    ]