Spaces:

Sauten
/

Final-Assignment

Sleeping

File size: 1,259 Bytes

25e6cf1

from smolagents import tool
import requests
from bs4 import BeautifulSoup

MAX_WEBPAGE_SIZE = 3000  # max characters to return from scraped content

@tool
def visit_webpage(url: str) -> dict:
    """
    Visits a webpage and extracts clean text from it.

    Args:
        url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI)

    Returns:
        dict: A dictionary containing:
            - "text": Truncated page content
            - "url": The original URL
            - "status": HTTP status or error info
    """
    print(f" Tool:visit_webpage visiting {url}...")

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        text = soup.get_text(separator="\n", strip=True)
        short_text = text[:MAX_WEBPAGE_SIZE]

        print(f"✅ Extracted {len(short_text)} characters from {url}")
        return {
            "text": short_text,
            "url": url,
            "status": f"Success ({response.status_code})"
        }

    except Exception as e:
        print(f"🚨 Error in visit_webpage: {e}")
        return {
            "text": "",
            "url": url,
            "status": f"Error: {e}"
        }