Spaces:
Sleeping
Sleeping
File size: 1,259 Bytes
25e6cf1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from smolagents import tool
import requests
from bs4 import BeautifulSoup
MAX_WEBPAGE_SIZE = 3000 # max characters to return from scraped content
@tool
def visit_webpage(url: str) -> dict:
"""
Visits a webpage and extracts clean text from it.
Args:
url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI)
Returns:
dict: A dictionary containing:
- "text": Truncated page content
- "url": The original URL
- "status": HTTP status or error info
"""
print(f" Tool:visit_webpage visiting {url}...")
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
text = soup.get_text(separator="\n", strip=True)
short_text = text[:MAX_WEBPAGE_SIZE]
print(f"✅ Extracted {len(short_text)} characters from {url}")
return {
"text": short_text,
"url": url,
"status": f"Success ({response.status_code})"
}
except Exception as e:
print(f"🚨 Error in visit_webpage: {e}")
return {
"text": "",
"url": url,
"status": f"Error: {e}"
}
|