Spaces:
Sleeping
Sleeping
from smolagents import tool | |
import requests | |
from bs4 import BeautifulSoup | |
MAX_WEBPAGE_SIZE = 3000 # max characters to return from scraped content | |
def visit_webpage(url: str) -> dict: | |
""" | |
Visits a webpage and extracts clean text from it. | |
Args: | |
url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI) | |
Returns: | |
dict: A dictionary containing: | |
- "text": Truncated page content | |
- "url": The original URL | |
- "status": HTTP status or error info | |
""" | |
print(f" Tool:visit_webpage visiting {url}...") | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, "html.parser") | |
text = soup.get_text(separator="\n", strip=True) | |
short_text = text[:MAX_WEBPAGE_SIZE] | |
print(f"β Extracted {len(short_text)} characters from {url}") | |
return { | |
"text": short_text, | |
"url": url, | |
"status": f"Success ({response.status_code})" | |
} | |
except Exception as e: | |
print(f"π¨ Error in visit_webpage: {e}") | |
return { | |
"text": "", | |
"url": url, | |
"status": f"Error: {e}" | |
} | |