Final_Assignment_Template / tools /webpage_parser.py
zbloss's picture
Attempt with wikipedia parsing tools
ca6fbc3
raw
history blame contribute delete
817 Bytes
from bs4 import BeautifulSoup
from smolagents import Tool
class WebpageParser(Tool):
name: str = "webpage_parser_tool"
description: str = (
"This tool parses elements from HTML to make them easily searchable."
)
inputs: dict[str, dict[str, str]] = {
"html_string": {
"type": "string",
"description": "The HTML content as a string.",
},
}
output_type: str = "array"
def forward(self, html_string: str) -> list[str]:
"""
Parses the HTML string and returns all elements as an array.
"""
# Create a BeautifulSoup object
soup = BeautifulSoup(html_string, "html.parser")
# Extract all elements as strings
elements = [str(element) for element in soup.find_all()]
return elements