mmichiels13's picture
Vision and Browsing Tools
2c55d04 verified
raw
history blame
4.08 kB
from smolagents import DuckDuckGoSearchTool, VisitWebpageTool, FinalAnswerTool
from smolagents import Tool, tool
import random
from huggingface_hub import list_models
# Initialize the DuckDuckGo search tool
web_search = DuckDuckGoSearchTool()
visit_webpage = VisitWebpageTool()
final_answer = FinalAnswerTool()
class WeatherInfoTool(Tool):
name = "weather_info"
description = "Fetches dummy weather information for a given location."
inputs = {
"location": {
"type": "string",
"description": "The location to get weather information for."
}
}
output_type = "string"
def forward(self, location: str):
# Dummy weather data
weather_conditions = [
{"condition": "Rainy", "temp_c": 15},
{"condition": "Clear", "temp_c": 25},
{"condition": "Windy", "temp_c": 20}
]
# Randomly select a weather condition
data = random.choice(weather_conditions)
return f"Weather in {location}: {data['condition']}, {data['temp_c']}°C"
class HubStatsTool(Tool):
name = "hub_stats"
description = "Fetches the most downloaded model from a specific author on the Hugging Face Hub."
inputs = {
"author": {
"type": "string",
"description": "The username of the model author/organization to find models from."
}
}
output_type = "string"
def forward(self, author: str):
try:
# List models from the specified author, sorted by downloads
models = list(list_models(author=author, sort="downloads", direction=-1, limit=1))
if models:
model = models[0]
return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads."
else:
return f"No models found for author {author}."
except Exception as e:
return f"Error fetching models for {author}: {str(e)}"
@tool
def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
"""
Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
Args:
text: The text to search for
nth_result: Which occurrence to jump to (default: 1)
"""
elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
if nth_result > len(elements):
raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
result = f"Found {len(elements)} matches for '{text}'."
elem = elements[nth_result - 1]
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
result += f"Focused on element {nth_result} of {len(elements)}"
return result
@tool
def go_back() -> None:
"""Goes back to previous page."""
driver.back()
@tool
def close_popups() -> str:
"""
Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows! This does not work on cookie consent banners.
"""
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
def save_screenshot(step_log: ActionStep, agent: CodeAgent) -> None:
sleep(1.0) # Let JavaScript animations happen before taking the screenshot
driver = helium.get_driver()
current_step = step_log.step_number
if driver is not None:
for step_logs in agent.logs: # Remove previous screenshots from logs for lean processing
if isinstance(step_log, ActionStep) and step_log.step_number <= current_step - 2:
step_logs.observations_images = None
png_bytes = driver.get_screenshot_as_png()
image = Image.open(BytesIO(png_bytes))
print(f"Captured a browser screenshot: {image.size} pixels")
step_log.observations_images = [image.copy()] # Create a copy to ensure it persists, important!
# Update observations with current URL
url_info = f"Current url: {driver.current_url}"
step_log.observations = url_info if step_logs.observations is None else step_log.observations + "\n" + url_info
return