from smolagents import CodeAgent, HfApiModel, tool import os import requests from bs4 import BeautifulSoup from markdownify import markdownify from tools.final_answer import FinalAnswerTool from Gradio_UI import GradioUI @tool def webpage_scraper(url: str) -> str: """A tool that scrapes and summarizes webpage content from a given URL. Args: url: URL of the webpage to scrape and summarize Returns: str: A summary of the webpage content including title and main text """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Get title title = soup.title.string if soup.title else "No title" # Find main content main_content = ( soup.find('main') or soup.find('article') or soup.find('div', class_='content') or soup.body ) if main_content: # Remove unwanted elements for tag in main_content.find_all(['script', 'style', 'nav', 'footer', 'aside', 'header']): tag.decompose() # Convert to markdown and clean up content = markdownify(str(main_content), heading_style="ATX") # Try to keep complete sentences content = content[:2000].rsplit('.', 1)[0] + '...' else: content = "Could not find main content" return f"Title: {title}\n\nContent Summary:\n{content}" except Exception as e: return f"Error scraping webpage: {str(e)}" final_answer = FinalAnswerTool() # Keep the same model endpoint model = HfApiModel( model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', max_tokens=2096, temperature=0.5, custom_role_conversions=None, ) agent = CodeAgent( model=model, tools=[ final_answer, webpage_scraper ], max_steps=5, verbosity_level=1, grammar=None, planning_interval=None, name="Web Scraping Agent", description="An agent capable of scraping and analyzing web content", prompt_templates=None # Remove prompts.yaml dependency ) GradioUI(agent).launch()