from smolagents import CodeAgent, HfApiModel, tool
import os
import requests
from bs4 import BeautifulSoup
from markdownify import markdownify
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI

@tool
def webpage_scraper(url: str) -> str:
    """A tool that scrapes and summarizes webpage content from a given URL.
    Args:
        url: URL of the webpage to scrape and summarize
    Returns:
        str: A summary of the webpage content including title and main text
    """
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Get title
        title = soup.title.string if soup.title else "No title"

        # Find main content
        main_content = (
            soup.find('main') or
            soup.find('article') or
            soup.find('div', class_='content') or
            soup.body
        )

        if main_content:
            # Remove unwanted elements
            for tag in main_content.find_all(['script', 'style', 'nav', 'footer', 'aside', 'header']):
                tag.decompose()

            # Convert to markdown and clean up
            content = markdownify(str(main_content), heading_style="ATX")
            # Try to keep complete sentences
            content = content[:2000].rsplit('.', 1)[0] + '...'
        else:
            content = "Could not find main content"

        return f"Title: {title}\n\nContent Summary:\n{content}"
    except Exception as e:
        return f"Error scraping webpage: {str(e)}"

final_answer = FinalAnswerTool()

# Keep the same model endpoint
model = HfApiModel(
    model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
    max_tokens=2096,
    temperature=0.5,
    custom_role_conversions=None,
)

agent = CodeAgent(
    model=model,
    tools=[
        final_answer,
        webpage_scraper
    ],
    max_steps=5,
    verbosity_level=1,
    grammar=None,
    planning_interval=None,
    name="Web Scraping Agent",
    description="An agent capable of scraping and analyzing web content",
    prompt_templates=None  # Remove prompts.yaml dependency
)

GradioUI(agent).launch()