from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup
import logging
from typing import List, Dict, Any
from config.settings import Settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class WebSearchTool:
    def __init__(self):
        self.ddgs = DDGS()
        self.session = requests.Session()
        self.session.headers.update({
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        })
    
    def search(self, query: str, max_results: int = Settings.MAX_SEARCH_RESULTS) -> List[Dict[str, Any]]:
        """
        Search the web using DuckDuckGo
        """
        try:
            results = []
            search_results = self.ddgs.text(query, max_results=max_results)
            
            for result in search_results:
                results.append({
                    'title': result.get('title', ''),
                    'url': result.get('href', ''),
                    'snippet': result.get('body', ''),
                    'source': 'DuckDuckGo'
                })
            
            logger.info(f"Found {len(results)} search results for: {query}")
            return results
            
        except Exception as e:
            logger.error(f"Error searching web: {e}")
            return []
    
    def get_page_content(self, url: str, max_chars: int = 5000) -> str:
        """
        Extract text content from a web page
        """
        try:
            response = self.session.get(url, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            
            # Remove script and style elements
            for script in soup(["script", "style"]):
                script.decompose()
            
            # Get text content
            text = soup.get_text()
            
            # Clean up whitespace
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = ' '.join(chunk for chunk in chunks if chunk)
            
            # Limit length
            if len(text) > max_chars:
                text = text[:max_chars] + "..."
            
            return text
            
        except Exception as e:
            logger.error(f"Error extracting content from {url}: {e}")
            return f"Error: Could not extract content from {url}"
    
    def search_and_summarize(self, query: str, include_content: bool = False) -> str:
        """
        Search and format results for LLM consumption
        """
        results = self.search(query)
        
        if not results:
            return "No search results found."
        
        summary_parts = [f"Search results for: {query}\n"]
        
        for i, result in enumerate(results, 1):
            summary_parts.append(f"{i}. **{result['title']}**")
            summary_parts.append(f"   URL: {result['url']}")
            summary_parts.append(f"   Summary: {result['snippet']}")
            
            if include_content and i <= 2:  # Only get content for top 2 results
                content = self.get_page_content(result['url'])
                if content and not content.startswith("Error:"):
                    summary_parts.append(f"   Content Preview: {content[:500]}...")
            
            summary_parts.append("")
        
        return "\n".join(summary_parts)
    
    def search_news(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
        """
        Search for news articles
        """
        try:
            results = []
            news_results = self.ddgs.news(query, max_results=max_results)
            
            for result in news_results:
                results.append({
                    'title': result.get('title', ''),
                    'url': result.get('url', ''),
                    'snippet': result.get('body', ''),
                    'source': result.get('source', ''),
                    'date': result.get('date', ''),
                    'type': 'news'
                })
            
            logger.info(f"Found {len(results)} news results for: {query}")
            return results
            
        except Exception as e:
            logger.error(f"Error searching news: {e}")
            return []
    
    def search_images(self, query: str, max_results: int = 5) -> List[Dict[str, Any]]:
        """
        Search for images
        """
        try:
            results = []
            image_results = self.ddgs.images(query, max_results=max_results)
            
            for result in image_results:
                results.append({
                    'title': result.get('title', ''),
                    'url': result.get('image', ''),
                    'thumbnail': result.get('thumbnail', ''),
                    'source': result.get('source', ''),
                    'type': 'image'
                })
            
            logger.info(f"Found {len(results)} image results for: {query}")
            return results
            
        except Exception as e:
            logger.error(f"Error searching images: {e}")
            return []
    
    def quick_fact_search(self, query: str) -> str:
        """
        Quick search for factual information
        """
        try:
            # Try to get instant answer first
            instant_answer = self.ddgs.answers(query)
            if instant_answer:
                return f"Quick Fact: {instant_answer[0].get('text', '')}"
            
            # Fall back to regular search
            results = self.search(query, max_results=2)
            if results:
                return f"From search: {results[0]['snippet']}"
            
            return "No quick facts found."
            
        except Exception as e:
            logger.error(f"Error in quick fact search: {e}")
            return "Error retrieving quick facts."
    
    def research_topic(self, topic: str) -> Dict[str, Any]:
        """
        Comprehensive research on a topic
        """
        research_data = {
            'topic': topic,
            'general_info': [],
            'news': [],
            'related_queries': []
        }
        
        try:
            # General search
            general_results = self.search(topic, max_results=5)
            research_data['general_info'] = general_results
            
            # News search
            news_results = self.search_news(topic, max_results=3)
            research_data['news'] = news_results
            
            # Generate related queries
            related_queries = [
                f"{topic} definition",
                f"{topic} examples",
                f"{topic} applications",
                f"latest {topic} developments"
            ]
            research_data['related_queries'] = related_queries
            
            return research_data
            
        except Exception as e:
            logger.error(f"Error researching topic {topic}: {e}")
            return research_data
    
    def format_research_for_llm(self, research_data: Dict[str, Any]) -> str:
        """
        Format research data for LLM consumption
        """
        formatted_parts = [f"Research Results for: {research_data['topic']}\n"]
        
        if research_data['general_info']:
            formatted_parts.append("## General Information:")
            for i, result in enumerate(research_data['general_info'], 1):
                formatted_parts.append(f"{i}. {result['title']}")
                formatted_parts.append(f"   {result['snippet']}\n")
        
        if research_data['news']:
            formatted_parts.append("## Recent News:")
            for i, result in enumerate(research_data['news'], 1):
                formatted_parts.append(f"{i}. {result['title']}")
                formatted_parts.append(f"   {result['snippet']}")
                if result.get('date'):
                    formatted_parts.append(f"   Date: {result['date']}\n")
        
        return "\n".join(formatted_parts)