File size: 1,650 Bytes
1ac9fd6
513fc9a
399f464
1ac9fd6
 
e1f9ee6
1ac9fd6
 
 
 
 
 
513fc9a
399f464
 
 
 
1ac9fd6
513fc9a
 
399f464
 
1ac9fd6
 
399f464
1ac9fd6
 
 
 
513fc9a
 
 
399f464
 
513fc9a
 
1ac9fd6
547b150
399f464
 
 
1ac9fd6
 
399f464
1ac9fd6
399f464
1ac9fd6
 
 
399f464
 
1ac9fd6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import re
from functools import lru_cache

# Clean HTML tags
TAG_CLEANER = re.compile(r"<[^>]+>")

@lru_cache(maxsize=500)
def extract_metadata(url):
    """Extract title and description from URL"""
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)',
            'Accept-Language': 'en-US,en;q=0.9'
        }
        response = requests.get(url, timeout=5, headers=headers)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, 'lxml')
        
        title = soup.title.string.strip() if soup.title else url
        title = title[:200]
        
        # Try to get description
        description = ""
        if meta_desc := soup.find("meta", attrs={"name": "description"}):
            description = meta_desc.get("content", "")[:300]
        
        return {
            "url": url,
            "title": title,
            "description": description
        }
    except Exception as e:
        return {"url": url, "title": f"Error: {str(e)[:30]}", "description": ""}

def search_google(query, num_results=5):
    """Search with enhanced result parsing"""
    try:
        # Get search results
        urls = list(search(query, num_results=num_results, advanced=False))
        
        # Extract metadata for each URL
        results = []
        for url in urls[:num_results]:
            if metadata := extract_metadata(url):
                results.append(metadata)
        
        return results
    except Exception as e:
        print(f"Search error: {e}")
        return []