Spaces:
Runtime error
Runtime error
File size: 1,650 Bytes
1ac9fd6 513fc9a 399f464 1ac9fd6 e1f9ee6 1ac9fd6 513fc9a 399f464 1ac9fd6 513fc9a 399f464 1ac9fd6 399f464 1ac9fd6 513fc9a 399f464 513fc9a 1ac9fd6 547b150 399f464 1ac9fd6 399f464 1ac9fd6 399f464 1ac9fd6 399f464 1ac9fd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from googlesearch import search
import requests
from bs4 import BeautifulSoup
import re
from functools import lru_cache
# Clean HTML tags
TAG_CLEANER = re.compile(r"<[^>]+>")
@lru_cache(maxsize=500)
def extract_metadata(url):
"""Extract title and description from URL"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; ResearchBot/1.0)',
'Accept-Language': 'en-US,en;q=0.9'
}
response = requests.get(url, timeout=5, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'lxml')
title = soup.title.string.strip() if soup.title else url
title = title[:200]
# Try to get description
description = ""
if meta_desc := soup.find("meta", attrs={"name": "description"}):
description = meta_desc.get("content", "")[:300]
return {
"url": url,
"title": title,
"description": description
}
except Exception as e:
return {"url": url, "title": f"Error: {str(e)[:30]}", "description": ""}
def search_google(query, num_results=5):
"""Search with enhanced result parsing"""
try:
# Get search results
urls = list(search(query, num_results=num_results, advanced=False))
# Extract metadata for each URL
results = []
for url in urls[:num_results]:
if metadata := extract_metadata(url):
results.append(metadata)
return results
except Exception as e:
print(f"Search error: {e}")
return [] |