# nuse_modules/google_search.py | |
import os | |
import requests | |
import time | |
from typing import List | |
from trafilatura import fetch_url, extract | |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") | |
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID") | |
def extract_full_text(url: str) -> str: | |
try: | |
downloaded = fetch_url(url) | |
if downloaded: | |
content = extract(downloaded, include_comments=False, include_tables=False) | |
return content or "" | |
except Exception as e: | |
print(f"[SCRAPER ERROR] {url}: {e}") | |
return "" | |
def search_google_news(keywords: List[str], num_results: int = 5) -> List[dict]: | |
query = " ".join(keywords) | |
url = ( | |
f"https://www.googleapis.com/customsearch/v1" | |
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}" | |
f"&q={query}&num={num_results}" | |
) | |
try: | |
res = requests.get(url, timeout=10) | |
res.raise_for_status() | |
data = res.json() | |
results = [] | |
for item in data.get("items", []): | |
link = item.get("link") | |
article_text = extract_full_text(link) | |
results.append({ | |
"title": item.get("title"), | |
"link": link, | |
"snippet": item.get("snippet"), | |
"content": article_text | |
}) | |
return results | |
except Exception as e: | |
print(f"[ERROR] Google search failed: {e}") | |
return [] | |