|
|
|
|
|
import os |
|
import requests |
|
import time |
|
from typing import List |
|
|
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") |
|
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID") |
|
|
|
def search_google_news_batch(queries: List[str], results_per_query: int = 30) -> List[dict]: |
|
all_results = [] |
|
seen_links = set() |
|
|
|
for query in queries: |
|
print(f"[SEARCH] Query: {query}") |
|
total_fetched = 0 |
|
start_index = 1 |
|
|
|
while total_fetched < results_per_query and start_index <= 91: |
|
url = ( |
|
f"https://www.googleapis.com/customsearch/v1" |
|
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}" |
|
f"&q={query}&num=10&start={start_index}" |
|
) |
|
|
|
try: |
|
res = requests.get(url, timeout=10) |
|
res.raise_for_status() |
|
data = res.json() |
|
items = data.get("items", []) |
|
|
|
if not items: |
|
break |
|
|
|
for item in items: |
|
link = item.get("link") |
|
if link and link not in seen_links: |
|
seen_links.add(link) |
|
all_results.append({ |
|
"title": item.get("title"), |
|
"link": link, |
|
"snippet": item.get("snippet"), |
|
"query": query, |
|
}) |
|
|
|
total_fetched += len(items) |
|
start_index += 10 |
|
time.sleep(0.5) |
|
|
|
except Exception as e: |
|
print(f"[ERROR] Query '{query}' failed at start={start_index}: {e}") |
|
break |
|
|
|
return all_results |
|
|
|
def search_google_news(keywords: list[str], num_results: int = 5): |
|
query = " ".join(keywords) |
|
url = ( |
|
f"https://www.googleapis.com/customsearch/v1" |
|
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}" |
|
f"&q={query}&num={num_results}" |
|
) |
|
|
|
try: |
|
res = requests.get(url, timeout=10) |
|
res.raise_for_status() |
|
data = res.json() |
|
results = [] |
|
|
|
for item in data.get("items", []): |
|
results.append({ |
|
"title": item.get("title"), |
|
"link": item.get("link"), |
|
"snippet": item.get("snippet"), |
|
}) |
|
|
|
return results |
|
except Exception as e: |
|
return {"error": str(e)} |
|
|