FastAPI / nuse_modules /google_search.py
raghavNCI
headlines revamp
b029173
raw
history blame
2.45 kB
# nuse_modules/google_search.py
import os
import requests
import time
from typing import List
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")
def search_google_news_batch(queries: List[str], results_per_query: int = 30) -> List[dict]:
all_results = []
seen_links = set()
for query in queries:
print(f"[SEARCH] Query: {query}")
total_fetched = 0
start_index = 1
while total_fetched < results_per_query and start_index <= 91:
url = (
f"https://www.googleapis.com/customsearch/v1"
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
f"&q={query}&num=10&start={start_index}"
)
try:
res = requests.get(url, timeout=10)
res.raise_for_status()
data = res.json()
items = data.get("items", [])
if not items:
break # No more results
for item in items:
link = item.get("link")
if link and link not in seen_links:
seen_links.add(link)
all_results.append({
"title": item.get("title"),
"link": link,
"snippet": item.get("snippet"),
"query": query,
})
total_fetched += len(items)
start_index += 10
time.sleep(0.5) # Avoid rate limits
except Exception as e:
print(f"[ERROR] Query '{query}' failed at start={start_index}: {e}")
break
return all_results
def search_google_news(keywords: list[str], num_results: int = 5):
query = " ".join(keywords)
url = (
f"https://www.googleapis.com/customsearch/v1"
f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
f"&q={query}&num={num_results}"
)
try:
res = requests.get(url, timeout=10)
res.raise_for_status()
data = res.json()
results = []
for item in data.get("items", []):
results.append({
"title": item.get("title"),
"link": item.get("link"),
"snippet": item.get("snippet"),
})
return results
except Exception as e:
return {"error": str(e)}