File size: 2,446 Bytes
826a1b8
 
 
 
b029173
 
826a1b8
206e141
826a1b8
 
b029173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
826a1b8
 
 
 
 
 
 
8121f99
826a1b8
 
 
 
 
 
 
8121f99
 
 
 
 
826a1b8
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# nuse_modules/google_search.py

import os
import requests
import time
from typing import List

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_CX_ID = os.getenv("GOOGLE_CX_ID")

def search_google_news_batch(queries: List[str], results_per_query: int = 30) -> List[dict]:
    all_results = []
    seen_links = set()

    for query in queries:
        print(f"[SEARCH] Query: {query}")
        total_fetched = 0
        start_index = 1

        while total_fetched < results_per_query and start_index <= 91:
            url = (
                f"https://www.googleapis.com/customsearch/v1"
                f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
                f"&q={query}&num=10&start={start_index}"
            )

            try:
                res = requests.get(url, timeout=10)
                res.raise_for_status()
                data = res.json()
                items = data.get("items", [])

                if not items:
                    break  # No more results

                for item in items:
                    link = item.get("link")
                    if link and link not in seen_links:
                        seen_links.add(link)
                        all_results.append({
                            "title": item.get("title"),
                            "link": link,
                            "snippet": item.get("snippet"),
                            "query": query,
                        })

                total_fetched += len(items)
                start_index += 10
                time.sleep(0.5)  # Avoid rate limits

            except Exception as e:
                print(f"[ERROR] Query '{query}' failed at start={start_index}: {e}")
                break

    return all_results

def search_google_news(keywords: list[str], num_results: int = 5):
    query = " ".join(keywords)
    url = (
        f"https://www.googleapis.com/customsearch/v1"
        f"?key={GOOGLE_API_KEY}&cx={GOOGLE_CX_ID}"
        f"&q={query}&num={num_results}"
    )
    
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        data = res.json()
        results = []

        for item in data.get("items", []):
            results.append({
                "title": item.get("title"),
                "link": item.get("link"),
                "snippet": item.get("snippet"),
            })

        return results
    except Exception as e:
        return {"error": str(e)}