File size: 5,358 Bytes
b6654dc
622f2bb
aeb4eba
b6654dc
2bf962d
622f2bb
 
 
 
 
 
a9c5bda
b6654dc
 
 
2200521
622f2bb
 
 
5e54175
 
 
 
622f2bb
 
 
b6654dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f114c3
 
b6654dc
4f114c3
 
5a9202f
 
 
 
 
 
 
 
 
 
 
4f114c3
 
 
eca7386
 
 
 
 
 
 
4f114c3
 
 
 
 
 
fc853f2
4f114c3
fc853f2
4f114c3
5a9202f
 
 
 
 
 
4f114c3
 
 
 
 
 
 
fc853f2
 
 
 
 
 
 
 
 
 
4f114c3
 
 
fc853f2
4f114c3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from typing import List, Callable
from duckduckgo_search import DDGS   # pip install -U duckduckgo-search
import re
import time

# -------- helper to shorten very long GAIA questions (optional but helpful)
def tighten(q: str) -> str:
    quoted = re.findall(r'"([^"]+)"', q)
    caps   = re.findall(r'\b([A-Z0-9][\w-]{2,})', q)
    short  = " ".join(quoted + caps)
    return short or q

def _raw_search(query: str, max_results: int = 5) -> List[str]:
    """Internal function that performs the actual DuckDuckGo search."""
    with DDGS() as ddgs:
        raw = list(ddgs.text(query, max_results=max_results))
    out = []
    for r in raw:
        try:
            title   = r.get("title", "")
            link    = r.get("href") or r.get("link", "")
            snippet = r.get("body") or r.get("snippet", "")
            out.append(f"{title}{link}\n{snippet}")
        except Exception:
            pass
    return out

def retry_ddg(
    query: str,
    max_results: int = 5,
    attempts: int = 4,
    delay_sec: int = 10,
    search_fn: Callable[[str, int], List[str]] = _raw_search,
) -> List[str]:
    """
    Retry DuckDuckGo search up to *attempts* times, waiting *delay_sec* seconds
    between attempts if no results were returned or an exception was raised.

    Parameters
    ----------
    query : str
        Search query.
    max_results : int, default 5
        Number of results to return.
    attempts : int, default 4
        Maximum number of attempts before giving up.
    delay_sec : int, default 10
        Seconds to sleep between attempts.
    search_fn : Callable
        A function with signature (query: str, max_results: int) -> List[str].
        Defaults to _raw_search.

    Returns
    -------
    List[str]
        List of result strings; may be empty if every attempt failed.
    """
    last_err = None
    for i in range(1, attempts + 1):
        try:
            results = search_fn(query, max_results)
            if results:                       # Success
                return results
            print(f"Attempt {i}/{attempts}: no results, retrying in {delay_sec}s…")
        except Exception as e:
            last_err = e                      # Keep last error for optional logging
            print(f"Attempt {i}/{attempts} failed: {e}. Retrying in {delay_sec}s…")

        if i < attempts:
            time.sleep(delay_sec)

    # All attempts failed or returned empty
    if last_err:
        print(f"All {attempts} attempts failed. Last exception: {last_err}")
    else:
        print(f"All {attempts} attempts returned empty results.")
    return []

# -------- the only search function your agent will call
def simple_search(query: str, max_results: int = 5) -> List[str]:
    """
    Perform a web search using DuckDuckGo and return formatted results.
    Includes retry logic and better error handling.
    """
    def _raw_search(q: str, max_results: int) -> List[str]:
        try:
            # Ensure we have a valid search query
            if not q or not q.strip():
                print("Warning: Empty search query")
                return []
                
            # Clean and validate the query
            q = q.strip()
            if len(q) < 2:  # DuckDuckGo requires at least 2 characters
                print("Warning: Query too short")
                return []
                
            with DDGS() as ddgs:
                results = []
                for r in ddgs.text(q, max_results=max_results):
                    # Handle missing keys gracefully
                    title = r.get('title', 'No title')
                    link = r.get('link', r.get('href', 'No link'))
                    body = r.get('body', r.get('snippet', 'No description'))
                    
                    # Format result with available information
                    result = f"{title}{link}\n{body}"
                    results.append(result)
                return results
        except Exception as e:
            print(f"Search error: {str(e)}")
            return []

    # Retry logic with rate limit handling
    max_attempts = 4
    rate_limit_delay = 20  # seconds to wait on rate limit
    
    # Clean the input query
    query = query.strip()
    if not query:
        print("Error: Empty search query provided")
        return []
        
    for attempt in range(max_attempts):
        try:
            results = _raw_search(query, max_results)
            if results:
                return results
            print(f"Attempt {attempt + 1}/{max_attempts}: No results found")
        except Exception as e:
            error_msg = str(e)
            print(f"Attempt {attempt + 1}/{max_attempts} failed: {error_msg}")
            
            # Check if it's a rate limit error
            if "Ratelimit" in error_msg or "202" in error_msg:
                print(f"Rate limit detected. Waiting {rate_limit_delay} seconds...")
                time.sleep(rate_limit_delay)
            elif attempt < max_attempts - 1:
                # For other errors, use exponential backoff
                delay = 30 * (2 ** attempt)
                print(f"Retrying in {delay}s...")
                time.sleep(delay)
            else:
                print(f"All {max_attempts} attempts failed. Last exception: {error_msg}")
                return []

    return []