File size: 5,358 Bytes
b6654dc 622f2bb aeb4eba b6654dc 2bf962d 622f2bb a9c5bda b6654dc 2200521 622f2bb 5e54175 622f2bb b6654dc 4f114c3 b6654dc 4f114c3 5a9202f 4f114c3 eca7386 4f114c3 fc853f2 4f114c3 fc853f2 4f114c3 5a9202f 4f114c3 fc853f2 4f114c3 fc853f2 4f114c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
from typing import List, Callable
from duckduckgo_search import DDGS # pip install -U duckduckgo-search
import re
import time
# -------- helper to shorten very long GAIA questions (optional but helpful)
def tighten(q: str) -> str:
quoted = re.findall(r'"([^"]+)"', q)
caps = re.findall(r'\b([A-Z0-9][\w-]{2,})', q)
short = " ".join(quoted + caps)
return short or q
def _raw_search(query: str, max_results: int = 5) -> List[str]:
"""Internal function that performs the actual DuckDuckGo search."""
with DDGS() as ddgs:
raw = list(ddgs.text(query, max_results=max_results))
out = []
for r in raw:
try:
title = r.get("title", "")
link = r.get("href") or r.get("link", "")
snippet = r.get("body") or r.get("snippet", "")
out.append(f"{title} – {link}\n{snippet}")
except Exception:
pass
return out
def retry_ddg(
query: str,
max_results: int = 5,
attempts: int = 4,
delay_sec: int = 10,
search_fn: Callable[[str, int], List[str]] = _raw_search,
) -> List[str]:
"""
Retry DuckDuckGo search up to *attempts* times, waiting *delay_sec* seconds
between attempts if no results were returned or an exception was raised.
Parameters
----------
query : str
Search query.
max_results : int, default 5
Number of results to return.
attempts : int, default 4
Maximum number of attempts before giving up.
delay_sec : int, default 10
Seconds to sleep between attempts.
search_fn : Callable
A function with signature (query: str, max_results: int) -> List[str].
Defaults to _raw_search.
Returns
-------
List[str]
List of result strings; may be empty if every attempt failed.
"""
last_err = None
for i in range(1, attempts + 1):
try:
results = search_fn(query, max_results)
if results: # Success
return results
print(f"Attempt {i}/{attempts}: no results, retrying in {delay_sec}s…")
except Exception as e:
last_err = e # Keep last error for optional logging
print(f"Attempt {i}/{attempts} failed: {e}. Retrying in {delay_sec}s…")
if i < attempts:
time.sleep(delay_sec)
# All attempts failed or returned empty
if last_err:
print(f"All {attempts} attempts failed. Last exception: {last_err}")
else:
print(f"All {attempts} attempts returned empty results.")
return []
# -------- the only search function your agent will call
def simple_search(query: str, max_results: int = 5) -> List[str]:
"""
Perform a web search using DuckDuckGo and return formatted results.
Includes retry logic and better error handling.
"""
def _raw_search(q: str, max_results: int) -> List[str]:
try:
# Ensure we have a valid search query
if not q or not q.strip():
print("Warning: Empty search query")
return []
# Clean and validate the query
q = q.strip()
if len(q) < 2: # DuckDuckGo requires at least 2 characters
print("Warning: Query too short")
return []
with DDGS() as ddgs:
results = []
for r in ddgs.text(q, max_results=max_results):
# Handle missing keys gracefully
title = r.get('title', 'No title')
link = r.get('link', r.get('href', 'No link'))
body = r.get('body', r.get('snippet', 'No description'))
# Format result with available information
result = f"{title} – {link}\n{body}"
results.append(result)
return results
except Exception as e:
print(f"Search error: {str(e)}")
return []
# Retry logic with rate limit handling
max_attempts = 4
rate_limit_delay = 20 # seconds to wait on rate limit
# Clean the input query
query = query.strip()
if not query:
print("Error: Empty search query provided")
return []
for attempt in range(max_attempts):
try:
results = _raw_search(query, max_results)
if results:
return results
print(f"Attempt {attempt + 1}/{max_attempts}: No results found")
except Exception as e:
error_msg = str(e)
print(f"Attempt {attempt + 1}/{max_attempts} failed: {error_msg}")
# Check if it's a rate limit error
if "Ratelimit" in error_msg or "202" in error_msg:
print(f"Rate limit detected. Waiting {rate_limit_delay} seconds...")
time.sleep(rate_limit_delay)
elif attempt < max_attempts - 1:
# For other errors, use exponential backoff
delay = 30 * (2 ** attempt)
print(f"Retrying in {delay}s...")
time.sleep(delay)
else:
print(f"All {max_attempts} attempts failed. Last exception: {error_msg}")
return []
return []
|