Nymbo commited on
Commit
2dea46b
·
verified ·
1 Parent(s): dc537d3

ADDING NEW DEEP_RESEARCH TOOL

Browse files
Files changed (1) hide show
  1. app.py +572 -5
app.py CHANGED
@@ -49,7 +49,7 @@ except Exception: # pragma: no cover - optional dependency
49
  # Fetch: Enhanced HTTP + extraction utils
50
  # ==============================
51
 
52
- def _http_get_enhanced(url: str) -> requests.Response:
53
  """
54
  Download the page with enhanced headers, timeout handling, and better error recovery.
55
  """
@@ -63,14 +63,15 @@ def _http_get_enhanced(url: str) -> requests.Response:
63
  "Upgrade-Insecure-Requests": "1",
64
  }
65
 
66
- # Apply rate limiting
67
- _fetch_rate_limiter.acquire()
 
68
 
69
  try:
70
  response = requests.get(
71
  url,
72
  headers=headers,
73
- timeout=30, # Increased timeout
74
  allow_redirects=True,
75
  stream=False
76
  )
@@ -512,6 +513,47 @@ def _log_call_end(func_name: str, output_desc: str) -> None:
512
  except Exception as e: # pragma: no cover
513
  print(f"[TOOL RESULT] {func_name} (failed to log output: {e})", flush=True)
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  def _extract_date_from_snippet(snippet: str) -> str:
516
  """
517
  Extract publication date from search result snippet using common patterns.
@@ -1532,7 +1574,7 @@ CSS_STYLES = """
1532
  /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
1533
  .app-title::before {
1534
  grid-row: 2;
1535
- content: "Fetch Webpage | Search DuckDuckGo | Python Interpreter | Memory Manager | Kokoro TTS | Image Generation | Video Generation";
1536
  display: block;
1537
  font-size: 1rem;
1538
  font-weight: 700;
@@ -2176,6 +2218,529 @@ video_generation_interface = gr.Interface(
2176
  show_api=bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")),
2177
  )
2178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2179
  _interfaces = [
2180
  fetch_interface,
2181
  concise_interface,
@@ -2184,6 +2749,7 @@ _interfaces = [
2184
  kokoro_interface,
2185
  image_generation_interface, # Always visible in UI
2186
  video_generation_interface, # Always visible in UI
 
2187
  ]
2188
  _tab_names = [
2189
  "Fetch Webpage",
@@ -2193,6 +2759,7 @@ _tab_names = [
2193
  "Kokoro TTS",
2194
  "Image Generation",
2195
  "Video Generation",
 
2196
  ]
2197
 
2198
  with gr.Blocks(title="Nymbo/Tools MCP", theme="Nymbo/Nymbo_Theme", css=CSS_STYLES) as demo:
 
49
  # Fetch: Enhanced HTTP + extraction utils
50
  # ==============================
51
 
52
+ def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
53
  """
54
  Download the page with enhanced headers, timeout handling, and better error recovery.
55
  """
 
63
  "Upgrade-Insecure-Requests": "1",
64
  }
65
 
66
+ # Apply rate limiting unless explicitly skipped
67
+ if not skip_rate_limit:
68
+ _fetch_rate_limiter.acquire()
69
 
70
  try:
71
  response = requests.get(
72
  url,
73
  headers=headers,
74
+ timeout=timeout, # Configurable timeout
75
  allow_redirects=True,
76
  stream=False
77
  )
 
513
  except Exception as e: # pragma: no cover
514
  print(f"[TOOL RESULT] {func_name} (failed to log output: {e})", flush=True)
515
 
516
+
517
+ # ==============================
518
+ # Deep Research helpers: slow-host detection
519
+ # ==============================
520
+
521
+ class SlowHost(Exception):
522
+ """Marker exception for slow hosts (timeouts) to trigger requeue."""
523
+ pass
524
+
525
+
526
+ def _fetch_page_markdown_fast(url: str, max_chars: int = 3000, timeout: float = 10.0) -> str:
527
+ """Fetch a single URL quickly; raise SlowHost on timeout.
528
+
529
+ Uses a shorter HTTP timeout to detect slow hosts, then reuses Fetch_Webpage
530
+ logic for conversion to Markdown. Returns empty string on non-timeout errors.
531
+ """
532
+ try:
533
+ # Bypass global rate limiter here; we want Deep Research to control pacing.
534
+ resp = _http_get_enhanced(url, timeout=timeout, skip_rate_limit=True)
535
+ resp.raise_for_status()
536
+ except requests.exceptions.RequestException as e:
537
+ msg = str(e)
538
+ if "timed out" in msg.lower():
539
+ raise SlowHost(msg)
540
+ return ""
541
+
542
+ final_url = str(resp.url)
543
+ ctype = resp.headers.get("Content-Type", "")
544
+ if "html" not in ctype.lower():
545
+ return ""
546
+
547
+ # Decode to text and convert similar to Fetch_Webpage (lean path)
548
+ resp.encoding = resp.encoding or resp.apparent_encoding
549
+ html = resp.text
550
+ soup = BeautifulSoup(html, "lxml")
551
+ # Reuse fullpage conversion with default selectors
552
+ md_text = _fullpage_markdown_from_soup(soup, final_url, "")
553
+ if max_chars > 0 and len(md_text) > max_chars:
554
+ md_text, _ = _truncate_markdown(md_text, max_chars)
555
+ return md_text
556
+
557
  def _extract_date_from_snippet(snippet: str) -> str:
558
  """
559
  Extract publication date from search result snippet using common patterns.
 
1574
  /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
1575
  .app-title::before {
1576
  grid-row: 2;
1577
+ content: "Fetch Webpage | Search DuckDuckGo | Python Interpreter | Memory Manager | Kokoro TTS | Image Generation | Video Generation | Deep Research";
1578
  display: block;
1579
  font-size: 1rem;
1580
  font-weight: 700;
 
2218
  show_api=bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")),
2219
  )
2220
 
2221
+ # ==========================
2222
+ # Deep Research (Search + Fetch + LLM)
2223
+ # ==========================
2224
+
2225
+ HF_TEXTGEN_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
2226
+
2227
+
2228
+ def _normalize_query(q: str) -> str:
2229
+ """Normalize fancy quotes and stray punctuation in queries.
2230
+
2231
+ - Replace curly quotes with straight quotes
2232
+ - Collapse multiple quotes/spaces
2233
+ - Strip leading/trailing quotes
2234
+ """
2235
+ if not q:
2236
+ return ""
2237
+ repl = {
2238
+ "“": '"',
2239
+ "”": '"',
2240
+ "‘": "'",
2241
+ "’": "'",
2242
+ "`": "'",
2243
+ }
2244
+ for k, v in repl.items():
2245
+ q = q.replace(k, v)
2246
+ # Remove duplicated quotes and excessive spaces
2247
+ q = re.sub(r'\s+', ' ', q)
2248
+ q = re.sub(r'"\s+"', ' ', q)
2249
+ q = q.strip().strip('"').strip()
2250
+ return q
2251
+
2252
+
2253
+ def _search_urls_only(query: str, max_results: int) -> list[str]:
2254
+ """Return a list of result URLs using DuckDuckGo search with rate limiting.
2255
+
2256
+ Uses ddgs to fetch web results only (no news/images/videos). Falls back to empty list on error.
2257
+ """
2258
+ if not query or not query.strip() or max_results <= 0:
2259
+ return []
2260
+ urls: list[str] = []
2261
+ try:
2262
+ _search_rate_limiter.acquire()
2263
+ with DDGS() as ddgs:
2264
+ for item in ddgs.text(query, region="wt-wt", safesearch="moderate", max_results=max_results):
2265
+ url = (item.get("href") or item.get("url") or "").strip()
2266
+ if url:
2267
+ urls.append(url)
2268
+ except Exception:
2269
+ pass
2270
+ # De-duplicate while preserving order
2271
+ seen = set()
2272
+ deduped = []
2273
+ for u in urls:
2274
+ if u not in seen:
2275
+ seen.add(u)
2276
+ deduped.append(u)
2277
+ return deduped
2278
+
2279
+
2280
+ def _fetch_page_markdown(url: str, max_chars: int = 3000) -> str:
2281
+ """Fetch a single URL and return cleaned Markdown using existing Fetch_Webpage.
2282
+
2283
+ Returns empty string on error.
2284
+ """
2285
+ try:
2286
+ # Intentionally skip global fetch rate limiting for Deep Research speed.
2287
+ return Fetch_Webpage(url=url, max_chars=max_chars, strip_selectors="", url_scraper=False, offset=0) # type: ignore[misc]
2288
+ except Exception:
2289
+ return ""
2290
+
2291
+
2292
+ def _truncate_join(parts: list[str], max_chars: int) -> tuple[str, bool]:
2293
+ out = []
2294
+ total = 0
2295
+ truncated = False
2296
+ for p in parts:
2297
+ if not p:
2298
+ continue
2299
+ if total + len(p) > max_chars:
2300
+ out.append(p[: max(0, max_chars - total)])
2301
+ truncated = True
2302
+ break
2303
+ out.append(p)
2304
+ total += len(p)
2305
+ return ("\n\n".join(out), truncated)
2306
+
2307
+
2308
+ def _build_research_prompt(
2309
+ summary: str,
2310
+ queries: list[str],
2311
+ url_list: list[str],
2312
+ pages_map: dict[str, str],
2313
+ ) -> str:
2314
+ researcher_instructions = (
2315
+ "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
2316
+ "You have been provided with User Question, Search Queries, and numerous webpages that the searches yielded.\n\n"
2317
+ "<report_format>\n"
2318
+ "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
2319
+ "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
2320
+ "</report_format>\n\n"
2321
+ "<document_structure>\n"
2322
+ "- Always begin with a clear title using a single # header\n"
2323
+ "- Organize content into major sections using ## headers\n"
2324
+ "- Further divide into subsections using ### headers\n"
2325
+ "- Use #### headers sparingly for special subsections\n"
2326
+ "- Never skip header levels\n"
2327
+ "- Write multiple paragraphs per section or subsection\n"
2328
+ "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
2329
+ "- Never use lists, instead always use text or tables\n\n"
2330
+ "Mandatory Section Flow:\n"
2331
+ "1. Title (# level)\n - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
2332
+ "2. Main Body Sections (## level)\n - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n - Use ### subsections for detailed analysis\n - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
2333
+ "3. Conclusion (## level)\n - Synthesis of findings\n - Potential recommendations or next steps\n"
2334
+ "</document_structure>\n\n"
2335
+ "<planning_rules>\n"
2336
+ "- Always break it down into multiple steps\n"
2337
+ "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
2338
+ "- Create the best report that weighs all the evidence from the sources\n"
2339
+ "- Remember that the current date is: Wednesday, April 23, 2025, 11:50 AM EDT\n"
2340
+ "- Make sure that your final report addresses all parts of the query\n"
2341
+ "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
2342
+ "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
2343
+ "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
2344
+ "</planning_rules>\n\n"
2345
+ )
2346
+
2347
+ # Build sources block limited to a reasonable size to avoid overrun
2348
+ # Cap combined sources to ~180k characters
2349
+ sources_blocks: list[str] = []
2350
+ indexed_urls: list[str] = []
2351
+ for idx, u in enumerate(url_list, start=1):
2352
+ txt = pages_map.get(u, "").strip()
2353
+ if not txt:
2354
+ continue
2355
+ indexed_urls.append(f"[{idx}] {u}")
2356
+ # Prefix each source with its index and URL for citation
2357
+ sources_blocks.append(f"[Source {idx}] URL: {u}\n\n{txt}")
2358
+
2359
+ # Cap combined sources aggressively to stay within provider limits
2360
+ sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
2361
+
2362
+ prompt = []
2363
+ prompt.append(researcher_instructions)
2364
+ prompt.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
2365
+ # Include populated queries only
2366
+ populated = [q for q in queries if q and q.strip()]
2367
+ if populated:
2368
+ prompt.append("<search_queries>\n" + "\n".join(f"- {q.strip()}" for q in populated) + "\n</search_queries>\n")
2369
+ if indexed_urls:
2370
+ prompt.append("<sources_list>\n" + "\n".join(indexed_urls) + "\n</sources_list>\n")
2371
+ prompt.append("<fetched_documents>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</fetched_documents>")
2372
+ return "\n\n".join(prompt)
2373
+
2374
+
2375
+ def _write_report_tmp(text: str) -> str:
2376
+ # Create a unique temp directory and write a deterministic filename inside it.
2377
+ tmp_dir = tempfile.mkdtemp(prefix="deep_research_")
2378
+ path = os.path.join(tmp_dir, "research_report.txt")
2379
+ with open(path, "w", encoding="utf-8") as f:
2380
+ f.write(text)
2381
+ return path
2382
+
2383
+
2384
+ def Deep_Research(
2385
+ summary: Annotated[str, "Summarization of research topic (one or more sentences)."],
2386
+ query1: Annotated[str, "DDG Search Query 1"],
2387
+ max1: Annotated[int, "Max results for Query 1 (1-50)"] = 10,
2388
+ query2: Annotated[str, "DDG Search Query 2"] = "",
2389
+ max2: Annotated[int, "Max results for Query 2 (1-50)"] = 10,
2390
+ query3: Annotated[str, "DDG Search Query 3"] = "",
2391
+ max3: Annotated[int, "Max results for Query 3 (1-50)"] = 10,
2392
+ query4: Annotated[str, "DDG Search Query 4"] = "",
2393
+ max4: Annotated[int, "Max results for Query 4 (1-50)"] = 10,
2394
+ query5: Annotated[str, "DDG Search Query 5"] = "",
2395
+ max5: Annotated[int, "Max results for Query 5 (1-50)"] = 10,
2396
+ ) -> tuple[str, str, str]:
2397
+ """
2398
+ Run deep research by searching, fetching pages, and generating a comprehensive report via a large LLM provider.
2399
+
2400
+ Pipeline:
2401
+ 1) Perform up to 5 DuckDuckGo searches (URLs only). If total requested > 50, each query is limited to 10.
2402
+ 2) Fetch all discovered URLs (up to 50) as cleaned Markdown (max 3000 chars per page).
2403
+ 3) Call Hugging Face Inference Providers (Cerebras) with model `Qwen/Qwen3-235B-A22B-Instruct-2507` to write a research report.
2404
+
2405
+ Args:
2406
+ summary (str): A brief description of the overall research topic or user question.
2407
+ This is shown to the researcher model and used to frame the report.
2408
+ query1 (str): DuckDuckGo search query #1. Required if you want any results.
2409
+ Example: "site:nature.com CRISPR ethical implications".
2410
+ max1 (int): Maximum number of URLs to take from query #1 (1–50).
2411
+ If the combined total requested across all queries exceeds 50, each query will be capped to 10.
2412
+ query2 (str): DuckDuckGo search query #2. Optional; leave empty to skip.
2413
+ max2 (int): Maximum number of URLs to take from query #2 (1–50).
2414
+ query3 (str): DuckDuckGo search query #3. Optional; leave empty to skip.
2415
+ max3 (int): Maximum number of URLs to take from query #3 (1–50).
2416
+ query4 (str): DuckDuckGo search query #4. Optional; leave empty to skip.
2417
+ max4 (int): Maximum number of URLs to take from query #4 (1–50).
2418
+ query5 (str): DuckDuckGo search query #5. Optional; leave empty to skip.
2419
+ max5 (int): Maximum number of URLs to take from query #5 (1–50).
2420
+
2421
+ Returns:
2422
+ - Markdown research report
2423
+ - Newline-separated list of fetched URLs
2424
+ - Path to a downloadable .txt file containing the full report
2425
+
2426
+ Raises:
2427
+ gr.Error: If a required Hugging Face token is not provided or if the researcher
2428
+ model call fails after retries.
2429
+
2430
+ Notes:
2431
+ - Total URLs across queries are capped at 50.
2432
+ - Each fetched page is truncated to ~3000 characters before prompting the model.
2433
+ - The function is optimized to complete within typical MCP time budgets.
2434
+ """
2435
+ _log_call_start(
2436
+ "Deep_Research",
2437
+ summary=_truncate_for_log(summary or "", 200),
2438
+ queries=[q for q in [query1, query2, query3, query4, query5] if q],
2439
+ )
2440
+
2441
+ # Validate token
2442
+ if not HF_TEXTGEN_TOKEN:
2443
+ _log_call_end("Deep_Research", "error=missing HF token")
2444
+ raise gr.Error("Please provide a `HF_READ_TOKEN` to enable Deep Research.")
2445
+
2446
+ # Normalize caps per spec and sanitize queries
2447
+ queries = [
2448
+ _normalize_query(query1 or ""),
2449
+ _normalize_query(query2 or ""),
2450
+ _normalize_query(query3 or ""),
2451
+ _normalize_query(query4 or ""),
2452
+ _normalize_query(query5 or ""),
2453
+ ]
2454
+ reqs = [max(1, min(50, int(max1))), max(1, min(50, int(max2))), max(1, min(50, int(max3))), max(1, min(50, int(max4))), max(1, min(50, int(max5)))]
2455
+ total_requested = sum(reqs)
2456
+ if total_requested > 50:
2457
+ # Enforce rule: each query fetches 10 results when over 50 total requested
2458
+ reqs = [10, 10, 10, 10, 10]
2459
+
2460
+ # Overall deadline to avoid MCP 60s timeout (reserve ~5s for prompt+inference)
2461
+ start_ts = time.time()
2462
+ budget_seconds = 55.0
2463
+ deadline = start_ts + budget_seconds
2464
+
2465
+ def time_left() -> float:
2466
+ return max(0.0, deadline - time.time())
2467
+
2468
+ # 1) Run searches (parallelize queries to reduce latency) and stop if budget exceeded
2469
+ all_urls: list[str] = []
2470
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2471
+ tasks = []
2472
+ with ThreadPoolExecutor(max_workers=min(5, sum(1 for q in queries if q.strip())) or 1) as executor:
2473
+ for q, n in zip(queries, reqs):
2474
+ if not q.strip():
2475
+ continue
2476
+ tasks.append(executor.submit(_search_urls_only, q.strip(), n))
2477
+ for fut in as_completed(tasks):
2478
+ try:
2479
+ urls = fut.result() or []
2480
+ except Exception:
2481
+ urls = []
2482
+ for u in urls:
2483
+ if u not in all_urls:
2484
+ all_urls.append(u)
2485
+ if len(all_urls) >= 50:
2486
+ break
2487
+ if time_left() <= 0.5:
2488
+ # Out of budget for searching; stop early
2489
+ break
2490
+ # Don't block on leftover tasks; cancel/shutdown immediately
2491
+ # Python futures don't support true cancel if running, but we can just avoid waiting
2492
+ # and let executor context exit cleanly.
2493
+ if len(all_urls) > 50:
2494
+ all_urls = all_urls[:50]
2495
+
2496
+ # Filter obviously irrelevant/shopping/dictionary/forum domains that often appear due to phrase tokenization
2497
+ blacklist = {
2498
+ "homedepot.com",
2499
+ "tractorsupply.com",
2500
+ "mcmaster.com",
2501
+ "mrchain.com",
2502
+ "answers.com",
2503
+ "city-data.com",
2504
+ "dictionary.cambridge.org",
2505
+ }
2506
+ def _domain(u: str) -> str:
2507
+ try:
2508
+ return urlparse(u).netloc.lower()
2509
+ except Exception:
2510
+ return ""
2511
+ all_urls = [u for u in all_urls if _domain(u) not in blacklist]
2512
+
2513
+ # Skip known large/non-HTML file types to avoid wasted fetch time
2514
+ skip_exts = (
2515
+ ".pdf", ".ppt", ".pptx", ".doc", ".docx", ".xls", ".xlsx",
2516
+ ".zip", ".gz", ".tgz", ".bz2", ".7z", ".rar"
2517
+ )
2518
+ def _skip_url(u: str) -> bool:
2519
+ try:
2520
+ path = urlparse(u).path.lower()
2521
+ except Exception:
2522
+ return False
2523
+ return any(path.endswith(ext) for ext in skip_exts)
2524
+ all_urls = [u for u in all_urls if not _skip_url(u)]
2525
+
2526
+ # 2) Fetch pages (markdown, 3000 chars) with slow-host requeue (3s delay), respecting deadline
2527
+ pages: dict[str, str] = {}
2528
+ if all_urls:
2529
+ from concurrent.futures import ThreadPoolExecutor, Future
2530
+ from collections import deque
2531
+
2532
+ queue = deque(all_urls)
2533
+ attempts: dict[str, int] = {u: 0 for u in all_urls}
2534
+ max_attempts = 2 # fewer retries to honor budget
2535
+ max_workers = min(12, max(4, len(all_urls)))
2536
+
2537
+ in_flight: dict[Future, str] = {}
2538
+
2539
+ def schedule_next(executor: ThreadPoolExecutor) -> None:
2540
+ while queue and len(in_flight) < max_workers:
2541
+ u = queue.popleft()
2542
+ # Skip if already fetched or exceeded attempts
2543
+ if u in pages:
2544
+ continue
2545
+ if attempts[u] >= max_attempts:
2546
+ continue
2547
+ attempts[u] += 1
2548
+ # Adaptive per-attempt timeout based on time remaining; min 2s, max 10s
2549
+ tl = time_left()
2550
+ per_timeout = 10.0 if tl > 15 else (5.0 if tl > 8 else 2.0)
2551
+ fut = executor.submit(_fetch_page_markdown_fast, u, 3000, per_timeout)
2552
+ in_flight[fut] = u
2553
+
2554
+ delayed: list[tuple[float, str]] = [] # (ready_time, url)
2555
+
2556
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
2557
+ schedule_next(executor)
2558
+
2559
+ while (in_flight or queue) and time_left() > 0.2:
2560
+ # Move any delayed items whose time has arrived back into the queue
2561
+ now = time.time()
2562
+ if delayed:
2563
+ ready, not_ready = [], []
2564
+ for t, u in delayed:
2565
+ (ready if t <= now else not_ready).append((t, u))
2566
+ delayed = not_ready
2567
+ for _, u in ready:
2568
+ queue.append(u)
2569
+ # Try to schedule newly ready URLs
2570
+ if ready:
2571
+ schedule_next(executor)
2572
+
2573
+ done: list[Future] = []
2574
+ # Poll completed futures without blocking too long
2575
+ for fut in list(in_flight.keys()):
2576
+ if fut.done():
2577
+ done.append(fut)
2578
+
2579
+ if not done:
2580
+ # If nothing to do but we have delayed items pending, sleep until next due time (capped)
2581
+ if not queue and delayed:
2582
+ sleep_for = max(0.02, min(0.25, max(0.0, min(t for t, _ in delayed) - time.time())))
2583
+ time.sleep(sleep_for)
2584
+ else:
2585
+ # brief sleep to avoid busy spin
2586
+ time.sleep(0.05)
2587
+ else:
2588
+ for fut in done:
2589
+ u = in_flight.pop(fut)
2590
+ try:
2591
+ md = fut.result()
2592
+ if md and not md.startswith("Unsupported content type") and not md.startswith("An error occurred"):
2593
+ pages[u] = md
2594
+ try:
2595
+ print(f"[FETCH OK] {u} (chars={len(md)})", flush=True)
2596
+ except Exception:
2597
+ pass
2598
+ else:
2599
+ # If empty due to non-timeout error, don't retry further
2600
+ pass
2601
+ except SlowHost:
2602
+ # Requeue to the back after 3 seconds
2603
+ # But only if we have enough time left for a retry window
2604
+ if time_left() > 5.0:
2605
+ delayed.append((time.time() + 3.0, u))
2606
+ except Exception:
2607
+ # Non-timeout error; skip
2608
+ pass
2609
+ # After handling done items, try to schedule more
2610
+ schedule_next(executor)
2611
+
2612
+ # If budget is nearly up and no pages were fetched, fall back to using the unique URL list in prompt (no content)
2613
+ # The prompt builder will include sources list even if pages_map is empty; LLM can still reason over URLs indirectly.
2614
+
2615
+ # Build final prompt
2616
+ prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
2617
+
2618
+ # 3) Call the Researcher model via Cerebras provider with robust fallbacks
2619
+ messages = [
2620
+ {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
2621
+ {"role": "user", "content": prompt},
2622
+ ]
2623
+ try:
2624
+ prompt_chars = len(prompt)
2625
+ except Exception:
2626
+ prompt_chars = -1
2627
+ print(f"[PIPELINE] Fetch complete: pages={len(pages)}, unique_urls={len(pages.keys())}, prompt_chars={prompt_chars}", flush=True)
2628
+ print("[PIPELINE] Starting inference (provider=cerebras, model=Qwen/Qwen3-235B-A22B-Thinking-2507)", flush=True)
2629
+ def _run_inference(provider: str, max_tokens: int, temp: float, top_p: float):
2630
+ client = InferenceClient(provider=provider, api_key=HF_TEXTGEN_TOKEN)
2631
+ return client.chat.completions.create(
2632
+ model="Qwen/Qwen3-235B-A22B-Thinking-2507",
2633
+ messages=messages,
2634
+ max_tokens=max_tokens,
2635
+ temperature=temp,
2636
+ top_p=top_p,
2637
+ )
2638
+ try:
2639
+ # Attempt 1: Cerebras, full prompt
2640
+ print("[LLM] Attempt 1: provider=cerebras, max_tokens=32768", flush=True)
2641
+ completion = _run_inference("cerebras", max_tokens=32768, temp=0.3, top_p=0.95)
2642
+ except Exception as e1:
2643
+ print(f"[LLM] Attempt 1 failed: {str(e1)[:200]}", flush=True)
2644
+ # Attempt 2: Cerebras, trimmed prompt and lower max_tokens
2645
+ try:
2646
+ prompt2 = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys())[:30], pages_map={k: pages[k] for k in list(pages.keys())[:30]})
2647
+ messages = [
2648
+ {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
2649
+ {"role": "user", "content": prompt2},
2650
+ ]
2651
+ print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
2652
+ completion = _run_inference("cerebras", max_tokens=16384, temp=0.7, top_p=0.95)
2653
+ except Exception as e2:
2654
+ print(f"[LLM] Attempt 2 failed: {str(e2)[:200]}", flush=True)
2655
+ # Attempt 3: provider auto-fallback with trimmed prompt
2656
+ try:
2657
+ print("[LLM] Attempt 3: provider=auto, max_tokens=8192", flush=True)
2658
+ completion = _run_inference("auto", max_tokens=8192, temp=0.7, top_p=0.95)
2659
+ except Exception as e3:
2660
+ _log_call_end("Deep_Research", f"error={_truncate_for_log(str(e3), 260)}")
2661
+ raise gr.Error(f"Researcher model call failed: {e3}")
2662
+ raw = completion.choices[0].message.content or ""
2663
+ # 1) Strip any internal <think>...</think> blocks produced by the Thinking model
2664
+ try:
2665
+ no_think = re.sub(r"<think>[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
2666
+ no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
2667
+ except Exception:
2668
+ no_think = raw
2669
+
2670
+ # 2) Remove planning / meta-analysis paragraphs that are part of the model's visible thinking trace.
2671
+ # Heuristics: paragraphs (double-newline separated) containing phrases like "let me", "first,", "now i'll",
2672
+ # "i will", "i'll", "let's", "now let me", or starting with "first" (case-insensitive).
2673
+ try:
2674
+ paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
2675
+ keep: list[str] = []
2676
+ removed = 0
2677
+ planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|i will now|now i'll|now i will)\b", re.IGNORECASE)
2678
+ for p in paragraphs:
2679
+ # If the paragraph looks like explicit planning/analysis, drop it
2680
+ if planning_re.search(p):
2681
+ removed += 1
2682
+ continue
2683
+ keep.append(p)
2684
+ report = "\n\n".join(keep).strip()
2685
+ # If we removed everything, fall back to the no_think version
2686
+ if not report:
2687
+ report = no_think.strip()
2688
+ except Exception:
2689
+ report = no_think
2690
+
2691
+ # 3) Final whitespace normalization
2692
+ report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
2693
+ # Emit a short postprocess log
2694
+ try:
2695
+ print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
2696
+ except Exception:
2697
+ pass
2698
+
2699
+ # Build outputs
2700
+ links_text = "\n".join([f"[{i+1}] {u}" for i, u in enumerate(pages.keys())])
2701
+ file_path = _write_report_tmp(report)
2702
+ elapsed = time.time() - start_ts
2703
+ # Print explicit timing and include in structured log output
2704
+ print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
2705
+ _log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
2706
+ return report, links_text, file_path
2707
+
2708
+
2709
+ deep_research_interface = gr.Interface(
2710
+ fn=Deep_Research,
2711
+ inputs=[
2712
+ gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question"),
2713
+ gr.Textbox(label="DDG Search Query 1"),
2714
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)"),
2715
+ gr.Textbox(label="DDG Search Query 2", value=""),
2716
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)"),
2717
+ gr.Textbox(label="DDG Search Query 3", value=""),
2718
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)"),
2719
+ gr.Textbox(label="DDG Search Query 4", value=""),
2720
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)"),
2721
+ gr.Textbox(label="DDG Search Query 5", value=""),
2722
+ gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)"),
2723
+ ],
2724
+ outputs=[
2725
+ gr.Markdown(label="Research Report"),
2726
+ gr.Textbox(label="Fetched Links", lines=8),
2727
+ gr.File(label="Download Research Report", file_count="single"),
2728
+ ],
2729
+ title="Deep Research",
2730
+ description=(
2731
+ "<div style=\"text-align:center\">Perform multi-query web research: search with DuckDuckGo, fetch up to 50 pages in parallel, "
2732
+ "and generate a comprehensive report using a large LLM via Hugging Face Inference Providers (Cerebras). Requires HF_READ_TOKEN.</div>"
2733
+ ),
2734
+ api_description=(
2735
+ "Runs 1–5 DDG searches (URLs only), caps total results to 50 (when exceeding, each query returns 10). "
2736
+ "Fetches all URLs (3000 chars each) and calls the Researcher to write a research report. "
2737
+ "Returns the report (Markdown), the list of sources, and a downloadable text file path. "
2738
+ "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report.txt](URL)`"
2739
+ ),
2740
+ flagging_mode="never",
2741
+ show_api=bool(HF_TEXTGEN_TOKEN),
2742
+ )
2743
+
2744
  _interfaces = [
2745
  fetch_interface,
2746
  concise_interface,
 
2749
  kokoro_interface,
2750
  image_generation_interface, # Always visible in UI
2751
  video_generation_interface, # Always visible in UI
2752
+ deep_research_interface,
2753
  ]
2754
  _tab_names = [
2755
  "Fetch Webpage",
 
2759
  "Kokoro TTS",
2760
  "Image Generation",
2761
  "Video Generation",
2762
+ "Deep Research",
2763
  ]
2764
 
2765
  with gr.Blocks(title="Nymbo/Tools MCP", theme="Nymbo/Nymbo_Theme", css=CSS_STYLES) as demo: