Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on Sep 17

Commit

2dea46b

verified ·

1 Parent(s): dc537d3

ADDING NEW DEEP_RESEARCH TOOL

Browse files

Files changed (1) hide show

app.py +572 -5

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ except Exception:  # pragma: no cover - optional dependency
 # Fetch: Enhanced HTTP + extraction utils
 # ==============================
-def _http_get_enhanced(url: str) -> requests.Response:
     """
     Download the page with enhanced headers, timeout handling, and better error recovery.
     """
@@ -63,14 +63,15 @@ def _http_get_enhanced(url: str) -> requests.Response:
         "Upgrade-Insecure-Requests": "1",
     }
-    # Apply rate limiting
-    _fetch_rate_limiter.acquire()
     try:
         response = requests.get(
             url,
             headers=headers,
-            timeout=30,  # Increased timeout
             allow_redirects=True,
             stream=False
         )
@@ -512,6 +513,47 @@ def _log_call_end(func_name: str, output_desc: str) -> None:
     except Exception as e:  # pragma: no cover
         print(f"[TOOL RESULT] {func_name} (failed to log output: {e})", flush=True)
 def _extract_date_from_snippet(snippet: str) -> str:
     """
     Extract publication date from search result snippet using common patterns.
@@ -1532,7 +1574,7 @@ CSS_STYLES = """
     /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
     .app-title::before {
         grid-row: 2;
-        content: "Fetch Webpage | Search DuckDuckGo | Python Interpreter | Memory Manager | Kokoro TTS | Image Generation | Video Generation";
         display: block;
         font-size: 1rem;
         font-weight: 700;
@@ -2176,6 +2218,529 @@ video_generation_interface = gr.Interface(
     show_api=bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")),
 )
 _interfaces = [
     fetch_interface,
     concise_interface,
@@ -2184,6 +2749,7 @@ _interfaces = [
     kokoro_interface,
     image_generation_interface,  # Always visible in UI
     video_generation_interface,  # Always visible in UI
 ]
 _tab_names = [
     "Fetch Webpage",
@@ -2193,6 +2759,7 @@ _tab_names = [
     "Kokoro TTS",
     "Image Generation",
     "Video Generation",
 ]
 with gr.Blocks(title="Nymbo/Tools MCP", theme="Nymbo/Nymbo_Theme", css=CSS_STYLES) as demo:

 # Fetch: Enhanced HTTP + extraction utils
 # ==============================
+def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
     """
     Download the page with enhanced headers, timeout handling, and better error recovery.
     """
         "Upgrade-Insecure-Requests": "1",
     }
+    # Apply rate limiting unless explicitly skipped
+    if not skip_rate_limit:
+        _fetch_rate_limiter.acquire()
     try:
         response = requests.get(
             url,
             headers=headers,
+            timeout=timeout,  # Configurable timeout
             allow_redirects=True,
             stream=False
         )
     except Exception as e:  # pragma: no cover
         print(f"[TOOL RESULT] {func_name} (failed to log output: {e})", flush=True)
+# ==============================
+# Deep Research helpers: slow-host detection
+# ==============================
+class SlowHost(Exception):
+    """Marker exception for slow hosts (timeouts) to trigger requeue."""
+    pass
+def _fetch_page_markdown_fast(url: str, max_chars: int = 3000, timeout: float = 10.0) -> str:
+    """Fetch a single URL quickly; raise SlowHost on timeout.
+    Uses a shorter HTTP timeout to detect slow hosts, then reuses Fetch_Webpage
+    logic for conversion to Markdown. Returns empty string on non-timeout errors.
+    """
+    try:
+        # Bypass global rate limiter here; we want Deep Research to control pacing.
+        resp = _http_get_enhanced(url, timeout=timeout, skip_rate_limit=True)
+        resp.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        msg = str(e)
+        if "timed out" in msg.lower():
+            raise SlowHost(msg)
+        return ""
+    final_url = str(resp.url)
+    ctype = resp.headers.get("Content-Type", "")
+    if "html" not in ctype.lower():
+        return ""
+    # Decode to text and convert similar to Fetch_Webpage (lean path)
+    resp.encoding = resp.encoding or resp.apparent_encoding
+    html = resp.text
+    soup = BeautifulSoup(html, "lxml")
+    # Reuse fullpage conversion with default selectors
+    md_text = _fullpage_markdown_from_soup(soup, final_url, "")
+    if max_chars > 0 and len(md_text) > max_chars:
+        md_text, _ = _truncate_markdown(md_text, max_chars)
+    return md_text
 def _extract_date_from_snippet(snippet: str) -> str:
     """
     Extract publication date from search result snippet using common patterns.
     /* Place bold tools list on line 2, normal auth note on line 3 (below title) */
     .app-title::before {
         grid-row: 2;
+        content: "Fetch Webpage | Search DuckDuckGo | Python Interpreter | Memory Manager | Kokoro TTS | Image Generation | Video Generation | Deep Research";
         display: block;
         font-size: 1rem;
         font-weight: 700;
     show_api=bool(os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")),
 )
+# ==========================
+# Deep Research (Search + Fetch + LLM)
+# ==========================
+HF_TEXTGEN_TOKEN = os.getenv("HF_READ_TOKEN") or os.getenv("HF_TOKEN")
+def _normalize_query(q: str) -> str:
+    """Normalize fancy quotes and stray punctuation in queries.
+    - Replace curly quotes with straight quotes
+    - Collapse multiple quotes/spaces
+    - Strip leading/trailing quotes
+    """
+    if not q:
+        return ""
+    repl = {
+        "“": '"',
+        "”": '"',
+        "‘": "'",
+        "’": "'",
+        "`": "'",
+    }
+    for k, v in repl.items():
+        q = q.replace(k, v)
+    # Remove duplicated quotes and excessive spaces
+    q = re.sub(r'\s+', ' ', q)
+    q = re.sub(r'"\s+"', ' ', q)
+    q = q.strip().strip('"').strip()
+    return q
+def _search_urls_only(query: str, max_results: int) -> list[str]:
+    """Return a list of result URLs using DuckDuckGo search with rate limiting.
+    Uses ddgs to fetch web results only (no news/images/videos). Falls back to empty list on error.
+    """
+    if not query or not query.strip() or max_results <= 0:
+        return []
+    urls: list[str] = []
+    try:
+        _search_rate_limiter.acquire()
+        with DDGS() as ddgs:
+            for item in ddgs.text(query, region="wt-wt", safesearch="moderate", max_results=max_results):
+                url = (item.get("href") or item.get("url") or "").strip()
+                if url:
+                    urls.append(url)
+    except Exception:
+        pass
+    # De-duplicate while preserving order
+    seen = set()
+    deduped = []
+    for u in urls:
+        if u not in seen:
+            seen.add(u)
+            deduped.append(u)
+    return deduped
+def _fetch_page_markdown(url: str, max_chars: int = 3000) -> str:
+    """Fetch a single URL and return cleaned Markdown using existing Fetch_Webpage.
+    Returns empty string on error.
+    """
+    try:
+        # Intentionally skip global fetch rate limiting for Deep Research speed.
+        return Fetch_Webpage(url=url, max_chars=max_chars, strip_selectors="", url_scraper=False, offset=0)  # type: ignore[misc]
+    except Exception:
+        return ""
+def _truncate_join(parts: list[str], max_chars: int) -> tuple[str, bool]:
+    out = []
+    total = 0
+    truncated = False
+    for p in parts:
+        if not p:
+            continue
+        if total + len(p) > max_chars:
+            out.append(p[: max(0, max_chars - total)])
+            truncated = True
+            break
+        out.append(p)
+        total += len(p)
+    return ("\n\n".join(out), truncated)
+def _build_research_prompt(
+    summary: str,
+    queries: list[str],
+    url_list: list[str],
+    pages_map: dict[str, str],
+) -> str:
+    researcher_instructions = (
+        "You are Nymbot, a helpful deep research assistant. You will be asked a Query from a user and you will create a long, comprehensive, well-structured research report in response to the user's Query.\n\n"
+        "You have been provided with User Question, Search Queries, and numerous webpages that the searches yielded.\n\n"
+        "<report_format>\n"
+        "Write a well-formatted report in the structure of a scientific report to a broad audience. The report must be readable and have a nice flow of Markdown headers and paragraphs of text. Do NOT use bullet points or lists which break up the natural flow. The report must be exhaustive for comprehensive topics.\n"
+        "For any given user query, first determine the major themes or areas that need investigation, then structure these as main sections, and develop detailed subsections that explore various facets of each theme. Each section and subsection requires paragraphs of texts that need to all connect into one narrative flow.\n"
+        "</report_format>\n\n"
+        "<document_structure>\n"
+        "- Always begin with a clear title using a single # header\n"
+        "- Organize content into major sections using ## headers\n"
+        "- Further divide into subsections using ### headers\n"
+        "- Use #### headers sparingly for special subsections\n"
+        "- Never skip header levels\n"
+        "- Write multiple paragraphs per section or subsection\n"
+        "- Each paragraph must contain at least 4-5 sentences, present novel insights and analysis grounded in source material, connect ideas to original query, and build upon previous paragraphs to create a narrative flow\n"
+        "- Never use lists, instead always use text or tables\n\n"
+        "Mandatory Section Flow:\n"
+        "1. Title (# level)\n   - Before writing the main report, start with one detailed paragraph summarizing key findings\n"
+        "2. Main Body Sections (## level)\n   - Each major topic gets its own section (## level). There MUST BE at least 5 sections.\n   - Use ### subsections for detailed analysis\n   - Every section or subsection needs at least one paragraph of narrative before moving to the next section\n   - Do NOT have a section titled \"Main Body Sections\" and instead pick informative section names that convey the theme of the section\n"
+        "3. Conclusion (## level)\n   - Synthesis of findings\n   - Potential recommendations or next steps\n"
+        "</document_structure>\n\n"
+        "<planning_rules>\n"
+        "- Always break it down into multiple steps\n"
+        "- Assess the different sources and whether they are useful for any steps needed to answer the query\n"
+        "- Create the best report that weighs all the evidence from the sources\n"
+        "- Remember that the current date is: Wednesday, April 23, 2025, 11:50 AM EDT\n"
+        "- Make sure that your final report addresses all parts of the query\n"
+        "- Communicate a brief high-level plan in the introduction; do not reveal chain-of-thought.\n"
+        "- When referencing sources during analysis, you should still refer to them by index with brackets and follow <citations>\n"
+        "- As a final step, review your planned report structure and ensure it completely answers the query.\n"
+        "</planning_rules>\n\n"
+    )
+    # Build sources block limited to a reasonable size to avoid overrun
+    # Cap combined sources to ~180k characters
+    sources_blocks: list[str] = []
+    indexed_urls: list[str] = []
+    for idx, u in enumerate(url_list, start=1):
+        txt = pages_map.get(u, "").strip()
+        if not txt:
+            continue
+        indexed_urls.append(f"[{idx}] {u}")
+        # Prefix each source with its index and URL for citation
+        sources_blocks.append(f"[Source {idx}] URL: {u}\n\n{txt}")
+    # Cap combined sources aggressively to stay within provider limits
+    sources_joined, truncated = _truncate_join(sources_blocks, max_chars=100_000)
+    prompt = []
+    prompt.append(researcher_instructions)
+    prompt.append("<user_query_summary>\n" + (summary or "") + "\n</user_query_summary>\n")
+    # Include populated queries only
+    populated = [q for q in queries if q and q.strip()]
+    if populated:
+        prompt.append("<search_queries>\n" + "\n".join(f"- {q.strip()}" for q in populated) + "\n</search_queries>\n")
+    if indexed_urls:
+        prompt.append("<sources_list>\n" + "\n".join(indexed_urls) + "\n</sources_list>\n")
+    prompt.append("<fetched_documents>\n" + sources_joined + ("\n\n[NOTE] Sources truncated due to context limits." if truncated else "") + "\n</fetched_documents>")
+    return "\n\n".join(prompt)
+def _write_report_tmp(text: str) -> str:
+    # Create a unique temp directory and write a deterministic filename inside it.
+    tmp_dir = tempfile.mkdtemp(prefix="deep_research_")
+    path = os.path.join(tmp_dir, "research_report.txt")
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(text)
+    return path
+def Deep_Research(
+    summary: Annotated[str, "Summarization of research topic (one or more sentences)."],
+    query1: Annotated[str, "DDG Search Query 1"],
+    max1: Annotated[int, "Max results for Query 1 (1-50)"] = 10,
+    query2: Annotated[str, "DDG Search Query 2"] = "",
+    max2: Annotated[int, "Max results for Query 2 (1-50)"] = 10,
+    query3: Annotated[str, "DDG Search Query 3"] = "",
+    max3: Annotated[int, "Max results for Query 3 (1-50)"] = 10,
+    query4: Annotated[str, "DDG Search Query 4"] = "",
+    max4: Annotated[int, "Max results for Query 4 (1-50)"] = 10,
+    query5: Annotated[str, "DDG Search Query 5"] = "",
+    max5: Annotated[int, "Max results for Query 5 (1-50)"] = 10,
+) -> tuple[str, str, str]:
+    """
+    Run deep research by searching, fetching pages, and generating a comprehensive report via a large LLM provider.
+    Pipeline:
+    1) Perform up to 5 DuckDuckGo searches (URLs only). If total requested > 50, each query is limited to 10.
+    2) Fetch all discovered URLs (up to 50) as cleaned Markdown (max 3000 chars per page).
+    3) Call Hugging Face Inference Providers (Cerebras) with model `Qwen/Qwen3-235B-A22B-Instruct-2507` to write a research report.
+    Args:
+        summary (str): A brief description of the overall research topic or user question.
+            This is shown to the researcher model and used to frame the report.
+        query1 (str): DuckDuckGo search query #1. Required if you want any results.
+            Example: "site:nature.com CRISPR ethical implications".
+        max1 (int): Maximum number of URLs to take from query #1 (1–50).
+            If the combined total requested across all queries exceeds 50, each query will be capped to 10.
+        query2 (str): DuckDuckGo search query #2. Optional; leave empty to skip.
+        max2 (int): Maximum number of URLs to take from query #2 (1–50).
+        query3 (str): DuckDuckGo search query #3. Optional; leave empty to skip.
+        max3 (int): Maximum number of URLs to take from query #3 (1–50).
+        query4 (str): DuckDuckGo search query #4. Optional; leave empty to skip.
+        max4 (int): Maximum number of URLs to take from query #4 (1–50).
+        query5 (str): DuckDuckGo search query #5. Optional; leave empty to skip.
+        max5 (int): Maximum number of URLs to take from query #5 (1–50).
+    Returns:
+        - Markdown research report
+        - Newline-separated list of fetched URLs
+        - Path to a downloadable .txt file containing the full report
+    Raises:
+        gr.Error: If a required Hugging Face token is not provided or if the researcher
+            model call fails after retries.
+    Notes:
+        - Total URLs across queries are capped at 50.
+        - Each fetched page is truncated to ~3000 characters before prompting the model.
+        - The function is optimized to complete within typical MCP time budgets.
+    """
+    _log_call_start(
+        "Deep_Research",
+        summary=_truncate_for_log(summary or "", 200),
+        queries=[q for q in [query1, query2, query3, query4, query5] if q],
+    )
+    # Validate token
+    if not HF_TEXTGEN_TOKEN:
+        _log_call_end("Deep_Research", "error=missing HF token")
+        raise gr.Error("Please provide a `HF_READ_TOKEN` to enable Deep Research.")
+    # Normalize caps per spec and sanitize queries
+    queries = [
+        _normalize_query(query1 or ""),
+        _normalize_query(query2 or ""),
+        _normalize_query(query3 or ""),
+        _normalize_query(query4 or ""),
+        _normalize_query(query5 or ""),
+    ]
+    reqs = [max(1, min(50, int(max1))), max(1, min(50, int(max2))), max(1, min(50, int(max3))), max(1, min(50, int(max4))), max(1, min(50, int(max5)))]
+    total_requested = sum(reqs)
+    if total_requested > 50:
+        # Enforce rule: each query fetches 10 results when over 50 total requested
+        reqs = [10, 10, 10, 10, 10]
+    # Overall deadline to avoid MCP 60s timeout (reserve ~5s for prompt+inference)
+    start_ts = time.time()
+    budget_seconds = 55.0
+    deadline = start_ts + budget_seconds
+    def time_left() -> float:
+        return max(0.0, deadline - time.time())
+    # 1) Run searches (parallelize queries to reduce latency) and stop if budget exceeded
+    all_urls: list[str] = []
+    from concurrent.futures import ThreadPoolExecutor, as_completed
+    tasks = []
+    with ThreadPoolExecutor(max_workers=min(5, sum(1 for q in queries if q.strip())) or 1) as executor:
+        for q, n in zip(queries, reqs):
+            if not q.strip():
+                continue
+            tasks.append(executor.submit(_search_urls_only, q.strip(), n))
+        for fut in as_completed(tasks):
+            try:
+                urls = fut.result() or []
+            except Exception:
+                urls = []
+            for u in urls:
+                if u not in all_urls:
+                    all_urls.append(u)
+            if len(all_urls) >= 50:
+                break
+            if time_left() <= 0.5:
+                # Out of budget for searching; stop early
+                break
+        # Don't block on leftover tasks; cancel/shutdown immediately
+        # Python futures don't support true cancel if running, but we can just avoid waiting
+        # and let executor context exit cleanly.
+    if len(all_urls) > 50:
+        all_urls = all_urls[:50]
+    # Filter obviously irrelevant/shopping/dictionary/forum domains that often appear due to phrase tokenization
+    blacklist = {
+        "homedepot.com",
+        "tractorsupply.com",
+        "mcmaster.com",
+        "mrchain.com",
+        "answers.com",
+        "city-data.com",
+        "dictionary.cambridge.org",
+    }
+    def _domain(u: str) -> str:
+        try:
+            return urlparse(u).netloc.lower()
+        except Exception:
+            return ""
+    all_urls = [u for u in all_urls if _domain(u) not in blacklist]
+    # Skip known large/non-HTML file types to avoid wasted fetch time
+    skip_exts = (
+        ".pdf", ".ppt", ".pptx", ".doc", ".docx", ".xls", ".xlsx",
+        ".zip", ".gz", ".tgz", ".bz2", ".7z", ".rar"
+    )
+    def _skip_url(u: str) -> bool:
+        try:
+            path = urlparse(u).path.lower()
+        except Exception:
+            return False
+        return any(path.endswith(ext) for ext in skip_exts)
+    all_urls = [u for u in all_urls if not _skip_url(u)]
+    # 2) Fetch pages (markdown, 3000 chars) with slow-host requeue (3s delay), respecting deadline
+    pages: dict[str, str] = {}
+    if all_urls:
+        from concurrent.futures import ThreadPoolExecutor, Future
+        from collections import deque
+        queue = deque(all_urls)
+        attempts: dict[str, int] = {u: 0 for u in all_urls}
+        max_attempts = 2  # fewer retries to honor budget
+        max_workers = min(12, max(4, len(all_urls)))
+        in_flight: dict[Future, str] = {}
+        def schedule_next(executor: ThreadPoolExecutor) -> None:
+            while queue and len(in_flight) < max_workers:
+                u = queue.popleft()
+                # Skip if already fetched or exceeded attempts
+                if u in pages:
+                    continue
+                if attempts[u] >= max_attempts:
+                    continue
+                attempts[u] += 1
+                # Adaptive per-attempt timeout based on time remaining; min 2s, max 10s
+                tl = time_left()
+                per_timeout = 10.0 if tl > 15 else (5.0 if tl > 8 else 2.0)
+                fut = executor.submit(_fetch_page_markdown_fast, u, 3000, per_timeout)
+                in_flight[fut] = u
+        delayed: list[tuple[float, str]] = []  # (ready_time, url)
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            schedule_next(executor)
+            while (in_flight or queue) and time_left() > 0.2:
+                # Move any delayed items whose time has arrived back into the queue
+                now = time.time()
+                if delayed:
+                    ready, not_ready = [], []
+                    for t, u in delayed:
+                        (ready if t <= now else not_ready).append((t, u))
+                    delayed = not_ready
+                    for _, u in ready:
+                        queue.append(u)
+                    # Try to schedule newly ready URLs
+                    if ready:
+                        schedule_next(executor)
+                done: list[Future] = []
+                # Poll completed futures without blocking too long
+                for fut in list(in_flight.keys()):
+                    if fut.done():
+                        done.append(fut)
+                if not done:
+                    # If nothing to do but we have delayed items pending, sleep until next due time (capped)
+                    if not queue and delayed:
+                        sleep_for = max(0.02, min(0.25, max(0.0, min(t for t, _ in delayed) - time.time())))
+                        time.sleep(sleep_for)
+                    else:
+                        # brief sleep to avoid busy spin
+                        time.sleep(0.05)
+                else:
+                    for fut in done:
+                        u = in_flight.pop(fut)
+                        try:
+                            md = fut.result()
+                            if md and not md.startswith("Unsupported content type") and not md.startswith("An error occurred"):
+                                pages[u] = md
+                                try:
+                                    print(f"[FETCH OK] {u} (chars={len(md)})", flush=True)
+                                except Exception:
+                                    pass
+                            else:
+                                # If empty due to non-timeout error, don't retry further
+                                pass
+                        except SlowHost:
+                            # Requeue to the back after 3 seconds
+                            # But only if we have enough time left for a retry window
+                            if time_left() > 5.0:
+                                delayed.append((time.time() + 3.0, u))
+                        except Exception:
+                            # Non-timeout error; skip
+                            pass
+                    # After handling done items, try to schedule more
+                    schedule_next(executor)
+        # If budget is nearly up and no pages were fetched, fall back to using the unique URL list in prompt (no content)
+        # The prompt builder will include sources list even if pages_map is empty; LLM can still reason over URLs indirectly.
+    # Build final prompt
+    prompt = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys()), pages_map=pages)
+    # 3) Call the Researcher model via Cerebras provider with robust fallbacks
+    messages = [
+        {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
+        {"role": "user", "content": prompt},
+    ]
+    try:
+        prompt_chars = len(prompt)
+    except Exception:
+        prompt_chars = -1
+    print(f"[PIPELINE] Fetch complete: pages={len(pages)}, unique_urls={len(pages.keys())}, prompt_chars={prompt_chars}", flush=True)
+    print("[PIPELINE] Starting inference (provider=cerebras, model=Qwen/Qwen3-235B-A22B-Thinking-2507)", flush=True)
+    def _run_inference(provider: str, max_tokens: int, temp: float, top_p: float):
+        client = InferenceClient(provider=provider, api_key=HF_TEXTGEN_TOKEN)
+        return client.chat.completions.create(
+            model="Qwen/Qwen3-235B-A22B-Thinking-2507",
+            messages=messages,
+            max_tokens=max_tokens,
+            temperature=temp,
+            top_p=top_p,
+        )
+    try:
+        # Attempt 1: Cerebras, full prompt
+        print("[LLM] Attempt 1: provider=cerebras, max_tokens=32768", flush=True)
+        completion = _run_inference("cerebras", max_tokens=32768, temp=0.3, top_p=0.95)
+    except Exception as e1:
+        print(f"[LLM] Attempt 1 failed: {str(e1)[:200]}", flush=True)
+        # Attempt 2: Cerebras, trimmed prompt and lower max_tokens
+        try:
+            prompt2 = _build_research_prompt(summary=summary or "", queries=[q for q in queries if q.strip()], url_list=list(pages.keys())[:30], pages_map={k: pages[k] for k in list(pages.keys())[:30]})
+            messages = [
+                {"role": "system", "content": "You are Nymbot, an expert deep research assistant."},
+                {"role": "user", "content": prompt2},
+            ]
+            print("[LLM] Attempt 2: provider=cerebras (trimmed), max_tokens=16384", flush=True)
+            completion = _run_inference("cerebras", max_tokens=16384, temp=0.7, top_p=0.95)
+        except Exception as e2:
+            print(f"[LLM] Attempt 2 failed: {str(e2)[:200]}", flush=True)
+            # Attempt 3: provider auto-fallback with trimmed prompt
+            try:
+                print("[LLM] Attempt 3: provider=auto, max_tokens=8192", flush=True)
+                completion = _run_inference("auto", max_tokens=8192, temp=0.7, top_p=0.95)
+            except Exception as e3:
+                _log_call_end("Deep_Research", f"error={_truncate_for_log(str(e3), 260)}")
+                raise gr.Error(f"Researcher model call failed: {e3}")
+    raw = completion.choices[0].message.content or ""
+    # 1) Strip any internal <think>...</think> blocks produced by the Thinking model
+    try:
+        no_think = re.sub(r"<think>[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
+        no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
+    except Exception:
+        no_think = raw
+    # 2) Remove planning / meta-analysis paragraphs that are part of the model's visible thinking trace.
+    #    Heuristics: paragraphs (double-newline separated) containing phrases like "let me", "first,", "now i'll",
+    #    "i will", "i'll", "let's", "now let me", or starting with "first" (case-insensitive).
+    try:
+        paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
+        keep: list[str] = []
+        removed = 0
+        planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|i will now|now i'll|now i will)\b", re.IGNORECASE)
+        for p in paragraphs:
+            # If the paragraph looks like explicit planning/analysis, drop it
+            if planning_re.search(p):
+                removed += 1
+                continue
+            keep.append(p)
+        report = "\n\n".join(keep).strip()
+        # If we removed everything, fall back to the no_think version
+        if not report:
+            report = no_think.strip()
+    except Exception:
+        report = no_think
+    # 3) Final whitespace normalization
+    report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
+    # Emit a short postprocess log
+    try:
+        print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
+    except Exception:
+        pass
+    # Build outputs
+    links_text = "\n".join([f"[{i+1}] {u}" for i, u in enumerate(pages.keys())])
+    file_path = _write_report_tmp(report)
+    elapsed = time.time() - start_ts
+    # Print explicit timing and include in structured log output
+    print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
+    _log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
+    return report, links_text, file_path
+deep_research_interface = gr.Interface(
+    fn=Deep_Research,
+    inputs=[
+        gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question"),
+        gr.Textbox(label="DDG Search Query 1"),
+        gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)"),
+        gr.Textbox(label="DDG Search Query 2", value=""),
+        gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)"),
+        gr.Textbox(label="DDG Search Query 3", value=""),
+        gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)"),
+        gr.Textbox(label="DDG Search Query 4", value=""),
+        gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)"),
+        gr.Textbox(label="DDG Search Query 5", value=""),
+        gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)"),
+    ],
+    outputs=[
+        gr.Markdown(label="Research Report"),
+        gr.Textbox(label="Fetched Links", lines=8),
+        gr.File(label="Download Research Report", file_count="single"),
+    ],
+    title="Deep Research",
+    description=(
+        "<div style=\"text-align:center\">Perform multi-query web research: search with DuckDuckGo, fetch up to 50 pages in parallel, "
+        "and generate a comprehensive report using a large LLM via Hugging Face Inference Providers (Cerebras). Requires HF_READ_TOKEN.</div>"
+    ),
+    api_description=(
+        "Runs 1–5 DDG searches (URLs only), caps total results to 50 (when exceeding, each query returns 10). "
+        "Fetches all URLs (3000 chars each) and calls the Researcher to write a research report. "
+        "Returns the report (Markdown), the list of sources, and a downloadable text file path. "
+        "Provide the user with one-paragraph summary of the research report and the txt file in this format `![research_report.txt](URL)`"
+    ),
+    flagging_mode="never",
+    show_api=bool(HF_TEXTGEN_TOKEN),
+)
 _interfaces = [
     fetch_interface,
     concise_interface,
     kokoro_interface,
     image_generation_interface,  # Always visible in UI
     video_generation_interface,  # Always visible in UI
+    deep_research_interface,
 ]
 _tab_names = [
     "Fetch Webpage",
     "Kokoro TTS",
     "Image Generation",
     "Video Generation",
+    "Deep Research",
 ]
 with gr.Blocks(title="Nymbo/Tools MCP", theme="Nymbo/Nymbo_Theme", css=CSS_STYLES) as demo: