Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on 14 days ago

Commit

b3ae1ba

verified ·

1 Parent(s): ed7ddca

Search MCP changes... READ ME

Browse files

I simplified the `Fetch_Webpage` and `Search_DuckDuckGo` tools a lot, it's much cleaner and reliable now. That said, the MCP instructions given to the LLM is a bit verbose right now.

Currently, having all six tools enabled will add 2000 tokens to context. I'd like it to be a bit less, but the current prompts are very effective for small, shitty models. They also work very well for big models but I test locally.

Fetch and Search now use a much better user agent and spam protection to avoid bot detection. Both have better error handling.
Search now makes full use of DDG's Operators (queries with site search, other advanced search parameters).

Files changed (1) hide show

app.py +124 -350

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # Purpose: One Space that offers six tools/tabs (all exposed as MCP tools):
-#   1) Fetch — extract relevant page content (title, metadata, clean text, hyperlinks)
 #   2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
 #   3) Python Code Executor — run Python code and capture stdout/errors
 #   4) Kokoro TTS — synthesize speech from text using Kokoro-82M with 54 voice options
@@ -85,96 +85,6 @@ def _http_get_enhanced(url: str) -> requests.Response:
         else:
             raise requests.exceptions.RequestException(f"HTTP error {response.status_code}: {str(e)}")
-def _extract_main_text_enhanced(html: str) -> Tuple[str, BeautifulSoup]:
-    """
-    Enhanced main text extraction with better fallback mechanisms.
-    """
-    try:
-        # Try Readability first
-        doc = Document(html)
-        readable_html = doc.summary(html_partial=True)
-        if readable_html and readable_html.strip():
-            soup = BeautifulSoup(readable_html, "lxml")
-            # Remove noisy tags more comprehensively
-            for sel in ["script", "style", "noscript", "iframe", "svg", "nav", "header", "footer", "aside", "[role='banner']", "[role='navigation']", "[role='complementary']"]:
-                for tag in soup.select(sel):
-                    tag.decompose()
-            # Extract text with better structure preservation
-            text_parts = []
-            for element in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "div"]):
-                chunk = element.get_text(" ", strip=True)
-                if chunk and len(chunk) > 15:  # Only include substantial content
-                    # Avoid repetitive disclaimers
-                    if not ("responses are generated using ai" in chunk.lower() and len(chunk) < 100):
-                        text_parts.append(chunk)
-            if text_parts:
-                clean_text = _normalize_whitespace("\n\n".join(text_parts))
-                # Check if we got substantial content
-                if len(clean_text) > 100:
-                    return clean_text, soup
-    except Exception:
-        pass  # Fall through to backup extraction
-    # Fallback: Parse original HTML more carefully
-    try:
-        full_soup = BeautifulSoup(html, "lxml")
-        # Remove unwanted elements
-        for element in full_soup.select("script, style, nav, footer, header, aside, [role='banner'], [role='navigation'], [role='complementary']"):
-            element.decompose()
-        # Try to find main content areas
-        main_content = (
-            full_soup.find("main")
-            or full_soup.find("article")
-            or full_soup.find("div", class_=re.compile(r"content|main|post|article|body", re.I))
-            or full_soup.find("div", id=re.compile(r"content|main|post|article|body", re.I))
-            or full_soup.find("section", class_=re.compile(r"content|main|post|article|body", re.I))
-            or full_soup.find("body")
-            or full_soup
-        )
-        if main_content:
-            # More aggressive removal of common noise patterns
-            for element in main_content.select(".disclaimer, .warning, .alert, .notice, [class*='cookie'], [class*='banner'], [id*='cookie'], [id*='banner']"):
-                element.decompose()
-            text = main_content.get_text(" ", strip=True)
-            text = _normalize_whitespace(text)
-            # Filter out repetitive text
-            lines = text.split('\n')
-            filtered_lines = []
-            seen_lines = set()
-            for line in lines:
-                line_clean = line.strip()
-                if len(line_clean) > 10 and line_clean not in seen_lines:
-                    # Skip common disclaimers and repetitive content
-                    if not ("responses are generated using ai" in line_clean.lower() and len(line_clean) < 100):
-                        filtered_lines.append(line)
-                        seen_lines.add(line_clean)
-            clean_text = '\n'.join(filtered_lines)
-            # Create a minimal soup for link extraction
-            minimal_soup = BeautifulSoup(str(main_content), "lxml")
-            return clean_text, minimal_soup
-    except Exception:
-        pass
-    # Last resort: Just get all text
-    fallback_soup = BeautifulSoup(html, "lxml")
-    text = fallback_soup.get_text(" ", strip=True)
-    return _normalize_whitespace(text), fallback_soup
 def _normalize_whitespace(text: str) -> str:
     """
     Squeeze extra spaces and blank lines to keep things compact.
@@ -338,136 +248,57 @@ def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str) -> str
     return markdown_text or "No content could be extracted."
-def _extract_links(readable_soup: BeautifulSoup, base_url: str, max_links: int) -> List[Tuple[str, str]]:
-    """
-    Collect clean, unique, absolute links from the readable section only.
-    (Layman's terms: pull a tidy list of links from the article body.)
-    """
-    seen = set()
-    links: List[Tuple[str, str]] = []
-    for a in readable_soup.find_all("a", href=True):
-        href = a.get("href").strip()
-        # Skip junk links we can't use
-        if not href or href.startswith("#") or href.startswith("mailto:") or href.startswith("javascript:"):
-            continue
-        # Resolve relative URLs, strip fragments (#…)
-        absolute = urljoin(base_url, href)
-        absolute, _ = urldefrag(absolute)
-        if absolute in seen:
-            continue
-        seen.add(absolute)
-        text = a.get_text(" ", strip=True)
-        if len(text) > 120:
-            text = text[:117] + "…"
-        links.append((text or absolute, absolute))
-        if len(links) >= max_links > 0:
-            break
-    return links
-def _format_markdown(
-    meta: Dict[str, str],
-    body: str,
-    body_truncated: bool,
-    links: List[Tuple[str, str]],
-    include_text: bool,
-    include_metadata: bool,
-    include_links: bool,
-    verbosity: str,
-) -> str:
     """
-    Assemble a compact Markdown summary with optional sections.
-    (Layman's terms: build the final markdown output with options.)
     """
-    lines: List[str] = []
-    # Title header
-    title = meta.get("title") or meta.get("domain") or "Untitled"
-    lines.append(f"# {title}")
-    # Metadata section (only show what exists)
-    if include_metadata:
-        md: List[str] = []
-        if meta.get("description"):
-            md.append(f"- **Description:** {meta['description']}")
-        if meta.get("site_name"):
-            md.append(f"- **Site:** {meta['site_name']}")
-        if meta.get("canonical"):
-            md.append(f"- **Canonical:** {meta['canonical']}")
-        if meta.get("lang"):
-            md.append(f"- **Language:** {meta['lang']}")
-        if meta.get("fetched_url"):
-            md.append(f"- **Fetched From:** {meta['fetched_url']}")
-        if md:
-            lines.append("## Metadata")
-            lines.extend(md)
-    # Body text
-    if include_text and body:
-        if verbosity == "Brief":
-            brief, was_more = _truncate(body, 800)
-            lines.append("## Text")
-            lines.append(brief)
-            if was_more or body_truncated:
-                lines.append("\n> (Trimmed for brevity)")
-        else:
-            lines.append("## Text")
-            lines.append(body)
-            if body_truncated:
-                lines.append("\n> (Trimmed for brevity)")
-    # Links section
-    if include_links and links:
-        lines.append(f"## Links ({len(links)})")
-        for text, url in links:
-            lines.append(f"- [{text}]({url})")
-    return "\n\n".join(lines).strip()
 def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
-    url: Annotated[str, "The absolute URL to fetch (must return HTML)."] ,
-    verbosity: Annotated[str, "Controls body length: one of 'Brief', 'Standard', or 'Full'."] = "Standard",
-    include_metadata: Annotated[bool, "Include a Metadata section (description, site name, canonical, lang, fetched URL)."] = True,
-    include_text: Annotated[bool, "Include the readable main text extracted with Readability."] = True,
-    include_links: Annotated[bool, "Include outbound links discovered in the readable section."] = True,
-    max_chars: Annotated[int, "Hard cap for body characters after the verbosity preset. Use 0 to disable the cap."] = 3000,
-    max_links: Annotated[int, "Maximum number of links to include from the readable content. Set 0 to omit links."] = 20,
-    full_page_markdown: Annotated[bool, "If true, return the page as full Markdown (Content Scraper mode) instead of a compact summary."] = False,
 ) -> str:
     """
-    Fetch a web page and return a compact Markdown summary containing title, key
-    metadata, readable main text, and outbound links.
     Args:
-        url: The absolute URL to fetch (must return HTML).
-        verbosity: Controls body length: one of 'Brief', 'Standard', or 'Full'.
-        include_metadata: Include a Metadata section (description, site name, canonical, lang, fetched URL).
-        include_text: Include the readable main text extracted with Readability.
-        include_links: Include outbound links discovered in the readable section.
-        max_chars: Hard cap for body characters after the verbosity preset. Use 0 to disable the cap.
-        max_links: Maximum number of links to include from the readable content. Set 0 to omit links.
-        full_page_markdown: If True, return the page converted to full Markdown (Content Scraper mode)
-            instead of the compact summary. This ignores verbosity/include_* and max_* limits and
-            attempts to convert the main content area to Markdown with headings preserved.
     Returns:
-        str: Markdown that may contain the following sections:
-            - Title (H1)
-            - Metadata (optional)
-            - Text (optional, may be trimmed)
-            - Links (optional, deduped and absolute)
-    Special mode:
-        If full_page_markdown=True, the function returns the page converted to Markdown,
-        similar to the "Content Scraper" tool, ignoring verbosity/include_* limits.
     """
     if not url or not url.strip():
         return "Please enter a valid URL."
@@ -487,42 +318,17 @@ def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
     resp.encoding = resp.encoding or resp.apparent_encoding
     html = resp.text
-    # Full-page soup for metadata (and potential Markdown conversion)
     full_soup = BeautifulSoup(html, "lxml")
-    meta = _extract_metadata(full_soup, final_url)
-    # Content Scraper mode: return full-page Markdown early
-    if full_page_markdown:
-        return _fullpage_markdown_from_soup(full_soup, final_url)
-    # Readable content with enhanced extraction
-    body_text, readable_soup = _extract_main_text_enhanced(html)
-    if not body_text:
-        # Fallback to "whole-page text" if Readability found nothing
-        fallback_text = full_soup.get_text(" ", strip=True)
-        body_text = _normalize_whitespace(fallback_text)
-    # Verbosity presets (we keep the smaller of preset vs. user cap)
-    preset_caps = {"Brief": 1200, "Standard": 3000, "Full": 999_999}
-    target_cap = preset_caps.get(verbosity, 3000)
-    cap = min(max_chars if max_chars > 0 else target_cap, target_cap)
-    body_text, truncated = _truncate(body_text, cap) if include_text else ("", False)
-    # Extract links from the simplified content only
-    links = _extract_links(readable_soup, final_url, max_links=max_links if include_links else 0)
-    # Final compact Markdown
-    md = _format_markdown(
-        meta=meta,
-        body=body_text,
-        body_truncated=truncated,
-        links=links,
-        include_text=include_text,
-        include_metadata=include_metadata,
-        include_links=include_links,
-        verbosity=verbosity,
-    )
-    return md or "No content could be extracted."
 # ============================================
@@ -558,37 +364,28 @@ _search_rate_limiter = RateLimiter(requests_per_minute=20)
 _fetch_rate_limiter = RateLimiter(requests_per_minute=25)
 def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
-    query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."] ,
     max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
-    include_snippets: Annotated[bool, "Include a short snippet for each result (adds tokens)."] = False,
-    max_snippet_chars: Annotated[int, "Character cap applied to each snippet when included."] = 80,
-    dedupe_domains: Annotated[bool, "If true, only the first result from each domain is kept."] = True,
-    title_chars: Annotated[int, "Character cap applied to titles."] = 80,
-    output_format: Annotated[str, "Output format: 'jsonl' for compact JSON or 'readable' for LLM-friendly text."] = "jsonl",
 ) -> str:
     """
-    Run a DuckDuckGo search with enhanced error handling and multiple output formats.
-    Returns either compact JSONL (t=title, u=url, optional s=snippet) or readable text
-    format optimized for LLM consumption with better error messages.
     Args:
-        query: The search query (supports operators like site:, quotes, OR).
-        max_results: Number of results to return (1–20).
-        include_snippets: Include a short snippet for each result (adds tokens).
-        max_snippet_chars: Character cap applied to each snippet when included.
-        dedupe_domains: If true, only the first result from each domain is kept.
-        title_chars: Character cap applied to titles.
-        output_format: Output format: 'jsonl' for compact JSON or 'readable' for LLM-friendly text.
     Returns:
-        str: Either JSONL format with {"t": "title", "u": "url"[, "s": "snippet"]}
-             or readable text format for better LLM consumption.
     """
     if not query or not query.strip():
-        error_msg = "No search query provided. Please enter a search term."
-        if output_format == "readable":
-            return error_msg
-        return json.dumps({"error": error_msg}, ensure_ascii=False, separators=(",", ":"))
     # Validate max_results
     max_results = max(1, min(20, max_results))
@@ -610,68 +407,41 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
         elif "network" in str(e).lower() or "connection" in str(e).lower():
             error_msg = "Network connection error. Please check your internet connection and try again."
-        if output_format == "readable":
-            return f"Error: {error_msg}"
-        return json.dumps({"error": error_msg}, ensure_ascii=False, separators=(",", ":"))
     if not raw:
-        no_results_msg = f"No results found for query: {query}"
-        if output_format == "readable":
-            return no_results_msg
-        return json.dumps({"info": no_results_msg}, ensure_ascii=False, separators=(",", ":"))
-    seen_domains = set()
     results = []
     for r in raw or []:
-        title = _shorten((r.get("title") or "").strip(), title_chars)
         url = (r.get("href") or r.get("link") or "").strip()
         body = (r.get("body") or r.get("snippet") or "").strip()
         if not url:
             continue
-        if dedupe_domains:
-            dom = _domain_of(url)
-            if dom in seen_domains:
-                continue
-            seen_domains.add(dom)
         result_obj = {
             "title": title or _domain_of(url),
-            "url": url
         }
-        if include_snippets and body:
-            result_obj["snippet"] = _shorten(body, max_snippet_chars)
         results.append(result_obj)
     if not results:
-        no_results_msg = f"No valid results found for query: {query}"
-        if output_format == "readable":
-            return no_results_msg
-        return json.dumps({"info": no_results_msg}, ensure_ascii=False, separators=(",", ":"))
-    # Format output based on requested format
-    if output_format == "readable":
-        lines = [f"Found {len(results)} search results for: {query}\n"]
-        for i, result in enumerate(results, 1):
-            lines.append(f"{i}. {result['title']}")
-            lines.append(f"   URL: {result['url']}")
-            if "snippet" in result:
-                lines.append(f"   Summary: {result['snippet']}")
-            lines.append("")  # Empty line between results
-        return "\n".join(lines)
-    else:
-        # JSONL format with compact keys
-        lines = []
-        for result in results:
-            obj = {"t": result["title"], "u": result["url"]}
-            if "snippet" in result:
-                obj["s"] = result["snippet"]
-            lines.append(json.dumps(obj, ensure_ascii=False, separators=(",", ":")))
-        return "\n".join(lines)
 # ======================================
@@ -683,7 +453,7 @@ def Execute_Python(code: Annotated[str, "Python source code to run; stdout is ca
     Execute arbitrary Python code and return captured stdout or an error message.
     Args:
-        code: Python source code to run; stdout is captured and returned.
     Returns:
         str: Combined stdout produced by the code, or the exception text if
@@ -850,9 +620,9 @@ def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
         - Voice defaults to "af_heart" (American Female, Heart voice)
     Args:
-        text: The text to synthesize. Works best with English but supports multiple languages.
-        speed: Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
-        voice: Voice identifier from 54 available options. Use List_Kokoro_Voices() to see all choices. Default: 'af_heart'.
     Returns:
         A tuple of (sample_rate_hz, audio_waveform) where:
@@ -928,52 +698,50 @@ def Generate_Speech(  # <-- MCP tool #4 (Generate Speech)
 # --- Fetch tab (compact controllable extraction) ---
 fetch_interface = gr.Interface(
-    fn=Fetch_Webpage,  # connect the function to the UI
     inputs=[
         gr.Textbox(label="URL", placeholder="https://example.com/article"),
-        gr.Dropdown(label="Verbosity", choices=["Brief", "Standard", "Full"], value="Standard"),
-        gr.Checkbox(value=True, label="Include Metadata"),
-        gr.Checkbox(value=True, label="Include Main Text"),
-        gr.Checkbox(value=True, label="Include Links"),
-        gr.Slider(400, 12000, value=3000, step=100, label="Max Characters (body text)"),
-        gr.Slider(0, 100, value=20, step=1, label="Max Links"),
-    gr.Checkbox(value=False, label="Full-page Markdown (Content Scraper mode)"),
     ],
-    outputs=gr.Markdown(label="Extracted Summary"),
     title="Fetch Webpage",
     description=(
-    "<div style=\"text-align:center\">Extract title, key metadata, readable text, and links from webpages — or toggle full-page Markdown.</div>"
     ),
     api_description=(
-    "Fetch a web page and return a compact Markdown summary with title, key "
-    "metadata, readable body text, and outbound links. Or, enable the "
-    "'Full-page Markdown (Content Scraper mode)' option to return the page "
-    "converted to Markdown."
     ),
     allow_flagging="never",
 )
-# --- Concise DDG tab (JSONL with short keys, minimal tokens) ---
 concise_interface = gr.Interface(
     fn=Search_DuckDuckGo,
     inputs=[
         gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
-        gr.Checkbox(value=False, label="Include snippets (adds tokens)"),
-        gr.Slider(minimum=20, maximum=200, value=80, step=5, label="Max snippet chars"),
-        gr.Checkbox(value=True, label="Dedupe by domain"),
-        gr.Slider(minimum=20, maximum=120, value=80, step=5, label="Max title chars"),
-        gr.Radio(label="Output format", choices=["jsonl", "readable"], value="jsonl", info="JSONL for compact JSON, readable for LLM-friendly text"),
     ],
     outputs=gr.Textbox(label="Search Results", interactive=False),
     title="DuckDuckGo Search",
     description=(
-        "<div style=\"text-align:center\">Enhanced web search with better error handling and multiple output formats. JSONL format emits compact keys (t,u[,s]), readable format provides LLM-friendly text.</div>"
     ),
     api_description=(
-        "Run a DuckDuckGo search with enhanced error handling and multiple output formats. "
-        "Returns either compact JSONL (t=title, u=url, optional s=snippet) or readable text "
-        "format optimized for LLM consumption with better error messages."
     ),
     allow_flagging="never",
     submit_btn="Search",
@@ -991,11 +759,11 @@ code_interface = gr.Interface(
         "<div style=\"text-align:center\">Execute Python code and see the output.</div>"
     ),
     api_description=(
-        "Execute arbitrary Python code and return captured stdout or an error message.\n\n"
-        "Parameters:\n"
-        "- code (string): The Python source code to run.\n\n"
-        "Returns:\n"
-        "- string: Combined stdout produced by the code, or the exception text if execution failed."
     ),
     allow_flagging="never",
 )
@@ -1057,9 +825,10 @@ kokoro_interface = gr.Interface(
     ),
     api_description=(
         "Synthesize speech from text using Kokoro-82M with 54 voice options. Returns (sample_rate, waveform) suitable for playback. "
-        "Parameters: text (str), speed (float 0.5–2.0, default 1.25x), voice (str from 54 available options). "
-        "Default voice: `af_heart`. "
-        "Can generate audio of unlimited length by processing all text segments. "
         "Return the generated media to the user in this format `![Alt text](URL)`"
     ),
     allow_flagging="never",
@@ -1184,9 +953,11 @@ image_generation_interface = gr.Interface(
     ),
     api_description=(
         "Generate a single image from a text prompt using a Hugging Face model (serverless Inference). "
-        "Parameters: prompt (str), model_id (str, creator/model-name), negative_prompt (str), steps (int, 1–100), cfg_scale (float, 1–20), "
-        "sampler (str, label only), seed (int, -1=random), width/height (int, 64–1216). Returns a PIL.Image. "
-        "Return the generated media to the user in this format `![Alt text](URL)`"
     ),
     allow_flagging="never",
 )
@@ -1362,8 +1133,11 @@ video_generation_interface = gr.Interface(
     ),
     api_description=(
         "Generate a short video from a text prompt using a Hugging Face model (Serverless Inference). "
         "Parameters: prompt (str), model_id (str), negative_prompt (str), steps (int), cfg_scale (float), seed (int), "
-        "width/height (int), fps (int), duration (float). Return the generated media to the user in this format `![Alt text](URL)`"
     ),
     allow_flagging="never",
 )

 # Purpose: One Space that offers six tools/tabs (all exposed as MCP tools):
+#   1) Fetch — convert webpages to clean Markdown format
 #   2) DuckDuckGo Search — compact JSONL search output (short keys to minimize tokens)
 #   3) Python Code Executor — run Python code and capture stdout/errors
 #   4) Kokoro TTS — synthesize speech from text using Kokoro-82M with 54 voice options
         else:
             raise requests.exceptions.RequestException(f"HTTP error {response.status_code}: {str(e)}")
 def _normalize_whitespace(text: str) -> str:
     """
     Squeeze extra spaces and blank lines to keep things compact.
     return markdown_text or "No content could be extracted."
+def _truncate_markdown(markdown: str, max_chars: int) -> str:
     """
+    Truncate markdown content to a maximum character count while preserving structure.
+    Tries to break at paragraph boundaries when possible.
     """
+    if len(markdown) <= max_chars:
+        return markdown
+    # Find a good break point near the limit
+    truncated = markdown[:max_chars]
+    # Try to break at the end of a paragraph (double newline)
+    last_paragraph = truncated.rfind('\n\n')
+    if last_paragraph > max_chars * 0.7:  # If we find a paragraph break in the last 30%
+        truncated = truncated[:last_paragraph]
+    # Try to break at the end of a sentence
+    elif '.' in truncated[-100:]:  # Look for a period in the last 100 chars
+        last_period = truncated.rfind('.')
+        if last_period > max_chars * 0.8:  # If we find a period in the last 20%
+            truncated = truncated[:last_period + 1]
+    return truncated.rstrip() + "\n\n> *[Content truncated for brevity]*"
 def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
+    url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
+    verbosity: Annotated[str, "Controls output length: 'Brief' (1000 chars), 'Standard' (3000 chars), or 'Full' (complete page)."] = "Standard",
 ) -> str:
     """
+    Fetch a web page and return it converted to Markdown format with configurable length.
+    This function retrieves a webpage and converts its main content to clean Markdown,
+    preserving headings, formatting, and structure. It automatically removes navigation,
+    footers, scripts, and other non-content elements to focus on the main article or
+    content area.
     Args:
+        url (str): The absolute URL to fetch (must return HTML).
+        verbosity (str): Controls output length:
+            - "Brief": Truncate to 1000 characters for quick summaries
+            - "Standard": Truncate to 3000 characters for balanced content
+            - "Full": Return complete page content with no length limit
     Returns:
+        str: The webpage content converted to Markdown format with:
+            - Page title as H1 header
+            - Main content converted to clean Markdown
+            - Preserved heading hierarchy
+            - Clean formatting without navigation/sidebar elements
+            - Length controlled by verbosity setting
     """
     if not url or not url.strip():
         return "Please enter a valid URL."
     resp.encoding = resp.encoding or resp.apparent_encoding
     html = resp.text
+    # Parse HTML and convert to full-page Markdown
     full_soup = BeautifulSoup(html, "lxml")
+    markdown_content = _fullpage_markdown_from_soup(full_soup, final_url)
+    # Apply verbosity-based truncation
+    if verbosity == "Brief":
+        return _truncate_markdown(markdown_content, 1000)
+    elif verbosity == "Standard":
+        return _truncate_markdown(markdown_content, 3000)
+    else:  # "Full"
+        return markdown_content
 # ============================================
 _fetch_rate_limiter = RateLimiter(requests_per_minute=25)
 def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
+    query: Annotated[str, "The search query (supports operators like site:, quotes, OR)."],
     max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
 ) -> str:
     """
+    Run a DuckDuckGo search with enhanced error handling and readable text output.
+    Always returns results in human-friendly format with snippets included.
     Args:
+        query (str): The search query string. Supports operators like site:, quotes for exact matching,
+               OR for alternatives, and other DuckDuckGo search syntax.
+               Examples:
+               - Basic search: "Python programming"
+               - Site search: "site:example.com"
+               - Exact phrase: "artificial intelligence"
+               - Exclude terms: "cats -dogs"
+        max_results (int): Number of results to return (1–20). Default: 5.
     Returns:
+        str: Search results in readable format with titles, URLs, and snippets as a numbered list.
     """
     if not query or not query.strip():
+        return "No search query provided. Please enter a search term."
     # Validate max_results
     max_results = max(1, min(20, max_results))
         elif "network" in str(e).lower() or "connection" in str(e).lower():
             error_msg = "Network connection error. Please check your internet connection and try again."
+        return f"Error: {error_msg}"
     if not raw:
+        return f"No results found for query: {query}"
     results = []
     for r in raw or []:
+        title = (r.get("title") or "").strip()
         url = (r.get("href") or r.get("link") or "").strip()
         body = (r.get("body") or r.get("snippet") or "").strip()
         if not url:
             continue
         result_obj = {
             "title": title or _domain_of(url),
+            "url": url,
+            "snippet": body
         }
         results.append(result_obj)
     if not results:
+        return f"No valid results found for query: {query}"
+    # Format output in readable format
+    lines = [f"Found {len(results)} search results for: {query}\n"]
+    for i, result in enumerate(results, 1):
+        lines.append(f"{i}. {result['title']}")
+        lines.append(f"   URL: {result['url']}")
+        if result['snippet']:
+            lines.append(f"   Summary: {result['snippet']}")
+        lines.append("")  # Empty line between results
+    return "\n".join(lines)
 # ======================================
     Execute arbitrary Python code and return captured stdout or an error message.
     Args:
+        code (str): Python source code to run; stdout is captured and returned.
     Returns:
         str: Combined stdout produced by the code, or the exception text if
         - Voice defaults to "af_heart" (American Female, Heart voice)
     Args:
+        text (str): The text to synthesize. Works best with English but supports multiple languages.
+        speed (float): Speech speed multiplier in 0.5–2.0; 1.0 = normal speed. Default: 1.25 (slightly brisk).
+        voice (str): Voice identifier from 54 available options. Use List_Kokoro_Voices() to see all choices. Default: 'af_heart'.
     Returns:
         A tuple of (sample_rate_hz, audio_waveform) where:
 # --- Fetch tab (compact controllable extraction) ---
 fetch_interface = gr.Interface(
+    fn=Fetch_Webpage,
     inputs=[
         gr.Textbox(label="URL", placeholder="https://example.com/article"),
+        gr.Dropdown(
+            label="Verbosity",
+            choices=["Brief", "Standard", "Full"],
+            value="Standard",
+            info="Brief: 1000 chars, Standard: 3000 chars, Full: complete page"
+        ),
     ],
+    outputs=gr.Markdown(label="Extracted Markdown"),
     title="Fetch Webpage",
     description=(
+        "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with configurable length, preserving structure and formatting while removing navigation and clutter.</div>"
     ),
     api_description=(
+        "Fetch a web page and return it converted to Markdown format with configurable length. "
+        "This function retrieves a webpage and converts its main content to clean Markdown, "
+        "preserving headings, formatting, and structure while removing navigation, footers, scripts, "
+        "and other non-content elements. Parameters: url (str - absolute URL), verbosity (str - "
+        "Brief/Standard/Full controlling output length: Brief=1000 chars, Standard=3000 chars, Full=complete page). "
+        "Returns clean Markdown with page title as H1 header and preserved content hierarchy."
     ),
     allow_flagging="never",
 )
+# --- Simplified DDG tab (readable output only) ---
 concise_interface = gr.Interface(
     fn=Search_DuckDuckGo,
     inputs=[
         gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
     ],
     outputs=gr.Textbox(label="Search Results", interactive=False),
     title="DuckDuckGo Search",
     description=(
+        "<div style=\"text-align:center\">Enhanced web search with readable output format. Always includes snippets for better context and understanding.</div>"
     ),
     api_description=(
+        "Run a DuckDuckGo search with enhanced error handling and readable text output. "
+        "Always returns results in human-friendly format with snippets included for better context. "
+        "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
+        "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
+        "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'."
     ),
     allow_flagging="never",
     submit_btn="Search",
         "<div style=\"text-align:center\">Execute Python code and see the output.</div>"
     ),
     api_description=(
+        "Execute arbitrary Python code and return captured stdout or an error message. "
+        "Supports any valid Python code including imports, variables, functions, loops, and calculations. "
+        "Examples: 'print(2+2)', 'import math; print(math.sqrt(16))', 'for i in range(3): print(i)'. "
+        "Parameters: code (str - Python source code to execute). "
+        "Returns: Combined stdout output or exception text if execution fails."
     ),
     allow_flagging="never",
 )
     ),
     api_description=(
         "Synthesize speech from text using Kokoro-82M with 54 voice options. Returns (sample_rate, waveform) suitable for playback. "
+        "Supports unlimited text length by processing all segments. Voice examples: 'af_heart' (US female), 'am_adam' (US male), "
+        "'bf_alice' (British female), 'bm_daniel' (British male), 'jf_alpha' (Japanese female), 'zf_xiaoni' (Chinese female). "
+        "Parameters: text (str), speed (float 0.5–2.0, default 1.25x), voice (str from 54 available options, default 'af_heart'). "
+        "Use List_Kokoro_Voices() to see all available voices. "
         "Return the generated media to the user in this format `![Alt text](URL)`"
     ),
     allow_flagging="never",
     ),
     api_description=(
         "Generate a single image from a text prompt using a Hugging Face model (serverless Inference). "
+        "Supports creative prompts like 'a serene mountain landscape at sunset', 'portrait of a wise owl', "
+        "'futuristic city with flying cars'. Default model: FLUX.1-Krea-dev (high quality). "
+        "Parameters: prompt (str), model_id (str, creator/model-name), negative_prompt (str), steps (int, 1–100), "
+        "cfg_scale (float, 1–20), sampler (str), seed (int, -1=random), width/height (int, 64–1216). "
+        "Returns a PIL.Image. Return the generated media to the user in this format `![Alt text](URL)`"
     ),
     allow_flagging="never",
 )
     ),
     api_description=(
         "Generate a short video from a text prompt using a Hugging Face model (Serverless Inference). "
+        "Create dynamic scenes like 'a red fox running through a snowy forest at sunrise', 'waves crashing on a rocky shore', "
+        "'time-lapse of clouds moving across a blue sky'. Default model: Wan2.2-T2V-A14B (2-6 second videos). "
         "Parameters: prompt (str), model_id (str), negative_prompt (str), steps (int), cfg_scale (float), seed (int), "
+        "width/height (int), fps (int), duration (float in seconds). Returns MP4 file path. "
+        "Return the generated media to the user in this format `![Alt text](URL)`"
     ),
     allow_flagging="never",
 )