Spaces:

Nymbo
/

Tools

Running

App Files Files Community

Nymbo commited on Sep 15

Commit

dc537d3

verified ·

1 Parent(s): 9e2a5dd

adding offset to Fetch_Webpage and Search_DuckDuckGo so you can pickup where you left off

Browse files

Files changed (1) hide show

app.py +214 -63

app.py CHANGED Viewed

@@ -288,13 +288,24 @@ def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str, strip_
     return markdown_text or "No content could be extracted."
-def _truncate_markdown(markdown: str, max_chars: int) -> str:
     """
     Truncate markdown content to a maximum character count while preserving structure.
     Tries to break at paragraph boundaries when possible.
     """
-    if len(markdown) <= max_chars:
-        return markdown
     # Find a good break point near the limit
     truncated = markdown[:max_chars]
@@ -303,14 +314,37 @@ def _truncate_markdown(markdown: str, max_chars: int) -> str:
     last_paragraph = truncated.rfind('\n\n')
     if last_paragraph > max_chars * 0.7:  # If we find a paragraph break in the last 30%
         truncated = truncated[:last_paragraph]
     # Try to break at the end of a sentence
     elif '.' in truncated[-100:]:  # Look for a period in the last 100 chars
         last_period = truncated.rfind('.')
         if last_period > max_chars * 0.8:  # If we find a period in the last 20%
             truncated = truncated[:last_period + 1]
-    return truncated.rstrip() + "\n\n> *[Content truncated for brevity]*"
 def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
@@ -318,6 +352,7 @@ def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
     max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
     strip_selectors: Annotated[str, "CSS selectors to remove (comma-separated, e.g., '.header, .footer, nav')."] = "",
     url_scraper: Annotated[bool, "Extract only links from the page instead of content."] = False,
 ) -> str:
     """
     Fetch a web page and return it converted to Markdown format with configurable options.
@@ -331,12 +366,14 @@ def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
         max_chars (int): Maximum characters to return. Use 0 for no limit (full page).
         strip_selectors (str): CSS selectors to remove before processing (comma-separated).
         url_scraper (bool): If True, extract only links instead of content.
     Returns:
         str: Either the webpage content converted to Markdown or a list of all links,
-             depending on the url_scraper setting. Content is length-limited by max_chars.
     """
-    _log_call_start("Fetch_Webpage", url=url, max_chars=max_chars, strip_selectors=strip_selectors, url_scraper=url_scraper)
     if not url or not url.strip():
         result = "Please enter a valid URL."
         _log_call_end("Fetch_Webpage", _truncate_for_log(result))
@@ -367,15 +404,34 @@ def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
     if url_scraper:
         # Extract links mode
         result = _extract_links_from_soup(full_soup, final_url)
     else:
         # Convert to markdown mode
-        result = _fullpage_markdown_from_soup(full_soup, final_url, strip_selectors)
-    # Apply max_chars truncation if specified
-    if max_chars > 0 and len(result) > max_chars:
-        result = _truncate_markdown(result, max_chars)
-    _log_call_end("Fetch_Webpage", f"chars={len(result)}, url_scraper={url_scraper}")
     return result
@@ -578,9 +634,13 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
     max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
     page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
     search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
 ) -> str:
     """
     Run a DuckDuckGo search and return formatted results with support for multiple content types.
     Args:
         query (str): The search query string. Supports operators like site:, quotes for exact matching,
@@ -591,18 +651,22 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
                - Exact phrase: "artificial intelligence"
                - Exclude terms: "cats -dogs"
         max_results (int): Number of results to return per page (1–20). Default: 5.
-        page (int): Page number for pagination (1-based). Default: 1.
         search_type (str): Type of search to perform:
                - "text": Web pages (default)
-               - "news": News articles with dates and sources
                - "images": Image results with dimensions and sources
                - "videos": Video results with duration and upload info
                - "books": Book search results
     Returns:
         str: Search results formatted appropriately for the search type, with pagination info.
     """
-    _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type)
     if not query or not query.strip():
         result = "No search query provided. Please enter a search term."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
@@ -611,77 +675,143 @@ def Search_DuckDuckGo(  # <-- MCP tool #2 (DDG Search)
     # Validate parameters
     max_results = max(1, min(20, max_results))
     page = max(1, page)
     valid_types = ["text", "news", "images", "videos", "books"]
     if search_type not in valid_types:
         search_type = "text"
-    # Calculate offset for pagination
-    offset = (page - 1) * max_results
-    total_needed = offset + max_results
-    try:
-        # Apply rate limiting to avoid being blocked
-        _search_rate_limiter.acquire()
-        # Perform search with timeout handling based on search type
-        with DDGS() as ddgs:
-            if search_type == "text":
-                raw_gen = ddgs.text(query, max_results=total_needed + 10)
-            elif search_type == "news":
-                raw_gen = ddgs.news(query, max_results=total_needed + 10)
-            elif search_type == "images":
-                raw_gen = ddgs.images(query, max_results=total_needed + 10)
-            elif search_type == "videos":
-                raw_gen = ddgs.videos(query, max_results=total_needed + 10)
-            elif search_type == "books":
-                raw_gen = ddgs.books(query, max_results=total_needed + 10)
-            raw = list(raw_gen)
     except Exception as e:
-        error_msg = f"Search failed: {str(e)[:200]}"
-        if "blocked" in str(e).lower() or "rate" in str(e).lower():
-            error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
-        elif "timeout" in str(e).lower():
-            error_msg = "Search timed out. Please try again with a simpler query."
-        elif "network" in str(e).lower() or "connection" in str(e).lower():
-            error_msg = "Network connection error. Please check your internet connection and try again."
-        result = f"Error: {error_msg}"
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
     if not raw:
-        result = f"No {search_type} results found for query: {query}"
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
     # Apply pagination by slicing the results
-    paginated_results = raw[offset:offset + max_results]
     if not paginated_results:
-        result = f"No {search_type} results found on page {page} for query: {query}. Try page 1 or reduce page number."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
     # Format results based on search type
     total_available = len(raw)
-    start_num = offset + 1
-    end_num = offset + len(paginated_results)
-    lines = [f"{search_type.title()} search results for: {query}"]
-    lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
     for i, result in enumerate(paginated_results, start_num):
         result_lines = _format_search_result(result, search_type, i)
         lines.extend(result_lines)
         lines.append("")  # Empty line between results
-    # Add pagination hint
     if total_available > end_num:
-        lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
     result = "\n".join(lines)
-    _log_call_end("Search_DuckDuckGo", f"type={search_type} page={page} results={len(paginated_results)} chars={len(result)}")
     return result
@@ -1300,19 +1430,29 @@ fetch_interface = gr.Interface(
             value=False,
             info="Extract only links instead of content"
         ),
     ],
     outputs=gr.Markdown(label="Extracted Content"),
     title="Fetch Webpage",
     description=(
-        "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, or extract all links. Supports custom element removal and length limits.</div>"
     ),
     api_description=(
         "Fetch a web page and return it converted to Markdown format or extract links with configurable options. "
         "Parameters: url (str - absolute URL), max_chars (int - 0=no limit, default 3000), "
         "strip_selectors (str - CSS selectors to remove, comma-separated), "
-        "url_scraper (bool - extract only links instead of content, default False). "
-        "When url_scraper=True, returns formatted list of all links found on the page. "
-        "When False, returns clean Markdown content with custom element removal and length control."
     ),
     flagging_mode="never",
 )
@@ -1323,27 +1463,38 @@ concise_interface = gr.Interface(
     inputs=[
         gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
-        gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
         gr.Radio(
             label="Search Type",
             choices=["text", "news", "images", "videos", "books"],
             value="text",
             info="Type of content to search for"
         ),
     ],
     outputs=gr.Textbox(label="Search Results", interactive=False),
     title="DuckDuckGo Search",
     description=(
-        "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and pagination. Supports text, news, images, videos, and books.</div>"
     ),
     api_description=(
         "Run a DuckDuckGo search with support for multiple content types and return formatted results. "
         "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
         "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
         "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
         "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
-        "search_type (str: text/news/images/videos/books). "
-        "Returns appropriately formatted results with metadata and pagination hints for each content type."
     ),
     flagging_mode="never",
     submit_btn="Search",

     return markdown_text or "No content could be extracted."
+def _truncate_markdown(markdown: str, max_chars: int) -> Tuple[str, Dict[str, any]]:
     """
     Truncate markdown content to a maximum character count while preserving structure.
     Tries to break at paragraph boundaries when possible.
+    Returns:
+        Tuple[str, Dict]: (truncated_content, metadata_dict)
+        metadata_dict contains: truncated, returned_chars, total_chars_estimate, next_cursor
     """
+    total_chars = len(markdown)
+    if total_chars <= max_chars:
+        return markdown, {
+            "truncated": False,
+            "returned_chars": total_chars,
+            "total_chars_estimate": total_chars,
+            "next_cursor": None
+        }
     # Find a good break point near the limit
     truncated = markdown[:max_chars]
     last_paragraph = truncated.rfind('\n\n')
     if last_paragraph > max_chars * 0.7:  # If we find a paragraph break in the last 30%
         truncated = truncated[:last_paragraph]
+        cursor_pos = last_paragraph
     # Try to break at the end of a sentence
     elif '.' in truncated[-100:]:  # Look for a period in the last 100 chars
         last_period = truncated.rfind('.')
         if last_period > max_chars * 0.8:  # If we find a period in the last 20%
             truncated = truncated[:last_period + 1]
+            cursor_pos = last_period + 1
+        else:
+            cursor_pos = len(truncated)
+    else:
+        cursor_pos = len(truncated)
+    metadata = {
+        "truncated": True,
+        "returned_chars": len(truncated),
+        "total_chars_estimate": total_chars,
+        "next_cursor": cursor_pos
+    }
+    truncated = truncated.rstrip()
+    # Add informative truncation notice
+    truncation_notice = (
+        f"\n\n---\n"
+        f"**Content Truncated:** Showing {metadata['returned_chars']:,} of {metadata['total_chars_estimate']:,} characters "
+        f"({(metadata['returned_chars']/metadata['total_chars_estimate']*100):.1f}%)\n"
+        f"**Next cursor:** {metadata['next_cursor']} (use this value with offset parameter for continuation)\n"
+        f"---"
+    )
+    return truncated + truncation_notice, metadata
 def Fetch_Webpage(  # <-- MCP tool #1 (Fetch)
     max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
     strip_selectors: Annotated[str, "CSS selectors to remove (comma-separated, e.g., '.header, .footer, nav')."] = "",
     url_scraper: Annotated[bool, "Extract only links from the page instead of content."] = False,
+    offset: Annotated[int, "Character offset to start from (for pagination, use next_cursor from previous call)."] = 0,
 ) -> str:
     """
     Fetch a web page and return it converted to Markdown format with configurable options.
         max_chars (int): Maximum characters to return. Use 0 for no limit (full page).
         strip_selectors (str): CSS selectors to remove before processing (comma-separated).
         url_scraper (bool): If True, extract only links instead of content.
+        offset (int): Character offset to start from (for pagination, use next_cursor from previous call).
     Returns:
         str: Either the webpage content converted to Markdown or a list of all links,
+             depending on the url_scraper setting. Content is length-limited by max_chars
+             and includes detailed truncation metadata when content is truncated.
     """
+    _log_call_start("Fetch_Webpage", url=url, max_chars=max_chars, strip_selectors=strip_selectors, url_scraper=url_scraper, offset=offset)
     if not url or not url.strip():
         result = "Please enter a valid URL."
         _log_call_end("Fetch_Webpage", _truncate_for_log(result))
     if url_scraper:
         # Extract links mode
         result = _extract_links_from_soup(full_soup, final_url)
+        # Apply offset and truncation for link extraction too
+        if offset > 0:
+            result = result[offset:]
+        if max_chars > 0 and len(result) > max_chars:
+            result, metadata = _truncate_markdown(result, max_chars)
     else:
         # Convert to markdown mode
+        full_result = _fullpage_markdown_from_soup(full_soup, final_url, strip_selectors)
+        # Apply offset if specified
+        if offset > 0:
+            if offset >= len(full_result):
+                result = f"Offset {offset} exceeds content length ({len(full_result)} characters). Content ends at position {len(full_result)}."
+                _log_call_end("Fetch_Webpage", _truncate_for_log(result))
+                return result
+            result = full_result[offset:]
+        else:
+            result = full_result
+        # Apply max_chars truncation if specified
+        if max_chars > 0 and len(result) > max_chars:
+            result, metadata = _truncate_markdown(result, max_chars)
+            # Adjust metadata to account for offset
+            if offset > 0:
+                metadata["total_chars_estimate"] = len(full_result)
+                metadata["next_cursor"] = offset + metadata["next_cursor"] if metadata["next_cursor"] else None
+    _log_call_end("Fetch_Webpage", f"chars={len(result)}, url_scraper={url_scraper}, offset={offset}")
     return result
     max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
     page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
     search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
+    offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
 ) -> str:
     """
     Run a DuckDuckGo search and return formatted results with support for multiple content types.
+    Features smart fallback: if 'news' search returns no results, automatically retries with 'text'
+    search to catch sources like Hacker News that might not appear in news-specific results.
     Args:
         query (str): The search query string. Supports operators like site:, quotes for exact matching,
                - Exact phrase: "artificial intelligence"
                - Exclude terms: "cats -dogs"
         max_results (int): Number of results to return per page (1–20). Default: 5.
+        page (int): Page number for pagination (1-based). Default: 1. Ignored if offset > 0.
         search_type (str): Type of search to perform:
                - "text": Web pages (default)
+               - "news": News articles with dates and sources (with smart fallback to 'text')
                - "images": Image results with dimensions and sources
                - "videos": Video results with duration and upload info
                - "books": Book search results
+        offset (int): Result offset to start from (0-based). If > 0, overrides page parameter
+               for precise continuation. Use this to pick up exactly where you left off.
     Returns:
         str: Search results formatted appropriately for the search type, with pagination info.
+             If 'news' search fails, results include a note about automatic fallback to 'text' search.
+             Includes next_offset information for easy continuation.
     """
+    _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type, offset=offset)
     if not query or not query.strip():
         result = "No search query provided. Please enter a search term."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
     # Validate parameters
     max_results = max(1, min(20, max_results))
     page = max(1, page)
+    offset = max(0, offset)
     valid_types = ["text", "news", "images", "videos", "books"]
     if search_type not in valid_types:
         search_type = "text"
+    # Calculate actual offset: use provided offset if > 0, otherwise calculate from page
+    if offset > 0:
+        actual_offset = offset
+        calculated_page = (offset // max_results) + 1
+    else:
+        actual_offset = (page - 1) * max_results
+        calculated_page = page
+    total_needed = actual_offset + max_results
+    # Track if we used fallback
+    used_fallback = False
+    original_search_type = search_type
+    def _perform_search(stype: str):
+        """Perform the actual search with the given search type."""
+        try:
+            # Apply rate limiting to avoid being blocked
+            _search_rate_limiter.acquire()
+            # Perform search with timeout handling based on search type
+            with DDGS() as ddgs:
+                if stype == "text":
+                    raw_gen = ddgs.text(query, max_results=total_needed + 10)
+                elif stype == "news":
+                    raw_gen = ddgs.news(query, max_results=total_needed + 10)
+                elif stype == "images":
+                    raw_gen = ddgs.images(query, max_results=total_needed + 10)
+                elif stype == "videos":
+                    raw_gen = ddgs.videos(query, max_results=total_needed + 10)
+                elif stype == "books":
+                    raw_gen = ddgs.books(query, max_results=total_needed + 10)
+                # Convert generator to list, handle case where no results are found
+                try:
+                    return list(raw_gen)
+                except Exception as inner_e:
+                    # If the generator fails (e.g., no results), return empty list
+                    if "no results" in str(inner_e).lower() or "not found" in str(inner_e).lower():
+                        return []
+                    else:
+                        raise inner_e
+        except Exception as e:
+            error_msg = f"Search failed: {str(e)[:200]}"
+            if "blocked" in str(e).lower() or "rate" in str(e).lower():
+                error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
+            elif "timeout" in str(e).lower():
+                error_msg = "Search timed out. Please try again with a simpler query."
+            elif "network" in str(e).lower() or "connection" in str(e).lower():
+                error_msg = "Network connection error. Please check your internet connection and try again."
+            elif "no results" in str(e).lower() or "not found" in str(e).lower():
+                # This is expected for some searches, return empty list
+                return []
+            raise Exception(error_msg)
+    # Try the primary search
+    try:
+        raw = _perform_search(search_type)
     except Exception as e:
+        result = f"Error: {str(e)}"
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
+    # Smart fallback: if news search returns empty and we haven't tried text yet, try text search
+    if not raw and search_type == "news":
+        try:
+            raw = _perform_search("text")
+            if raw:  # Only mark as fallback if we actually got results
+                used_fallback = True
+                search_type = "text"  # Update for result formatting
+        except Exception:
+            # If fallback also fails, continue with empty results from original search
+            pass
     if not raw:
+        fallback_note = " (also tried 'text' search as fallback)" if original_search_type == "news" and used_fallback else ""
+        result = f"No {original_search_type} results found for query: {query}{fallback_note}"
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
     # Apply pagination by slicing the results
+    paginated_results = raw[actual_offset:actual_offset + max_results]
     if not paginated_results:
+        if actual_offset >= len(raw):
+            result = f"Offset {actual_offset} exceeds available results ({len(raw)} total). Try offset=0 to start from beginning."
+        else:
+            result = f"No {original_search_type} results found on page {calculated_page} for query: {query}. Try page 1 or reduce page number."
         _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
         return result
     # Format results based on search type
     total_available = len(raw)
+    start_num = actual_offset + 1
+    end_num = actual_offset + len(paginated_results)
+    next_offset = actual_offset + len(paginated_results)
+    # Create header with fallback notification if applicable
+    search_label = original_search_type.title()
+    if used_fallback:
+        search_label += " → Text (Smart Fallback)"
+    # Show both page and offset information for clarity
+    pagination_info = f"Page {calculated_page}"
+    if offset > 0:
+        pagination_info = f"Offset {actual_offset} (≈ {pagination_info})"
+    lines = [f"{search_label} search results for: {query}"]
+    if used_fallback:
+        lines.append("📍 Note: News search returned no results, automatically searched general web content instead")
+    lines.append(f"{pagination_info} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
     for i, result in enumerate(paginated_results, start_num):
         result_lines = _format_search_result(result, search_type, i)
         lines.extend(result_lines)
         lines.append("")  # Empty line between results
+    # Add pagination/continuation hints
     if total_available > end_num:
+        lines.append(f"💡 More results available:")
+        lines.append(f"   • Next page: page={calculated_page + 1}")
+        lines.append(f"   • Next offset: offset={next_offset}")
+        lines.append(f"   • Use offset={next_offset} to continue exactly from result {next_offset + 1}")
     result = "\n".join(lines)
+    search_info = f"type={original_search_type}"
+    if used_fallback:
+        search_info += "→text"
+    _log_call_end("Search_DuckDuckGo", f"{search_info} page={calculated_page} offset={actual_offset} results={len(paginated_results)} chars={len(result)}")
     return result
             value=False,
             info="Extract only links instead of content"
         ),
+        gr.Slider(
+            minimum=0,
+            maximum=100000,
+            value=0,
+            step=100,
+            label="Offset",
+            info="Character offset to start from (use next_cursor from previous call for pagination)"
+        ),
     ],
     outputs=gr.Markdown(label="Extracted Content"),
     title="Fetch Webpage",
     description=(
+        "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, or extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
     ),
     api_description=(
         "Fetch a web page and return it converted to Markdown format or extract links with configurable options. "
+        "Includes enhanced truncation with detailed metadata and pagination support via offset parameter. "
         "Parameters: url (str - absolute URL), max_chars (int - 0=no limit, default 3000), "
         "strip_selectors (str - CSS selectors to remove, comma-separated), "
+        "url_scraper (bool - extract only links instead of content, default False), "
+        "offset (int - character offset for pagination, use next_cursor from previous call). "
+        "When content is truncated, returns detailed metadata including truncated status, character counts, "
+        "and next_cursor for continuation. When url_scraper=True, returns formatted list of all links found on the page."
     ),
     flagging_mode="never",
 )
     inputs=[
         gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
         gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
+        gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
         gr.Radio(
             label="Search Type",
             choices=["text", "news", "images", "videos", "books"],
             value="text",
             info="Type of content to search for"
         ),
+        gr.Slider(
+            minimum=0,
+            maximum=1000,
+            value=0,
+            step=1,
+            label="Offset",
+            info="Result offset to start from (overrides page if > 0, use next_offset from previous search)"
+        ),
     ],
     outputs=gr.Textbox(label="Search Results", interactive=False),
     title="DuckDuckGo Search",
     description=(
+        "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and flexible pagination. Supports text, news, images, videos, and books. Features smart fallback for news searches and precise offset control.</div>"
     ),
     api_description=(
         "Run a DuckDuckGo search with support for multiple content types and return formatted results. "
+        "Features smart fallback: if 'news' search returns no results, automatically retries with 'text' search "
+        "to catch sources like Hacker News that might not appear in news-specific results. "
         "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
         "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
         "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
         "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
+        "search_type (str: text/news/images/videos/books), offset (int, result offset for precise continuation). "
+        "If offset > 0, it overrides the page parameter. Returns appropriately formatted results with metadata, "
+        "pagination hints, and next_offset information for each content type."
     ),
     flagging_mode="never",
     submit_btn="Search",