Nymbo commited on
Commit
dc537d3
·
verified ·
1 Parent(s): 9e2a5dd

adding offset to Fetch_Webpage and Search_DuckDuckGo so you can pickup where you left off

Browse files
Files changed (1) hide show
  1. app.py +214 -63
app.py CHANGED
@@ -288,13 +288,24 @@ def _fullpage_markdown_from_soup(full_soup: BeautifulSoup, base_url: str, strip_
288
  return markdown_text or "No content could be extracted."
289
 
290
 
291
- def _truncate_markdown(markdown: str, max_chars: int) -> str:
292
  """
293
  Truncate markdown content to a maximum character count while preserving structure.
294
  Tries to break at paragraph boundaries when possible.
 
 
 
 
295
  """
296
- if len(markdown) <= max_chars:
297
- return markdown
 
 
 
 
 
 
 
298
 
299
  # Find a good break point near the limit
300
  truncated = markdown[:max_chars]
@@ -303,14 +314,37 @@ def _truncate_markdown(markdown: str, max_chars: int) -> str:
303
  last_paragraph = truncated.rfind('\n\n')
304
  if last_paragraph > max_chars * 0.7: # If we find a paragraph break in the last 30%
305
  truncated = truncated[:last_paragraph]
306
-
307
  # Try to break at the end of a sentence
308
  elif '.' in truncated[-100:]: # Look for a period in the last 100 chars
309
  last_period = truncated.rfind('.')
310
  if last_period > max_chars * 0.8: # If we find a period in the last 20%
311
  truncated = truncated[:last_period + 1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- return truncated.rstrip() + "\n\n> *[Content truncated for brevity]*"
314
 
315
 
316
  def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
@@ -318,6 +352,7 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
318
  max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
319
  strip_selectors: Annotated[str, "CSS selectors to remove (comma-separated, e.g., '.header, .footer, nav')."] = "",
320
  url_scraper: Annotated[bool, "Extract only links from the page instead of content."] = False,
 
321
  ) -> str:
322
  """
323
  Fetch a web page and return it converted to Markdown format with configurable options.
@@ -331,12 +366,14 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
331
  max_chars (int): Maximum characters to return. Use 0 for no limit (full page).
332
  strip_selectors (str): CSS selectors to remove before processing (comma-separated).
333
  url_scraper (bool): If True, extract only links instead of content.
 
334
 
335
  Returns:
336
  str: Either the webpage content converted to Markdown or a list of all links,
337
- depending on the url_scraper setting. Content is length-limited by max_chars.
 
338
  """
339
- _log_call_start("Fetch_Webpage", url=url, max_chars=max_chars, strip_selectors=strip_selectors, url_scraper=url_scraper)
340
  if not url or not url.strip():
341
  result = "Please enter a valid URL."
342
  _log_call_end("Fetch_Webpage", _truncate_for_log(result))
@@ -367,15 +404,34 @@ def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
367
  if url_scraper:
368
  # Extract links mode
369
  result = _extract_links_from_soup(full_soup, final_url)
 
 
 
 
 
370
  else:
371
  # Convert to markdown mode
372
- result = _fullpage_markdown_from_soup(full_soup, final_url, strip_selectors)
373
-
374
- # Apply max_chars truncation if specified
375
- if max_chars > 0 and len(result) > max_chars:
376
- result = _truncate_markdown(result, max_chars)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
- _log_call_end("Fetch_Webpage", f"chars={len(result)}, url_scraper={url_scraper}")
379
  return result
380
 
381
 
@@ -578,9 +634,13 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
578
  max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
579
  page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
580
  search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
 
581
  ) -> str:
582
  """
583
  Run a DuckDuckGo search and return formatted results with support for multiple content types.
 
 
 
584
 
585
  Args:
586
  query (str): The search query string. Supports operators like site:, quotes for exact matching,
@@ -591,18 +651,22 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
591
  - Exact phrase: "artificial intelligence"
592
  - Exclude terms: "cats -dogs"
593
  max_results (int): Number of results to return per page (1–20). Default: 5.
594
- page (int): Page number for pagination (1-based). Default: 1.
595
  search_type (str): Type of search to perform:
596
  - "text": Web pages (default)
597
- - "news": News articles with dates and sources
598
  - "images": Image results with dimensions and sources
599
  - "videos": Video results with duration and upload info
600
  - "books": Book search results
 
 
601
 
602
  Returns:
603
  str: Search results formatted appropriately for the search type, with pagination info.
 
 
604
  """
605
- _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type)
606
  if not query or not query.strip():
607
  result = "No search query provided. Please enter a search term."
608
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
@@ -611,77 +675,143 @@ def Search_DuckDuckGo( # <-- MCP tool #2 (DDG Search)
611
  # Validate parameters
612
  max_results = max(1, min(20, max_results))
613
  page = max(1, page)
 
614
  valid_types = ["text", "news", "images", "videos", "books"]
615
  if search_type not in valid_types:
616
  search_type = "text"
617
 
618
- # Calculate offset for pagination
619
- offset = (page - 1) * max_results
620
- total_needed = offset + max_results
 
 
 
 
621
 
622
- try:
623
- # Apply rate limiting to avoid being blocked
624
- _search_rate_limiter.acquire()
625
-
626
- # Perform search with timeout handling based on search type
627
- with DDGS() as ddgs:
628
- if search_type == "text":
629
- raw_gen = ddgs.text(query, max_results=total_needed + 10)
630
- elif search_type == "news":
631
- raw_gen = ddgs.news(query, max_results=total_needed + 10)
632
- elif search_type == "images":
633
- raw_gen = ddgs.images(query, max_results=total_needed + 10)
634
- elif search_type == "videos":
635
- raw_gen = ddgs.videos(query, max_results=total_needed + 10)
636
- elif search_type == "books":
637
- raw_gen = ddgs.books(query, max_results=total_needed + 10)
638
-
639
- raw = list(raw_gen)
640
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
641
  except Exception as e:
642
- error_msg = f"Search failed: {str(e)[:200]}"
643
- if "blocked" in str(e).lower() or "rate" in str(e).lower():
644
- error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
645
- elif "timeout" in str(e).lower():
646
- error_msg = "Search timed out. Please try again with a simpler query."
647
- elif "network" in str(e).lower() or "connection" in str(e).lower():
648
- error_msg = "Network connection error. Please check your internet connection and try again."
649
- result = f"Error: {error_msg}"
650
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
651
  return result
652
 
 
 
 
 
 
 
 
 
 
 
 
653
  if not raw:
654
- result = f"No {search_type} results found for query: {query}"
 
655
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
656
  return result
657
 
658
  # Apply pagination by slicing the results
659
- paginated_results = raw[offset:offset + max_results]
660
 
661
  if not paginated_results:
662
- result = f"No {search_type} results found on page {page} for query: {query}. Try page 1 or reduce page number."
 
 
 
663
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
664
  return result
665
 
666
  # Format results based on search type
667
  total_available = len(raw)
668
- start_num = offset + 1
669
- end_num = offset + len(paginated_results)
 
670
 
671
- lines = [f"{search_type.title()} search results for: {query}"]
672
- lines.append(f"Page {page} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
 
674
  for i, result in enumerate(paginated_results, start_num):
675
  result_lines = _format_search_result(result, search_type, i)
676
  lines.extend(result_lines)
677
  lines.append("") # Empty line between results
678
 
679
- # Add pagination hint
680
  if total_available > end_num:
681
- lines.append(f"💡 More results available - use page={page + 1} to see next {max_results} results")
 
 
 
682
 
683
  result = "\n".join(lines)
684
- _log_call_end("Search_DuckDuckGo", f"type={search_type} page={page} results={len(paginated_results)} chars={len(result)}")
 
 
 
685
  return result
686
 
687
 
@@ -1300,19 +1430,29 @@ fetch_interface = gr.Interface(
1300
  value=False,
1301
  info="Extract only links instead of content"
1302
  ),
 
 
 
 
 
 
 
 
1303
  ],
1304
  outputs=gr.Markdown(label="Extracted Content"),
1305
  title="Fetch Webpage",
1306
  description=(
1307
- "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, or extract all links. Supports custom element removal and length limits.</div>"
1308
  ),
1309
  api_description=(
1310
  "Fetch a web page and return it converted to Markdown format or extract links with configurable options. "
 
1311
  "Parameters: url (str - absolute URL), max_chars (int - 0=no limit, default 3000), "
1312
  "strip_selectors (str - CSS selectors to remove, comma-separated), "
1313
- "url_scraper (bool - extract only links instead of content, default False). "
1314
- "When url_scraper=True, returns formatted list of all links found on the page. "
1315
- "When False, returns clean Markdown content with custom element removal and length control."
 
1316
  ),
1317
  flagging_mode="never",
1318
  )
@@ -1323,27 +1463,38 @@ concise_interface = gr.Interface(
1323
  inputs=[
1324
  gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
1325
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
1326
- gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination"),
1327
  gr.Radio(
1328
  label="Search Type",
1329
  choices=["text", "news", "images", "videos", "books"],
1330
  value="text",
1331
  info="Type of content to search for"
1332
  ),
 
 
 
 
 
 
 
 
1333
  ],
1334
  outputs=gr.Textbox(label="Search Results", interactive=False),
1335
  title="DuckDuckGo Search",
1336
  description=(
1337
- "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and pagination. Supports text, news, images, videos, and books.</div>"
1338
  ),
1339
  api_description=(
1340
  "Run a DuckDuckGo search with support for multiple content types and return formatted results. "
 
 
1341
  "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
1342
  "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
1343
  "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
1344
  "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
1345
- "search_type (str: text/news/images/videos/books). "
1346
- "Returns appropriately formatted results with metadata and pagination hints for each content type."
 
1347
  ),
1348
  flagging_mode="never",
1349
  submit_btn="Search",
 
288
  return markdown_text or "No content could be extracted."
289
 
290
 
291
+ def _truncate_markdown(markdown: str, max_chars: int) -> Tuple[str, Dict[str, any]]:
292
  """
293
  Truncate markdown content to a maximum character count while preserving structure.
294
  Tries to break at paragraph boundaries when possible.
295
+
296
+ Returns:
297
+ Tuple[str, Dict]: (truncated_content, metadata_dict)
298
+ metadata_dict contains: truncated, returned_chars, total_chars_estimate, next_cursor
299
  """
300
+ total_chars = len(markdown)
301
+
302
+ if total_chars <= max_chars:
303
+ return markdown, {
304
+ "truncated": False,
305
+ "returned_chars": total_chars,
306
+ "total_chars_estimate": total_chars,
307
+ "next_cursor": None
308
+ }
309
 
310
  # Find a good break point near the limit
311
  truncated = markdown[:max_chars]
 
314
  last_paragraph = truncated.rfind('\n\n')
315
  if last_paragraph > max_chars * 0.7: # If we find a paragraph break in the last 30%
316
  truncated = truncated[:last_paragraph]
317
+ cursor_pos = last_paragraph
318
  # Try to break at the end of a sentence
319
  elif '.' in truncated[-100:]: # Look for a period in the last 100 chars
320
  last_period = truncated.rfind('.')
321
  if last_period > max_chars * 0.8: # If we find a period in the last 20%
322
  truncated = truncated[:last_period + 1]
323
+ cursor_pos = last_period + 1
324
+ else:
325
+ cursor_pos = len(truncated)
326
+ else:
327
+ cursor_pos = len(truncated)
328
+
329
+ metadata = {
330
+ "truncated": True,
331
+ "returned_chars": len(truncated),
332
+ "total_chars_estimate": total_chars,
333
+ "next_cursor": cursor_pos
334
+ }
335
+
336
+ truncated = truncated.rstrip()
337
+
338
+ # Add informative truncation notice
339
+ truncation_notice = (
340
+ f"\n\n---\n"
341
+ f"**Content Truncated:** Showing {metadata['returned_chars']:,} of {metadata['total_chars_estimate']:,} characters "
342
+ f"({(metadata['returned_chars']/metadata['total_chars_estimate']*100):.1f}%)\n"
343
+ f"**Next cursor:** {metadata['next_cursor']} (use this value with offset parameter for continuation)\n"
344
+ f"---"
345
+ )
346
 
347
+ return truncated + truncation_notice, metadata
348
 
349
 
350
  def Fetch_Webpage( # <-- MCP tool #1 (Fetch)
 
352
  max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
353
  strip_selectors: Annotated[str, "CSS selectors to remove (comma-separated, e.g., '.header, .footer, nav')."] = "",
354
  url_scraper: Annotated[bool, "Extract only links from the page instead of content."] = False,
355
+ offset: Annotated[int, "Character offset to start from (for pagination, use next_cursor from previous call)."] = 0,
356
  ) -> str:
357
  """
358
  Fetch a web page and return it converted to Markdown format with configurable options.
 
366
  max_chars (int): Maximum characters to return. Use 0 for no limit (full page).
367
  strip_selectors (str): CSS selectors to remove before processing (comma-separated).
368
  url_scraper (bool): If True, extract only links instead of content.
369
+ offset (int): Character offset to start from (for pagination, use next_cursor from previous call).
370
 
371
  Returns:
372
  str: Either the webpage content converted to Markdown or a list of all links,
373
+ depending on the url_scraper setting. Content is length-limited by max_chars
374
+ and includes detailed truncation metadata when content is truncated.
375
  """
376
+ _log_call_start("Fetch_Webpage", url=url, max_chars=max_chars, strip_selectors=strip_selectors, url_scraper=url_scraper, offset=offset)
377
  if not url or not url.strip():
378
  result = "Please enter a valid URL."
379
  _log_call_end("Fetch_Webpage", _truncate_for_log(result))
 
404
  if url_scraper:
405
  # Extract links mode
406
  result = _extract_links_from_soup(full_soup, final_url)
407
+ # Apply offset and truncation for link extraction too
408
+ if offset > 0:
409
+ result = result[offset:]
410
+ if max_chars > 0 and len(result) > max_chars:
411
+ result, metadata = _truncate_markdown(result, max_chars)
412
  else:
413
  # Convert to markdown mode
414
+ full_result = _fullpage_markdown_from_soup(full_soup, final_url, strip_selectors)
415
+
416
+ # Apply offset if specified
417
+ if offset > 0:
418
+ if offset >= len(full_result):
419
+ result = f"Offset {offset} exceeds content length ({len(full_result)} characters). Content ends at position {len(full_result)}."
420
+ _log_call_end("Fetch_Webpage", _truncate_for_log(result))
421
+ return result
422
+ result = full_result[offset:]
423
+ else:
424
+ result = full_result
425
+
426
+ # Apply max_chars truncation if specified
427
+ if max_chars > 0 and len(result) > max_chars:
428
+ result, metadata = _truncate_markdown(result, max_chars)
429
+ # Adjust metadata to account for offset
430
+ if offset > 0:
431
+ metadata["total_chars_estimate"] = len(full_result)
432
+ metadata["next_cursor"] = offset + metadata["next_cursor"] if metadata["next_cursor"] else None
433
 
434
+ _log_call_end("Fetch_Webpage", f"chars={len(result)}, url_scraper={url_scraper}, offset={offset}")
435
  return result
436
 
437
 
 
634
  max_results: Annotated[int, "Number of results to return (1–20)."] = 5,
635
  page: Annotated[int, "Page number for pagination (1-based, each page contains max_results items)."] = 1,
636
  search_type: Annotated[str, "Type of search: 'text' (web pages), 'news', 'images', 'videos', or 'books'."] = "text",
637
+ offset: Annotated[int, "Result offset to start from (overrides page if > 0, for precise continuation)."] = 0,
638
  ) -> str:
639
  """
640
  Run a DuckDuckGo search and return formatted results with support for multiple content types.
641
+
642
+ Features smart fallback: if 'news' search returns no results, automatically retries with 'text'
643
+ search to catch sources like Hacker News that might not appear in news-specific results.
644
 
645
  Args:
646
  query (str): The search query string. Supports operators like site:, quotes for exact matching,
 
651
  - Exact phrase: "artificial intelligence"
652
  - Exclude terms: "cats -dogs"
653
  max_results (int): Number of results to return per page (1–20). Default: 5.
654
+ page (int): Page number for pagination (1-based). Default: 1. Ignored if offset > 0.
655
  search_type (str): Type of search to perform:
656
  - "text": Web pages (default)
657
+ - "news": News articles with dates and sources (with smart fallback to 'text')
658
  - "images": Image results with dimensions and sources
659
  - "videos": Video results with duration and upload info
660
  - "books": Book search results
661
+ offset (int): Result offset to start from (0-based). If > 0, overrides page parameter
662
+ for precise continuation. Use this to pick up exactly where you left off.
663
 
664
  Returns:
665
  str: Search results formatted appropriately for the search type, with pagination info.
666
+ If 'news' search fails, results include a note about automatic fallback to 'text' search.
667
+ Includes next_offset information for easy continuation.
668
  """
669
+ _log_call_start("Search_DuckDuckGo", query=query, max_results=max_results, page=page, search_type=search_type, offset=offset)
670
  if not query or not query.strip():
671
  result = "No search query provided. Please enter a search term."
672
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
 
675
  # Validate parameters
676
  max_results = max(1, min(20, max_results))
677
  page = max(1, page)
678
+ offset = max(0, offset)
679
  valid_types = ["text", "news", "images", "videos", "books"]
680
  if search_type not in valid_types:
681
  search_type = "text"
682
 
683
+ # Calculate actual offset: use provided offset if > 0, otherwise calculate from page
684
+ if offset > 0:
685
+ actual_offset = offset
686
+ calculated_page = (offset // max_results) + 1
687
+ else:
688
+ actual_offset = (page - 1) * max_results
689
+ calculated_page = page
690
 
691
+ total_needed = actual_offset + max_results
692
+
693
+ # Track if we used fallback
694
+ used_fallback = False
695
+ original_search_type = search_type
696
+
697
+ def _perform_search(stype: str):
698
+ """Perform the actual search with the given search type."""
699
+ try:
700
+ # Apply rate limiting to avoid being blocked
701
+ _search_rate_limiter.acquire()
 
 
 
 
 
 
 
702
 
703
+ # Perform search with timeout handling based on search type
704
+ with DDGS() as ddgs:
705
+ if stype == "text":
706
+ raw_gen = ddgs.text(query, max_results=total_needed + 10)
707
+ elif stype == "news":
708
+ raw_gen = ddgs.news(query, max_results=total_needed + 10)
709
+ elif stype == "images":
710
+ raw_gen = ddgs.images(query, max_results=total_needed + 10)
711
+ elif stype == "videos":
712
+ raw_gen = ddgs.videos(query, max_results=total_needed + 10)
713
+ elif stype == "books":
714
+ raw_gen = ddgs.books(query, max_results=total_needed + 10)
715
+
716
+ # Convert generator to list, handle case where no results are found
717
+ try:
718
+ return list(raw_gen)
719
+ except Exception as inner_e:
720
+ # If the generator fails (e.g., no results), return empty list
721
+ if "no results" in str(inner_e).lower() or "not found" in str(inner_e).lower():
722
+ return []
723
+ else:
724
+ raise inner_e
725
+
726
+ except Exception as e:
727
+ error_msg = f"Search failed: {str(e)[:200]}"
728
+ if "blocked" in str(e).lower() or "rate" in str(e).lower():
729
+ error_msg = "Search temporarily blocked due to rate limiting. Please try again in a few minutes."
730
+ elif "timeout" in str(e).lower():
731
+ error_msg = "Search timed out. Please try again with a simpler query."
732
+ elif "network" in str(e).lower() or "connection" in str(e).lower():
733
+ error_msg = "Network connection error. Please check your internet connection and try again."
734
+ elif "no results" in str(e).lower() or "not found" in str(e).lower():
735
+ # This is expected for some searches, return empty list
736
+ return []
737
+ raise Exception(error_msg)
738
+
739
+ # Try the primary search
740
+ try:
741
+ raw = _perform_search(search_type)
742
  except Exception as e:
743
+ result = f"Error: {str(e)}"
 
 
 
 
 
 
 
744
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
745
  return result
746
 
747
+ # Smart fallback: if news search returns empty and we haven't tried text yet, try text search
748
+ if not raw and search_type == "news":
749
+ try:
750
+ raw = _perform_search("text")
751
+ if raw: # Only mark as fallback if we actually got results
752
+ used_fallback = True
753
+ search_type = "text" # Update for result formatting
754
+ except Exception:
755
+ # If fallback also fails, continue with empty results from original search
756
+ pass
757
+
758
  if not raw:
759
+ fallback_note = " (also tried 'text' search as fallback)" if original_search_type == "news" and used_fallback else ""
760
+ result = f"No {original_search_type} results found for query: {query}{fallback_note}"
761
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
762
  return result
763
 
764
  # Apply pagination by slicing the results
765
+ paginated_results = raw[actual_offset:actual_offset + max_results]
766
 
767
  if not paginated_results:
768
+ if actual_offset >= len(raw):
769
+ result = f"Offset {actual_offset} exceeds available results ({len(raw)} total). Try offset=0 to start from beginning."
770
+ else:
771
+ result = f"No {original_search_type} results found on page {calculated_page} for query: {query}. Try page 1 or reduce page number."
772
  _log_call_end("Search_DuckDuckGo", _truncate_for_log(result))
773
  return result
774
 
775
  # Format results based on search type
776
  total_available = len(raw)
777
+ start_num = actual_offset + 1
778
+ end_num = actual_offset + len(paginated_results)
779
+ next_offset = actual_offset + len(paginated_results)
780
 
781
+ # Create header with fallback notification if applicable
782
+ search_label = original_search_type.title()
783
+ if used_fallback:
784
+ search_label += " → Text (Smart Fallback)"
785
+
786
+ # Show both page and offset information for clarity
787
+ pagination_info = f"Page {calculated_page}"
788
+ if offset > 0:
789
+ pagination_info = f"Offset {actual_offset} (≈ {pagination_info})"
790
+
791
+ lines = [f"{search_label} search results for: {query}"]
792
+
793
+ if used_fallback:
794
+ lines.append("📍 Note: News search returned no results, automatically searched general web content instead")
795
+
796
+ lines.append(f"{pagination_info} (results {start_num}-{end_num} of ~{total_available}+ available)\n")
797
 
798
  for i, result in enumerate(paginated_results, start_num):
799
  result_lines = _format_search_result(result, search_type, i)
800
  lines.extend(result_lines)
801
  lines.append("") # Empty line between results
802
 
803
+ # Add pagination/continuation hints
804
  if total_available > end_num:
805
+ lines.append(f"💡 More results available:")
806
+ lines.append(f" • Next page: page={calculated_page + 1}")
807
+ lines.append(f" • Next offset: offset={next_offset}")
808
+ lines.append(f" • Use offset={next_offset} to continue exactly from result {next_offset + 1}")
809
 
810
  result = "\n".join(lines)
811
+ search_info = f"type={original_search_type}"
812
+ if used_fallback:
813
+ search_info += "→text"
814
+ _log_call_end("Search_DuckDuckGo", f"{search_info} page={calculated_page} offset={actual_offset} results={len(paginated_results)} chars={len(result)}")
815
  return result
816
 
817
 
 
1430
  value=False,
1431
  info="Extract only links instead of content"
1432
  ),
1433
+ gr.Slider(
1434
+ minimum=0,
1435
+ maximum=100000,
1436
+ value=0,
1437
+ step=100,
1438
+ label="Offset",
1439
+ info="Character offset to start from (use next_cursor from previous call for pagination)"
1440
+ ),
1441
  ],
1442
  outputs=gr.Markdown(label="Extracted Content"),
1443
  title="Fetch Webpage",
1444
  description=(
1445
+ "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, or extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
1446
  ),
1447
  api_description=(
1448
  "Fetch a web page and return it converted to Markdown format or extract links with configurable options. "
1449
+ "Includes enhanced truncation with detailed metadata and pagination support via offset parameter. "
1450
  "Parameters: url (str - absolute URL), max_chars (int - 0=no limit, default 3000), "
1451
  "strip_selectors (str - CSS selectors to remove, comma-separated), "
1452
+ "url_scraper (bool - extract only links instead of content, default False), "
1453
+ "offset (int - character offset for pagination, use next_cursor from previous call). "
1454
+ "When content is truncated, returns detailed metadata including truncated status, character counts, "
1455
+ "and next_cursor for continuation. When url_scraper=True, returns formatted list of all links found on the page."
1456
  ),
1457
  flagging_mode="never",
1458
  )
 
1463
  inputs=[
1464
  gr.Textbox(label="Query", placeholder="topic OR site:example.com"),
1465
  gr.Slider(minimum=1, maximum=20, value=5, step=1, label="Max results"),
1466
+ gr.Slider(minimum=1, maximum=10, value=1, step=1, label="Page", info="Page number for pagination (ignored if offset > 0)"),
1467
  gr.Radio(
1468
  label="Search Type",
1469
  choices=["text", "news", "images", "videos", "books"],
1470
  value="text",
1471
  info="Type of content to search for"
1472
  ),
1473
+ gr.Slider(
1474
+ minimum=0,
1475
+ maximum=1000,
1476
+ value=0,
1477
+ step=1,
1478
+ label="Offset",
1479
+ info="Result offset to start from (overrides page if > 0, use next_offset from previous search)"
1480
+ ),
1481
  ],
1482
  outputs=gr.Textbox(label="Search Results", interactive=False),
1483
  title="DuckDuckGo Search",
1484
  description=(
1485
+ "<div style=\"text-align:center\">Multi-type web search with readable output format, date detection, and flexible pagination. Supports text, news, images, videos, and books. Features smart fallback for news searches and precise offset control.</div>"
1486
  ),
1487
  api_description=(
1488
  "Run a DuckDuckGo search with support for multiple content types and return formatted results. "
1489
+ "Features smart fallback: if 'news' search returns no results, automatically retries with 'text' search "
1490
+ "to catch sources like Hacker News that might not appear in news-specific results. "
1491
  "Supports advanced search operators: site: for specific domains, quotes for exact phrases, "
1492
  "OR for alternatives, and - to exclude terms. Examples: 'Python programming', 'site:example.com', "
1493
  "'\"artificial intelligence\"', 'cats -dogs', 'Python OR JavaScript'. "
1494
  "Parameters: query (str), max_results (int, 1-20), page (int, 1-based pagination), "
1495
+ "search_type (str: text/news/images/videos/books), offset (int, result offset for precise continuation). "
1496
+ "If offset > 0, it overrides the page parameter. Returns appropriately formatted results with metadata, "
1497
+ "pagination hints, and next_offset information for each content type."
1498
  ),
1499
  flagging_mode="never",
1500
  submit_btn="Search",