Coool2 commited on
Commit
8012613
·
1 Parent(s): 81a79f6

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +11 -4
agent.py CHANGED
@@ -33,7 +33,7 @@ from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
33
  from llama_index.tools.arxiv import ArxivToolSpec
34
  from llama_index.core.agent.workflow import AgentWorkflow
35
  from llama_index.llms.vllm import Vllm
36
-
37
 
38
  # Import all required official LlamaIndex Readers
39
  from llama_index.readers.file import (
@@ -494,16 +494,23 @@ dynamic_qe_manager = DynamicQueryEngineManager()
494
  # 1. Create the base DuckDuckGo search tool from the official spec.
495
  # This tool returns text summaries of search results, not just URLs.
496
 
 
 
497
  def search_and_extract_content_from_url(query: str) -> List[Document]:
498
  """
499
  Searches web, gets top URL, and extracts both text content and images.
500
  Returns a list of Document objects containing the extracted content.
501
  """
502
  # Get URL from search
503
- results = search_ddg(query)
504
- url = results["AbstractURL"]
 
 
 
 
 
 
505
  print(url)
506
-
507
  documents = []
508
 
509
  try:
 
33
  from llama_index.tools.arxiv import ArxivToolSpec
34
  from llama_index.core.agent.workflow import AgentWorkflow
35
  from llama_index.llms.vllm import Vllm
36
+ from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
37
 
38
  # Import all required official LlamaIndex Readers
39
  from llama_index.readers.file import (
 
494
  # 1. Create the base DuckDuckGo search tool from the official spec.
495
  # This tool returns text summaries of search results, not just URLs.
496
 
497
+ base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
498
+
499
  def search_and_extract_content_from_url(query: str) -> List[Document]:
500
  """
501
  Searches web, gets top URL, and extracts both text content and images.
502
  Returns a list of Document objects containing the extracted content.
503
  """
504
  # Get URL from search
505
+ #search_results = base_duckduckgo_tool(query, max_results=1)
506
+ #url_match = re.search(r"https?://\S+", str(search_results))
507
+
508
+ #if not url_match:
509
+ #return [Document(text="No URL could be extracted from the search results.")]
510
+
511
+ #url = url_match.group(0)[:-2]
512
+ url = "https://en.wikipedia.org/wiki/Mercedes_Sosa"
513
  print(url)
 
514
  documents = []
515
 
516
  try: