Spaces:
Configuration error
Configuration error
Update agent.py
Browse files
agent.py
CHANGED
@@ -33,7 +33,7 @@ from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
|
33 |
from llama_index.tools.arxiv import ArxivToolSpec
|
34 |
from llama_index.core.agent.workflow import AgentWorkflow
|
35 |
from llama_index.llms.vllm import Vllm
|
36 |
-
|
37 |
|
38 |
# Import all required official LlamaIndex Readers
|
39 |
from llama_index.readers.file import (
|
@@ -494,16 +494,23 @@ dynamic_qe_manager = DynamicQueryEngineManager()
|
|
494 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
495 |
# This tool returns text summaries of search results, not just URLs.
|
496 |
|
|
|
|
|
497 |
def search_and_extract_content_from_url(query: str) -> List[Document]:
|
498 |
"""
|
499 |
Searches web, gets top URL, and extracts both text content and images.
|
500 |
Returns a list of Document objects containing the extracted content.
|
501 |
"""
|
502 |
# Get URL from search
|
503 |
-
|
504 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
print(url)
|
506 |
-
|
507 |
documents = []
|
508 |
|
509 |
try:
|
|
|
33 |
from llama_index.tools.arxiv import ArxivToolSpec
|
34 |
from llama_index.core.agent.workflow import AgentWorkflow
|
35 |
from llama_index.llms.vllm import Vllm
|
36 |
+
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
37 |
|
38 |
# Import all required official LlamaIndex Readers
|
39 |
from llama_index.readers.file import (
|
|
|
494 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
495 |
# This tool returns text summaries of search results, not just URLs.
|
496 |
|
497 |
+
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
498 |
+
|
499 |
def search_and_extract_content_from_url(query: str) -> List[Document]:
|
500 |
"""
|
501 |
Searches web, gets top URL, and extracts both text content and images.
|
502 |
Returns a list of Document objects containing the extracted content.
|
503 |
"""
|
504 |
# Get URL from search
|
505 |
+
#search_results = base_duckduckgo_tool(query, max_results=1)
|
506 |
+
#url_match = re.search(r"https?://\S+", str(search_results))
|
507 |
+
|
508 |
+
#if not url_match:
|
509 |
+
#return [Document(text="No URL could be extracted from the search results.")]
|
510 |
+
|
511 |
+
#url = url_match.group(0)[:-2]
|
512 |
+
url = "https://en.wikipedia.org/wiki/Mercedes_Sosa"
|
513 |
print(url)
|
|
|
514 |
documents = []
|
515 |
|
516 |
try:
|