Spaces:
Sleeping
Sleeping
DiegoSanC
refactor: :recycle: Move langgraph solution to langgraph folder as it is not working properly
d4598ef
from langchain_core.tools import tool | |
from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader | |
def wikipedia_search(query: str) -> str: | |
""" | |
Search Wikipedia for information | |
Args: | |
query: The query to search for | |
Returns: | |
The search results | |
""" | |
docs_found = WikipediaLoader(query=query, load_max_docs=5).load() | |
# format the docs found into a string keeping just first paragraph | |
formatted_results = [] | |
for i, doc in enumerate(docs_found, 1): | |
source = doc.metadata.get('source', 'Unknown source') | |
title = doc.metadata.get('title', 'Untitled') | |
# Get the first paragraph (split by \n\n and take first part) | |
content = doc.page_content.strip() | |
first_paragraph = content.split('\n\n')[0] if content else "No content available" | |
formatted_doc = f"""--- DOCUMENT {i} START --- | |
Source: {source} | |
Title: {title} | |
Content: {first_paragraph} | |
--- DOCUMENT {i} END ---""" | |
formatted_results.append(formatted_doc) | |
return "\n\n".join(formatted_results) | |
def arxiv_search(query: str) -> str: | |
""" | |
Search ArXiv for research papers | |
Args: | |
query: The query to search for | |
Returns: | |
The search results with abstracts | |
""" | |
docs_found = ArxivLoader(query=query, load_max_docs=3).load() | |
formatted_results = [] | |
for i, doc in enumerate(docs_found, 1): | |
source = doc.metadata.get('source', 'Unknown source') | |
title = doc.metadata.get('title', 'Untitled') | |
# For ArXiv, the abstract is typically in the page_content or metadata | |
abstract = doc.page_content.strip() if doc.page_content else "No abstract available" | |
formatted_doc = f"""--- DOCUMENT {i} START --- | |
Source: {source} | |
Title: {title} | |
Abstract: {abstract} | |
--- DOCUMENT {i} END ---""" | |
formatted_results.append(formatted_doc) | |
return "\n\n".join(formatted_results) | |
def web_search(query: str) -> str: | |
""" | |
Search the web for information | |
Args: | |
query: The query to search for (should be a list of URLs or single URL) | |
Returns: | |
The search results with first 1000 characters | |
""" | |
# Note: WebBaseLoader requires URLs, so this assumes query contains URLs | |
# For a more general web search, you'd need a different approach like SerpAPI | |
try: | |
if isinstance(query, str): | |
urls = [query] if query.startswith('http') else [] | |
else: | |
urls = query | |
if not urls: | |
return "No valid URLs provided for web search." | |
# Limit to 4 URLs maximum | |
urls = urls[:4] | |
docs_found = WebBaseLoader(urls).load() | |
formatted_results = [] | |
for i, doc in enumerate(docs_found, 1): | |
source = doc.metadata.get('source', 'Unknown source') | |
title = doc.metadata.get('title', 'Untitled') | |
# Get first 1000 characters of content | |
content = doc.page_content.strip() | |
first_1000_chars = content[:1000] if content else "No content available" | |
if len(content) > 1000: | |
first_1000_chars += "..." | |
formatted_doc = f"""--- DOCUMENT {i} START --- | |
Source: {source} | |
Title: {title} | |
Content: {first_1000_chars} | |
--- DOCUMENT {i} END ---""" | |
formatted_results.append(formatted_doc) | |
return "\n\n".join(formatted_results) | |
except Exception as e: | |
return f"Error during web search: {str(e)}" | |