DiegoSanC
refactor: :recycle: Move langgraph solution to langgraph folder as it is not working properly
d4598ef
from langchain_core.tools import tool
from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader
@tool
def wikipedia_search(query: str) -> str:
"""
Search Wikipedia for information
Args:
query: The query to search for
Returns:
The search results
"""
docs_found = WikipediaLoader(query=query, load_max_docs=5).load()
# format the docs found into a string keeping just first paragraph
formatted_results = []
for i, doc in enumerate(docs_found, 1):
source = doc.metadata.get('source', 'Unknown source')
title = doc.metadata.get('title', 'Untitled')
# Get the first paragraph (split by \n\n and take first part)
content = doc.page_content.strip()
first_paragraph = content.split('\n\n')[0] if content else "No content available"
formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_paragraph}
--- DOCUMENT {i} END ---"""
formatted_results.append(formatted_doc)
return "\n\n".join(formatted_results)
@tool
def arxiv_search(query: str) -> str:
"""
Search ArXiv for research papers
Args:
query: The query to search for
Returns:
The search results with abstracts
"""
docs_found = ArxivLoader(query=query, load_max_docs=3).load()
formatted_results = []
for i, doc in enumerate(docs_found, 1):
source = doc.metadata.get('source', 'Unknown source')
title = doc.metadata.get('title', 'Untitled')
# For ArXiv, the abstract is typically in the page_content or metadata
abstract = doc.page_content.strip() if doc.page_content else "No abstract available"
formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Abstract: {abstract}
--- DOCUMENT {i} END ---"""
formatted_results.append(formatted_doc)
return "\n\n".join(formatted_results)
@tool
def web_search(query: str) -> str:
"""
Search the web for information
Args:
query: The query to search for (should be a list of URLs or single URL)
Returns:
The search results with first 1000 characters
"""
# Note: WebBaseLoader requires URLs, so this assumes query contains URLs
# For a more general web search, you'd need a different approach like SerpAPI
try:
if isinstance(query, str):
urls = [query] if query.startswith('http') else []
else:
urls = query
if not urls:
return "No valid URLs provided for web search."
# Limit to 4 URLs maximum
urls = urls[:4]
docs_found = WebBaseLoader(urls).load()
formatted_results = []
for i, doc in enumerate(docs_found, 1):
source = doc.metadata.get('source', 'Unknown source')
title = doc.metadata.get('title', 'Untitled')
# Get first 1000 characters of content
content = doc.page_content.strip()
first_1000_chars = content[:1000] if content else "No content available"
if len(content) > 1000:
first_1000_chars += "..."
formatted_doc = f"""--- DOCUMENT {i} START ---
Source: {source}
Title: {title}
Content: {first_1000_chars}
--- DOCUMENT {i} END ---"""
formatted_results.append(formatted_doc)
return "\n\n".join(formatted_results)
except Exception as e:
return f"Error during web search: {str(e)}"