Final_Assignment_Template

Sleeping

Final_Assignment_Template / langgraph-wip /agent_tools /web_doc_search.py

DiegoSanC

refactor: :recycle: Move langgraph solution to langgraph folder as it is not working properly

d4598ef about 2 months ago

3.64 kB

	from langchain_core.tools import tool
	from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader

	@tool
	def wikipedia_search(query: str) -> str:
	"""
	Search Wikipedia for information
	Args:
	query: The query to search for
	Returns:
	The search results
	"""
	docs_found = WikipediaLoader(query=query, load_max_docs=5).load()
	# format the docs found into a string keeping just first paragraph
	formatted_results = []

	for i, doc in enumerate(docs_found, 1):
	source = doc.metadata.get('source', 'Unknown source')
	title = doc.metadata.get('title', 'Untitled')

	# Get the first paragraph (split by \n\n and take first part)
	content = doc.page_content.strip()
	first_paragraph = content.split('\n\n')[0] if content else "No content available"

	formatted_doc = f"""--- DOCUMENT {i} START ---
	Source: {source}
	Title: {title}
	Content: {first_paragraph}
	--- DOCUMENT {i} END ---"""

	formatted_results.append(formatted_doc)

	return "\n\n".join(formatted_results)

	@tool
	def arxiv_search(query: str) -> str:
	"""
	Search ArXiv for research papers
	Args:
	query: The query to search for
	Returns:
	The search results with abstracts
	"""
	docs_found = ArxivLoader(query=query, load_max_docs=3).load()
	formatted_results = []

	for i, doc in enumerate(docs_found, 1):
	source = doc.metadata.get('source', 'Unknown source')
	title = doc.metadata.get('title', 'Untitled')

	# For ArXiv, the abstract is typically in the page_content or metadata
	abstract = doc.page_content.strip() if doc.page_content else "No abstract available"

	formatted_doc = f"""--- DOCUMENT {i} START ---
	Source: {source}
	Title: {title}
	Abstract: {abstract}
	--- DOCUMENT {i} END ---"""

	formatted_results.append(formatted_doc)

	return "\n\n".join(formatted_results)

	@tool
	def web_search(query: str) -> str:
	"""
	Search the web for information
	Args:
	query: The query to search for (should be a list of URLs or single URL)
	Returns:
	The search results with first 1000 characters
	"""
	# Note: WebBaseLoader requires URLs, so this assumes query contains URLs
	# For a more general web search, you'd need a different approach like SerpAPI
	try:
	if isinstance(query, str):
	urls = [query] if query.startswith('http') else []
	else:
	urls = query

	if not urls:
	return "No valid URLs provided for web search."

	# Limit to 4 URLs maximum
	urls = urls[:4]
	docs_found = WebBaseLoader(urls).load()
	formatted_results = []

	for i, doc in enumerate(docs_found, 1):
	source = doc.metadata.get('source', 'Unknown source')
	title = doc.metadata.get('title', 'Untitled')

	# Get first 1000 characters of content
	content = doc.page_content.strip()
	first_1000_chars = content[:1000] if content else "No content available"
	if len(content) > 1000:
	first_1000_chars += "..."

	formatted_doc = f"""--- DOCUMENT {i} START ---
	Source: {source}
	Title: {title}
	Content: {first_1000_chars}
	--- DOCUMENT {i} END ---"""

	formatted_results.append(formatted_doc)

	return "\n\n".join(formatted_results)

	except Exception as e:
	return f"Error during web search: {str(e)}"