Tingusto's picture
Build agent & graph
2f65b93
raw
history blame
6.1 kB
import os
from dotenv import load_dotenv
from langgraph.graph import START, StateGraph, MessagesState
from langgraph.prebuilt import tools_condition
from langgraph.prebuilt import ToolNode
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.tools import tool
import requests
from bs4 import BeautifulSoup
import urllib.parse
load_dotenv()
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia for information.
Args:
query: The search query."""
try:
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
except Exception as e:
return f"Error searching Wikipedia: {str(e)}"
@tool
def web_search(query: str) -> str:
"""Search the web using DuckDuckGo.
Args:
query: The search query."""
try:
encoded_query = urllib.parse.quote(query)
url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
results = []
for result in soup.find_all('div', class_='result__body'):
title = result.find('h2', class_='result__title')
snippet = result.find('a', class_='result__snippet')
if title and snippet:
results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}")
if len(results) >= 3:
break
return {"web_results": "\n\n".join(results) if results else "No results found"}
except Exception as e:
return f"Error searching web: {str(e)}"
@tool
def arxiv_search(query: str) -> str:
"""Search Arxiv for scientific papers.
Args:
query: The search query."""
try:
search_docs = ArxivLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
for doc in search_docs
])
return {"arxiv_results": formatted_search_docs}
except Exception as e:
return f"Error searching Arxiv: {str(e)}"
# System prompt
system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions.
Key Rules:
1. Answer Format:
- For numbers: Provide only the number without units, commas, or formatting
- For text: Use minimal words, no articles or abbreviations
- For lists: Use comma-separated values without additional formatting
- For dates: Use YYYY-MM-DD format unless specified otherwise
- For names: Use full names without titles or honorifics
- For country codes: Use official IOC codes (3 letters)
- For chess moves: Use standard algebraic notation
- For currency: Use numbers only, no symbols
2. Answer Guidelines:
- Be extremely precise and direct
- Do not include any explanatory text
- Do not use phrases like "FINAL ANSWER" or any markers
- Do not include units unless explicitly requested
- Do not use abbreviations unless they are standard (e.g., DNA, RNA)
- For multiple choice: Provide only the letter or number of the correct answer
- For reversed text: Provide the answer in normal text
- For file-based questions: Focus on the specific information requested
3. Error Handling:
- If uncertain, provide the most likely answer based on available information
- If completely unsure, provide a reasonable default rather than an error message
- For file processing errors, indicate the specific issue
4. Special Cases:
- For mathematical questions: Provide the exact numerical result
- For historical dates: Use the most widely accepted date
- For scientific terms: Use the standard scientific notation
- For geographical locations: Use official names without abbreviations
- For audio/video questions: Focus on the specific detail requested"""
# System message
sys_msg = SystemMessage(content=system_prompt)
# Tools list
tools = [
wiki_search,
web_search,
arxiv_search,
]
def build_graph():
"""Build the graph"""
# Initialize Groq LLM
llm = ChatGroq(
model="meta-llama/llama-4-maverick-17b-128e-instruct",
temperature=0.1
)
# Bind tools to LLM
llm_with_tools = llm.bind_tools(tools)
# Node
def assistant(state: MessagesState):
"""Assistant node"""
return {"messages": [llm_with_tools.invoke(state["messages"])]}
# Build graph
builder = StateGraph(MessagesState)
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
"assistant",
tools_condition,
)
builder.add_edge("tools", "assistant")
# Compile graph
return builder.compile()
# Test
if __name__ == "__main__":
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
# Build the graph
graph = build_graph()
# Run the graph
messages = [HumanMessage(content=question)]
messages = graph.invoke({"messages": messages})
for m in messages["messages"]:
m.pretty_print()