from dotenv import load_dotenv
from typing import TypedDict, List, Dict, Any, Optional, Annotated
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI # Added ChatGoogleGenerativeAI
from langchain_groq import ChatGroq
from langgraph.graph import StateGraph, MessagesState, START, END
from langgraph.graph.message import add_messages
from langchain_core.messages import SystemMessage, HumanMessage, AnyMessage, AIMessage
from langchain_core.messages.ai import subtract_usage
from langchain.tools import Tool
from langchain_core.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.document_loaders import ArxivLoader
from langchain_community.retrievers import BM25Retriever
from langgraph.prebuilt import ToolNode, tools_condition
from prompts import system_prompt
# load environment variables
load_dotenv()
# wikipedia search tool
@tool
def search_wiki(query: str) -> str:
"""Search Wikipedia for a query and return maximum 2 results.
Args:
query: The search query."""
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return {"wiki_results": formatted_search_docs}
# internet search tool
@tool
def search_web(query: str) -> str:
"""Search Tavily for a query and return maximum 3 results.
Args:
query: The search query."""
search_docs = TavilySearchResults(max_results=3).invoke(query=query)
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content}\n'
for doc in search_docs
])
return {"web_results": formatted_search_docs}
# ArXiv search tool
@tool
def search_arxiv(query: str) -> str:
"""Search Arxiv for a query and return maximum 3 result.
Args:
query: The search query."""
search_docs = ArxivLoader(query=query, load_max_docs=3).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'\n{doc.page_content[:1000]}\n'
for doc in search_docs
])
return {"arvix_results": formatted_search_docs}
# build retriever
# bm25_retriever = BM25Retriever.from_documents(docs)
# init system message
sys_msg = SystemMessage(content=system_prompt)
tools = [
search_web,
search_wiki,
search_arxiv
]
# build graph function
def build_graph():
# llm
llm = ChatGoogleGenerativeAI(
model="gemini-2.5-flash-preview-04-17",
temperature=0
)
# llm = ChatGroq(
# model="meta-llama/llama-4-scout-17b-16e-instruct",
# temperature=0.1,
# )
print(f"DEBUG: llm object = {llm}")
# bind tools to llm
llm_with_tools = llm.bind_tools(tools)
print(f"DEBUG: llm_with_tools object = {llm_with_tools}")
# generate AgentState and Agent graph
class AgentState(TypedDict):
messages: Annotated[list[AnyMessage], add_messages]
def assistant(state: AgentState):
result = llm_with_tools.invoke(state["messages"])
print(f"DEBUG: LLM result = {result}")
# Ensure the result is always wrapped in a list, even if invoke returns a single message
# Add usage information if it's not already present
if isinstance(result, AIMessage) and result.usage_metadata is None:
# Add dummy usage metadata if none exists
result.usage_metadata = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
return {
"messages": [result]
}
# build graph
builder = StateGraph(AgentState)
# define nodes
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))
# define edges
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
"assistant",
tools_condition,
{
# If the latest message requires a tool, route to tools
"tools": "tools",
# Otherwise, provide a direct response
END: END,
}
)
builder.add_edge("tools", "assistant")
return builder.compile()
if __name__ == "__main__":
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
graph = build_graph()
messages = [HumanMessage(content=question)]
messages = graph.invoke({"messages": messages})
for m in messages["messages"]:
m.pretty_print()