final_agent

Sleeping

App Files Files Community

jafhaponiuk commited on Oct 2

Commit

88124ec

verified ·

1 Parent(s): de2bc77

Update agent.py

Browse files

Files changed (1) hide show

agent.py +185 -58

agent.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import operator
 import os
 import json
 from typing import TypedDict, Annotated, List, Dict, Any, Union
 from dotenv import load_dotenv
 from tools import tools_for_llm
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langgraph.graph import StateGraph, END
 from langgraph.prebuilt import ToolNode
@@ -15,7 +17,13 @@ load_dotenv()
 # --- Initialize the language model ---
 llm = ChatGoogleGenerativeAI(
-    model="gemini-2.0-flash-exp",
     temperature=0,
     google_api_key=os.getenv("GOOGLE_API_KEY"),
 )
@@ -24,77 +32,173 @@ llm = ChatGoogleGenerativeAI(
 with open("system_prompt.txt", "r", encoding="utf-8") as f:
     SYSTEM_PROMPT_CONTENT = f.read()
 # --- Agent State Definition ---
 class AgentState(TypedDict):
     """Represents the state of the agent at each step of the graph."""
     input: str
     chat_history: Annotated[List[BaseMessage], operator.add]
     llm_response_raw: Union[AIMessage, None]
-    final_answer: Union[str, None]
 # --- Graph Nodes ---
 def call_llm(state: AgentState) -> AgentState:
-    """Prompts the LLM to decide on tools or provide direct answer."""
     current_input = state["input"]
     chat_history = state.get("chat_history", [])
-    # Filter out tool messages to avoid context overflow
-    filtered_history = [msg for msg in chat_history if not isinstance(msg, ToolMessage)]
-    prompt = ChatPromptTemplate.from_messages([
-        ("system", SYSTEM_PROMPT_CONTENT),
         MessagesPlaceholder(variable_name="chat_history"),
-        ("human", "{input}"),
     ])
-    # Bind tools for native tool calling
-    chain = prompt | llm.bind_tools(tools_for_llm)
     response = chain.invoke({
         "input": current_input,
-        "chat_history": filtered_history
     })
-    print(f"[call_llm] LLM response: {response.content}")
-    if response.tool_calls:
-        print(f"[call_llm] Tool calls detected: {response.tool_calls}")
-    return {
-        "input": current_input,
-        "chat_history": chat_history + [response],
-        "llm_response_raw": response,
-        "final_answer": response.content if not response.tool_calls else None
-    }
 def route_action(state: AgentState) -> str:
-    """Routes the graph based on LLM response."""
-    response = state["llm_response_raw"]
-    if response.tool_calls:
-        print("[route_action] Routing to execute_tool")
         return "execute_tool"
-    else:
-        print("[route_action] Routing to final_answer")
-        return "final_answer"
-def format_final_answer(state: AgentState) -> AgentState:
-    """Formats the final answer for output."""
-    response = state["llm_response_raw"]
-    final_answer = response.content if response else "No response generated"
-    print(f"[format_final_answer] Final answer: {final_answer}")
-    return {
-        "input": state["input"],
-        "chat_history": state["chat_history"],
-        "llm_response_raw": state["llm_response_raw"],
-        "final_answer": final_answer
-    }
 # --- Build the agent graph ---
 builder = StateGraph(AgentState)
 builder.add_node("call_llm", call_llm)
-builder.add_node("execute_tool", ToolNode(tools_for_llm))
-builder.add_node("final_answer", format_final_answer)
 builder.set_entry_point("call_llm")
@@ -103,12 +207,12 @@ builder.add_conditional_edges(
     route_action,
     {
         "execute_tool": "execute_tool",
-        "final_answer": "final_answer"
-    }
-)
 builder.add_edge("execute_tool", "call_llm")
-builder.add_edge("final_answer", END)
 agent_executor = builder.compile()
@@ -116,30 +220,53 @@ agent_executor = builder.compile()
 class BasicAgent:
     def __init__(self):
         self.agent = agent_executor
     def __call__(self, question: str) -> str:
         initial_state: AgentState = {
             "input": question,
-            "chat_history": [],
             "llm_response_raw": None,
-            "final_answer": None
         }
         final_state = self.agent.invoke(initial_state)
-        return final_state.get("final_answer", "No answer generated.")
 if __name__ == "__main__":
     print("Testing BasicAgent locally...")
     try:
         agent = BasicAgent()
-        print("\n--- Test 1: Simple question ---")
         response1 = agent("What is the capital of France?")
-        print(f"Response: {response1}")
-        print("\n--- Test 2: Math question ---")
-        response2 = agent("What is 15 multiplied by 23?")
-        print(f"Response: {response2}")
     except Exception as e:
-        print(f"Error during testing: {e}")

 import operator
 import os
 import json
+import re
 from typing import TypedDict, Annotated, List, Dict, Any, Union
+from datetime import datetime
 from dotenv import load_dotenv
 from tools import tools_for_llm
 from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, ToolMessage
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate, SystemMessagePromptTemplate
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langgraph.graph import StateGraph, END
 from langgraph.prebuilt import ToolNode
 # --- Initialize the language model ---
 llm = ChatGoogleGenerativeAI(
+    #model="gemini-1.5-pro", #404
+    #model="gemini-2.0-flash-lite", # It worked but it causes hallucinations with the tools
+    #model="gemini-2.5-flash-lite", # Tool calling problem with LangChain
+    #model="gemini-1.5-flash", #404
+    #model="gemini-1.5-flash-001", #404
+    #model="gemini-2.0-flash-001",
+    model="gemini-2.5-flash-lite",
     temperature=0,
     google_api_key=os.getenv("GOOGLE_API_KEY"),
 )
 with open("system_prompt.txt", "r", encoding="utf-8") as f:
     SYSTEM_PROMPT_CONTENT = f.read()
+# --- Helper to parse LLM's text output into an action ---
+def parse_llm_output(text: str) -> dict:
+    """Parses LLM text output for final_answer or tool fallback."""
+    action_match = re.search(
+        r"Action: (.+?)\s*Action Input: (\{.*?\}\s*)",
+        text,
+        re.DOTALL
+    )
+    if action_match:
+        action_type = action_match.group(1).strip()
+        action_input_str = action_match.group(2).strip()
+        try:
+            action_args = json.loads(action_input_str)
+            if action_type.lower() == "final_answer":
+                # Returns the final answer
+                return {"action": "final_answer", "answer": action_args.get("answer")}
+            else:
+                # Fallback: Process manual text tool call
+                return {"action": "tool", "tool_name": action_type, "tool_args": action_args}
+        except json.JSONDecodeError:
+            return {"action": "fail", "answer": f"Invalid JSON in Action Input: {action_input_str}"}
+    return {"action": "fail", "answer": "Could not parse LLM output. It did not match the expected format."}
 # --- Agent State Definition ---
 class AgentState(TypedDict):
     """Represents the state of the agent at each step of the graph."""
     input: str
     chat_history: Annotated[List[BaseMessage], operator.add]
     llm_response_raw: Union[AIMessage, None]
+    output: Union[str, None]
+    parsed_action: Union[Dict[str, Any], None]
+    tool_output: Union[Any, None]
+    tool_descriptions_str: str
 # --- Graph Nodes ---
 def call_llm(state: AgentState) -> AgentState:
+    """Prompts the LLM to decide on a tool and its arguments, or provide a direct answer."""
+    print(f"[{__name__}] call_llm: State received (keys): {list(state.keys())}")
     current_input = state["input"]
     chat_history = state.get("chat_history", [])
+    tool_descriptions_str = state["tool_descriptions_str"]
+    decision_prompt_template = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(SYSTEM_PROMPT_CONTENT.replace("{{tool_descriptions}}", tool_descriptions_str)),
         MessagesPlaceholder(variable_name="chat_history"),
+        HumanMessagePromptTemplate.from_template("{input}"),
     ])
+    # Bind tools to the LLM for native tool call generation
+    chain = decision_prompt_template | llm.bind_tools(tools_for_llm)
     response = chain.invoke({
         "input": current_input,
+        "chat_history": chat_history
     })
+    print(f"[{__name__}] LLM raw decision response: {response.content}")
+    # NEW LOGIC: Always parse text output first to get the true intent (especially final_answer).
+    parsed_action = parse_llm_output(response.content)
+    # Case A: Action is a FINAL_ANSWER (highest priority)
+    if parsed_action.get("action") == "final_answer":
+        # CRITICAL FIX: If the text is a final_answer, clear any inconsistent
+        # native tool_calls signal to prevent the ToolNode crash and ensure routing to END.
+        if response.tool_calls:
+            response.tool_calls = []
+    # Case B: Action is a TOOL CALL
+    elif parsed_action.get("action") == "tool":
+        # Sub-case B.1: Fallback detected (text tool call, but native tool_calls is missing)
+        if not response.tool_calls:
+            # 3. CRITICAL INJECTION: we inject the native tool call into the AIMessage for ToolNode to use.
+            tool_name = parsed_action.get("tool_name")
+            tool_args = parsed_action.get("tool_args")
+            # Construct the native ToolCall object
+            tool_call = {
+                "name": tool_name,
+                "args": tool_args,
+                # Temporary ID is required by LangGraph/ToolNode
+                "id": f"call_{tool_name}_{datetime.now().timestamp()}",
+                "type": "tool_call",
+            }
+            # Inject the tool call into the AIMessage
+            response.tool_calls = [tool_call]
+        # Sub-case B.2: Native tool call is already correctly present.
+    # Case C: Native tool call signal exists, but text parsing failed (use native signal)
+    # This covers the case where the LLM generated a native tool call but no text.
+    elif response.tool_calls:
+        parsed_action = {"action": "tool"}
+    # Case D: Failure or other action, parsed_action is already set by parse_llm_output (e.g., "fail")
+    new_state = AgentState(
+        input=current_input,
+        chat_history=chat_history + [response],
+        llm_response_raw=response,
+        parsed_action=parsed_action,
+        tool_output=None,
+        output=None,
+        tool_descriptions_str=tool_descriptions_str
+    )
+    return new_state
+def format_final_answer_node(state: AgentState) -> AgentState:
+    """Formats the final answer from the LLM for the agent's output."""
+    parsed_action = state.get("parsed_action")
+    # Check if parsed_action is a valid dictionary before proceeding
+    if isinstance(parsed_action, dict) and "answer" in parsed_action:
+        final_answer_content = parsed_action.get("answer")
+    else:
+        # If parsing failed, set a generic error message
+        final_answer_content = "An error occurred while formatting the final answer. The LLM's response could not be parsed correctly."
+        print(f"[{__name__}] ERROR: The parsed_action dictionary is invalid or missing the 'answer' key. Parsed action: {parsed_action}")
+    new_state = AgentState(
+        input=state["input"],
+        chat_history=state["chat_history"],
+        llm_response_raw=state["llm_response_raw"],
+        parsed_action=parsed_action,
+        tool_output=None,
+        output=final_answer_content,
+        tool_descriptions_str=state["tool_descriptions_str"]
+    )
+    print(f"[{__name__}] Final answer formatted and added to state.")
+    return new_state
 def route_action(state: AgentState) -> str:
+    """Routes the graph based on the LLM's parsed action."""
+    print(f"[{__name__}] route_action: State received (keys): {list(state.keys())}")
+    # PRIORITY 1: Native LangChain tool call detection (MUST BE FIRST)
+    if state["llm_response_raw"] and state["llm_response_raw"].tool_calls:
+        print(f"[{__name__}] Native tool call detected. Routing to 'execute_tool'.")
         return "execute_tool"
+    # PRIORITY 2: Manual parser detection (for final_answer/tool/fail)
+    parsed_action = state.get("parsed_action")
+    action_type = parsed_action.get("action")
+    if action_type == "final_answer":
+        print(f"[{__name__}] Final Answer detected. Routing to 'format_final_answer'.")
+        return "format_final_answer"
+    elif action_type == "tool":
+        print(f"[{__name__}] Manual tool action detected. Routing to 'execute_tool'.")
+        return "execute_tool"
+    else:
+        # Catches 'fail' action from parser, sending it back to LLM to try again
+        print(f"[{__name__}] Could not parse action '{action_type}'. Routing back to 'call_llm'.")
+        return "call_llm"
 # --- Build the agent graph ---
 builder = StateGraph(AgentState)
 builder.add_node("call_llm", call_llm)
+# ToolNode fixes the previous 'tool_call_id' error
+builder.add_node("execute_tool", ToolNode(tools_for_llm))
+builder.add_node("format_final_answer", format_final_answer_node)
 builder.set_entry_point("call_llm")
     route_action,
     {
         "execute_tool": "execute_tool",
+        "final_answer": "format_final_answer",
+        "call_llm": "call_llm"
+    })
 builder.add_edge("execute_tool", "call_llm")
+builder.add_edge("format_final_answer", END)
 agent_executor = builder.compile()
 class BasicAgent:
     def __init__(self):
         self.agent = agent_executor
+        self._tool_descriptions_str = self._get_tool_descriptions()
     def __call__(self, question: str) -> str:
         initial_state: AgentState = {
             "input": question,
+            "chat_history": [HumanMessage(content=question)],
             "llm_response_raw": None,
+            "parsed_action": None,
+            "tool_output": None,
+            "output": None,
+            "tool_descriptions_str": self._get_tool_descriptions()
         }
         final_state = self.agent.invoke(initial_state)
+        final_answer = final_state.get("output", "I could not find a final answer.")
+        return final_answer
+    def _get_tool_descriptions(self):
+        """Helper to get tool descriptions outside the graph."""
+        descriptions = []
+        for tool_item in tools_for_llm:
+            escaped_description = tool_item.description.replace("{", "{{").replace("}", "}}")
+            descriptions.append(f"- {tool_item.name}: {escaped_description}")
+        return "\n".join(descriptions)
 if __name__ == "__main__":
     print("Testing BasicAgent locally...")
     try:
         agent = BasicAgent()
+        print("\n--- Test 1: Simple question, should directly answer ---")
         response1 = agent("What is the capital of France?")
+        print(f"Agent Response: {response1}")
+        print("\n--- Test 2: Question requiring a tool (e.g., web_search) ---")
+        response2 = agent("What is the current population of the United States? (as of today)")
+        print(f"Agent Response: {response2}")
+        print("\n--- Test 3: Math question (e.g., calculator tool) ---")
+        response3 = agent("What is 15 multiplied by 23?")
+        print(f"Agent Response: {response3}")
+        print("\n--- Test 4: Question requiring the new PDF tool ---")
+        response4 = agent("According to the document 'test.pdf', what is the main conclusion of the report?")
+        print(f"Agent Response: {response4}")
     except Exception as e:
+        print(f"\nError during local testing: {e}")
+        print("Please ensure your GOOGLE_API_KEY and TAVILY_API_KEY are set.")