Final_Assignment_Template

Runtime error

App Files Files Community

Final_Assignment_Template / cache /agent /graph.py

leonwoo

Upload 22 files

1777acb verified 3 months ago

raw

history blame contribute delete

13.9 kB

	"""LangGraph state graph construction for the LLM-powered agent."""

	import base64
	import json
	import os
	import time
	import random
	from typing import Annotated, Literal, TypedDict

	from langchain_core.messages import AIMessage, BaseMessage, ToolMessage
	from langchain_core.prompts import ChatPromptTemplate
	from langchain_core.tools import render_text_description
	from langchain_openai import ChatOpenAI
	from langgraph.graph import StateGraph
	from langgraph.graph.message import add_messages
	from openai import RateLimitError

	from .constants import CACHE_DIR
	from .tools import tool_classes


	class AgentState(TypedDict):
	messages: Annotated[list[BaseMessage], add_messages]


	class SimpleRateLimiter:
	"""Simple token bucket rate limiter to prevent hitting API limits."""

	def __init__(self, calls_per_minute=50):
	self.calls_per_minute = calls_per_minute
	self.call_times = []

	def wait_if_needed(self):
	"""Wait if we're about to exceed rate limit."""
	now = time.time()

	# Remove calls older than 1 minute
	self.call_times = [t for t in self.call_times if now - t < 60]

	# If we're at the limit, wait
	if len(self.call_times) >= self.calls_per_minute:
	sleep_time = 60 - (now - self.call_times[0]) + 1
	if sleep_time > 0:
	print(f"⏳ Rate limiter: waiting {sleep_time:.1f}s to avoid hitting limits...")
	time.sleep(sleep_time)
	self.call_times = []

	# Record this call
	self.call_times.append(time.time())


	# Global variables that will be initialized in build_agent_graph()
	_tools = None
	_agent_chain = None
	_generation_chain = None
	_primary_llm = None
	_fallback_llm = None
	_rate_limiter = SimpleRateLimiter(calls_per_minute=40) # Conservative limit

	def _call_llm_with_retry(chain, state, max_retries=5):
	"""
	Call LLM with exponential backoff retry logic.
	Falls back to cheaper model if primary keeps failing.
	"""
	for attempt in range(max_retries):
	try:
	# Wait if we're approaching rate limits
	_rate_limiter.wait_if_needed()
	return chain.invoke(state)
	except RateLimitError as e:
	# Extract wait time from error if available
	wait_time = min(60, (2 ** attempt) + random.random())
	print(f"⚠️ Rate limit hit (attempt {attempt + 1}/{max_retries})")
	print(f" Waiting {wait_time:.1f}s before retry...")
	time.sleep(wait_time)
	except Exception as e:
	# For other errors, don't retry
	print(f"❌ LLM error: {e}")
	raise

	# If all retries failed, try fallback model
	print("🔄 All retries exhausted, switching to fallback model (gpt-4o-mini)...")
	try:
	# Rebuild chain with fallback LLM
	if _fallback_llm is not None:
	fallback_chain = chain.first \| _fallback_llm
	return fallback_chain.invoke(state)
	except Exception as e:
	print(f"❌ Fallback model also failed: {e}")
	raise

	raise RuntimeError("All retry attempts and fallback failed")

	def _initialize_chains_and_tools():
	"""Initialize the tools and LLM chains. Called once when building the graph."""
	global _tools, _agent_chain, _generation_chain, _primary_llm, _fallback_llm

	if _tools is not None:
	return # Already initialized

	# Initialize PRIMARY LLM (gpt-4o)
	print("🔧 Initializing primary LLM: gpt-4o")
	_primary_llm = ChatOpenAI(
	#model="gpt-4o",
	model="gpt-4.1",
	temperature=0,
	verbose=True,
	request_timeout=60 # 60 second timeout
	)

	# Initialize FALLBACK LLM (gpt-4o-mini - cheaper, faster)
	print("🔧 Initializing fallback LLM: gpt-4o-mini")
	_fallback_llm = ChatOpenAI(
	model="gpt-4o-mini",
	temperature=0,
	verbose=True,
	request_timeout=60
	)

	llm = _primary_llm

	# Instantiate the tools
	_tools = []
	for tool in tool_classes:
	if callable(tool) and not isinstance(tool, type):
	# It's a function that returns a tool instance (like create_wikipedia_tool)
	_tools.append(tool())
	else:
	# It's a class, instantiate it
	_tools.append(tool())

	# CRITICAL: Bind tools to the LLM using OpenAI's native function calling
	llm = llm.bind_tools(_tools)

	# Render the tools to a text description for the prompt
	rendered_tools = render_text_description(_tools)

	# Create the system prompt
	system_prompt = f"""You are a highly capable AI assistant designed to solve complex, real-world questions.

	REASONING STRATEGY (CRITICAL):
	1. Decompose: Break complex questions into smaller sub-questions
	2. Plan: Before using tools, outline your complete strategy
	3. Execute: Use tools systematically, one step at a time
	4. Verify: Check each result before proceeding to the next step
	5. Self-correct: If a tool fails or gives unexpected results, try alternative approaches
	6. Synthesize: Combine information from multiple sources to form your final answer

	FILE HANDLING - CRITICAL:
	⚠️ Files mentioned as "attached" are ALREADY in the current directory!
	- When question says "attached Excel file", "attached image", "attached .mp3" - use `list_files` to find them
	- Files are pre-downloaded before you start, so they WILL be in current directory
	- NEVER ask for URLs for "attached" files - they're already there!
	- Workflow:
	1. Use `list_files` to see what's available
	2. Find the relevant file (Excel, image, mp3, etc.)
	3. Process it with appropriate tool:
	- Excel (.xlsx, .xls): use `read_excel` tool to get summary and data
	- CSV: use `python_repl` with pandas: `pd.read_csv('filename.csv')`
	- Python files (.py): use `execute_python_file` tool to run and get output
	- Text files: use `read_file` tool
	- Images (.png, .jpg): use `analyze_image` tool (Gemini vision) - great for chess, diagrams, text in images
	- MP3/Audio files: use `understand_audio` tool (Gemini audio) - transcribes and understands audio

	MULTIMEDIA HANDLING:
	- For YouTube videos: use `understand_video` tool with format: 'URL: <youtube_url> \| QUESTION: <specific_question>'
	Example: understand_video('URL: https://www.youtube.com/watch?v=abc \| QUESTION: How many bird species are visible?')
	This ensures Gemini knows exactly what to look for in the video
	- For audio files (.mp3): use `understand_audio` tool - Gemini will transcribe and answer questions
	- For images: use `analyze_image` tool - Gemini can read text, analyze chess positions, describe images
	- For web URLs: use `download_file` if you need to download something from the internet

	TOOL USAGE BEST PRACTICES:
	- Use `calculator` for precise mathematical operations (faster than python_repl)
	- Use `wikipedia` for factual knowledge about people, places, events
	- Use `tavily_search` for recent information or specific facts
	- Use `youtube_transcript` for YouTube video content analysis
	- Use `read_excel` for quick Excel file inspection
	- Use `python_repl` for complex data analysis and calculations
	- Chain multiple tools when needed (e.g., search → extract info → calculate)

	AVAILABLE TOOLS:
	{rendered_tools}

	RESPONSE FORMAT:
	- For tool calls: return JSON with 'name' and 'arguments' keys
	- When finished: return JSON with 'name' of 'FINISH'

	⚠️ CRITICAL - PROVIDE ONLY THE FINAL ANSWER ⚠️
	DO NOT include explanations, reasoning, or extra text in your final answer.
	Examples:
	- Question: "How many albums?" → Answer: "2" (NOT "Mercedes Sosa published 2 albums...")
	- Question: "What city?" → Answer: "Paris" (NOT "The city is Paris")
	- Question: "Total sales?" → Answer: "1234.56" (NOT "The total sales are $1,234.56")

	BE EXTREMELY CONCISE. The scoring system only wants the literal answer.

	CRITICAL - ANSWER FORMATTING RULES:
	The scoring system is very strict about format. Follow these rules EXACTLY:

	1. For NUMBER answers:
	- Remove currency symbols ($, €, £)
	- Remove percentage signs (%)
	- Remove commas from large numbers
	- Provide just the number: "1234.56" not "$1,234.56"

	2. For LIST answers (comma-separated):
	- Use ONLY commas to separate items (or semicolons if specified)
	- NO extra spaces around commas
	- Count must match exactly
	- Order matters!
	- Example: "apple,banana,cherry" NOT "apple, banana, cherry"

	3. For STRING answers:
	- Be concise - extra words will cause mismatch
	- Capitalization doesn't matter
	- Punctuation doesn't matter
	- Spaces don't matter
	- But be precise with the core answer

	4. For NAMES:
	- Use full names if asked
	- Use last names only if specified
	- Check the question carefully for format requirements

	5. For CODES (IOC, airport, etc.):
	- Use exact format requested (uppercase/lowercase)
	- No extra characters

	DOUBLE-CHECK YOUR FINAL ANSWER FORMAT BEFORE RETURNING!
	"""

	prompt = ChatPromptTemplate.from_messages([
	("system", system_prompt),
	("placeholder", "{messages}"),
	])

	# Create the LLM chains
	_agent_chain = prompt \| llm
	generation_prompt = ChatPromptTemplate.from_messages([
	("system", "You are a helpful assistant. Answer the user's question based on the conversation history."),
	("placeholder", "{messages}"),
	])
	_generation_chain = generation_prompt \| llm

	def agent_node(state: AgentState) -> dict:
	"""Invokes the LLM to decide on the next action with retry logic."""
	print("\n🤖 [AGENT NODE] Deciding next action...")

	# Use retry logic
	response = _call_llm_with_retry(_agent_chain, state)

	# Check if there are tool calls
	if hasattr(response, 'tool_calls') and response.tool_calls:
	print(f"📝 [AGENT NODE] Requesting {len(response.tool_calls)} tool call(s)")
	for tc in response.tool_calls:
	print(f" - {tc['name']}")
	else:
	print(f"📝 [AGENT NODE] Response: {response.content[:200]}...")

	return {"messages": [response]}

	def generation_node(state: AgentState) -> dict:
	"""Invokes the LLM to generate a final answer."""
	print("\n✨ [GENERATION NODE] Creating final answer...")
	response = _generation_chain.invoke(state)
	print(f"✅ [GENERATION NODE] Final answer: {response.content[:200]}...")
	return {"messages": [AIMessage(content=response.content)]}

	def tool_node(state: AgentState) -> dict:
	"""Runs the tools using OpenAI's native tool calling."""
	print("\n🔧 [TOOL NODE] Executing tools...")
	last_message = state["messages"][-1]

	# Check if the message has tool_calls (OpenAI's native format)
	if not hasattr(last_message, 'tool_calls') or not last_message.tool_calls:
	print("⚠️ [TOOL NODE] No tool calls found")
	return {"messages": []}

	tool_messages = []
	for tool_call in last_message.tool_calls:
	tool_name = tool_call['name']
	tool_args = tool_call['args']
	tool_call_id = tool_call['id']

	print(f" 🛠️ Calling tool: {tool_name}")
	print(f" Args: {str(tool_args)[:100]}...")

	tool_to_call = next((t for t in _tools if t.name == tool_name), None)
	if tool_to_call:
	try:
	observation = tool_to_call.invoke(tool_args)
	result_preview = str(observation)[:150]
	print(f" ✅ Result: {result_preview}...")
	tool_messages.append(ToolMessage(
	content=str(observation),
	tool_call_id=tool_call_id
	))
	except Exception as e:
	print(f" ❌ Error: {e}")
	tool_messages.append(ToolMessage(
	content=f"Error: {e}",
	tool_call_id=tool_call_id
	))
	else:
	print(f" ⚠️ Tool '{tool_name}' not found")

	print(f"🔧 [TOOL NODE] Executed {len(tool_messages)} tool(s)")
	return {"messages": tool_messages}

	def should_continue(state: AgentState) -> Literal["tools", "__end__"]:
	"""Determines the next node to execute based on OpenAI's tool calls."""
	last_message = state["messages"][-1]

	# If the last message has tool calls, go to tools node
	if hasattr(last_message, 'tool_calls') and last_message.tool_calls:
	print("➡️ Routing to: TOOLS")
	return "tools"

	# Otherwise, we're done
	print("➡️ Routing to: END")
	return "__end__"

	def build_agent_graph() -> StateGraph:
	"""Builds the state graph for the agent."""
	# Initialize tools and chains (only happens once)
	_initialize_chains_and_tools()

	workflow = StateGraph(AgentState)
	workflow.add_node("agent", agent_node)
	workflow.add_node("tools", tool_node)
	workflow.set_entry_point("agent")
	workflow.add_conditional_edges("agent", should_continue)
	workflow.add_edge("tools", "agent")
	return workflow.compile()

	def agent_graph_mermaid() -> str:
	"""Returns the LangGraph structure in Mermaid format."""
	graph = build_agent_graph()
	return graph.get_graph().draw_mermaid()

	def agent_graph_png_base64(filename: str = "agent_graph.png") -> str \| None:
	"""Generates a PNG of the agent graph and returns it as a base64 string."""
	graph = build_agent_graph()
	output_path = CACHE_DIR / filename
	try:
	graph.get_graph().draw_png(str(output_path))
	except Exception as exc:
	print(f"Warning: Failed to render agent graph PNG: {exc}")
	return None

	try:
	return base64.b64encode(output_path.read_bytes()).decode("ascii")
	except Exception as exc:
	print(f"Warning: Unable to read rendered graph PNG: {exc}")
	return None