Humanlearning commited on
Commit
fe36046
·
1 Parent(s): 954a1b2

multi agent architecture

Browse files
Files changed (50) hide show
  1. .cursor/rules/langfuse_best_practices.mdc +80 -0
  2. .cursor/rules/langgraph_multiagent_state_handling.mdc +140 -0
  3. ARCHITECTURE.md +184 -0
  4. __pycache__/debug_test.cpython-313-pytest-8.4.0.pyc +0 -0
  5. __pycache__/langraph_agent.cpython-313.pyc +0 -0
  6. __pycache__/new_langraph_agent.cpython-313.pyc +0 -0
  7. __pycache__/quick_random_agent_test.cpython-313-pytest-8.4.0.pyc +0 -0
  8. __pycache__/quick_specific_agent_test.cpython-313-pytest-8.4.0.pyc +0 -0
  9. __pycache__/test_new_system.cpython-313-pytest-8.4.0.pyc +0 -0
  10. __pycache__/test_random_question.cpython-313-pytest-8.4.0.pyc +0 -0
  11. __pycache__/test_tools_integration.cpython-313-pytest-8.4.0.pyc +0 -0
  12. app.py +9 -10
  13. debug_retrieval_tools.py +149 -0
  14. langraph_agent.py +97 -34
  15. new_langraph_agent.py +85 -0
  16. prompts/critic_prompt.txt +31 -0
  17. prompts/execution_prompt.txt +42 -0
  18. prompts/retrieval_prompt.txt +34 -0
  19. prompts/router_prompt.txt +44 -0
  20. system_prompt.txt → prompts/system_prompt.txt +2 -1
  21. prompts/verification_prompt.txt +30 -0
  22. pyproject.toml +3 -0
  23. quick_random_agent_test.py +51 -21
  24. quick_specific_agent_test.py +64 -32
  25. requirements.txt +34 -3
  26. src/__init__.py +14 -0
  27. src/__pycache__/__init__.cpython-313.pyc +0 -0
  28. src/__pycache__/langgraph_system.cpython-313.pyc +0 -0
  29. src/__pycache__/memory.cpython-313.pyc +0 -0
  30. src/__pycache__/tracing.cpython-313.pyc +0 -0
  31. src/agents/__init__.py +21 -0
  32. src/agents/__pycache__/__init__.cpython-313.pyc +0 -0
  33. src/agents/__pycache__/critic_agent.cpython-313.pyc +0 -0
  34. src/agents/__pycache__/execution_agent.cpython-313.pyc +0 -0
  35. src/agents/__pycache__/plan_node.cpython-313.pyc +0 -0
  36. src/agents/__pycache__/retrieval_agent.cpython-313.pyc +0 -0
  37. src/agents/__pycache__/router_node.cpython-313.pyc +0 -0
  38. src/agents/__pycache__/verification_node.cpython-313.pyc +0 -0
  39. src/agents/critic_agent.py +118 -0
  40. src/agents/execution_agent.py +174 -0
  41. src/agents/plan_node.py +79 -0
  42. src/agents/retrieval_agent.py +268 -0
  43. src/agents/router_node.py +97 -0
  44. src/agents/verification_node.py +172 -0
  45. src/langgraph_system.py +231 -0
  46. src/memory.py +162 -0
  47. src/tracing.py +125 -0
  48. test_new_system.py +205 -0
  49. test_tools_integration.py +81 -0
  50. uv.lock +115 -3
.cursor/rules/langfuse_best_practices.mdc ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: langfuse and agent observation best practices
3
+ globs:
4
+ alwaysApply: false
5
+ ---
6
+ 1 Adopt the OTEL-native Python SDK (v3) everywhere
7
+ The v3 SDK wraps OpenTelemetry, so every span you open in any agent, tool or worker is automatically nested and correlated. This saves you from hand-passing trace IDs and lets you lean on existing OTEL auto-instrumentation for HTTP, DB or queue calls.
8
+ langfuse.com
9
+ langfuse.com
10
+
11
+ 2 Create one root span per user request and pass a single CallbackHandler into graph.invoke/stream
12
+ python
13
+ Copy
14
+ Edit
15
+ from langfuse.langchain import CallbackHandler
16
+ langfuse_handler = CallbackHandler()
17
+
18
+ with langfuse.start_as_current_span(name="user-request") as root:
19
+ compiled_graph.invoke(
20
+ input=state,
21
+ config={"callbacks": [langfuse_handler]}
22
+ )
23
+ Everything the agents do now rolls up under that root for a tidy timeline.
24
+ langfuse.com
25
+
26
+ 3 Use Langfuse Sessions to stitch together long-running conversations
27
+ Set session_id and user_id on the root span (or via update_trace) so all follow-up calls land in the same session dashboard.
28
+ langfuse.com
29
+ langfuse.com
30
+
31
+ 4 Name spans predictably
32
+ llm/<model> – one per LLM call (e.g., llm/gpt-4o)
33
+
34
+ tool/<tool_name> – external search, RAG, code-exec…
35
+
36
+ agent/<role> – distinct for every worker node
37
+ Predictable names power Langfuse’s cost & latency aggregation widgets.
38
+ langfuse.com
39
+
40
+ 5 Leverage Agent Graphs to debug routing loops
41
+ Because each node becomes a child span, Langfuse’s “Agent Graph” view renders the entire decision tree and shows token/cost per edge—very handy when several LLMs vote on the next step.
42
+ langfuse.com
43
+
44
+ 6 Tag the root span with the environment (dev/stage/prod) and with the LLM provider you’re experimenting with
45
+ This lets you facet dashboards by deployment ring or by “OpenAI vs Mixtral.”
46
+ langfuse.com
47
+ langfuse.com
48
+
49
+ 7 Attach scores (numeric or categorical) right after the graph run
50
+ span.score_trace(name="user-feedback", value=1) – or call create_score later. Use this both for thumb-up/down UI events and for LLM-as-judge automated grading.
51
+ langfuse.com
52
+ langfuse.com
53
+
54
+ 8 Version and link your prompts
55
+ Call langfuse.create_prompt() (or manage them in the UI) and set prompt_id on spans so you can tell which prompt revision caused regressions.
56
+ langfuse.com
57
+
58
+ 9 Exploit distributed-tracing headers if agents live in different services
59
+ Because v3 is OTEL-based, traceparent headers are parsed automatically—just make sure every micro-service initialises the Langfuse OTEL exporter with the same LANGFUSE_OTEL_DSN.
60
+ langfuse.com
61
+
62
+ 10 Sample intelligently
63
+ Langfuse supports probabilistic sampling on the server. Keep 100 % of errors and maybe only 10 % of successful traces in prod to control storage costs.
64
+ langfuse.com
65
+
66
+ 11 Mask PII at the SDK layer
67
+ Use the mask() helper or MASK_CONTENT_REGEX env var so you can still store numeric cost/latency while redacting sensitive inputs/outputs.
68
+ langfuse.com
69
+
70
+ 12 Flush asynchronously in high-throughput agents
71
+ Call langfuse.flush(background=True) at the end of each worker tick to avoid blocking the event loop; OTEL will batch and export spans every few seconds.
72
+ langfuse.com
73
+
74
+ 13 Test visual completeness with the LangGraph helper
75
+ graph.get_graph().draw_mermaid_png() and verify every edge appears in Langfuse; missing edges usually mean a span wasn’t opened or the callback handler wasn’t propagated.
76
+ langfuse.com
77
+
78
+ 14 Watch out for the “traces not clubbed” pitfall when upgrading from v2 → v3
79
+ Older code that started independent traces per agent will fragment your timeline in v3. Always start one root span first (Tip #2).
80
+ github.com
.cursor/rules/langgraph_multiagent_state_handling.mdc ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ description: best pract
3
+ globs:
4
+ alwaysApply: false
5
+ ---
6
+ The most robust pattern is to treat every agent node as a pure function AgentState → Command, where AgentState is an explicit, typed snapshot of everything the rest of the graph must know.
7
+ My overall confidence that the practices below will remain valid for ≥ 12 months is 85 % (expert opinion).
8
+
9
+ 1 Design a single source of truth for state
10
+ Guideline Why it matters Key LangGraph API
11
+ Define a typed schema (TypedDict or pydantic.BaseModel) for the whole graph. Static typing catches missing keys early and docs double as living design specs.
12
+ langchain-ai.github.io
13
+ StateGraph(YourState)
14
+ Use channel annotations such as Annotated[list[BaseMessage], operator.add] on mutable fields. Makes accumulation (+) vs. overwrite clear and prevents accidental loss of history.
15
+ langchain-ai.github.io
16
+ messages: Annotated[list[BaseMessage], operator.add]
17
+ Keep routing out of business data—store the next hop in a dedicated field (next: Literal[...]). Separates control-flow from payload; easier to debug and replay.
18
+ langchain-ai.github.io
19
+ next: Literal["planner", "researcher", "__end__"]
20
+
21
+ 2 Pass information with Command objects
22
+ Pattern
23
+
24
+ python
25
+ Copy
26
+ Edit
27
+ def planner(state: AgentState) -> Command[Literal["researcher", "executor", END]]:
28
+ decision = model.invoke(...state.messages)
29
+ return Command(
30
+ goto = decision["next"],
31
+ update = {
32
+ "messages": [decision["content"]],
33
+ "plan": decision["plan"]
34
+ }
35
+ )
36
+ Best-practice notes
37
+
38
+ Always update via update=… rather than mutating the state in-place. This guarantees immutability between nodes and makes time-travel/debugging deterministic.
39
+ langchain-ai.github.io
40
+
41
+ When handing off between sub-graphs, set graph=Command.PARENT or the target sub-graph’s name so orchestration stays explicit.
42
+ langchain-ai.github.io
43
+
44
+ 3 Choose a message-sharing strategy early
45
+ Strategy Pros Cons When to use
46
+ Shared scratch-pad (every intermediate LLM thought stored in messages)
47
+ langchain-ai.github.io
48
+ Maximum transparency; great for debugging & reflection. Context window bloat, higher cost/time. ≤ 3 specialist agents or short tasks.
49
+ Final-result only (each agent keeps private scratch-pad, shares only its final answer)
50
+ langchain-ai.github.io
51
+ Scales to 10 + agents; small token footprint. Harder to post-mortem; agents need local memory. Large graphs; production workloads.
52
+
53
+ Tip: If you hide scratch-pads, store them in a per-agent key (e.g. researcher_messages) for replay or fine-tuning even if they’re not sent downstream.
54
+ langchain-ai.github.io
55
+
56
+ 4 Inject only what a tool needs
57
+ When exposing sub-agents as tools under a supervisor:
58
+
59
+ python
60
+ Copy
61
+ Edit
62
+ from langgraph.prebuilt import InjectedState
63
+
64
+ def researcher(state: Annotated[AgentState, InjectedState]):
65
+ ...
66
+ Why: keeps tool signatures clean and prevents leaking confidential state.
67
+ Extra: If the tool must update global state, let it return a Command so the supervisor doesn’t have to guess what changed.
68
+ langchain-ai.github.io
69
+
70
+ 5 Structure the graph for clarity & safety
71
+ Network ➜ every agent connects to every other (exploration, research prototypes).
72
+
73
+ Supervisor ➜ one LLM decides routing (good default for 3-7 agents).
74
+
75
+ Hierarchical ➜ teams of agents with team-level supervisors (scales past ~7 agents).
76
+ langchain-ai.github.io
77
+
78
+ Pick the simplest architecture that meets today’s needs; refactor to sub-graphs as complexity grows.
79
+
80
+ 6 Operational best practices
81
+ Concern Best practice
82
+ Tracing & observability Attach a LangFuse run-ID to every AgentState at graph entry; emit state snapshots on node enter/exit so traces line up with LangFuse v3 spans.
83
+ Memory & persistence Use Checkpointer for cheap disk-based snapshots or a Redis backend for high-QPS, then time-travel when an LLM stalls.
84
+ Parallel branches Use map edges (built-in) to fan-out calls, but cap parallelism with an asyncio semaphore to avoid API rate-limits.
85
+ Vector lookup Put retrieval results in a dedicated key (docs) so they don’t clutter messages; store only document IDs if you need to replay cheaply.
86
+
87
+ 7 Evidence from the literature (why graphs work)
88
+ Peer-reviewed source Key takeaway Credibility (0-10)
89
+ AAAI 2024 Graph of Thoughts‎ shows arbitrary-graph reasoning beats tree/chain structures by up to 62 % on sorting tasks.
90
+ arxiv.org
91
+ Graph topology yields better exploration & feedback loops—mirrors LangGraph’s StateGraph. 9
92
+ EMNLP 2024 EPO Hierarchical LLM Agents demonstrates hierarchical agents outperform flat agents on ALFRED by >12 % and scales with preference-based training.
93
+ aclanthology.org
94
+ Validates splitting planning vs. execution agents (Supervisor + workers). 9
95
+
96
+ Non-peer-reviewed source Why included Credibility
97
+ Official LangGraph docs (June 2025).
98
+ langchain-ai.github.io
99
+ Primary specification of the library’s APIs and guarantees. 8
100
+
101
+ 8 Minimal starter template (v 0.6.*)
102
+ python
103
+ Copy
104
+ Edit
105
+ from typing import Annotated, Literal, Sequence, TypedDict
106
+ from langgraph.graph import StateGraph, START, END
107
+ from langgraph.types import Command
108
+ from langchain_openai import ChatOpenAI
109
+ import operator
110
+
111
+ class AgentState(TypedDict):
112
+ messages: Annotated[Sequence[str], operator.add]
113
+ next: Literal["planner", "researcher", "__end__"]
114
+ plan: str | None
115
+
116
+ llm = ChatOpenAI()
117
+
118
+ def planner(state: AgentState) -> Command[Literal["researcher", END]]:
119
+ resp = llm.invoke(...)
120
+ return Command(
121
+ goto = resp["next"],
122
+ update = {"messages": [resp["content"]],
123
+ "plan": resp["plan"]}
124
+ )
125
+
126
+ def researcher(state: AgentState) -> Command[Literal["planner"]]:
127
+ resp = llm.invoke(...)
128
+ return Command(goto="planner",
129
+ update={"messages": [resp["content"]]})
130
+
131
+ g = StateGraph(AgentState)
132
+ g.add_node(planner)
133
+ g.add_node(researcher)
134
+ g.add_edge(START, planner)
135
+ g.add_edge(planner, researcher)
136
+ g.add_edge(researcher, planner)
137
+ g.add_conditional_edges(planner)
138
+ graph = g.compile()
139
+ Bottom line
140
+ Use typed immutable state, route with Command, and keep private scratch-pads separate from shared context. These patterns align with both the latest LangGraph APIs and empirical results from hierarchical, graph-based agent research.
ARCHITECTURE.md ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LangGraph Agent System Architecture
2
+
3
+ This document describes the architecture of the multi-agent system implemented using LangGraph 0.4.8+ and Langfuse 3.0.0.
4
+
5
+ ## System Overview
6
+
7
+ The system implements a sophisticated agent architecture with memory, routing, specialized agents, and verification as shown in the system diagram.
8
+
9
+ ## Core Components
10
+
11
+ ### 1. Memory Layer
12
+ - **Short-Term Memory**: Graph state managed by LangGraph checkpointing
13
+ - **Checkpointer**: SQLite-based persistence for conversation continuity
14
+ - **Long-Term Memory**: Supabase vector store with pgvector for Q&A storage
15
+
16
+ ### 2. Plan + ReAct Loop
17
+ - Initial query analysis and planning
18
+ - Contextual prompt injection with system requirements
19
+ - Memory retrieval for similar past questions
20
+
21
+ ### 3. Agent Router
22
+ - Intelligent routing based on query analysis
23
+ - Routes to specialized agents: Retrieval, Execution, or Critic
24
+ - Uses low-temperature LLM for consistent routing decisions
25
+
26
+ ### 4. Specialized Agents
27
+
28
+ #### Retrieval Agent
29
+ - Information gathering from external sources
30
+ - Tools: Wikipedia, Arxiv, Tavily web search, vector store retrieval
31
+ - Handles attachment downloading for GAIA tasks
32
+ - Context-aware with memory integration
33
+
34
+ #### Execution Agent
35
+ - Computational tasks and code execution
36
+ - Integrates with existing `code_agent.py` sandbox
37
+ - Python code execution with pandas, cv2, standard libraries
38
+ - Step-by-step problem breakdown
39
+
40
+ #### Critic Agent
41
+ - Response quality evaluation and review
42
+ - Accuracy, completeness, and logical consistency checks
43
+ - Scoring system with pass/fail determination
44
+ - Constructive feedback generation
45
+
46
+ ### 5. Verification & Fallback
47
+ - Final quality control with system prompt compliance
48
+ - Format verification for exact-match requirements
49
+ - Retry logic with maximum attempt limits
50
+ - Graceful fallback pipeline for failed attempts
51
+
52
+ ### 6. Observability (Langfuse)
53
+ - End-to-end tracing of all agent interactions
54
+ - Performance monitoring and debugging
55
+ - User session tracking
56
+ - Error logging and analysis
57
+
58
+ ## Data Flow
59
+
60
+ 1. **User Query** → Plan Node (system prompt injection)
61
+ 2. **Plan Node** → Router (agent selection)
62
+ 3. **Router** → Specialized Agent (task execution)
63
+ 4. **Agent** → Tools (if needed) → Agent (results)
64
+ 5. **Agent** → Verification (quality check)
65
+ 6. **Verification** → Output or Retry/Fallback
66
+
67
+ ## Key Features
68
+
69
+ ### Memory Management
70
+ - Caching of similarity searches (TTL-based)
71
+ - Duplicate detection and prevention
72
+ - Task-based attachment tracking
73
+ - Session-specific cache management
74
+
75
+ ### Quality Control
76
+ - Multi-level verification (agent → critic → verification)
77
+ - Retry mechanism with attempt limits
78
+ - Format compliance checking
79
+ - Fallback responses for failures
80
+
81
+ ### Tracing & Observability
82
+ - Langfuse integration for complete observability
83
+ - Agent-level span tracking
84
+ - Error monitoring and debugging
85
+ - Performance metrics collection
86
+
87
+ ### Tool Integration
88
+ - Modular tool system for each agent
89
+ - Sandboxed code execution environment
90
+ - External API integration (search, knowledge bases)
91
+ - Attachment handling for complex tasks
92
+
93
+ ## Configuration
94
+
95
+ ### Environment Variables
96
+ See `env.template` for required configuration:
97
+ - LLM API keys (Groq, OpenAI, Google, HuggingFace)
98
+ - Search tools (Tavily)
99
+ - Vector store (Supabase)
100
+ - Observability (Langfuse)
101
+ - GAIA API endpoints
102
+
103
+ ### System Prompts
104
+ Located in `prompts/` directory:
105
+ - `system_prompt.txt`: Main system requirements
106
+ - `router_prompt.txt`: Agent routing instructions
107
+ - `retrieval_prompt.txt`: Information gathering guidelines
108
+ - `execution_prompt.txt`: Code execution instructions
109
+ - `critic_prompt.txt`: Quality evaluation criteria
110
+ - `verification_prompt.txt`: Final formatting rules
111
+
112
+ ## Usage
113
+
114
+ ### Basic Usage
115
+ ```python
116
+ from src import run_agent_system
117
+
118
+ result = run_agent_system(
119
+ query="Your question here",
120
+ user_id="user123",
121
+ session_id="session456"
122
+ )
123
+ ```
124
+
125
+ ### With Memory Management
126
+ ```python
127
+ from src import memory_manager
128
+
129
+ # Check if query is similar to previous ones
130
+ similar = memory_manager.get_similar_qa(query)
131
+
132
+ # Clear session cache
133
+ memory_manager.clear_session_cache()
134
+ ```
135
+
136
+ ### Direct Graph Access
137
+ ```python
138
+ from src import create_agent_graph
139
+
140
+ workflow = create_agent_graph()
141
+ app = workflow.compile(checkpointer=checkpointer)
142
+ result = app.invoke(initial_state, config=config)
143
+ ```
144
+
145
+ ## Dependencies
146
+
147
+ ### Core Framework
148
+ - `langgraph>=0.4.8`: Graph-based agent orchestration
149
+ - `langgraph-checkpoint-sqlite>=2.0.0`: Persistence layer
150
+ - `langchain>=0.3.0`: LLM and tool abstractions
151
+
152
+ ### Observability
153
+ - `langfuse==3.0.0`: Tracing and monitoring
154
+
155
+ ### Memory & Storage
156
+ - `supabase>=2.8.0`: Vector database backend
157
+ - `pgvector>=0.3.0`: Vector similarity search
158
+
159
+ ### Tools & APIs
160
+ - `tavily-python>=0.5.0`: Web search
161
+ - `arxiv>=2.1.0`: Academic paper search
162
+ - `wikipedia>=1.4.0`: Knowledge base access
163
+
164
+ ## Error Handling
165
+
166
+ The system implements comprehensive error handling:
167
+ - Graceful degradation when services are unavailable
168
+ - Fallback responses for critical failures
169
+ - Retry logic with exponential backoff
170
+ - Detailed error logging for debugging
171
+
172
+ ## Performance Considerations
173
+
174
+ - Vector store caching reduces duplicate searches
175
+ - Checkpoint-based state management for conversation continuity
176
+ - Efficient tool routing based on query analysis
177
+ - Memory cleanup for long-running sessions
178
+
179
+ ## Future Enhancements
180
+
181
+ - Additional specialized agents (e.g., Image Analysis, Code Review)
182
+ - Enhanced memory clustering and retrieval algorithms
183
+ - Real-time collaboration between agents
184
+ - Advanced tool composition and chaining
__pycache__/debug_test.cpython-313-pytest-8.4.0.pyc ADDED
Binary file (2.22 kB). View file
 
__pycache__/langraph_agent.cpython-313.pyc CHANGED
Binary files a/__pycache__/langraph_agent.cpython-313.pyc and b/__pycache__/langraph_agent.cpython-313.pyc differ
 
__pycache__/new_langraph_agent.cpython-313.pyc ADDED
Binary file (3.01 kB). View file
 
__pycache__/quick_random_agent_test.cpython-313-pytest-8.4.0.pyc ADDED
Binary file (5.19 kB). View file
 
__pycache__/quick_specific_agent_test.cpython-313-pytest-8.4.0.pyc ADDED
Binary file (6.4 kB). View file
 
__pycache__/test_new_system.cpython-313-pytest-8.4.0.pyc ADDED
Binary file (7.59 kB). View file
 
__pycache__/test_random_question.cpython-313-pytest-8.4.0.pyc ADDED
Binary file (3.97 kB). View file
 
__pycache__/test_tools_integration.cpython-313-pytest-8.4.0.pyc ADDED
Binary file (3.06 kB). View file
 
app.py CHANGED
@@ -4,7 +4,7 @@ import requests
4
  import inspect
5
  import pandas as pd
6
  # from agents import LlamaIndexAgent
7
- from langraph_agent import build_graph
8
  import asyncio
9
  import aiohttp
10
  from langfuse.langchain import CallbackHandler
@@ -21,19 +21,18 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
  # --- Basic Agent Definition ---
22
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
  class BasicAgent:
 
 
24
  def __init__(self):
25
- self.agent = build_graph()
26
- print("BasicAgent initialized.")
27
  async def aquery(self, question: str) -> str:
28
- messages = [HumanMessage(content=question)]
29
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
30
  try:
31
- response = await self.agent.ainvoke({"messages": messages}, config={"callbacks": [langfuse_handler]})
32
- print(f"Agent raw response: {response}")
33
- if not response or 'messages' not in response or not response['messages']:
34
- print("Agent response missing or empty 'messages'. Returning AGENT ERROR.")
35
- return "AGENT ERROR: No response from agent."
36
- answer = response['messages'][-1].content
37
  print(f"Agent returning answer: {answer}")
38
  return answer
39
  except Exception as e:
 
4
  import inspect
5
  import pandas as pd
6
  # from agents import LlamaIndexAgent
7
+ from new_langraph_agent import run_agent as _sync_run_agent # Updated: use the new multi-agent runner
8
  import asyncio
9
  import aiohttp
10
  from langfuse.langchain import CallbackHandler
 
21
  # --- Basic Agent Definition ---
22
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
23
  class BasicAgent:
24
+ """Wrapper that executes the new multi-agent LangGraph system in a background thread."""
25
+
26
  def __init__(self):
27
+ print("BasicAgent (multi-agent) initialized.")
28
+
29
  async def aquery(self, question: str) -> str:
30
+ """Run the synchronous `run_agent` helper inside the event-loop executor."""
31
  print(f"Agent received question (first 50 chars): {question[:50]}...")
32
+ loop = asyncio.get_event_loop()
33
  try:
34
+ # Off-load the blocking call to a thread so we don't block the Gradio event loop
35
+ answer = await loop.run_in_executor(None, _sync_run_agent, question)
 
 
 
 
36
  print(f"Agent returning answer: {answer}")
37
  return answer
38
  except Exception as e:
debug_retrieval_tools.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Debug script to test individual tools in isolation
4
+ """
5
+
6
+ from src.agents.retrieval_agent import get_retrieval_tools, execute_tool_calls
7
+ from src.agents.execution_agent import get_execution_tools
8
+
9
+ def test_wikipedia_tool():
10
+ """Test Wikipedia search tool directly"""
11
+ print("=" * 50)
12
+ print("Testing Wikipedia Tool")
13
+ print("=" * 50)
14
+
15
+ tools = get_retrieval_tools()
16
+ wiki_tool = None
17
+
18
+ for tool in tools:
19
+ if tool.name == "wiki_search":
20
+ wiki_tool = tool
21
+ break
22
+
23
+ if wiki_tool:
24
+ try:
25
+ print("Found wiki_search tool")
26
+ result = wiki_tool.invoke({"input": "Albert Einstein"})
27
+ print(f"Result: {result[:500]}...")
28
+ return True
29
+ except Exception as e:
30
+ print(f"Error: {e}")
31
+ return False
32
+ else:
33
+ print("wiki_search tool not found!")
34
+ return False
35
+
36
+ def test_web_search_tool():
37
+ """Test web search tool directly"""
38
+ print("=" * 50)
39
+ print("Testing Web Search Tool")
40
+ print("=" * 50)
41
+
42
+ tools = get_retrieval_tools()
43
+ web_tool = None
44
+
45
+ for tool in tools:
46
+ if tool.name == "web_search":
47
+ web_tool = tool
48
+ break
49
+
50
+ if web_tool:
51
+ try:
52
+ print("Found web_search tool")
53
+ result = web_tool.invoke({"input": "artificial intelligence news"})
54
+ print(f"Result: {result[:500]}...")
55
+ return True
56
+ except Exception as e:
57
+ print(f"Error: {e}")
58
+ return False
59
+ else:
60
+ print("web_search tool not found!")
61
+ return False
62
+
63
+ def test_python_tool():
64
+ """Test Python execution tool directly"""
65
+ print("=" * 50)
66
+ print("Testing Python Execution Tool")
67
+ print("=" * 50)
68
+
69
+ tools = get_execution_tools()
70
+ python_tool = None
71
+
72
+ for tool in tools:
73
+ if tool.name == "run_python":
74
+ python_tool = tool
75
+ break
76
+
77
+ if python_tool:
78
+ try:
79
+ print("Found run_python tool")
80
+ code = """
81
+ # Calculate first 5 Fibonacci numbers
82
+ def fibonacci(n):
83
+ if n <= 1:
84
+ return n
85
+ return fibonacci(n-1) + fibonacci(n-2)
86
+
87
+ result = [fibonacci(i) for i in range(5)]
88
+ print("First 5 Fibonacci numbers:", result)
89
+ """
90
+ result = python_tool.invoke({"input": code})
91
+ print(f"Result: {result}")
92
+ return True
93
+ except Exception as e:
94
+ print(f"Error: {e}")
95
+ return False
96
+ else:
97
+ print("run_python tool not found!")
98
+ return False
99
+
100
+ def test_tool_calls_execution():
101
+ """Test the tool call execution function"""
102
+ print("=" * 50)
103
+ print("Testing Tool Call Execution")
104
+ print("=" * 50)
105
+
106
+ tools = get_retrieval_tools()
107
+
108
+ # Simulate tool calls
109
+ mock_tool_calls = [
110
+ {
111
+ 'name': 'wiki_search',
112
+ 'args': {'input': 'Albert Einstein'},
113
+ 'id': 'test_id_1'
114
+ }
115
+ ]
116
+
117
+ try:
118
+ tool_messages = execute_tool_calls(mock_tool_calls, tools)
119
+ print(f"Tool execution successful: {len(tool_messages)} messages")
120
+ for msg in tool_messages:
121
+ print(f"Message type: {type(msg)}")
122
+ print(f"Content preview: {str(msg.content)[:200]}...")
123
+ return True
124
+ except Exception as e:
125
+ print(f"Error in tool execution: {e}")
126
+ import traceback
127
+ traceback.print_exc()
128
+ return False
129
+
130
+ if __name__ == "__main__":
131
+ print("Starting individual tool tests...")
132
+
133
+ results = {}
134
+ results['wikipedia'] = test_wikipedia_tool()
135
+ results['web_search'] = test_web_search_tool()
136
+ results['python'] = test_python_tool()
137
+ results['tool_execution'] = test_tool_calls_execution()
138
+
139
+ print("\n" + "=" * 50)
140
+ print("TEST RESULTS SUMMARY")
141
+ print("=" * 50)
142
+ for test_name, result in results.items():
143
+ status = "✅ PASS" if result else "❌ FAIL"
144
+ print(f"{test_name}: {status}")
145
+
146
+ if all(results.values()):
147
+ print("\n🎉 All tools are working correctly!")
148
+ else:
149
+ print("\n⚠️ Some tools have issues that need to be fixed.")
langraph_agent.py CHANGED
@@ -37,6 +37,21 @@ load_dotenv("env.local") # Try env.local as backup
37
  print(f"SUPABASE_URL loaded: {bool(os.environ.get('SUPABASE_URL'))}")
38
  print(f"GROQ_API_KEY loaded: {bool(os.environ.get('GROQ_API_KEY'))}")
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # Base URL of the scoring API (duplicated here to avoid circular import with basic_agent)
41
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
42
 
@@ -114,7 +129,7 @@ def run_python(input: str) -> str:
114
  return run_agent(input)
115
 
116
  # load the system prompt from the file
117
- with open("system_prompt.txt", "r", encoding="utf-8") as f:
118
  system_prompt = f.read()
119
 
120
  # System message
@@ -206,6 +221,35 @@ def _code_to_message(state: dict): # type: ignore[override]
206
  return {}
207
  return {"messages": [AIMessage(content=state["code_result"])]}
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  # Build graph function
210
  def build_graph(provider: str = "groq"):
211
  """Build the graph"""
@@ -243,29 +287,56 @@ def build_graph(provider: str = "groq"):
243
  return {"messages": [error_msg]}
244
 
245
  def retriever(state: MessagesState):
246
- """Retriever node"""
247
  try:
248
  print(f"Retriever node: Processing {len(state['messages'])} messages")
249
  if not state["messages"]:
250
  print("Retriever node: No messages in state")
251
  return {"messages": [sys_msg]}
252
 
253
- # Extract the user query content early for downstream steps
254
- query_content = state["messages"][0].content
255
-
256
- # ------------------- NEW: fetch attachment if available -------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  attachment_msg = None
 
258
  try:
259
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
260
  resp.raise_for_status()
261
  questions = resp.json()
262
- matched_task_id = None
263
  for q in questions:
264
  if str(q.get("question")).strip() == str(query_content).strip():
265
  matched_task_id = str(q.get("task_id"))
266
  break
267
- if matched_task_id:
268
- print(f"Retriever node: Found task_id {matched_task_id} for current question, attempting to download attachment…")
269
  file_resp = requests.get(f"{DEFAULT_API_URL}/files/{matched_task_id}", timeout=60)
270
  if file_resp.status_code == 200 and file_resp.content:
271
  try:
@@ -274,40 +345,28 @@ def build_graph(provider: str = "groq"):
274
  file_text = "(binary or non-UTF8 file omitted)"
275
  MAX_CHARS = 8000
276
  if len(file_text) > MAX_CHARS:
277
- print(f"Retriever node: Attachment length {len(file_text)} > {MAX_CHARS}, truncating…")
278
  file_text = file_text[:MAX_CHARS] + "\n… (truncated)"
279
  attachment_msg = HumanMessage(content=f"Attached file content for task {matched_task_id}:\n```python\n{file_text}\n```")
280
- print("Retriever node: Prepared attachment message")
 
281
  else:
282
- print(f"Retriever node: No attachment found for task {matched_task_id} (status {file_resp.status_code})")
 
283
  except Exception as api_e:
284
  print(f"Retriever node: Error while fetching attachment – {api_e}")
285
- # -------------------------------------------------------------------------
286
-
287
- # If vector store unavailable, simply return sys_msg + user message (+ attachment if any)
288
- if not vector_store:
289
- msgs = [sys_msg] + state["messages"]
290
- if attachment_msg:
291
- msgs.append(attachment_msg)
292
- print("Retriever node: Vector store not available, skipping retrieval")
293
- return {"messages": msgs}
294
-
295
- # Perform similarity search when vector store is available
296
- print(f"Retriever node: Searching for similar questions with query: {query_content[:100]}…")
297
- similar_question = vector_store.similarity_search(query_content)
298
- print(f"Retriever node: Found {len(similar_question)} similar questions")
299
  msgs = [sys_msg] + state["messages"]
300
  if similar_question:
301
- example_msg = HumanMessage(content=f"Here I provide a similar question and answer for reference: \n\n{similar_question[0].page_content}")
 
302
  msgs.append(example_msg)
303
  print("Retriever node: Added example message from similar question")
304
- else:
305
- print("Retriever node: No similar questions found, proceeding without example")
306
 
307
- # Attach the file content if we have it
308
  if attachment_msg:
309
  msgs.append(attachment_msg)
310
- print("Retriever node: Added attachment content to messages")
311
 
312
  return {"messages": msgs}
313
  except Exception as e:
@@ -320,13 +379,17 @@ def build_graph(provider: str = "groq"):
320
  builder.add_node("tools", ToolNode(tools))
321
  builder.add_node("code_exec", _code_exec_wrapper)
322
  builder.add_node("code_to_message", _code_to_message)
 
323
 
 
324
  builder.add_edge(START, "retriever")
 
 
325
  # Conditional branch: decide whether to run code interpreter
326
  builder.add_conditional_edges(
327
- "retriever",
328
  _needs_code,
329
- {True: "code_exec", False: "assistant"},
330
  )
331
 
332
  # Flow after code execution: inject result then resume chat
@@ -343,7 +406,7 @@ def build_graph(provider: str = "groq"):
343
  return builder.compile()
344
 
345
  # test
346
- if __name__ == "__main__":
347
  question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
348
  # Build the graph
349
  graph = build_graph(provider="groq")
 
37
  print(f"SUPABASE_URL loaded: {bool(os.environ.get('SUPABASE_URL'))}")
38
  print(f"GROQ_API_KEY loaded: {bool(os.environ.get('GROQ_API_KEY'))}")
39
 
40
+ # ---------------------------------------------------------------------------
41
+ # Lightweight in-memory caches and constants for smarter retrieval/ingest
42
+ # ---------------------------------------------------------------------------
43
+ import hashlib # NEW: for hashing payloads / queries
44
+
45
+ TTL = 300 # seconds – how long we keep similarity-search results
46
+ SIMILARITY_THRESHOLD = 0.85 # cosine score above which we assume we already know the answer
47
+
48
+ # (query_hash -> (timestamp, results))
49
+ QUERY_CACHE: dict[str, tuple[float, list]] = {}
50
+ # task IDs whose attachments we already attempted to download this session
51
+ PROCESSED_TASKS: set[str] = set()
52
+ # hash_ids of Q/A payloads we have already upserted during this session
53
+ SEEN_HASHES: set[str] = set()
54
+
55
  # Base URL of the scoring API (duplicated here to avoid circular import with basic_agent)
56
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
57
 
 
129
  return run_agent(input)
130
 
131
  # load the system prompt from the file
132
+ with open("./prompts/system_prompt.txt", "r", encoding="utf-8") as f:
133
  system_prompt = f.read()
134
 
135
  # System message
 
221
  return {}
222
  return {"messages": [AIMessage(content=state["code_result"])]}
223
 
224
+ # ---------------------------------------------------------------------------
225
+ # NEW: Ingest node – write back to vector store if `should_ingest` flag set
226
+ # ---------------------------------------------------------------------------
227
+ def ingest(state: MessagesState):
228
+ """Persist helpful Q/A pairs (and any attachment snippet) to the vector DB."""
229
+ try:
230
+ if not state.get("should_ingest") or not vector_store:
231
+ return {}
232
+
233
+ question_text = state["messages"][0].content
234
+ answer_text = state["messages"][-1].content
235
+ attach_snippets = "\n\n".join(
236
+ m.content for m in state["messages"] if str(m.content).startswith("Attached file content")
237
+ )
238
+ payload = f"Question:\n{question_text}\n\nAnswer:\n{answer_text}"
239
+ if attach_snippets:
240
+ payload += f"\n\n{attach_snippets}"
241
+
242
+ hash_id = hashlib.sha256(payload.encode()).hexdigest()
243
+ if hash_id in SEEN_HASHES:
244
+ print("Ingest: Duplicate payload within session – skip")
245
+ return {}
246
+ SEEN_HASHES.add(hash_id)
247
+ vector_store.add_texts([payload], metadatas=[{"hash_id": hash_id, "timestamp": time.time()}])
248
+ print("Ingest: Stored new Q/A pair in vector store")
249
+ except Exception as ing_e:
250
+ print(f"Ingest node: Error while upserting – {ing_e}")
251
+ return {}
252
+
253
  # Build graph function
254
  def build_graph(provider: str = "groq"):
255
  """Build the graph"""
 
287
  return {"messages": [error_msg]}
288
 
289
  def retriever(state: MessagesState):
290
+ """Retriever node (smart fetch + similarity search)"""
291
  try:
292
  print(f"Retriever node: Processing {len(state['messages'])} messages")
293
  if not state["messages"]:
294
  print("Retriever node: No messages in state")
295
  return {"messages": [sys_msg]}
296
 
297
+ # Extract the *latest* user query content
298
+ query_content = state["messages"][-1].content
299
+
300
+ # ----------------------------------------------------------------------------------
301
+ # Similarity search with an in-process cache
302
+ # ----------------------------------------------------------------------------------
303
+ q_hash = hashlib.sha256(query_content.encode()).hexdigest()
304
+ now = time.time()
305
+ if q_hash in QUERY_CACHE and now - QUERY_CACHE[q_hash][0] < TTL:
306
+ similar_question = QUERY_CACHE[q_hash][1]
307
+ print("Retriever node: Cache hit for similarity search")
308
+ else:
309
+ if vector_store:
310
+ print(f"Retriever node: Searching vector store for similar questions …")
311
+ try:
312
+ similar_question = vector_store.similarity_search_with_relevance_scores(query_content, k=2)
313
+ except Exception as vs_e:
314
+ print(f"Retriever node: Vector store search error – {vs_e}")
315
+ similar_question = []
316
+ QUERY_CACHE[q_hash] = (now, similar_question)
317
+ else:
318
+ similar_question = []
319
+ print("Retriever node: Vector store not available, skipping similarity search")
320
+
321
+ # Decide whether this exchange should later be ingested
322
+ top_score = similar_question[0][1] if similar_question else 0.0
323
+ state["should_ingest"] = top_score < SIMILARITY_THRESHOLD
324
+
325
+ # ----------------------------------------------------------------------------------
326
+ # Attachment fetch (only once per task_id during this session)
327
+ # ----------------------------------------------------------------------------------
328
  attachment_msg = None
329
+ matched_task_id = None
330
  try:
331
  resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
332
  resp.raise_for_status()
333
  questions = resp.json()
 
334
  for q in questions:
335
  if str(q.get("question")).strip() == str(query_content).strip():
336
  matched_task_id = str(q.get("task_id"))
337
  break
338
+ if matched_task_id and matched_task_id not in PROCESSED_TASKS:
339
+ print(f"Retriever node: Downloading attachment for task {matched_task_id} …")
340
  file_resp = requests.get(f"{DEFAULT_API_URL}/files/{matched_task_id}", timeout=60)
341
  if file_resp.status_code == 200 and file_resp.content:
342
  try:
 
345
  file_text = "(binary or non-UTF8 file omitted)"
346
  MAX_CHARS = 8000
347
  if len(file_text) > MAX_CHARS:
 
348
  file_text = file_text[:MAX_CHARS] + "\n… (truncated)"
349
  attachment_msg = HumanMessage(content=f"Attached file content for task {matched_task_id}:\n```python\n{file_text}\n```")
350
+ print("Retriever node: Attachment added to context")
351
+ state["should_ingest"] = True # ensure we store this new info
352
  else:
353
+ print(f"Retriever node: No attachment for task {matched_task_id} (status {file_resp.status_code})")
354
+ PROCESSED_TASKS.add(matched_task_id)
355
  except Exception as api_e:
356
  print(f"Retriever node: Error while fetching attachment – {api_e}")
357
+
358
+ # ----------------------------------------------------------------------------------
359
+ # Build message list for downstream LLM
360
+ # ----------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
361
  msgs = [sys_msg] + state["messages"]
362
  if similar_question:
363
+ example_doc = similar_question[0][0] if isinstance(similar_question[0], tuple) else similar_question[0]
364
+ example_msg = HumanMessage(content=f"Here I provide a similar question and answer for reference: \n\n{example_doc.page_content}")
365
  msgs.append(example_msg)
366
  print("Retriever node: Added example message from similar question")
 
 
367
 
 
368
  if attachment_msg:
369
  msgs.append(attachment_msg)
 
370
 
371
  return {"messages": msgs}
372
  except Exception as e:
 
379
  builder.add_node("tools", ToolNode(tools))
380
  builder.add_node("code_exec", _code_exec_wrapper)
381
  builder.add_node("code_to_message", _code_to_message)
382
+ builder.add_node("ingest", ingest)
383
 
384
+ # Edge layout
385
  builder.add_edge(START, "retriever")
386
+ builder.add_edge("retriever", "assistant")
387
+
388
  # Conditional branch: decide whether to run code interpreter
389
  builder.add_conditional_edges(
390
+ "assistant",
391
  _needs_code,
392
+ {True: "code_exec", False: "ingest"},
393
  )
394
 
395
  # Flow after code execution: inject result then resume chat
 
406
  return builder.compile()
407
 
408
  # test
409
+ if __name__ == "__main__":
410
  question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
411
  # Build the graph
412
  graph = build_graph(provider="groq")
new_langraph_agent.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Updated LangGraph Agent Implementation
3
+ Implements the architecture from the system diagram with memory layer, agent routing, and verification.
4
+ """
5
+ import os
6
+ import sys
7
+ from dotenv import load_dotenv
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Import the new agent system
13
+ from src import run_agent_system, memory_manager
14
+ from src.tracing import flush_langfuse, shutdown_langfuse
15
+
16
+
17
+ def run_agent(query: str) -> str:
18
+ """
19
+ Main entry point for the agent system.
20
+
21
+ Args:
22
+ query: The user question
23
+
24
+ Returns:
25
+ The formatted final answer
26
+ """
27
+ try:
28
+ # Run the new agent system
29
+ result = run_agent_system(
30
+ query=query,
31
+ user_id=os.getenv("USER_ID", "default_user"),
32
+ session_id=os.getenv("SESSION_ID", "default_session")
33
+ )
34
+
35
+ # Flush tracing events
36
+ flush_langfuse()
37
+
38
+ return result
39
+
40
+ except Exception as e:
41
+ print(f"Agent Error: {e}")
42
+ return f"I apologize, but I encountered an error: {e}"
43
+
44
+
45
+ def clear_memory():
46
+ """Clear the agent's session memory"""
47
+ memory_manager.clear_session_cache()
48
+ print("Agent memory cleared")
49
+
50
+
51
+ def cleanup():
52
+ """Cleanup function for graceful shutdown"""
53
+ try:
54
+ flush_langfuse()
55
+ shutdown_langfuse()
56
+ memory_manager.close_checkpointer()
57
+ print("Agent cleanup completed")
58
+ except Exception as e:
59
+ print(f"Cleanup error: {e}")
60
+
61
+
62
+ if __name__ == "__main__":
63
+ # Test the agent system
64
+ test_queries = [
65
+ "What is the capital of France?",
66
+ "Calculate the factorial of 5",
67
+ "What are the benefits of renewable energy?"
68
+ ]
69
+
70
+ print("Testing new LangGraph Agent System")
71
+ print("=" * 50)
72
+
73
+ for i, query in enumerate(test_queries, 1):
74
+ print(f"\nTest {i}: {query}")
75
+ print("-" * 30)
76
+
77
+ try:
78
+ result = run_agent(query)
79
+ print(f"Result: {result}")
80
+ except Exception as e:
81
+ print(f"Error: {e}")
82
+
83
+ # Cleanup
84
+ cleanup()
85
+ print("\nAll tests completed!")
prompts/critic_prompt.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a specialized critic agent that evaluates responses for accuracy, completeness, and quality.
2
+
3
+ Your role is to:
4
+ 1. Analyze responses from other agents for factual accuracy
5
+ 2. Check for logical consistency and completeness
6
+ 3. Identify potential errors, biases, or missing information
7
+ 4. Provide constructive feedback and suggestions for improvement
8
+
9
+ Evaluation criteria:
10
+ - **Accuracy**: Are the facts correct? Are sources reliable?
11
+ - **Completeness**: Does the response fully address the question?
12
+ - **Clarity**: Is the explanation clear and well-structured?
13
+ - **Logic**: Is the reasoning sound and consistent?
14
+ - **Relevance**: Does the response stay on topic?
15
+
16
+ Process:
17
+ 1. Carefully review the provided response
18
+ 2. Cross-check key claims for accuracy
19
+ 3. Identify any gaps or weaknesses
20
+ 4. Assess overall quality and usefulness
21
+ 5. Provide specific, actionable feedback
22
+
23
+ Feedback format:
24
+ - **Strengths**: What was done well
25
+ - **Issues**: Specific problems identified
26
+ - **Suggestions**: How to improve
27
+ - **Overall Assessment**: Pass/Fail with reasoning
28
+
29
+ Be thorough but constructive. Focus on helping improve the response quality.
30
+
31
+ Always append answers in markdown; think step-by-step.
prompts/execution_prompt.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a specialized execution agent that handles computational tasks, code execution, and data processing.
2
+
3
+ Your role is to:
4
+ 1. Analyze computational requirements in user queries
5
+ 2. ALWAYS use the run_python tool to execute code and solve problems
6
+ 3. Process data, perform calculations, and manipulate files
7
+ 4. Provide clear explanations of your code and results
8
+
9
+ Available tools:
10
+ - run_python: Execute Python code in a sandboxed environment with access to pandas, cv2, and standard libraries
11
+
12
+ IMPORTANT: You MUST use the run_python tool for all computational tasks. Do not provide calculated answers without executing code.
13
+
14
+ Capabilities:
15
+ - Mathematical calculations and algorithms
16
+ - Data analysis and visualization
17
+ - File processing (CSV, JSON, text)
18
+ - Image processing with OpenCV
19
+ - Statistical analysis with pandas/numpy
20
+ - Small algorithmic problems (sorting, searching, etc.)
21
+
22
+ Process:
23
+ 1. Understand the computational task
24
+ 2. Plan your approach step-by-step
25
+ 3. Use run_python tool to write and execute code
26
+ 4. Verify results and handle any errors
27
+ 5. Explain your solution and findings
28
+
29
+ Guidelines:
30
+ - Always execute code using the run_python tool
31
+ - Write efficient, readable code with comments
32
+ - Handle errors gracefully and retry if needed
33
+ - Provide explanations for complex logic
34
+ - Show intermediate steps for multi-step problems
35
+ - Use appropriate data structures and algorithms
36
+
37
+ Example approach:
38
+ - For "Calculate the fibonacci sequence": Use run_python to write and execute the code
39
+ - For "Analyze this data": Use run_python to process and analyze the data
40
+ - For "Sort this list": Use run_python to implement the sorting algorithm
41
+
42
+ Always append answers in markdown; think step-by-step and show your code execution.
prompts/retrieval_prompt.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a specialized retrieval agent focused on gathering accurate information to answer user questions.
2
+
3
+ Your role is to:
4
+ 1. Understand the user's information needs
5
+ 2. **ALWAYS use available tools to search for relevant information**
6
+ 3. Synthesize findings into comprehensive, accurate answers
7
+ 4. Verify information across multiple sources when possible
8
+
9
+ Available tools:
10
+ - wiki_search: Search Wikipedia for general knowledge and factual information
11
+ - web_search: Search the web for current information and recent developments
12
+ - arvix_search: Search academic papers on ArXiv for scientific research
13
+ - question_search: Search previously answered similar questions
14
+
15
+ **IMPORTANT: You MUST use tools to gather information. Do not provide answers based solely on your training data.**
16
+
17
+ Process:
18
+ 1. Break down complex questions into searchable components
19
+ 2. **Use multiple appropriate tools based on the query type**
20
+ 3. For historical facts or general knowledge: Use wiki_search
21
+ 4. For current events or recent information: Use web_search
22
+ 5. For scientific or academic topics: Use arvix_search
23
+ 6. Cross-reference information when possible
24
+ 7. Provide sources and citations from tool results
25
+ 8. Acknowledge limitations or uncertainty when information is incomplete
26
+
27
+ Example approach:
28
+ - For "When was X invented?": Use wiki_search to find historical information
29
+ - For "Latest news about Y": Use web_search for current information
30
+ - For "Research on Z": Use arvix_search for academic papers
31
+
32
+ Always provide factual, well-sourced responses with proper citations. If you cannot find sufficient information through tools, clearly state this limitation.
33
+
34
+ Always append answers in markdown; think step-by-step and show your tool usage.
prompts/router_prompt.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are an intelligent agent router that analyzes user queries and determines which specialized agent should handle the request.
2
+
3
+ You have access to three specialized agents:
4
+ 1. **Retrieval Agent** - For questions requiring external information retrieval, search, and knowledge gathering
5
+ 2. **Execution Agent** - For tasks requiring code execution, calculations, data processing, or file manipulation
6
+ 3. **Critic Agent** - For reviewing, evaluating, or providing critical analysis of content or responses
7
+
8
+ **CRITICAL ROUTING RULES:**
9
+
10
+ **Use EXECUTION for:**
11
+ - Mathematical calculations (e.g., "calculate", "compute", "solve")
12
+ - Algorithmic problems (e.g., "fibonacci", "prime numbers", "sorting", "searching")
13
+ - Programming tasks (e.g., "write code", "implement function")
14
+ - Data analysis and processing (e.g., "analyze data", "process file")
15
+ - Any task that requires computation or code execution
16
+ - Statistical analysis, math problems, algorithms
17
+
18
+ **Use RETRIEVAL for:**
19
+ - Research questions requiring external information
20
+ - Fact-checking and historical information
21
+ - Current events and news
22
+ - Looking up definitions or explanations
23
+ - Scientific research and academic papers
24
+ - General knowledge questions
25
+
26
+ **Use CRITIC for:**
27
+ - Evaluating responses or content
28
+ - Reviewing and providing feedback
29
+ - Critical analysis of information
30
+ - Quality assessment tasks
31
+
32
+ **EXAMPLES:**
33
+ - "Calculate the first 10 Fibonacci numbers" → EXECUTION
34
+ - "What is the square root of 144?" → EXECUTION
35
+ - "Write a sorting algorithm" → EXECUTION
36
+ - "When was Einstein born?" → RETRIEVAL
37
+ - "Latest news about AI" → RETRIEVAL
38
+ - "Review this essay" → CRITIC
39
+
40
+ **IMPORTANT:** If a query involves ANY mathematical computation, algorithm, or code execution, ALWAYS route to EXECUTION.
41
+
42
+ Analyze the user's query and respond with exactly one of: RETRIEVAL, EXECUTION, or CRITIC
43
+
44
+ Think step-by-step and be very clear about your routing decision.
system_prompt.txt → prompts/system_prompt.txt RENAMED
@@ -2,7 +2,8 @@ You are a helpful assistant tasked with answering GAIA benchmark questions using
2
 
3
  When you receive a question:
4
  1. Think step-by-step (silently) and choose the appropriate tools to obtain the answer.
5
- 2. After the answer is found, reply with ONLY the answer following the exact formatting rules below.
 
6
 
7
  Exact-match output rules:
8
  • Single number → write the number only (no commas, units, or other symbols).
 
2
 
3
  When you receive a question:
4
  1. Think step-by-step (silently) and choose the appropriate tools to obtain the answer.
5
+ 2. After the answer is found, reply with ONLY the answer following the exact formatting rules below.
6
+ 3. When a tool returns useful reference content (Wikipedia articles, Tavily search snippets, ArXiv abstracts, file attachments, etc.), store that content in the memory database so it can be reused later; when answering a new question, proactively fetch any previously-stored material that might help.
7
 
8
  Exact-match output rules:
9
  • Single number → write the number only (no commas, units, or other symbols).
prompts/verification_prompt.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a verification agent responsible for final quality control and determining if responses meet the required standards.
2
+
3
+ Your role is to:
4
+ 1. Perform final verification of agent responses
5
+ 2. Ensure all requirements from the system prompt are met
6
+ 3. Trigger fallback pipeline if quality standards are not met
7
+ 4. Make final formatting adjustments
8
+
9
+ Quality standards checklist:
10
+ - Response directly answers the user's question
11
+ - Information is accurate and well-sourced
12
+ - Format follows exact-match output rules from system prompt
13
+ - No extraneous text or formatting violations
14
+ - Tone and style are appropriate
15
+
16
+ Output format requirements (from system prompt):
17
+ • Single number → write the number only (no commas, units, or other symbols)
18
+ • Single string/phrase → write the text only; omit articles and abbreviations unless explicitly required
19
+ • List → separate elements with a single comma and a space
20
+ • Never include surrounding text such as "Final Answer", "Answer:", quotes, brackets, or markdown
21
+
22
+ Decision process:
23
+ 1. Review the response against quality standards
24
+ 2. Check format compliance with exact-match rules
25
+ 3. If PASS: return the properly formatted final answer
26
+ 4. If FAIL: trigger fallback pipeline and note specific issues
27
+
28
+ Always ensure the final output strictly adheres to the system prompt requirements.
29
+
30
+ Always append answers in markdown; think step-by-step.
pyproject.toml CHANGED
@@ -19,6 +19,8 @@ dependencies = [
19
  "langchain-openai>=0.3.24",
20
  "langfuse>=3.0.0",
21
  "langgraph>=0.4.8",
 
 
22
  "llama-index>=0.12.40",
23
  "llama-index-core>=0.12.40",
24
  "llama-index-llms-huggingface-api>=0.5.0",
@@ -32,4 +34,5 @@ dependencies = [
32
  "sentence-transformers>=4.1.0",
33
  "supabase>=2.15.3",
34
  "wikipedia>=1.4.0",
 
35
  ]
 
19
  "langchain-openai>=0.3.24",
20
  "langfuse>=3.0.0",
21
  "langgraph>=0.4.8",
22
+ "langgraph-checkpoint>=2.1.0",
23
+ "langgraph-checkpoint-sqlite>=2.0.10",
24
  "llama-index>=0.12.40",
25
  "llama-index-core>=0.12.40",
26
  "llama-index-llms-huggingface-api>=0.5.0",
 
34
  "sentence-transformers>=4.1.0",
35
  "supabase>=2.15.3",
36
  "wikipedia>=1.4.0",
37
+ "datasets>=2.19.1",
38
  ]
quick_random_agent_test.py CHANGED
@@ -1,13 +1,26 @@
1
  import os
 
2
  import tempfile
3
  import requests
4
- from basic_agent import BasicAgent, DEFAULT_API_URL
5
- from langchain_core.messages import HumanMessage
6
- from langfuse.langchain import CallbackHandler
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
9
  try:
10
- langfuse_handler = CallbackHandler()
 
11
  except Exception as e:
12
  print(f"Warning: Could not initialize Langfuse handler: {e}")
13
  langfuse_handler = None
@@ -42,25 +55,42 @@ def maybe_download_file(task_id: str, api_base: str = DEFAULT_API_URL) -> str |
42
 
43
 
44
  def main():
45
- q = fetch_random_question()
46
- task_id = str(q["task_id"])
47
- question_text = q["question"]
48
- print("\n=== Random Question ===")
49
- print(f"Task ID : {task_id}")
50
- print(f"Question: {question_text}")
 
 
 
 
 
51
 
52
- # Attempt to get attachment if any
53
- maybe_download_file(task_id)
 
 
54
 
55
- # Run the agent
56
- agent = BasicAgent()
57
- result = agent.agent.invoke({"messages": [HumanMessage(content=question_text)]}, config={"callbacks": [langfuse_handler]})
58
- if isinstance(result, dict) and "messages" in result and result["messages"]:
59
- answer = result["messages"][-1].content.strip()
60
- else:
61
- answer = str(result)
62
- print("\n=== Agent Answer ===")
63
- print(answer)
 
 
 
 
 
 
 
 
 
 
64
 
65
 
66
  if __name__ == "__main__":
 
1
  import os
2
+ import sys
3
  import tempfile
4
  import requests
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+ # Add the current directory to Python path
11
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
12
+
13
+ # Import the new agent system
14
+ from new_langraph_agent import run_agent, cleanup
15
+ from src.tracing import get_langfuse_callback_handler
16
+
17
+ # Default API URL - Using the same URL as the original basic_agent.py
18
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
 
20
  # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
21
  try:
22
+ langfuse_handler = get_langfuse_callback_handler()
23
+ print("✅ Langfuse handler initialized successfully")
24
  except Exception as e:
25
  print(f"Warning: Could not initialize Langfuse handler: {e}")
26
  langfuse_handler = None
 
55
 
56
 
57
  def main():
58
+ print("Random Agent Test - New LangGraph Architecture")
59
+ print("=" * 60)
60
+
61
+ try:
62
+ # Fetch random question
63
+ q = fetch_random_question()
64
+ task_id = str(q["task_id"])
65
+ question_text = q["question"]
66
+ print("\n=== Random Question ===")
67
+ print(f"Task ID : {task_id}")
68
+ print(f"Question: {question_text}")
69
 
70
+ # Attempt to get attachment if any
71
+ attachment_path = maybe_download_file(task_id)
72
+ if attachment_path:
73
+ question_text += f"\n\nAttachment available at: {attachment_path}"
74
 
75
+ # Run the new agent system
76
+ print("\n=== Running LangGraph Agent System ===")
77
+ result = run_agent(question_text)
78
+
79
+ print("\n=== Agent Answer ===")
80
+ print(result)
81
+
82
+ except Exception as e:
83
+ print(f"Error in main execution: {e}")
84
+ import traceback
85
+ traceback.print_exc()
86
+
87
+ finally:
88
+ # Cleanup
89
+ try:
90
+ cleanup()
91
+ print("\n✅ Agent cleanup completed")
92
+ except Exception as e:
93
+ print(f"⚠️ Cleanup warning: {e}")
94
 
95
 
96
  if __name__ == "__main__":
quick_specific_agent_test.py CHANGED
@@ -2,20 +2,30 @@ import os
2
  import sys
3
  import tempfile
4
  import requests
5
- from basic_agent import BasicAgent, DEFAULT_API_URL
6
- from langchain_core.messages import HumanMessage
7
- from langfuse.langchain import CallbackHandler
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
10
  try:
11
- langfuse_handler = CallbackHandler()
 
12
  except Exception as e:
13
  print(f"Warning: Could not initialize Langfuse handler: {e}")
14
  langfuse_handler = None
15
 
16
- # Default Task ID (replace with your desired one or pass via CLI)
17
- DEFAULT_TASK_ID = "f918266a-b3e0-4914-865d-4faa564f1aef"
18
-
19
  def fetch_question_by_id(task_id: str, api_base: str = DEFAULT_API_URL):
20
  """Return JSON of a question for a given task_id.
21
 
@@ -60,31 +70,53 @@ def maybe_download_file(task_id: str, api_base: str = DEFAULT_API_URL) -> str |
60
 
61
 
62
  def main():
63
- # Determine the task ID (CLI arg > env var > default)
64
- task_id = (
65
- sys.argv[1] if len(sys.argv) > 1 else os.environ.get("TASK_ID", DEFAULT_TASK_ID)
66
- )
67
- print(f"Using task ID: {task_id}")
68
-
69
- q = fetch_question_by_id(task_id)
70
- question_text = q["question"]
71
-
72
- print("\n=== Specific Question ===")
73
- print(f"Task ID : {task_id}")
74
- print(f"Question: {question_text}")
75
-
76
- # Attempt to get attachment if any
77
- maybe_download_file(task_id)
78
-
79
- # Run the agent
80
- agent = BasicAgent()
81
- result = agent.agent.invoke({"messages": [HumanMessage(content=question_text)]}, config={"callbacks": [langfuse_handler]})
82
- if isinstance(result, dict) and "messages" in result and result["messages"]:
83
- answer = result["messages"][-1].content.strip()
84
- else:
85
- answer = str(result)
86
- print("\n=== Agent Answer ===")
87
- print(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  if __name__ == "__main__":
 
2
  import sys
3
  import tempfile
4
  import requests
5
+ from dotenv import load_dotenv
6
+
7
+ # Load environment variables
8
+ load_dotenv()
9
+
10
+ # Add the current directory to Python path
11
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
12
+
13
+ # Import the new agent system
14
+ from new_langraph_agent import run_agent, cleanup
15
+ from src.tracing import get_langfuse_callback_handler
16
+
17
+ # Default API URL and Task ID - Using the same URL as the original basic_agent.py
18
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
+ DEFAULT_TASK_ID = "f918266a-b3e0-4914-865d-4faa564f1aef"
20
 
21
  # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
22
  try:
23
+ langfuse_handler = get_langfuse_callback_handler()
24
+ print("✅ Langfuse handler initialized successfully")
25
  except Exception as e:
26
  print(f"Warning: Could not initialize Langfuse handler: {e}")
27
  langfuse_handler = None
28
 
 
 
 
29
  def fetch_question_by_id(task_id: str, api_base: str = DEFAULT_API_URL):
30
  """Return JSON of a question for a given task_id.
31
 
 
70
 
71
 
72
  def main():
73
+ print("Specific Agent Test - New LangGraph Architecture")
74
+ print("=" * 60)
75
+
76
+ try:
77
+ # Determine the task ID (CLI arg > env var > default)
78
+ task_id = (
79
+ sys.argv[1] if len(sys.argv) > 1 else os.environ.get("TASK_ID", DEFAULT_TASK_ID)
80
+ )
81
+ print(f"Using task ID: {task_id}")
82
+
83
+ # Fetch specific question
84
+ q = fetch_question_by_id(task_id)
85
+ question_text = q["question"]
86
+
87
+ print("\n=== Specific Question ===")
88
+ print(f"Task ID : {task_id}")
89
+ print(f"Question: {question_text}")
90
+
91
+ # Attempt to get attachment if any
92
+ attachment_path = maybe_download_file(task_id)
93
+ if attachment_path:
94
+ question_text += f"\n\nAttachment available at: {attachment_path}"
95
+
96
+ # Run the new agent system
97
+ print("\n=== Running LangGraph Agent System ===")
98
+
99
+ # Set environment variables for user/session tracking
100
+ os.environ["USER_ID"] = "test_user"
101
+ os.environ["SESSION_ID"] = f"session_{task_id}"
102
+
103
+ result = run_agent(question_text)
104
+
105
+ print("\n=== Agent Answer ===")
106
+ print(result)
107
+
108
+ except Exception as e:
109
+ print(f"Error in main execution: {e}")
110
+ import traceback
111
+ traceback.print_exc()
112
+
113
+ finally:
114
+ # Cleanup
115
+ try:
116
+ cleanup()
117
+ print("\n✅ Agent cleanup completed")
118
+ except Exception as e:
119
+ print(f"⚠️ Cleanup warning: {e}")
120
 
121
 
122
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -6,13 +6,16 @@ aiohappyeyeballs==2.6.1
6
  # via aiohttp
7
  aiohttp==3.12.9
8
  # via
 
9
  # langchain-community
10
  # llama-index-core
11
  # realtime
12
  aiosignal==1.3.2
13
  # via aiohttp
14
  aiosqlite==0.21.0
15
- # via llama-index-core
 
 
16
  annotated-types==0.7.0
17
  # via pydantic
18
  anyio==4.9.0
@@ -72,6 +75,8 @@ dataclasses-json==0.6.7
72
  # via
73
  # langchain-community
74
  # llama-index-core
 
 
75
  debugpy==1.8.14
76
  # via ipykernel
77
  decorator==5.2.1
@@ -84,6 +89,10 @@ deprecated==1.2.18
84
  # llama-index-core
85
  deprecation==2.1.0
86
  # via postgrest
 
 
 
 
87
  dirtyjson==1.0.8
88
  # via llama-index-core
89
  distro==1.9.0
@@ -109,6 +118,7 @@ ffmpy==0.6.0
109
  # via gradio
110
  filelock==3.18.0
111
  # via
 
112
  # huggingface-hub
113
  # torch
114
  # transformers
@@ -120,8 +130,9 @@ frozenlist==1.6.2
120
  # via
121
  # aiohttp
122
  # aiosignal
123
- fsspec==2025.5.1
124
  # via
 
125
  # gradio-client
126
  # huggingface-hub
127
  # llama-index-core
@@ -198,6 +209,7 @@ httpx-sse==0.4.0
198
  huggingface-hub==0.32.4
199
  # via
200
  # final-assignment-template (pyproject.toml)
 
201
  # gradio
202
  # gradio-client
203
  # langchain-huggingface
@@ -284,8 +296,12 @@ langgraph==0.4.8
284
  # via final-assignment-template (pyproject.toml)
285
  langgraph-checkpoint==2.1.0
286
  # via
 
287
  # langgraph
 
288
  # langgraph-prebuilt
 
 
289
  langgraph-prebuilt==0.2.2
290
  # via langgraph
291
  langgraph-sdk==0.1.70
@@ -387,6 +403,8 @@ multidict==6.4.4
387
  # via
388
  # aiohttp
389
  # yarl
 
 
390
  mypy-extensions==1.1.0
391
  # via typing-inspect
392
  nest-asyncio==1.6.0
@@ -403,6 +421,7 @@ nltk==3.9.1
403
  # llama-index-core
404
  numpy==2.2.6
405
  # via
 
406
  # gradio
407
  # langchain-community
408
  # llama-index-core
@@ -457,6 +476,7 @@ ormsgpack==1.10.0
457
  # via langgraph-checkpoint
458
  packaging==24.2
459
  # via
 
460
  # deprecation
461
  # gradio
462
  # gradio-client
@@ -471,6 +491,7 @@ packaging==24.2
471
  pandas==2.2.3
472
  # via
473
  # final-assignment-template (pyproject.toml)
 
474
  # gradio
475
  # llama-index-readers-file
476
  parso==0.8.4
@@ -513,6 +534,8 @@ psutil==7.0.0
513
  # via ipykernel
514
  pure-eval==0.2.3
515
  # via stack-data
 
 
516
  pyasn1==0.6.1
517
  # via
518
  # pyasn1-modules
@@ -572,9 +595,11 @@ python-multipart==0.0.20
572
  # via gradio
573
  pytz==2025.2
574
  # via pandas
 
575
  # via jupyter-core
576
  pyyaml==6.0.2
577
  # via
 
578
  # gradio
579
  # huggingface-hub
580
  # langchain
@@ -596,6 +621,7 @@ regex==2024.11.6
596
  requests==2.32.3
597
  # via
598
  # arxiv
 
599
  # google-api-core
600
  # huggingface-hub
601
  # langchain
@@ -651,6 +677,8 @@ sqlalchemy==2.0.41
651
  # langchain
652
  # langchain-community
653
  # llama-index-core
 
 
654
  stack-data==0.6.3
655
  # via ipython
656
  starlette==0.46.2
@@ -701,6 +729,7 @@ tornado==6.5.1
701
  # jupyter-client
702
  tqdm==4.67.1
703
  # via
 
704
  # huggingface-hub
705
  # llama-index-core
706
  # nltk
@@ -783,7 +812,9 @@ wrapt==1.17.2
783
  # langfuse
784
  # llama-index-core
785
  xxhash==3.5.0
786
- # via langgraph
 
 
787
  yarl==1.20.0
788
  # via aiohttp
789
  zipp==3.22.0
 
6
  # via aiohttp
7
  aiohttp==3.12.9
8
  # via
9
+ # fsspec
10
  # langchain-community
11
  # llama-index-core
12
  # realtime
13
  aiosignal==1.3.2
14
  # via aiohttp
15
  aiosqlite==0.21.0
16
+ # via
17
+ # langgraph-checkpoint-sqlite
18
+ # llama-index-core
19
  annotated-types==0.7.0
20
  # via pydantic
21
  anyio==4.9.0
 
75
  # via
76
  # langchain-community
77
  # llama-index-core
78
+ datasets==3.6.0
79
+ # via final-assignment-template (pyproject.toml)
80
  debugpy==1.8.14
81
  # via ipykernel
82
  decorator==5.2.1
 
89
  # llama-index-core
90
  deprecation==2.1.0
91
  # via postgrest
92
+ dill==0.3.8
93
+ # via
94
+ # datasets
95
+ # multiprocess
96
  dirtyjson==1.0.8
97
  # via llama-index-core
98
  distro==1.9.0
 
118
  # via gradio
119
  filelock==3.18.0
120
  # via
121
+ # datasets
122
  # huggingface-hub
123
  # torch
124
  # transformers
 
130
  # via
131
  # aiohttp
132
  # aiosignal
133
+ fsspec==2025.3.0
134
  # via
135
+ # datasets
136
  # gradio-client
137
  # huggingface-hub
138
  # llama-index-core
 
209
  huggingface-hub==0.32.4
210
  # via
211
  # final-assignment-template (pyproject.toml)
212
+ # datasets
213
  # gradio
214
  # gradio-client
215
  # langchain-huggingface
 
296
  # via final-assignment-template (pyproject.toml)
297
  langgraph-checkpoint==2.1.0
298
  # via
299
+ # final-assignment-template (pyproject.toml)
300
  # langgraph
301
+ # langgraph-checkpoint-sqlite
302
  # langgraph-prebuilt
303
+ langgraph-checkpoint-sqlite==2.0.10
304
+ # via final-assignment-template (pyproject.toml)
305
  langgraph-prebuilt==0.2.2
306
  # via langgraph
307
  langgraph-sdk==0.1.70
 
403
  # via
404
  # aiohttp
405
  # yarl
406
+ multiprocess==0.70.16
407
+ # via datasets
408
  mypy-extensions==1.1.0
409
  # via typing-inspect
410
  nest-asyncio==1.6.0
 
421
  # llama-index-core
422
  numpy==2.2.6
423
  # via
424
+ # datasets
425
  # gradio
426
  # langchain-community
427
  # llama-index-core
 
476
  # via langgraph-checkpoint
477
  packaging==24.2
478
  # via
479
+ # datasets
480
  # deprecation
481
  # gradio
482
  # gradio-client
 
491
  pandas==2.2.3
492
  # via
493
  # final-assignment-template (pyproject.toml)
494
+ # datasets
495
  # gradio
496
  # llama-index-readers-file
497
  parso==0.8.4
 
534
  # via ipykernel
535
  pure-eval==0.2.3
536
  # via stack-data
537
+ pyarrow==20.0.0
538
+ # via datasets
539
  pyasn1==0.6.1
540
  # via
541
  # pyasn1-modules
 
595
  # via gradio
596
  pytz==2025.2
597
  # via pandas
598
+ pywin32==310
599
  # via jupyter-core
600
  pyyaml==6.0.2
601
  # via
602
+ # datasets
603
  # gradio
604
  # huggingface-hub
605
  # langchain
 
621
  requests==2.32.3
622
  # via
623
  # arxiv
624
+ # datasets
625
  # google-api-core
626
  # huggingface-hub
627
  # langchain
 
677
  # langchain
678
  # langchain-community
679
  # llama-index-core
680
+ sqlite-vec==0.1.6
681
+ # via langgraph-checkpoint-sqlite
682
  stack-data==0.6.3
683
  # via ipython
684
  starlette==0.46.2
 
729
  # jupyter-client
730
  tqdm==4.67.1
731
  # via
732
+ # datasets
733
  # huggingface-hub
734
  # llama-index-core
735
  # nltk
 
812
  # langfuse
813
  # llama-index-core
814
  xxhash==3.5.0
815
+ # via
816
+ # datasets
817
+ # langgraph
818
  yarl==1.20.0
819
  # via aiohttp
820
  zipp==3.22.0
src/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LangGraph Agent System Package"""
2
+
3
+ from .langgraph_system import run_agent_system, create_agent_graph, AgentState
4
+ from .memory import memory_manager
5
+ from .tracing import get_langfuse_callback_handler, initialize_langfuse
6
+
7
+ __all__ = [
8
+ "run_agent_system",
9
+ "create_agent_graph",
10
+ "AgentState",
11
+ "memory_manager",
12
+ "get_langfuse_callback_handler",
13
+ "initialize_langfuse"
14
+ ]
src/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (541 Bytes). View file
 
src/__pycache__/langgraph_system.cpython-313.pyc ADDED
Binary file (7.61 kB). View file
 
src/__pycache__/memory.cpython-313.pyc ADDED
Binary file (9.3 kB). View file
 
src/__pycache__/tracing.cpython-313.pyc ADDED
Binary file (5.98 kB). View file
 
src/agents/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent Modules Package"""
2
+
3
+ from .plan_node import plan_node
4
+ from .router_node import router_node, should_route_to_agent
5
+ from .retrieval_agent import retrieval_agent, get_retrieval_tools
6
+ from .execution_agent import execution_agent, get_execution_tools
7
+ from .critic_agent import critic_agent
8
+ from .verification_node import verification_node, should_retry
9
+
10
+ __all__ = [
11
+ "plan_node",
12
+ "router_node",
13
+ "should_route_to_agent",
14
+ "retrieval_agent",
15
+ "get_retrieval_tools",
16
+ "execution_agent",
17
+ "get_execution_tools",
18
+ "critic_agent",
19
+ "verification_node",
20
+ "should_retry"
21
+ ]
src/agents/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (648 Bytes). View file
 
src/agents/__pycache__/critic_agent.cpython-313.pyc ADDED
Binary file (3.92 kB). View file
 
src/agents/__pycache__/execution_agent.cpython-313.pyc ADDED
Binary file (6.88 kB). View file
 
src/agents/__pycache__/plan_node.cpython-313.pyc ADDED
Binary file (3.14 kB). View file
 
src/agents/__pycache__/retrieval_agent.cpython-313.pyc ADDED
Binary file (12 kB). View file
 
src/agents/__pycache__/router_node.cpython-313.pyc ADDED
Binary file (3.79 kB). View file
 
src/agents/__pycache__/verification_node.cpython-313.pyc ADDED
Binary file (6.99 kB). View file
 
src/agents/critic_agent.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Critic Agent - Evaluates and reviews responses for quality and accuracy"""
2
+ from typing import Dict, Any
3
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
4
+ from langchain_groq import ChatGroq
5
+ from src.tracing import get_langfuse_callback_handler
6
+
7
+
8
+ def load_critic_prompt() -> str:
9
+ """Load the critic prompt from file"""
10
+ try:
11
+ with open("./prompts/critic_prompt.txt", "r", encoding="utf-8") as f:
12
+ return f.read().strip()
13
+ except FileNotFoundError:
14
+ return """You are a specialized critic agent. Evaluate responses for accuracy, completeness, and quality."""
15
+
16
+
17
+ def critic_agent(state: Dict[str, Any]) -> Dict[str, Any]:
18
+ """
19
+ Critic agent that evaluates responses for quality and accuracy
20
+ """
21
+ print("Critic Agent: Evaluating response quality")
22
+
23
+ try:
24
+ # Get critic prompt
25
+ critic_prompt = load_critic_prompt()
26
+
27
+ # Initialize LLM for criticism
28
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0.2)
29
+
30
+ # Get callback handler for tracing
31
+ callback_handler = get_langfuse_callback_handler()
32
+ callbacks = [callback_handler] if callback_handler else []
33
+
34
+ # Build messages
35
+ messages = state.get("messages", [])
36
+
37
+ # Get the agent response to evaluate
38
+ agent_response = state.get("agent_response")
39
+ if not agent_response:
40
+ # Find the last AI message
41
+ for msg in reversed(messages):
42
+ if msg.type == "ai":
43
+ agent_response = msg
44
+ break
45
+
46
+ if not agent_response:
47
+ print("Critic Agent: No response to evaluate")
48
+ return {
49
+ **state,
50
+ "critic_assessment": "No response found to evaluate",
51
+ "quality_score": 0,
52
+ "current_step": "verification"
53
+ }
54
+
55
+ # Get user query for context
56
+ user_query = None
57
+ for msg in reversed(messages):
58
+ if msg.type == "human":
59
+ user_query = msg.content
60
+ break
61
+
62
+ # Build critic messages
63
+ critic_messages = [SystemMessage(content=critic_prompt)]
64
+
65
+ # Add evaluation request
66
+ evaluation_request = f"""
67
+ Please evaluate the following response:
68
+
69
+ Original Query: {user_query or "Unknown query"}
70
+
71
+ Response to Evaluate:
72
+ {agent_response.content}
73
+
74
+ Provide your evaluation following the format specified in your instructions.
75
+ """
76
+
77
+ critic_messages.append(HumanMessage(content=evaluation_request))
78
+
79
+ # Get critic evaluation
80
+ evaluation = llm.invoke(critic_messages, config={"callbacks": callbacks})
81
+
82
+ # Parse evaluation to determine if it passes
83
+ evaluation_text = evaluation.content.lower()
84
+ quality_pass = True
85
+ quality_score = 7 # Default moderate score
86
+
87
+ # Simple heuristics for quality assessment
88
+ if "fail" in evaluation_text or "poor" in evaluation_text:
89
+ quality_pass = False
90
+ quality_score = 3
91
+ elif "excellent" in evaluation_text or "outstanding" in evaluation_text:
92
+ quality_score = 9
93
+ elif "good" in evaluation_text:
94
+ quality_score = 7
95
+ elif "issues" in evaluation_text or "problems" in evaluation_text:
96
+ quality_score = 5
97
+
98
+ # Add critic evaluation to messages
99
+ updated_messages = messages + [evaluation]
100
+
101
+ return {
102
+ **state,
103
+ "messages": updated_messages,
104
+ "critic_assessment": evaluation.content,
105
+ "quality_pass": quality_pass,
106
+ "quality_score": quality_score,
107
+ "current_step": "verification"
108
+ }
109
+
110
+ except Exception as e:
111
+ print(f"Critic Agent Error: {e}")
112
+ return {
113
+ **state,
114
+ "critic_assessment": f"Error during evaluation: {e}",
115
+ "quality_pass": False,
116
+ "quality_score": 0,
117
+ "current_step": "verification"
118
+ }
src/agents/execution_agent.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Execution Agent - Handles code execution and computational tasks"""
2
+ from typing import Dict, Any, List
3
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
4
+ from langchain_core.tools import tool
5
+ from langchain_groq import ChatGroq
6
+ from code_agent import run_agent # Import our existing code execution engine
7
+ from src.tracing import get_langfuse_callback_handler
8
+
9
+
10
+ @tool
11
+ def run_python(input: str) -> str:
12
+ """Execute Python code in a restricted sandbox (code-interpreter).
13
+
14
+ Pass **any** coding or file-manipulation task here and the agent will
15
+ compute the answer by running Python. The entire standard library is NOT
16
+ available; heavy networking is disabled. Suitable for: math, data-frames,
17
+ small file parsing, algorithmic questions.
18
+ """
19
+ return run_agent(input)
20
+
21
+
22
+ def load_execution_prompt() -> str:
23
+ """Load the execution prompt from file"""
24
+ try:
25
+ with open("./prompts/execution_prompt.txt", "r", encoding="utf-8") as f:
26
+ return f.read().strip()
27
+ except FileNotFoundError:
28
+ return """You are a specialized execution agent. Use the run_python tool to execute code and solve computational problems."""
29
+
30
+
31
+ def get_execution_tools() -> List:
32
+ """Get list of tools available to the execution agent"""
33
+ return [run_python]
34
+
35
+
36
+ def execute_tool_calls(tool_calls: list, tools: list) -> list:
37
+ """Execute tool calls and return results"""
38
+ tool_messages = []
39
+
40
+ # Create a mapping of tool names to tool functions
41
+ tool_map = {tool.name: tool for tool in tools}
42
+
43
+ for tool_call in tool_calls:
44
+ tool_name = tool_call['name']
45
+ tool_args = tool_call['args']
46
+ tool_call_id = tool_call['id']
47
+
48
+ if tool_name in tool_map:
49
+ try:
50
+ print(f"Execution Agent: Executing {tool_name} with args: {str(tool_args)[:200]}...")
51
+ result = tool_map[tool_name].invoke(tool_args)
52
+ tool_messages.append(
53
+ ToolMessage(
54
+ content=str(result),
55
+ tool_call_id=tool_call_id
56
+ )
57
+ )
58
+ except Exception as e:
59
+ print(f"Error executing {tool_name}: {e}")
60
+ tool_messages.append(
61
+ ToolMessage(
62
+ content=f"Error executing {tool_name}: {e}",
63
+ tool_call_id=tool_call_id
64
+ )
65
+ )
66
+ else:
67
+ tool_messages.append(
68
+ ToolMessage(
69
+ content=f"Unknown tool: {tool_name}",
70
+ tool_call_id=tool_call_id
71
+ )
72
+ )
73
+
74
+ return tool_messages
75
+
76
+
77
+ def needs_code_execution(query: str) -> bool:
78
+ """Heuristic to determine if a query requires code execution"""
79
+ code_indicators = [
80
+ "calculate", "compute", "algorithm", "fibonacci", "math", "data",
81
+ "programming", "code", "function", "sort", "csv", "json", "pandas",
82
+ "plot", "graph", "analyze", "process", "file", "manipulation"
83
+ ]
84
+ query_lower = query.lower()
85
+ return any(indicator in query_lower for indicator in code_indicators)
86
+
87
+
88
+ def execution_agent(state: Dict[str, Any]) -> Dict[str, Any]:
89
+ """
90
+ Execution agent that handles computational and code execution tasks
91
+ """
92
+ print("Execution Agent: Processing computational request")
93
+
94
+ try:
95
+ # Get execution prompt
96
+ execution_prompt = load_execution_prompt()
97
+
98
+ # Initialize LLM with tools
99
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0.1) # Lower temp for consistent code
100
+ tools = get_execution_tools()
101
+ llm_with_tools = llm.bind_tools(tools)
102
+
103
+ # Get callback handler for tracing
104
+ callback_handler = get_langfuse_callback_handler()
105
+ callbacks = [callback_handler] if callback_handler else []
106
+
107
+ # Build messages
108
+ messages = state.get("messages", [])
109
+
110
+ # Add execution system prompt
111
+ execution_messages = [SystemMessage(content=execution_prompt)]
112
+
113
+ # Get user query for analysis
114
+ user_query = None
115
+ for msg in reversed(messages):
116
+ if msg.type == "human":
117
+ user_query = msg.content
118
+ break
119
+
120
+ # If this clearly needs code execution, provide guidance
121
+ if user_query and needs_code_execution(user_query):
122
+ guidance_msg = HumanMessage(
123
+ content=f"""Task requiring code execution: {user_query}
124
+
125
+ Please analyze this computational task and use the run_python tool to solve it step by step.
126
+ Break down complex problems into smaller steps and provide clear explanations."""
127
+ )
128
+ execution_messages.append(guidance_msg)
129
+
130
+ # Add original messages (excluding system messages to avoid duplicates)
131
+ for msg in messages:
132
+ if msg.type != "system":
133
+ execution_messages.append(msg)
134
+
135
+ # Get initial response from LLM
136
+ response = llm_with_tools.invoke(execution_messages, config={"callbacks": callbacks})
137
+
138
+ # Check if the LLM wants to use tools
139
+ if response.tool_calls:
140
+ print(f"Execution Agent: LLM requested {len(response.tool_calls)} tool calls")
141
+
142
+ # Execute the tool calls
143
+ tool_messages = execute_tool_calls(response.tool_calls, tools)
144
+
145
+ # Add the response and tool messages to conversation
146
+ execution_messages.extend([response] + tool_messages)
147
+
148
+ # Get final response after tool execution
149
+ final_response = llm.invoke(execution_messages, config={"callbacks": callbacks})
150
+
151
+ return {
152
+ **state,
153
+ "messages": execution_messages + [final_response],
154
+ "agent_response": final_response,
155
+ "current_step": "verification"
156
+ }
157
+ else:
158
+ # Direct response without tools
159
+ return {
160
+ **state,
161
+ "messages": execution_messages + [response],
162
+ "agent_response": response,
163
+ "current_step": "verification"
164
+ }
165
+
166
+ except Exception as e:
167
+ print(f"Execution Agent Error: {e}")
168
+ error_response = AIMessage(content=f"I encountered an error while processing your computational request: {e}")
169
+ return {
170
+ **state,
171
+ "messages": state.get("messages", []) + [error_response],
172
+ "agent_response": error_response,
173
+ "current_step": "verification"
174
+ }
src/agents/plan_node.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Plan Node - Initial ReAct planning loop"""
2
+ from typing import Dict, Any
3
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
4
+ from langchain_groq import ChatGroq
5
+ from src.tracing import get_langfuse_callback_handler
6
+
7
+
8
+ def load_system_prompt() -> str:
9
+ """Load the system prompt from file"""
10
+ try:
11
+ with open("./prompts/system_prompt.txt", "r", encoding="utf-8") as f:
12
+ return f.read().strip()
13
+ except FileNotFoundError:
14
+ return "You are a helpful assistant tasked with answering GAIA benchmark questions."
15
+
16
+
17
+ def plan_node(state: Dict[str, Any]) -> Dict[str, Any]:
18
+ """
19
+ Initial planning node that sets up the conversation with system prompt
20
+ and prepares for agent routing
21
+ """
22
+ print("Plan Node: Processing query")
23
+
24
+ try:
25
+ # Get the system prompt
26
+ system_prompt = load_system_prompt()
27
+
28
+ # Initialize LLM for planning
29
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0.1)
30
+
31
+ # Get callback handler for tracing
32
+ callback_handler = get_langfuse_callback_handler()
33
+ callbacks = [callback_handler] if callback_handler else []
34
+
35
+ # Extract user messages
36
+ messages = state.get("messages", [])
37
+ if not messages:
38
+ return {"messages": [SystemMessage(content=system_prompt)]}
39
+
40
+ # Build message list with system prompt
41
+ plan_messages = [SystemMessage(content=system_prompt)]
42
+
43
+ # Add existing messages
44
+ for msg in messages:
45
+ if msg.type != "system": # Avoid duplicate system messages
46
+ plan_messages.append(msg)
47
+
48
+ # Add planning instruction
49
+ planning_instruction = """
50
+ Analyze this query and prepare a plan for answering it. Consider:
51
+ 1. What type of information or processing is needed?
52
+ 2. What tools or agents would be most appropriate?
53
+ 3. What is the expected output format?
54
+
55
+ Provide a brief analysis and initial plan.
56
+ """
57
+
58
+ if plan_messages and plan_messages[-1].type == "human":
59
+ # Get LLM analysis of the query
60
+ analysis_messages = plan_messages + [HumanMessage(content=planning_instruction)]
61
+
62
+ response = llm.invoke(analysis_messages, config={"callbacks": callbacks})
63
+ plan_messages.append(response)
64
+
65
+ return {
66
+ "messages": plan_messages,
67
+ "plan_complete": True,
68
+ "current_step": "routing"
69
+ }
70
+
71
+ except Exception as e:
72
+ print(f"Plan Node Error: {e}")
73
+ # Fallback with basic system message
74
+ system_prompt = load_system_prompt()
75
+ return {
76
+ "messages": [SystemMessage(content=system_prompt)] + state.get("messages", []),
77
+ "plan_complete": True,
78
+ "current_step": "routing"
79
+ }
src/agents/retrieval_agent.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Retrieval Agent - Handles information gathering and search tasks"""
2
+ import os
3
+ import requests
4
+ from typing import Dict, Any, List
5
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
6
+ from langchain_core.tools import tool
7
+ from langchain_groq import ChatGroq
8
+ from langchain_community.tools.tavily_search import TavilySearchResults
9
+ from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
10
+ from langchain.tools.retriever import create_retriever_tool
11
+ from src.memory import memory_manager
12
+ from src.tracing import get_langfuse_callback_handler
13
+
14
+
15
+ # Tool definitions (same as original)
16
+ @tool
17
+ def wiki_search(input: str) -> str:
18
+ """Search Wikipedia for a query and return maximum 2 results.
19
+
20
+ Args:
21
+ input: The search query."""
22
+ try:
23
+ search_docs = WikipediaLoader(query=input, load_max_docs=2).load()
24
+ if not search_docs:
25
+ return "No Wikipedia results found for the query."
26
+ formatted_search_docs = "\n\n---\n\n".join(
27
+ [
28
+ f'<Document source="{doc.metadata.get("source", "Unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
29
+ for doc in search_docs
30
+ ])
31
+ return formatted_search_docs
32
+ except Exception as e:
33
+ print(f"Error in wiki_search: {e}")
34
+ return f"Error searching Wikipedia: {e}"
35
+
36
+
37
+ @tool
38
+ def web_search(input: str) -> str:
39
+ """Search Tavily for a query and return maximum 3 results.
40
+
41
+ Args:
42
+ input: The search query."""
43
+ try:
44
+ search_docs = TavilySearchResults(max_results=3).invoke(input)
45
+ if not search_docs:
46
+ return "No web search results found for the query."
47
+ formatted_search_docs = "\n\n---\n\n".join(
48
+ [
49
+ f'<Document source="{doc.get("url", "Unknown")}" />\n{doc.get("content", "No content")}\n</Document>'
50
+ for doc in search_docs
51
+ ])
52
+ return formatted_search_docs
53
+ except Exception as e:
54
+ print(f"Error in web_search: {e}")
55
+ return f"Error searching web: {e}"
56
+
57
+
58
+ @tool
59
+ def arvix_search(input: str) -> str:
60
+ """Search Arxiv for a query and return maximum 3 results.
61
+
62
+ Args:
63
+ input: The search query."""
64
+ try:
65
+ search_docs = ArxivLoader(query=input, load_max_docs=3).load()
66
+ if not search_docs:
67
+ return "No Arxiv results found for the query."
68
+ formatted_search_docs = "\n\n---\n\n".join(
69
+ [
70
+ f'<Document source="{doc.metadata.get("source", "Unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
71
+ for doc in search_docs
72
+ ])
73
+ return formatted_search_docs
74
+ except Exception as e:
75
+ print(f"Error in arvix_search: {e}")
76
+ return f"Error searching Arxiv: {e}"
77
+
78
+
79
+ def load_retrieval_prompt() -> str:
80
+ """Load the retrieval prompt from file"""
81
+ try:
82
+ with open("./prompts/retrieval_prompt.txt", "r", encoding="utf-8") as f:
83
+ return f.read().strip()
84
+ except FileNotFoundError:
85
+ return """You are a specialized retrieval agent. Use available tools to search for information and provide comprehensive answers."""
86
+
87
+
88
+ def get_retrieval_tools() -> List:
89
+ """Get list of tools available to the retrieval agent"""
90
+ tools = [wiki_search, web_search, arvix_search]
91
+
92
+ # Add vector store retrieval tool if available
93
+ if memory_manager.vector_store:
94
+ try:
95
+ retrieval_tool = create_retriever_tool(
96
+ retriever=memory_manager.vector_store.as_retriever(),
97
+ name="question_search",
98
+ description="A tool to retrieve similar questions from a vector store.",
99
+ )
100
+ tools.append(retrieval_tool)
101
+ except Exception as e:
102
+ print(f"Could not create retrieval tool: {e}")
103
+
104
+ return tools
105
+
106
+
107
+ def execute_tool_calls(tool_calls: list, tools: list) -> list:
108
+ """Execute tool calls and return results"""
109
+ tool_messages = []
110
+
111
+ # Create a mapping of tool names to tool functions
112
+ tool_map = {tool.name: tool for tool in tools}
113
+
114
+ for tool_call in tool_calls:
115
+ tool_name = tool_call['name']
116
+ tool_args = tool_call['args']
117
+ tool_call_id = tool_call['id']
118
+
119
+ if tool_name in tool_map:
120
+ try:
121
+ print(f"Retrieval Agent: Executing {tool_name} with args: {tool_args}")
122
+ result = tool_map[tool_name].invoke(tool_args)
123
+ tool_messages.append(
124
+ ToolMessage(
125
+ content=str(result),
126
+ tool_call_id=tool_call_id
127
+ )
128
+ )
129
+ except Exception as e:
130
+ print(f"Error executing {tool_name}: {e}")
131
+ tool_messages.append(
132
+ ToolMessage(
133
+ content=f"Error executing {tool_name}: {e}",
134
+ tool_call_id=tool_call_id
135
+ )
136
+ )
137
+ else:
138
+ tool_messages.append(
139
+ ToolMessage(
140
+ content=f"Unknown tool: {tool_name}",
141
+ tool_call_id=tool_call_id
142
+ )
143
+ )
144
+
145
+ return tool_messages
146
+
147
+
148
+ def fetch_attachment_if_needed(query: str) -> str:
149
+ """Fetch attachment content if the query matches a known task"""
150
+ try:
151
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
152
+ resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
153
+ resp.raise_for_status()
154
+ questions = resp.json()
155
+
156
+ for q in questions:
157
+ if str(q.get("question")).strip() == str(query).strip():
158
+ task_id = str(q.get("task_id"))
159
+ print(f"Retrieval Agent: Downloading attachment for task {task_id}")
160
+ file_resp = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=60)
161
+ if file_resp.status_code == 200 and file_resp.content:
162
+ try:
163
+ file_text = file_resp.content.decode("utf-8", errors="replace")
164
+ except Exception:
165
+ file_text = "(binary or non-UTF8 file omitted)"
166
+ MAX_CHARS = 8000
167
+ if len(file_text) > MAX_CHARS:
168
+ file_text = file_text[:MAX_CHARS] + "\n… (truncated)"
169
+ return f"Attached file content for task {task_id}:\n```python\n{file_text}\n```"
170
+ else:
171
+ print(f"No attachment for task {task_id}")
172
+ return ""
173
+ return ""
174
+ except Exception as e:
175
+ print(f"Error fetching attachment: {e}")
176
+ return ""
177
+
178
+
179
+ def retrieval_agent(state: Dict[str, Any]) -> Dict[str, Any]:
180
+ """
181
+ Retrieval agent that handles information gathering tasks
182
+ """
183
+ print("Retrieval Agent: Processing information retrieval request")
184
+
185
+ try:
186
+ # Get retrieval prompt
187
+ retrieval_prompt = load_retrieval_prompt()
188
+
189
+ # Initialize LLM with tools
190
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0.3)
191
+ tools = get_retrieval_tools()
192
+ llm_with_tools = llm.bind_tools(tools)
193
+
194
+ # Get callback handler for tracing
195
+ callback_handler = get_langfuse_callback_handler()
196
+ callbacks = [callback_handler] if callback_handler else []
197
+
198
+ # Build messages
199
+ messages = state.get("messages", [])
200
+
201
+ # Add retrieval system prompt
202
+ retrieval_messages = [SystemMessage(content=retrieval_prompt)]
203
+
204
+ # Get user query for context and attachment fetching
205
+ user_query = None
206
+ for msg in reversed(messages):
207
+ if msg.type == "human":
208
+ user_query = msg.content
209
+ break
210
+
211
+ # Check for similar questions in memory
212
+ if user_query:
213
+ similar_qa = memory_manager.get_similar_qa(user_query)
214
+ if similar_qa:
215
+ context_msg = HumanMessage(
216
+ content=f"Here is a similar question and answer for reference:\n\n{similar_qa}"
217
+ )
218
+ retrieval_messages.append(context_msg)
219
+
220
+ # Fetch attachment if needed
221
+ attachment_content = fetch_attachment_if_needed(user_query)
222
+ if attachment_content:
223
+ attachment_msg = HumanMessage(content=attachment_content)
224
+ retrieval_messages.append(attachment_msg)
225
+
226
+ # Add original messages (excluding system messages to avoid duplicates)
227
+ for msg in messages:
228
+ if msg.type != "system":
229
+ retrieval_messages.append(msg)
230
+
231
+ # Get initial response from LLM and iterate tool calls if necessary
232
+ response = llm_with_tools.invoke(retrieval_messages, config={"callbacks": callbacks})
233
+
234
+ max_tool_iterations = 3 # safeguard to prevent infinite loops
235
+ iteration = 0
236
+
237
+ while response.tool_calls and iteration < max_tool_iterations:
238
+ iteration += 1
239
+ print(f"Retrieval Agent: LLM requested {len(response.tool_calls)} tool calls (iteration {iteration})")
240
+
241
+ # Execute the tool calls
242
+ tool_messages = execute_tool_calls(response.tool_calls, tools)
243
+
244
+ # Append the LLM response and tool results to the conversation
245
+ retrieval_messages.extend([response] + tool_messages)
246
+
247
+ # Ask the model again with the new information
248
+ response = llm_with_tools.invoke(retrieval_messages, config={"callbacks": callbacks})
249
+
250
+ # After iterating (or if no tool calls), we have our final response
251
+ retrieval_messages.append(response)
252
+
253
+ return {
254
+ **state,
255
+ "messages": retrieval_messages,
256
+ "agent_response": response,
257
+ "current_step": "verification"
258
+ }
259
+
260
+ except Exception as e:
261
+ print(f"Retrieval Agent Error: {e}")
262
+ error_response = AIMessage(content=f"I encountered an error while processing your request: {e}")
263
+ return {
264
+ **state,
265
+ "messages": state.get("messages", []) + [error_response],
266
+ "agent_response": error_response,
267
+ "current_step": "verification"
268
+ }
src/agents/router_node.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Router Node - Decides which specialized agent to use"""
2
+ from typing import Dict, Any, Literal
3
+ from langchain_core.messages import SystemMessage, HumanMessage
4
+ from langchain_groq import ChatGroq
5
+ from src.tracing import get_langfuse_callback_handler
6
+
7
+
8
+ def load_router_prompt() -> str:
9
+ """Load the router prompt from file"""
10
+ try:
11
+ with open("./prompts/router_prompt.txt", "r", encoding="utf-8") as f:
12
+ return f.read().strip()
13
+ except FileNotFoundError:
14
+ return """You are an intelligent agent router. Analyze the query and respond with exactly one of: RETRIEVAL, EXECUTION, or CRITIC"""
15
+
16
+
17
+ def router_node(state: Dict[str, Any]) -> Dict[str, Any]:
18
+ """
19
+ Router node that analyzes the user query and determines which agent should handle it
20
+ Returns: next_agent = 'retrieval' | 'execution' | 'critic'
21
+ """
22
+ print("Router Node: Analyzing query for agent selection")
23
+
24
+ try:
25
+ # Get router prompt
26
+ router_prompt = load_router_prompt()
27
+
28
+ # Initialize LLM for routing decision
29
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0.0) # Low temperature for consistent routing
30
+
31
+ # Get callback handler for tracing
32
+ callback_handler = get_langfuse_callback_handler()
33
+ callbacks = [callback_handler] if callback_handler else []
34
+
35
+ # Extract the last human message for routing decision
36
+ messages = state.get("messages", [])
37
+ user_query = None
38
+
39
+ for msg in reversed(messages):
40
+ if msg.type == "human":
41
+ user_query = msg.content
42
+ break
43
+
44
+ if not user_query:
45
+ print("Router Node: No user query found, defaulting to retrieval")
46
+ return {
47
+ **state,
48
+ "next_agent": "retrieval",
49
+ "routing_reason": "No user query found"
50
+ }
51
+
52
+ # Build routing messages
53
+ routing_messages = [
54
+ SystemMessage(content=router_prompt),
55
+ HumanMessage(content=f"Query to route: {user_query}")
56
+ ]
57
+
58
+ # Get routing decision
59
+ response = llm.invoke(routing_messages, config={"callbacks": callbacks})
60
+ routing_decision = response.content.strip().upper()
61
+
62
+ # Map decision to next agent
63
+ next_agent = "retrieval" # Default fallback
64
+ if "RETRIEVAL" in routing_decision:
65
+ next_agent = "retrieval"
66
+ elif "EXECUTION" in routing_decision:
67
+ next_agent = "execution"
68
+ elif "CRITIC" in routing_decision:
69
+ next_agent = "critic"
70
+
71
+ print(f"Router Node: Routing to {next_agent} agent (decision: {routing_decision})")
72
+
73
+ return {
74
+ **state,
75
+ "next_agent": next_agent,
76
+ "routing_decision": routing_decision,
77
+ "routing_reason": f"Query analysis resulted in: {routing_decision}",
78
+ "current_step": next_agent
79
+ }
80
+
81
+ except Exception as e:
82
+ print(f"Router Node Error: {e}")
83
+ # Fallback to retrieval agent
84
+ return {
85
+ **state,
86
+ "next_agent": "retrieval",
87
+ "routing_reason": f"Error in routing: {e}"
88
+ }
89
+
90
+
91
+ def should_route_to_agent(state: Dict[str, Any]) -> Literal["retrieval", "execution", "critic"]:
92
+ """
93
+ Conditional edge function that determines which agent to route to
94
+ """
95
+ next_agent = state.get("next_agent", "retrieval")
96
+ print(f"Routing to: {next_agent}")
97
+ return next_agent
src/agents/verification_node.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Verification Node - Final quality control and output formatting"""
2
+ from typing import Dict, Any
3
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
4
+ from langchain_groq import ChatGroq
5
+ from src.tracing import get_langfuse_callback_handler
6
+
7
+
8
+ def load_verification_prompt() -> str:
9
+ """Load the verification prompt from file"""
10
+ try:
11
+ with open("./prompts/verification_prompt.txt", "r", encoding="utf-8") as f:
12
+ return f.read().strip()
13
+ except FileNotFoundError:
14
+ return """You are a verification agent. Ensure responses meet quality standards and format requirements."""
15
+
16
+
17
+ def extract_final_answer(response_content: str) -> str:
18
+ """Extract and format the final answer according to system prompt requirements"""
19
+ # Remove common prefixes and suffixes
20
+ answer = response_content.strip()
21
+
22
+ # Remove markdown formatting
23
+ answer = answer.replace("**", "").replace("*", "")
24
+
25
+ # Remove common answer prefixes
26
+ prefixes_to_remove = [
27
+ "Final Answer:", "Answer:", "The answer is:", "The final answer is:",
28
+ "Result:", "Solution:", "Response:", "Output:", "Conclusion:"
29
+ ]
30
+
31
+ for prefix in prefixes_to_remove:
32
+ if answer.lower().startswith(prefix.lower()):
33
+ answer = answer[len(prefix):].strip()
34
+
35
+ # Remove quotes and brackets if they wrap the entire answer
36
+ answer = answer.strip('"\'()[]{}')
37
+
38
+ # Handle lists - format with comma and space separation
39
+ if '\n' in answer and all(line.strip().startswith(('-', '*', '•')) for line in answer.split('\n') if line.strip()):
40
+ # Convert bullet list to comma-separated
41
+ items = [line.strip().lstrip('-*•').strip() for line in answer.split('\n') if line.strip()]
42
+ answer = ', '.join(items)
43
+
44
+ return answer.strip()
45
+
46
+
47
+ def verification_node(state: Dict[str, Any]) -> Dict[str, Any]:
48
+ """
49
+ Verification node that performs final quality control and formatting
50
+ """
51
+ print("Verification Node: Performing final quality control")
52
+
53
+ try:
54
+ # Get verification prompt
55
+ verification_prompt = load_verification_prompt()
56
+
57
+ # Initialize LLM for verification
58
+ llm = ChatGroq(model="qwen-qwq-32b", temperature=0.0) # Very low temp for consistent formatting
59
+
60
+ # Get callback handler for tracing
61
+ callback_handler = get_langfuse_callback_handler()
62
+ callbacks = [callback_handler] if callback_handler else []
63
+
64
+ # Get state information
65
+ messages = state.get("messages", [])
66
+ quality_pass = state.get("quality_pass", True)
67
+ quality_score = state.get("quality_score", 7)
68
+ critic_assessment = state.get("critic_assessment", "")
69
+
70
+ # Get the agent response to verify
71
+ agent_response = state.get("agent_response")
72
+ if not agent_response:
73
+ # Find the last AI message
74
+ for msg in reversed(messages):
75
+ if msg.type == "ai":
76
+ agent_response = msg
77
+ break
78
+
79
+ if not agent_response:
80
+ print("Verification Node: No response to verify")
81
+ return {
82
+ **state,
83
+ "final_answer": "No response found to verify",
84
+ "verification_status": "failed",
85
+ "current_step": "complete"
86
+ }
87
+
88
+ # Get user query for context
89
+ user_query = None
90
+ for msg in reversed(messages):
91
+ if msg.type == "human":
92
+ user_query = msg.content
93
+ break
94
+
95
+ # Determine if we should proceed or trigger fallback
96
+ failure_threshold = 4
97
+ max_attempts = state.get("attempt_count", 1)
98
+
99
+ if not quality_pass or quality_score < failure_threshold:
100
+ if max_attempts >= 3:
101
+ print("Verification Node: Maximum attempts reached, proceeding with fallback")
102
+ return {
103
+ **state,
104
+ "final_answer": "Unable to provide a satisfactory answer after multiple attempts",
105
+ "verification_status": "failed_max_attempts",
106
+ "current_step": "fallback"
107
+ }
108
+ else:
109
+ print(f"Verification Node: Quality check failed (score: {quality_score}), retrying")
110
+ return {
111
+ **state,
112
+ "verification_status": "failed",
113
+ "attempt_count": max_attempts + 1,
114
+ "current_step": "routing" # Retry from routing
115
+ }
116
+
117
+ # Quality passed, format the final answer
118
+ print("Verification Node: Quality check passed, formatting final answer")
119
+
120
+ # Build verification messages
121
+ verification_messages = [SystemMessage(content=verification_prompt)]
122
+
123
+ verification_request = f"""
124
+ Please verify and format the following response according to the exact-match output rules:
125
+
126
+ Original Query: {user_query or "Unknown query"}
127
+
128
+ Response to Verify:
129
+ {agent_response.content}
130
+
131
+ Quality Assessment: {critic_assessment}
132
+
133
+ Ensure the final output strictly adheres to the format requirements specified in the system prompt.
134
+ """
135
+
136
+ verification_messages.append(HumanMessage(content=verification_request))
137
+
138
+ # Get verification response
139
+ verification_response = llm.invoke(verification_messages, config={"callbacks": callbacks})
140
+
141
+ # Extract and format the final answer
142
+ final_answer = extract_final_answer(verification_response.content)
143
+
144
+ # Store the final formatted answer
145
+ return {
146
+ **state,
147
+ "messages": messages + [verification_response],
148
+ "final_answer": final_answer,
149
+ "verification_status": "passed",
150
+ "current_step": "complete"
151
+ }
152
+
153
+ except Exception as e:
154
+ print(f"Verification Node Error: {e}")
155
+ # Fallback - try to extract answer from agent response
156
+ if agent_response:
157
+ fallback_answer = extract_final_answer(agent_response.content)
158
+ else:
159
+ fallback_answer = f"Error during verification: {e}"
160
+
161
+ return {
162
+ **state,
163
+ "final_answer": fallback_answer,
164
+ "verification_status": "error",
165
+ "current_step": "complete"
166
+ }
167
+
168
+
169
+ def should_retry(state: Dict[str, Any]) -> bool:
170
+ """Determine if we should retry the process"""
171
+ verification_status = state.get("verification_status", "")
172
+ return verification_status == "failed" and state.get("attempt_count", 1) < 3
src/langgraph_system.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Main LangGraph Agent System Implementation"""
2
+ import os
3
+ from typing import Dict, Any, TypedDict, Literal
4
+ from langchain_core.messages import BaseMessage, HumanMessage
5
+ from langgraph.graph import StateGraph, END
6
+
7
+ # Import our agents and nodes
8
+ from src.agents.plan_node import plan_node
9
+ from src.agents.router_node import router_node, should_route_to_agent
10
+ from src.agents.retrieval_agent import retrieval_agent
11
+ from src.agents.execution_agent import execution_agent
12
+ from src.agents.critic_agent import critic_agent
13
+ from src.agents.verification_node import verification_node, should_retry
14
+ from src.memory import memory_manager
15
+ from src.tracing import (
16
+ get_langfuse_callback_handler,
17
+ update_trace_metadata,
18
+ trace_agent_execution,
19
+ flush_langfuse,
20
+ )
21
+
22
+
23
+ class AgentState(TypedDict):
24
+ """State schema for the agent system"""
25
+ # Core conversation
26
+ messages: list[BaseMessage]
27
+
28
+ # Planning and routing
29
+ plan_complete: bool
30
+ next_agent: str
31
+ routing_decision: str
32
+ routing_reason: str
33
+ current_step: str
34
+
35
+ # Agent responses
36
+ agent_response: BaseMessage
37
+ execution_result: str
38
+
39
+ # Quality control
40
+ critic_assessment: str
41
+ quality_pass: bool
42
+ quality_score: int
43
+ verification_status: str
44
+
45
+ # System management
46
+ attempt_count: int
47
+ final_answer: str
48
+
49
+
50
+ def create_agent_graph() -> StateGraph:
51
+ """Create the LangGraph agent system"""
52
+
53
+ # Initialize the state graph
54
+ workflow = StateGraph(AgentState)
55
+
56
+ # Add nodes
57
+ workflow.add_node("plan", plan_node)
58
+ workflow.add_node("router", router_node)
59
+ workflow.add_node("retrieval", retrieval_agent)
60
+ workflow.add_node("execution", execution_agent)
61
+ workflow.add_node("critic", critic_agent)
62
+ workflow.add_node("verification", verification_node)
63
+
64
+ # Add fallback node
65
+ def fallback_node(state: Dict[str, Any]) -> Dict[str, Any]:
66
+ """Simple fallback that returns a basic response"""
67
+ print("Fallback Node: Providing basic response")
68
+
69
+ messages = state.get("messages", [])
70
+ user_query = None
71
+
72
+ for msg in reversed(messages):
73
+ if msg.type == "human":
74
+ user_query = msg.content
75
+ break
76
+
77
+ fallback_answer = "I apologize, but I was unable to provide a satisfactory answer to your question."
78
+ if user_query:
79
+ fallback_answer += f" Your question was: {user_query}"
80
+
81
+ return {
82
+ **state,
83
+ "final_answer": fallback_answer,
84
+ "verification_status": "fallback",
85
+ "current_step": "complete"
86
+ }
87
+
88
+ workflow.add_node("fallback", fallback_node)
89
+
90
+ # Set entry point
91
+ workflow.set_entry_point("plan")
92
+
93
+ # Add edges
94
+ workflow.add_edge("plan", "router")
95
+
96
+ # Conditional routing from router to agents
97
+ workflow.add_conditional_edges(
98
+ "router",
99
+ should_route_to_agent,
100
+ {
101
+ "retrieval": "retrieval",
102
+ "execution": "execution",
103
+ "critic": "critic"
104
+ }
105
+ )
106
+
107
+ # Route agent outputs through critic for quality evaluation before final verification
108
+ workflow.add_edge("retrieval", "critic")
109
+ workflow.add_edge("execution", "critic")
110
+ # Critic (whether reached directly via routing or via other agents) proceeds to verification
111
+ workflow.add_edge("critic", "verification")
112
+
113
+ # Verification conditional logic
114
+ def verification_next(state: Dict[str, Any]) -> Literal["router", "fallback", END]:
115
+ """Determine next step after verification"""
116
+ verification_status = state.get("verification_status", "")
117
+ current_step = state.get("current_step", "")
118
+
119
+ if current_step == "complete":
120
+ return END
121
+ elif verification_status == "failed" and state.get("attempt_count", 1) < 3:
122
+ return "router" # Retry
123
+ elif verification_status == "failed_max_attempts":
124
+ return "fallback"
125
+ else:
126
+ return END
127
+
128
+ workflow.add_conditional_edges(
129
+ "verification",
130
+ verification_next,
131
+ {
132
+ "router": "router",
133
+ "fallback": "fallback",
134
+ END: END
135
+ }
136
+ )
137
+
138
+ # Fallback ends the process
139
+ workflow.add_edge("fallback", END)
140
+
141
+ return workflow
142
+
143
+
144
+ def run_agent_system(query: str, user_id: str = None, session_id: str = None) -> str:
145
+ """
146
+ Run the complete agent system with a user query
147
+
148
+ Args:
149
+ query: The user question
150
+ user_id: Optional user identifier for tracing
151
+ session_id: Optional session identifier for tracing
152
+
153
+ Returns:
154
+ The final formatted answer
155
+ """
156
+ print(f"Agent System: Processing query: {query[:100]}...")
157
+
158
+ # Open a **root** Langfuse span so that everything inside is neatly grouped
159
+ with trace_agent_execution(name="user-request", user_id=user_id, session_id=session_id):
160
+ try:
161
+ # Enrich the root span with metadata & tags
162
+ update_trace_metadata(
163
+ user_id=user_id,
164
+ session_id=session_id,
165
+ tags=["agent_system"],
166
+ )
167
+
168
+ # Create the graph
169
+ workflow = create_agent_graph()
170
+
171
+ # Compile with checkpointing
172
+ checkpointer = memory_manager.get_checkpointer()
173
+ if checkpointer:
174
+ app = workflow.compile(checkpointer=checkpointer)
175
+ else:
176
+ app = workflow.compile()
177
+
178
+ # Prepare initial state
179
+ initial_state = {
180
+ "messages": [HumanMessage(content=query)],
181
+ "plan_complete": False,
182
+ "next_agent": "",
183
+ "routing_decision": "",
184
+ "routing_reason": "",
185
+ "current_step": "planning",
186
+ "agent_response": None,
187
+ "execution_result": "",
188
+ "critic_assessment": "",
189
+ "quality_pass": True,
190
+ "quality_score": 7,
191
+ "verification_status": "",
192
+ "attempt_count": 1,
193
+ "final_answer": "",
194
+ }
195
+
196
+ # Configure execution – reuse *one* callback handler
197
+ callback_handler = get_langfuse_callback_handler()
198
+ config = {
199
+ "configurable": {"thread_id": session_id or "default"},
200
+ }
201
+ if callback_handler:
202
+ config["callbacks"] = [callback_handler]
203
+
204
+ # Run the graph
205
+ print("Agent System: Executing workflow...")
206
+ final_state = app.invoke(initial_state, config=config)
207
+
208
+ # Extract final answer
209
+ final_answer = final_state.get("final_answer", "No answer generated")
210
+
211
+ # Store in memory if appropriate
212
+ if memory_manager.should_ingest(query):
213
+ memory_manager.ingest_qa_pair(query, final_answer)
214
+
215
+ print(f"Agent System: Completed. Final answer: {final_answer[:100]}...")
216
+ return final_answer
217
+ except Exception as e:
218
+ print(f"Agent System Error: {e}")
219
+ return (
220
+ f"I apologize, but I encountered an error while processing your question: {e}"
221
+ )
222
+ finally:
223
+ # Ensure Langfuse spans are exported even in short-lived environments
224
+ try:
225
+ flush_langfuse()
226
+ except Exception:
227
+ pass
228
+
229
+
230
+ # Export the main function
231
+ __all__ = ["run_agent_system", "create_agent_graph", "AgentState"]
src/memory.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Memory Layer Implementation for LangGraph Agent System"""
2
+ import os
3
+ import time
4
+ import hashlib
5
+ import sqlite3
6
+ from typing import Optional, List, Dict, Any, Tuple
7
+ from langchain_community.vectorstores import SupabaseVectorStore
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from supabase.client import Client, create_client
10
+ from langgraph.checkpoint.sqlite import SqliteSaver
11
+ from langchain_core.messages import BaseMessage, HumanMessage
12
+
13
+
14
+ # Constants for memory management
15
+ TTL = 300 # seconds – how long we keep similarity-search results
16
+ SIMILARITY_THRESHOLD = 0.85 # cosine score above which we assume we already know the answer
17
+
18
+
19
+ class MemoryManager:
20
+ """Manages short-term, long-term memory and checkpointing for the agent system"""
21
+
22
+ def __init__(self):
23
+ self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
24
+ self.vector_store = None
25
+ self.checkpointer = None
26
+ self._sqlite_connection = None
27
+
28
+ # In-memory caches
29
+ self.query_cache: Dict[str, Tuple[float, List]] = {}
30
+ self.processed_tasks: set[str] = set()
31
+ self.seen_hashes: set[str] = set()
32
+
33
+ self._initialize_vector_store()
34
+ self._initialize_checkpointer()
35
+
36
+ def _initialize_vector_store(self) -> None:
37
+ """Initialize Supabase vector store for long-term memory"""
38
+ try:
39
+ supabase_url = os.environ.get("SUPABASE_URL")
40
+ supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
41
+
42
+ if not supabase_url or not supabase_key:
43
+ print("Warning: Supabase credentials not found, vector store will be disabled")
44
+ return
45
+
46
+ supabase: Client = create_client(supabase_url, supabase_key)
47
+ self.vector_store = SupabaseVectorStore(
48
+ client=supabase,
49
+ embedding=self.embeddings,
50
+ table_name="documents",
51
+ query_name="match_documents_langchain",
52
+ )
53
+ print("Vector store initialized successfully")
54
+ except Exception as e:
55
+ print(f"Warning: Could not initialize Supabase vector store: {e}")
56
+
57
+ def _initialize_checkpointer(self) -> None:
58
+ """Initialize SQLite checkpointer for short-term memory"""
59
+ try:
60
+ # Create a direct SQLite connection
61
+ self._sqlite_connection = sqlite3.connect(":memory:", check_same_thread=False)
62
+ self.checkpointer = SqliteSaver(self._sqlite_connection)
63
+ print("Checkpointer initialized successfully")
64
+ except Exception as e:
65
+ print(f"Warning: Could not initialize checkpointer: {e}")
66
+
67
+ def get_checkpointer(self) -> Optional[SqliteSaver]:
68
+ """Get the checkpointer instance"""
69
+ return self.checkpointer
70
+
71
+ def close_checkpointer(self) -> None:
72
+ """Close the checkpointer and its SQLite connection"""
73
+ if self._sqlite_connection:
74
+ try:
75
+ self._sqlite_connection.close()
76
+ print("SQLite connection closed")
77
+ except Exception as e:
78
+ print(f"Warning: Error closing SQLite connection: {e}")
79
+
80
+ def similarity_search(self, query: str, k: int = 2) -> List[Any]:
81
+ """Search for similar questions with caching"""
82
+ if not self.vector_store:
83
+ return []
84
+
85
+ # Check cache first
86
+ q_hash = hashlib.sha256(query.encode()).hexdigest()
87
+ now = time.time()
88
+
89
+ if q_hash in self.query_cache and now - self.query_cache[q_hash][0] < TTL:
90
+ print("Memory: Cache hit for similarity search")
91
+ return self.query_cache[q_hash][1]
92
+
93
+ try:
94
+ print("Memory: Searching vector store for similar questions...")
95
+ similar_questions = self.vector_store.similarity_search_with_relevance_scores(query, k=k)
96
+ self.query_cache[q_hash] = (now, similar_questions)
97
+ return similar_questions
98
+ except Exception as e:
99
+ print(f"Memory: Vector store search error – {e}")
100
+ return []
101
+
102
+ def should_ingest(self, query: str) -> bool:
103
+ """Determine if this query/answer should be ingested to long-term memory"""
104
+ if not self.vector_store:
105
+ return False
106
+
107
+ similar_questions = self.similarity_search(query, k=1)
108
+ top_score = similar_questions[0][1] if similar_questions else 0.0
109
+ return top_score < SIMILARITY_THRESHOLD
110
+
111
+ def ingest_qa_pair(self, question: str, answer: str, attachments: str = "") -> None:
112
+ """Store Q/A pair in long-term memory"""
113
+ if not self.vector_store:
114
+ print("Memory: Vector store not available for ingestion")
115
+ return
116
+
117
+ try:
118
+ payload = f"Question:\n{question}\n\nAnswer:\n{answer}"
119
+ if attachments:
120
+ payload += f"\n\n{attachments}"
121
+
122
+ hash_id = hashlib.sha256(payload.encode()).hexdigest()
123
+ if hash_id in self.seen_hashes:
124
+ print("Memory: Duplicate payload within session – skip")
125
+ return
126
+
127
+ self.seen_hashes.add(hash_id)
128
+ self.vector_store.add_texts(
129
+ [payload],
130
+ metadatas=[{"hash_id": hash_id, "timestamp": time.time()}]
131
+ )
132
+ print("Memory: Stored new Q/A pair in vector store")
133
+ except Exception as e:
134
+ print(f"Memory: Error while upserting – {e}")
135
+
136
+ def get_similar_qa(self, query: str) -> Optional[str]:
137
+ """Get similar Q/A for context"""
138
+ similar_questions = self.similarity_search(query, k=1)
139
+ if not similar_questions:
140
+ return None
141
+
142
+ example_doc = similar_questions[0][0] if isinstance(similar_questions[0], tuple) else similar_questions[0]
143
+ return example_doc.page_content
144
+
145
+ def add_processed_task(self, task_id: str) -> None:
146
+ """Mark a task as processed to avoid re-downloading attachments"""
147
+ self.processed_tasks.add(task_id)
148
+
149
+ def is_task_processed(self, task_id: str) -> bool:
150
+ """Check if a task has already been processed"""
151
+ return task_id in self.processed_tasks
152
+
153
+ def clear_session_cache(self) -> None:
154
+ """Clear session-specific caches"""
155
+ self.query_cache.clear()
156
+ self.processed_tasks.clear()
157
+ self.seen_hashes.clear()
158
+ print("Memory: Session cache cleared")
159
+
160
+
161
+ # Global memory manager instance
162
+ memory_manager = MemoryManager()
src/tracing.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tracing and Observability Setup for Langfuse v3.0.0"""
2
+ import os
3
+ from typing import Optional
4
+ from langfuse import Langfuse, get_client
5
+ from langfuse.langchain import CallbackHandler
6
+
7
+
8
+ def initialize_langfuse() -> None:
9
+ """Initialize Langfuse client with proper configuration"""
10
+ try:
11
+ # Initialize Langfuse client
12
+ Langfuse(
13
+ public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
14
+ secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
15
+ host=os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
16
+ )
17
+ print("Langfuse client initialized successfully")
18
+ except Exception as e:
19
+ print(f"Warning: Could not initialize Langfuse client: {e}")
20
+
21
+
22
+ # Singleton for Langfuse CallbackHandler to ensure a single handler per request
23
+ _CALLBACK_HANDLER: Optional[CallbackHandler] = None
24
+
25
+
26
+ def get_langfuse_callback_handler() -> Optional[CallbackHandler]:
27
+ """Get (or create) a singleton Langfuse callback handler for LangChain integration
28
+
29
+ Best-practice (#2): Pass exactly **one** CallbackHandler into graph.invoke/stream so that
30
+ every nested LLM/tool span is correlated underneath the same root span. Re-using the
31
+ same instance avoids fragmenting traces when individual nodes try to create their own
32
+ handler.
33
+ """
34
+ global _CALLBACK_HANDLER # noqa: PLW0603 – module-level singleton is intentional
35
+
36
+ try:
37
+ initialize_langfuse()
38
+ if _CALLBACK_HANDLER is None:
39
+ _CALLBACK_HANDLER = CallbackHandler()
40
+ return _CALLBACK_HANDLER
41
+ except Exception as e:
42
+ print(f"Warning: Could not create Langfuse callback handler: {e}")
43
+ return None
44
+
45
+
46
+ def trace_agent_execution(name: str, user_id: str | None = None, session_id: str | None = None):
47
+ """Context manager that opens a **root** span for the current user request.
48
+
49
+ Follows Langfuse best practices (rules #2 & #3):
50
+ • exactly one root span per request
51
+ • attach `user_id` and `session_id` so that follow-up calls are stitched together
52
+ """
53
+ try:
54
+ langfuse = get_client()
55
+ span_kwargs = {"name": name}
56
+ # Open the span as context manager so everything inside is automatically nested
57
+ span_cm = langfuse.start_as_current_span(**span_kwargs)
58
+
59
+ # Wrap the CM so that we can update the trace metadata *after* it was started
60
+ class _TraceWrapper:
61
+ def __enter__(self):
62
+ # Enter the span
63
+ self._span = span_cm.__enter__()
64
+ # Immediately enrich it with session/user information
65
+ try:
66
+ langfuse.update_current_trace(
67
+ **{k: v for k, v in {"user_id": user_id, "session_id": session_id}.items() if v}
68
+ )
69
+ except Exception:
70
+ # Ignore update failures – tracing must never break business logic
71
+ pass
72
+ return self._span
73
+
74
+ def __exit__(self, exc_type, exc_val, exc_tb):
75
+ return span_cm.__exit__(exc_type, exc_val, exc_tb)
76
+
77
+ return _TraceWrapper()
78
+ except Exception as e:
79
+ print(f"Warning: Could not create trace span: {e}")
80
+ # Gracefully degrade – return dummy context manager
81
+ from contextlib import nullcontext
82
+
83
+ return nullcontext() # type: ignore
84
+
85
+
86
+ def update_trace_metadata(user_id: str = None, session_id: str = None, tags: list = None, **kwargs):
87
+ """Update current trace with metadata"""
88
+ try:
89
+ langfuse = get_client()
90
+ update_args = {}
91
+
92
+ if user_id:
93
+ update_args["user_id"] = user_id
94
+ if session_id:
95
+ update_args["session_id"] = session_id
96
+ if tags:
97
+ update_args["tags"] = tags
98
+ if kwargs:
99
+ update_args.update(kwargs)
100
+
101
+ langfuse.update_current_trace(**update_args)
102
+ except Exception as e:
103
+ print(f"Warning: Could not update trace metadata: {e}")
104
+
105
+
106
+ def flush_langfuse():
107
+ """Flush Langfuse events (for short-lived applications)"""
108
+ try:
109
+ langfuse = get_client()
110
+ langfuse.flush()
111
+ except Exception as e:
112
+ print(f"Warning: Could not flush Langfuse events: {e}")
113
+
114
+
115
+ def shutdown_langfuse():
116
+ """Shutdown Langfuse client (for application cleanup)"""
117
+ try:
118
+ langfuse = get_client()
119
+ langfuse.shutdown()
120
+ except Exception as e:
121
+ print(f"Warning: Could not shutdown Langfuse client: {e}")
122
+
123
+
124
+ # Initialize Langfuse on module import
125
+ initialize_langfuse()
test_new_system.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test Script for New LangGraph Agent System
4
+ Tests the multi-agent architecture with memory, routing, and verification.
5
+ """
6
+ import os
7
+ import sys
8
+ import time
9
+ from dotenv import load_dotenv
10
+
11
+ # Load environment variables
12
+ load_dotenv()
13
+
14
+ # Add the current directory to Python path
15
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
16
+
17
+ def test_imports():
18
+ """Test that all modules can be imported correctly"""
19
+ print("Testing imports...")
20
+ try:
21
+ # Test core imports
22
+ from src import run_agent_system, memory_manager
23
+ from src.tracing import get_langfuse_callback_handler
24
+
25
+ # Test agent imports
26
+ from src.agents import (
27
+ plan_node, router_node, retrieval_agent,
28
+ execution_agent, critic_agent, verification_node
29
+ )
30
+
31
+ print("✅ All imports successful")
32
+ return True
33
+ except ImportError as e:
34
+ print(f"❌ Import error: {e}")
35
+ return False
36
+
37
+
38
+ def test_memory_system():
39
+ """Test the memory management system"""
40
+ print("\nTesting memory system...")
41
+ try:
42
+ from src.memory import memory_manager
43
+
44
+ # Test basic functionality
45
+ test_query = "What is 2+2?"
46
+
47
+ # Test similarity search (should not crash even without vector store)
48
+ similar = memory_manager.similarity_search(test_query, k=1)
49
+ print(f"✅ Similarity search completed: {len(similar)} results")
50
+
51
+ # Test cache management
52
+ memory_manager.clear_session_cache()
53
+ print("✅ Memory cache cleared")
54
+
55
+ return True
56
+ except Exception as e:
57
+ print(f"❌ Memory system error: {e}")
58
+ return False
59
+
60
+
61
+ def test_tracing_system():
62
+ """Test the Langfuse tracing integration"""
63
+ print("\nTesting tracing system...")
64
+ try:
65
+ from src.tracing import get_langfuse_callback_handler, initialize_langfuse
66
+
67
+ # Test handler creation (should not crash even without credentials)
68
+ handler = get_langfuse_callback_handler()
69
+ print(f"✅ Langfuse handler: {type(handler)}")
70
+
71
+ return True
72
+ except Exception as e:
73
+ print(f"❌ Tracing system error: {e}")
74
+ return False
75
+
76
+
77
+ def test_individual_agents():
78
+ """Test each agent individually"""
79
+ print("\nTesting individual agents...")
80
+
81
+ # Test state structure
82
+ test_state = {
83
+ "messages": [],
84
+ "plan_complete": False,
85
+ "next_agent": "",
86
+ "routing_decision": "",
87
+ "routing_reason": "",
88
+ "current_step": "testing",
89
+ "agent_response": None,
90
+ "needs_tools": False,
91
+ "execution_result": "",
92
+ "critic_assessment": "",
93
+ "quality_pass": True,
94
+ "quality_score": 7,
95
+ "verification_status": "",
96
+ "attempt_count": 1,
97
+ "final_answer": ""
98
+ }
99
+
100
+ try:
101
+ from langchain_core.messages import HumanMessage
102
+ test_state["messages"] = [HumanMessage(content="Test query")]
103
+
104
+ # Test plan node
105
+ from src.agents.plan_node import plan_node
106
+ plan_result = plan_node(test_state)
107
+ print("✅ Plan node executed")
108
+
109
+ # Test router node
110
+ from src.agents.router_node import router_node
111
+ router_result = router_node(plan_result)
112
+ print("✅ Router node executed")
113
+
114
+ return True
115
+ except Exception as e:
116
+ print(f"❌ Agent testing error: {e}")
117
+ return False
118
+
119
+
120
+ def test_graph_creation():
121
+ """Test the main graph creation"""
122
+ print("\nTesting graph creation...")
123
+ try:
124
+ from src.langgraph_system import create_agent_graph
125
+
126
+ # Create the workflow
127
+ workflow = create_agent_graph()
128
+ print("✅ Graph created successfully")
129
+
130
+ # Try to compile (this might fail without proper setup, but shouldn't crash)
131
+ try:
132
+ app = workflow.compile()
133
+ print("✅ Graph compiled successfully")
134
+ except Exception as e:
135
+ print(f"⚠️ Graph compilation warning: {e}")
136
+
137
+ return True
138
+ except Exception as e:
139
+ print(f"❌ Graph creation error: {e}")
140
+ return False
141
+
142
+
143
+ def test_simple_query():
144
+ """Test a simple query through the system"""
145
+ print("\nTesting simple query...")
146
+ try:
147
+ from new_langraph_agent import run_agent
148
+
149
+ # Simple test query
150
+ test_query = "What is 1 + 1?"
151
+ print(f"Query: {test_query}")
152
+
153
+ start_time = time.time()
154
+ result = run_agent(test_query)
155
+ end_time = time.time()
156
+
157
+ print(f"Result: {result}")
158
+ print(f"Time taken: {end_time - start_time:.2f} seconds")
159
+ print("✅ Simple query completed")
160
+
161
+ return True
162
+ except Exception as e:
163
+ print(f"❌ Simple query error: {e}")
164
+ return False
165
+
166
+
167
+ def main():
168
+ """Run all tests"""
169
+ print("LangGraph Agent System - Test Suite")
170
+ print("=" * 50)
171
+
172
+ tests = [
173
+ test_imports,
174
+ test_memory_system,
175
+ test_tracing_system,
176
+ test_individual_agents,
177
+ test_graph_creation,
178
+ test_simple_query
179
+ ]
180
+
181
+ results = []
182
+ for test_func in tests:
183
+ try:
184
+ result = test_func()
185
+ results.append(result)
186
+ except Exception as e:
187
+ print(f"❌ Test {test_func.__name__} failed with exception: {e}")
188
+ results.append(False)
189
+
190
+ # Summary
191
+ print("\n" + "=" * 50)
192
+ print("Test Summary:")
193
+ print(f"Passed: {sum(results)}/{len(results)}")
194
+ print(f"Failed: {len(results) - sum(results)}/{len(results)}")
195
+
196
+ if all(results):
197
+ print("🎉 All tests passed!")
198
+ return 0
199
+ else:
200
+ print("⚠️ Some tests failed. Check the output above for details.")
201
+ return 1
202
+
203
+
204
+ if __name__ == "__main__":
205
+ sys.exit(main())
test_tools_integration.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test script to verify tool integration in the LangGraph agent system
4
+ """
5
+
6
+ from src.langgraph_system import run_agent_system
7
+
8
+ def test_retrieval_tools():
9
+ """Test that retrieval tools (Wikipedia, web search, etc.) are working"""
10
+ print("=" * 60)
11
+ print("Testing Retrieval Tools Integration")
12
+ print("=" * 60)
13
+
14
+ # Test Wikipedia search
15
+ query = "When was Albert Einstein born?"
16
+ print(f"\nTesting query: {query}")
17
+ print("-" * 40)
18
+
19
+ result = run_agent_system(query, user_id="test_user", session_id="test_session")
20
+ print(f"Result: {result}")
21
+
22
+ return result
23
+
24
+ def test_execution_tools():
25
+ """Test that execution tools (Python code execution) are working"""
26
+ print("=" * 60)
27
+ print("Testing Execution Tools Integration")
28
+ print("=" * 60)
29
+
30
+ # Test code execution
31
+ query = "Calculate the first 10 numbers in the Fibonacci sequence"
32
+ print(f"\nTesting query: {query}")
33
+ print("-" * 40)
34
+
35
+ result = run_agent_system(query, user_id="test_user", session_id="test_session")
36
+ print(f"Result: {result}")
37
+
38
+ return result
39
+
40
+ def test_web_search_tools():
41
+ """Test web search functionality"""
42
+ print("=" * 60)
43
+ print("Testing Web Search Tools Integration")
44
+ print("=" * 60)
45
+
46
+ # Test web search
47
+ query = "What is the latest news about artificial intelligence?"
48
+ print(f"\nTesting query: {query}")
49
+ print("-" * 40)
50
+
51
+ result = run_agent_system(query, user_id="test_user", session_id="test_session")
52
+ print(f"Result: {result}")
53
+
54
+ return result
55
+
56
+ if __name__ == "__main__":
57
+ print("Starting Tool Integration Tests...")
58
+
59
+ try:
60
+ # Test retrieval tools
61
+ test_retrieval_tools()
62
+
63
+ print("\n" + "=" * 60)
64
+ input("Press Enter to continue to execution tools test...")
65
+
66
+ # Test execution tools
67
+ test_execution_tools()
68
+
69
+ print("\n" + "=" * 60)
70
+ input("Press Enter to continue to web search tools test...")
71
+
72
+ # Test web search tools
73
+ test_web_search_tools()
74
+
75
+ print("\n" + "=" * 60)
76
+ print("Tool integration tests completed!")
77
+
78
+ except Exception as e:
79
+ print(f"Test failed with error: {e}")
80
+ import traceback
81
+ traceback.print_exc()
uv.lock CHANGED
@@ -331,6 +331,30 @@ wheels = [
331
  { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
332
  ]
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  [[package]]
335
  name = "debugpy"
336
  version = "1.8.14"
@@ -386,6 +410,15 @@ wheels = [
386
  { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178 },
387
  ]
388
 
 
 
 
 
 
 
 
 
 
389
  [[package]]
390
  name = "dirtyjson"
391
  version = "1.0.8"
@@ -495,6 +528,7 @@ name = "final-assignment-template"
495
  version = "0.1.0"
496
  source = { virtual = "." }
497
  dependencies = [
 
498
  { name = "dotenv" },
499
  { name = "gradio" },
500
  { name = "hf-xet" },
@@ -509,6 +543,8 @@ dependencies = [
509
  { name = "langchain-openai" },
510
  { name = "langfuse" },
511
  { name = "langgraph" },
 
 
512
  { name = "llama-index" },
513
  { name = "llama-index-core" },
514
  { name = "llama-index-llms-huggingface-api" },
@@ -526,6 +562,7 @@ dependencies = [
526
 
527
  [package.metadata]
528
  requires-dist = [
 
529
  { name = "dotenv", specifier = ">=0.9.9" },
530
  { name = "gradio", specifier = ">=5.34.1" },
531
  { name = "hf-xet", specifier = ">=1.1.3" },
@@ -540,6 +577,8 @@ requires-dist = [
540
  { name = "langchain-openai", specifier = ">=0.3.24" },
541
  { name = "langfuse", specifier = ">=3.0.0" },
542
  { name = "langgraph", specifier = ">=0.4.8" },
 
 
543
  { name = "llama-index", specifier = ">=0.12.40" },
544
  { name = "llama-index-core", specifier = ">=0.12.40" },
545
  { name = "llama-index-llms-huggingface-api", specifier = ">=0.5.0" },
@@ -600,11 +639,16 @@ wheels = [
600
 
601
  [[package]]
602
  name = "fsspec"
603
- version = "2025.5.1"
604
  source = { registry = "https://pypi.org/simple" }
605
- sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033 }
606
  wheels = [
607
- { url = "https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462", size = 199052 },
 
 
 
 
 
608
  ]
609
 
610
  [[package]]
@@ -1366,6 +1410,20 @@ wheels = [
1366
  { url = "https://files.pythonhosted.org/packages/0f/41/390a97d9d0abe5b71eea2f6fb618d8adadefa674e97f837bae6cda670bc7/langgraph_checkpoint-2.1.0-py3-none-any.whl", hash = "sha256:4cea3e512081da1241396a519cbfe4c5d92836545e2c64e85b6f5c34a1b8bc61", size = 43844 },
1367
  ]
1368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1369
  [[package]]
1370
  name = "langgraph-prebuilt"
1371
  version = "0.2.2"
@@ -1838,6 +1896,22 @@ wheels = [
1838
  { url = "https://files.pythonhosted.org/packages/84/5d/e17845bb0fa76334477d5de38654d27946d5b5d3695443987a094a71b440/multidict-6.4.4-py3-none-any.whl", hash = "sha256:bd4557071b561a8b3b6075c3ce93cf9bfb6182cb241805c3d66ced3b75eff4ac", size = 10481 },
1839
  ]
1840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1841
  [[package]]
1842
  name = "mypy-extensions"
1843
  version = "1.1.0"
@@ -2476,6 +2550,32 @@ wheels = [
2476
  { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 },
2477
  ]
2478
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2479
  [[package]]
2480
  name = "pyasn1"
2481
  version = "0.6.1"
@@ -3029,6 +3129,18 @@ asyncio = [
3029
  { name = "greenlet" },
3030
  ]
3031
 
 
 
 
 
 
 
 
 
 
 
 
 
3032
  [[package]]
3033
  name = "stack-data"
3034
  version = "0.6.3"
 
331
  { url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
332
  ]
333
 
334
+ [[package]]
335
+ name = "datasets"
336
+ version = "3.6.0"
337
+ source = { registry = "https://pypi.org/simple" }
338
+ dependencies = [
339
+ { name = "dill" },
340
+ { name = "filelock" },
341
+ { name = "fsspec", extra = ["http"] },
342
+ { name = "huggingface-hub" },
343
+ { name = "multiprocess" },
344
+ { name = "numpy" },
345
+ { name = "packaging" },
346
+ { name = "pandas" },
347
+ { name = "pyarrow" },
348
+ { name = "pyyaml" },
349
+ { name = "requests" },
350
+ { name = "tqdm" },
351
+ { name = "xxhash" },
352
+ ]
353
+ sdist = { url = "https://files.pythonhosted.org/packages/1a/89/d3d6fef58a488f8569c82fd293ab7cbd4250244d67f425dcae64c63800ea/datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041", size = 569336 }
354
+ wheels = [
355
+ { url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546 },
356
+ ]
357
+
358
  [[package]]
359
  name = "debugpy"
360
  version = "1.8.14"
 
410
  { url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178 },
411
  ]
412
 
413
+ [[package]]
414
+ name = "dill"
415
+ version = "0.3.8"
416
+ source = { registry = "https://pypi.org/simple" }
417
+ sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847 }
418
+ wheels = [
419
+ { url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252 },
420
+ ]
421
+
422
  [[package]]
423
  name = "dirtyjson"
424
  version = "1.0.8"
 
528
  version = "0.1.0"
529
  source = { virtual = "." }
530
  dependencies = [
531
+ { name = "datasets" },
532
  { name = "dotenv" },
533
  { name = "gradio" },
534
  { name = "hf-xet" },
 
543
  { name = "langchain-openai" },
544
  { name = "langfuse" },
545
  { name = "langgraph" },
546
+ { name = "langgraph-checkpoint" },
547
+ { name = "langgraph-checkpoint-sqlite" },
548
  { name = "llama-index" },
549
  { name = "llama-index-core" },
550
  { name = "llama-index-llms-huggingface-api" },
 
562
 
563
  [package.metadata]
564
  requires-dist = [
565
+ { name = "datasets", specifier = ">=2.19.1" },
566
  { name = "dotenv", specifier = ">=0.9.9" },
567
  { name = "gradio", specifier = ">=5.34.1" },
568
  { name = "hf-xet", specifier = ">=1.1.3" },
 
577
  { name = "langchain-openai", specifier = ">=0.3.24" },
578
  { name = "langfuse", specifier = ">=3.0.0" },
579
  { name = "langgraph", specifier = ">=0.4.8" },
580
+ { name = "langgraph-checkpoint", specifier = ">=2.1.0" },
581
+ { name = "langgraph-checkpoint-sqlite", specifier = ">=2.0.10" },
582
  { name = "llama-index", specifier = ">=0.12.40" },
583
  { name = "llama-index-core", specifier = ">=0.12.40" },
584
  { name = "llama-index-llms-huggingface-api", specifier = ">=0.5.0" },
 
639
 
640
  [[package]]
641
  name = "fsspec"
642
+ version = "2025.3.0"
643
  source = { registry = "https://pypi.org/simple" }
644
+ sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491 }
645
  wheels = [
646
+ { url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615 },
647
+ ]
648
+
649
+ [package.optional-dependencies]
650
+ http = [
651
+ { name = "aiohttp" },
652
  ]
653
 
654
  [[package]]
 
1410
  { url = "https://files.pythonhosted.org/packages/0f/41/390a97d9d0abe5b71eea2f6fb618d8adadefa674e97f837bae6cda670bc7/langgraph_checkpoint-2.1.0-py3-none-any.whl", hash = "sha256:4cea3e512081da1241396a519cbfe4c5d92836545e2c64e85b6f5c34a1b8bc61", size = 43844 },
1411
  ]
1412
 
1413
+ [[package]]
1414
+ name = "langgraph-checkpoint-sqlite"
1415
+ version = "2.0.10"
1416
+ source = { registry = "https://pypi.org/simple" }
1417
+ dependencies = [
1418
+ { name = "aiosqlite" },
1419
+ { name = "langgraph-checkpoint" },
1420
+ { name = "sqlite-vec" },
1421
+ ]
1422
+ sdist = { url = "https://files.pythonhosted.org/packages/7b/38/5d44b91fa21e06309be8f1658ae966f5c717443401df005b20d9af91b6b5/langgraph_checkpoint_sqlite-2.0.10.tar.gz", hash = "sha256:c8a55a268b857761dc77f123df48addaf8e9a40b72c4eaddb7c551ddced1c5b6", size = 103625 }
1423
+ wheels = [
1424
+ { url = "https://files.pythonhosted.org/packages/c1/ff/63b16d83a513f7d7a5001bb01a40024986d330718a5315bf1962d7cc50c8/langgraph_checkpoint_sqlite-2.0.10-py3-none-any.whl", hash = "sha256:89d1d2201fe26aa52f1a9c03e1015d226635649be596b26542a5de78f8cc6c9f", size = 30973 },
1425
+ ]
1426
+
1427
  [[package]]
1428
  name = "langgraph-prebuilt"
1429
  version = "0.2.2"
 
1896
  { url = "https://files.pythonhosted.org/packages/84/5d/e17845bb0fa76334477d5de38654d27946d5b5d3695443987a094a71b440/multidict-6.4.4-py3-none-any.whl", hash = "sha256:bd4557071b561a8b3b6075c3ce93cf9bfb6182cb241805c3d66ced3b75eff4ac", size = 10481 },
1897
  ]
1898
 
1899
+ [[package]]
1900
+ name = "multiprocess"
1901
+ version = "0.70.16"
1902
+ source = { registry = "https://pypi.org/simple" }
1903
+ dependencies = [
1904
+ { name = "dill" },
1905
+ ]
1906
+ sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603 }
1907
+ wheels = [
1908
+ { url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824 },
1909
+ { url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519 },
1910
+ { url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 },
1911
+ { url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628 },
1912
+ { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 },
1913
+ ]
1914
+
1915
  [[package]]
1916
  name = "mypy-extensions"
1917
  version = "1.1.0"
 
2550
  { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 },
2551
  ]
2552
 
2553
+ [[package]]
2554
+ name = "pyarrow"
2555
+ version = "20.0.0"
2556
+ source = { registry = "https://pypi.org/simple" }
2557
+ sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187 }
2558
+ wheels = [
2559
+ { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501 },
2560
+ { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895 },
2561
+ { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322 },
2562
+ { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441 },
2563
+ { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027 },
2564
+ { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473 },
2565
+ { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897 },
2566
+ { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847 },
2567
+ { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219 },
2568
+ { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957 },
2569
+ { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972 },
2570
+ { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434 },
2571
+ { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648 },
2572
+ { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853 },
2573
+ { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743 },
2574
+ { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441 },
2575
+ { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279 },
2576
+ { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982 },
2577
+ ]
2578
+
2579
  [[package]]
2580
  name = "pyasn1"
2581
  version = "0.6.1"
 
3129
  { name = "greenlet" },
3130
  ]
3131
 
3132
+ [[package]]
3133
+ name = "sqlite-vec"
3134
+ version = "0.1.6"
3135
+ source = { registry = "https://pypi.org/simple" }
3136
+ wheels = [
3137
+ { url = "https://files.pythonhosted.org/packages/88/ed/aabc328f29ee6814033d008ec43e44f2c595447d9cccd5f2aabe60df2933/sqlite_vec-0.1.6-py3-none-macosx_10_6_x86_64.whl", hash = "sha256:77491bcaa6d496f2acb5cc0d0ff0b8964434f141523c121e313f9a7d8088dee3", size = 164075 },
3138
+ { url = "https://files.pythonhosted.org/packages/a7/57/05604e509a129b22e303758bfa062c19afb020557d5e19b008c64016704e/sqlite_vec-0.1.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fdca35f7ee3243668a055255d4dee4dea7eed5a06da8cad409f89facf4595361", size = 165242 },
3139
+ { url = "https://files.pythonhosted.org/packages/f2/48/dbb2cc4e5bad88c89c7bb296e2d0a8df58aab9edc75853728c361eefc24f/sqlite_vec-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b0519d9cd96164cd2e08e8eed225197f9cd2f0be82cb04567692a0a4be02da3", size = 103704 },
3140
+ { url = "https://files.pythonhosted.org/packages/80/76/97f33b1a2446f6ae55e59b33869bed4eafaf59b7f4c662c8d9491b6a714a/sqlite_vec-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64.whl", hash = "sha256:823b0493add80d7fe82ab0fe25df7c0703f4752941aee1c7b2b02cec9656cb24", size = 151556 },
3141
+ { url = "https://files.pythonhosted.org/packages/6a/98/e8bc58b178266eae2fcf4c9c7a8303a8d41164d781b32d71097924a6bebe/sqlite_vec-0.1.6-py3-none-win_amd64.whl", hash = "sha256:c65bcfd90fa2f41f9000052bcb8bb75d38240b2dae49225389eca6c3136d3f0c", size = 281540 },
3142
+ ]
3143
+
3144
  [[package]]
3145
  name = "stack-data"
3146
  version = "0.6.3"