Commit
·
fe36046
1
Parent(s):
954a1b2
multi agent architecture
Browse files- .cursor/rules/langfuse_best_practices.mdc +80 -0
- .cursor/rules/langgraph_multiagent_state_handling.mdc +140 -0
- ARCHITECTURE.md +184 -0
- __pycache__/debug_test.cpython-313-pytest-8.4.0.pyc +0 -0
- __pycache__/langraph_agent.cpython-313.pyc +0 -0
- __pycache__/new_langraph_agent.cpython-313.pyc +0 -0
- __pycache__/quick_random_agent_test.cpython-313-pytest-8.4.0.pyc +0 -0
- __pycache__/quick_specific_agent_test.cpython-313-pytest-8.4.0.pyc +0 -0
- __pycache__/test_new_system.cpython-313-pytest-8.4.0.pyc +0 -0
- __pycache__/test_random_question.cpython-313-pytest-8.4.0.pyc +0 -0
- __pycache__/test_tools_integration.cpython-313-pytest-8.4.0.pyc +0 -0
- app.py +9 -10
- debug_retrieval_tools.py +149 -0
- langraph_agent.py +97 -34
- new_langraph_agent.py +85 -0
- prompts/critic_prompt.txt +31 -0
- prompts/execution_prompt.txt +42 -0
- prompts/retrieval_prompt.txt +34 -0
- prompts/router_prompt.txt +44 -0
- system_prompt.txt → prompts/system_prompt.txt +2 -1
- prompts/verification_prompt.txt +30 -0
- pyproject.toml +3 -0
- quick_random_agent_test.py +51 -21
- quick_specific_agent_test.py +64 -32
- requirements.txt +34 -3
- src/__init__.py +14 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/langgraph_system.cpython-313.pyc +0 -0
- src/__pycache__/memory.cpython-313.pyc +0 -0
- src/__pycache__/tracing.cpython-313.pyc +0 -0
- src/agents/__init__.py +21 -0
- src/agents/__pycache__/__init__.cpython-313.pyc +0 -0
- src/agents/__pycache__/critic_agent.cpython-313.pyc +0 -0
- src/agents/__pycache__/execution_agent.cpython-313.pyc +0 -0
- src/agents/__pycache__/plan_node.cpython-313.pyc +0 -0
- src/agents/__pycache__/retrieval_agent.cpython-313.pyc +0 -0
- src/agents/__pycache__/router_node.cpython-313.pyc +0 -0
- src/agents/__pycache__/verification_node.cpython-313.pyc +0 -0
- src/agents/critic_agent.py +118 -0
- src/agents/execution_agent.py +174 -0
- src/agents/plan_node.py +79 -0
- src/agents/retrieval_agent.py +268 -0
- src/agents/router_node.py +97 -0
- src/agents/verification_node.py +172 -0
- src/langgraph_system.py +231 -0
- src/memory.py +162 -0
- src/tracing.py +125 -0
- test_new_system.py +205 -0
- test_tools_integration.py +81 -0
- uv.lock +115 -3
.cursor/rules/langfuse_best_practices.mdc
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
description: langfuse and agent observation best practices
|
| 3 |
+
globs:
|
| 4 |
+
alwaysApply: false
|
| 5 |
+
---
|
| 6 |
+
1 Adopt the OTEL-native Python SDK (v3) everywhere
|
| 7 |
+
The v3 SDK wraps OpenTelemetry, so every span you open in any agent, tool or worker is automatically nested and correlated. This saves you from hand-passing trace IDs and lets you lean on existing OTEL auto-instrumentation for HTTP, DB or queue calls.
|
| 8 |
+
langfuse.com
|
| 9 |
+
langfuse.com
|
| 10 |
+
|
| 11 |
+
2 Create one root span per user request and pass a single CallbackHandler into graph.invoke/stream
|
| 12 |
+
python
|
| 13 |
+
Copy
|
| 14 |
+
Edit
|
| 15 |
+
from langfuse.langchain import CallbackHandler
|
| 16 |
+
langfuse_handler = CallbackHandler()
|
| 17 |
+
|
| 18 |
+
with langfuse.start_as_current_span(name="user-request") as root:
|
| 19 |
+
compiled_graph.invoke(
|
| 20 |
+
input=state,
|
| 21 |
+
config={"callbacks": [langfuse_handler]}
|
| 22 |
+
)
|
| 23 |
+
Everything the agents do now rolls up under that root for a tidy timeline.
|
| 24 |
+
langfuse.com
|
| 25 |
+
|
| 26 |
+
3 Use Langfuse Sessions to stitch together long-running conversations
|
| 27 |
+
Set session_id and user_id on the root span (or via update_trace) so all follow-up calls land in the same session dashboard.
|
| 28 |
+
langfuse.com
|
| 29 |
+
langfuse.com
|
| 30 |
+
|
| 31 |
+
4 Name spans predictably
|
| 32 |
+
llm/<model> – one per LLM call (e.g., llm/gpt-4o)
|
| 33 |
+
|
| 34 |
+
tool/<tool_name> – external search, RAG, code-exec…
|
| 35 |
+
|
| 36 |
+
agent/<role> – distinct for every worker node
|
| 37 |
+
Predictable names power Langfuse’s cost & latency aggregation widgets.
|
| 38 |
+
langfuse.com
|
| 39 |
+
|
| 40 |
+
5 Leverage Agent Graphs to debug routing loops
|
| 41 |
+
Because each node becomes a child span, Langfuse’s “Agent Graph” view renders the entire decision tree and shows token/cost per edge—very handy when several LLMs vote on the next step.
|
| 42 |
+
langfuse.com
|
| 43 |
+
|
| 44 |
+
6 Tag the root span with the environment (dev/stage/prod) and with the LLM provider you’re experimenting with
|
| 45 |
+
This lets you facet dashboards by deployment ring or by “OpenAI vs Mixtral.”
|
| 46 |
+
langfuse.com
|
| 47 |
+
langfuse.com
|
| 48 |
+
|
| 49 |
+
7 Attach scores (numeric or categorical) right after the graph run
|
| 50 |
+
span.score_trace(name="user-feedback", value=1) – or call create_score later. Use this both for thumb-up/down UI events and for LLM-as-judge automated grading.
|
| 51 |
+
langfuse.com
|
| 52 |
+
langfuse.com
|
| 53 |
+
|
| 54 |
+
8 Version and link your prompts
|
| 55 |
+
Call langfuse.create_prompt() (or manage them in the UI) and set prompt_id on spans so you can tell which prompt revision caused regressions.
|
| 56 |
+
langfuse.com
|
| 57 |
+
|
| 58 |
+
9 Exploit distributed-tracing headers if agents live in different services
|
| 59 |
+
Because v3 is OTEL-based, traceparent headers are parsed automatically—just make sure every micro-service initialises the Langfuse OTEL exporter with the same LANGFUSE_OTEL_DSN.
|
| 60 |
+
langfuse.com
|
| 61 |
+
|
| 62 |
+
10 Sample intelligently
|
| 63 |
+
Langfuse supports probabilistic sampling on the server. Keep 100 % of errors and maybe only 10 % of successful traces in prod to control storage costs.
|
| 64 |
+
langfuse.com
|
| 65 |
+
|
| 66 |
+
11 Mask PII at the SDK layer
|
| 67 |
+
Use the mask() helper or MASK_CONTENT_REGEX env var so you can still store numeric cost/latency while redacting sensitive inputs/outputs.
|
| 68 |
+
langfuse.com
|
| 69 |
+
|
| 70 |
+
12 Flush asynchronously in high-throughput agents
|
| 71 |
+
Call langfuse.flush(background=True) at the end of each worker tick to avoid blocking the event loop; OTEL will batch and export spans every few seconds.
|
| 72 |
+
langfuse.com
|
| 73 |
+
|
| 74 |
+
13 Test visual completeness with the LangGraph helper
|
| 75 |
+
graph.get_graph().draw_mermaid_png() and verify every edge appears in Langfuse; missing edges usually mean a span wasn’t opened or the callback handler wasn’t propagated.
|
| 76 |
+
langfuse.com
|
| 77 |
+
|
| 78 |
+
14 Watch out for the “traces not clubbed” pitfall when upgrading from v2 → v3
|
| 79 |
+
Older code that started independent traces per agent will fragment your timeline in v3. Always start one root span first (Tip #2).
|
| 80 |
+
github.com
|
.cursor/rules/langgraph_multiagent_state_handling.mdc
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
description: best pract
|
| 3 |
+
globs:
|
| 4 |
+
alwaysApply: false
|
| 5 |
+
---
|
| 6 |
+
The most robust pattern is to treat every agent node as a pure function AgentState → Command, where AgentState is an explicit, typed snapshot of everything the rest of the graph must know.
|
| 7 |
+
My overall confidence that the practices below will remain valid for ≥ 12 months is 85 % (expert opinion).
|
| 8 |
+
|
| 9 |
+
1 Design a single source of truth for state
|
| 10 |
+
Guideline Why it matters Key LangGraph API
|
| 11 |
+
Define a typed schema (TypedDict or pydantic.BaseModel) for the whole graph. Static typing catches missing keys early and docs double as living design specs.
|
| 12 |
+
langchain-ai.github.io
|
| 13 |
+
StateGraph(YourState)
|
| 14 |
+
Use channel annotations such as Annotated[list[BaseMessage], operator.add] on mutable fields. Makes accumulation (+) vs. overwrite clear and prevents accidental loss of history.
|
| 15 |
+
langchain-ai.github.io
|
| 16 |
+
messages: Annotated[list[BaseMessage], operator.add]
|
| 17 |
+
Keep routing out of business data—store the next hop in a dedicated field (next: Literal[...]). Separates control-flow from payload; easier to debug and replay.
|
| 18 |
+
langchain-ai.github.io
|
| 19 |
+
next: Literal["planner", "researcher", "__end__"]
|
| 20 |
+
|
| 21 |
+
2 Pass information with Command objects
|
| 22 |
+
Pattern
|
| 23 |
+
|
| 24 |
+
python
|
| 25 |
+
Copy
|
| 26 |
+
Edit
|
| 27 |
+
def planner(state: AgentState) -> Command[Literal["researcher", "executor", END]]:
|
| 28 |
+
decision = model.invoke(...state.messages)
|
| 29 |
+
return Command(
|
| 30 |
+
goto = decision["next"],
|
| 31 |
+
update = {
|
| 32 |
+
"messages": [decision["content"]],
|
| 33 |
+
"plan": decision["plan"]
|
| 34 |
+
}
|
| 35 |
+
)
|
| 36 |
+
Best-practice notes
|
| 37 |
+
|
| 38 |
+
Always update via update=… rather than mutating the state in-place. This guarantees immutability between nodes and makes time-travel/debugging deterministic.
|
| 39 |
+
langchain-ai.github.io
|
| 40 |
+
|
| 41 |
+
When handing off between sub-graphs, set graph=Command.PARENT or the target sub-graph’s name so orchestration stays explicit.
|
| 42 |
+
langchain-ai.github.io
|
| 43 |
+
|
| 44 |
+
3 Choose a message-sharing strategy early
|
| 45 |
+
Strategy Pros Cons When to use
|
| 46 |
+
Shared scratch-pad (every intermediate LLM thought stored in messages)
|
| 47 |
+
langchain-ai.github.io
|
| 48 |
+
Maximum transparency; great for debugging & reflection. Context window bloat, higher cost/time. ≤ 3 specialist agents or short tasks.
|
| 49 |
+
Final-result only (each agent keeps private scratch-pad, shares only its final answer)
|
| 50 |
+
langchain-ai.github.io
|
| 51 |
+
Scales to 10 + agents; small token footprint. Harder to post-mortem; agents need local memory. Large graphs; production workloads.
|
| 52 |
+
|
| 53 |
+
Tip: If you hide scratch-pads, store them in a per-agent key (e.g. researcher_messages) for replay or fine-tuning even if they’re not sent downstream.
|
| 54 |
+
langchain-ai.github.io
|
| 55 |
+
|
| 56 |
+
4 Inject only what a tool needs
|
| 57 |
+
When exposing sub-agents as tools under a supervisor:
|
| 58 |
+
|
| 59 |
+
python
|
| 60 |
+
Copy
|
| 61 |
+
Edit
|
| 62 |
+
from langgraph.prebuilt import InjectedState
|
| 63 |
+
|
| 64 |
+
def researcher(state: Annotated[AgentState, InjectedState]):
|
| 65 |
+
...
|
| 66 |
+
Why: keeps tool signatures clean and prevents leaking confidential state.
|
| 67 |
+
Extra: If the tool must update global state, let it return a Command so the supervisor doesn’t have to guess what changed.
|
| 68 |
+
langchain-ai.github.io
|
| 69 |
+
|
| 70 |
+
5 Structure the graph for clarity & safety
|
| 71 |
+
Network ➜ every agent connects to every other (exploration, research prototypes).
|
| 72 |
+
|
| 73 |
+
Supervisor ➜ one LLM decides routing (good default for 3-7 agents).
|
| 74 |
+
|
| 75 |
+
Hierarchical ➜ teams of agents with team-level supervisors (scales past ~7 agents).
|
| 76 |
+
langchain-ai.github.io
|
| 77 |
+
|
| 78 |
+
Pick the simplest architecture that meets today’s needs; refactor to sub-graphs as complexity grows.
|
| 79 |
+
|
| 80 |
+
6 Operational best practices
|
| 81 |
+
Concern Best practice
|
| 82 |
+
Tracing & observability Attach a LangFuse run-ID to every AgentState at graph entry; emit state snapshots on node enter/exit so traces line up with LangFuse v3 spans.
|
| 83 |
+
Memory & persistence Use Checkpointer for cheap disk-based snapshots or a Redis backend for high-QPS, then time-travel when an LLM stalls.
|
| 84 |
+
Parallel branches Use map edges (built-in) to fan-out calls, but cap parallelism with an asyncio semaphore to avoid API rate-limits.
|
| 85 |
+
Vector lookup Put retrieval results in a dedicated key (docs) so they don’t clutter messages; store only document IDs if you need to replay cheaply.
|
| 86 |
+
|
| 87 |
+
7 Evidence from the literature (why graphs work)
|
| 88 |
+
Peer-reviewed source Key takeaway Credibility (0-10)
|
| 89 |
+
AAAI 2024 Graph of Thoughts shows arbitrary-graph reasoning beats tree/chain structures by up to 62 % on sorting tasks.
|
| 90 |
+
arxiv.org
|
| 91 |
+
Graph topology yields better exploration & feedback loops—mirrors LangGraph’s StateGraph. 9
|
| 92 |
+
EMNLP 2024 EPO Hierarchical LLM Agents demonstrates hierarchical agents outperform flat agents on ALFRED by >12 % and scales with preference-based training.
|
| 93 |
+
aclanthology.org
|
| 94 |
+
Validates splitting planning vs. execution agents (Supervisor + workers). 9
|
| 95 |
+
|
| 96 |
+
Non-peer-reviewed source Why included Credibility
|
| 97 |
+
Official LangGraph docs (June 2025).
|
| 98 |
+
langchain-ai.github.io
|
| 99 |
+
Primary specification of the library’s APIs and guarantees. 8
|
| 100 |
+
|
| 101 |
+
8 Minimal starter template (v 0.6.*)
|
| 102 |
+
python
|
| 103 |
+
Copy
|
| 104 |
+
Edit
|
| 105 |
+
from typing import Annotated, Literal, Sequence, TypedDict
|
| 106 |
+
from langgraph.graph import StateGraph, START, END
|
| 107 |
+
from langgraph.types import Command
|
| 108 |
+
from langchain_openai import ChatOpenAI
|
| 109 |
+
import operator
|
| 110 |
+
|
| 111 |
+
class AgentState(TypedDict):
|
| 112 |
+
messages: Annotated[Sequence[str], operator.add]
|
| 113 |
+
next: Literal["planner", "researcher", "__end__"]
|
| 114 |
+
plan: str | None
|
| 115 |
+
|
| 116 |
+
llm = ChatOpenAI()
|
| 117 |
+
|
| 118 |
+
def planner(state: AgentState) -> Command[Literal["researcher", END]]:
|
| 119 |
+
resp = llm.invoke(...)
|
| 120 |
+
return Command(
|
| 121 |
+
goto = resp["next"],
|
| 122 |
+
update = {"messages": [resp["content"]],
|
| 123 |
+
"plan": resp["plan"]}
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
def researcher(state: AgentState) -> Command[Literal["planner"]]:
|
| 127 |
+
resp = llm.invoke(...)
|
| 128 |
+
return Command(goto="planner",
|
| 129 |
+
update={"messages": [resp["content"]]})
|
| 130 |
+
|
| 131 |
+
g = StateGraph(AgentState)
|
| 132 |
+
g.add_node(planner)
|
| 133 |
+
g.add_node(researcher)
|
| 134 |
+
g.add_edge(START, planner)
|
| 135 |
+
g.add_edge(planner, researcher)
|
| 136 |
+
g.add_edge(researcher, planner)
|
| 137 |
+
g.add_conditional_edges(planner)
|
| 138 |
+
graph = g.compile()
|
| 139 |
+
Bottom line
|
| 140 |
+
Use typed immutable state, route with Command, and keep private scratch-pads separate from shared context. These patterns align with both the latest LangGraph APIs and empirical results from hierarchical, graph-based agent research.
|
ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# LangGraph Agent System Architecture
|
| 2 |
+
|
| 3 |
+
This document describes the architecture of the multi-agent system implemented using LangGraph 0.4.8+ and Langfuse 3.0.0.
|
| 4 |
+
|
| 5 |
+
## System Overview
|
| 6 |
+
|
| 7 |
+
The system implements a sophisticated agent architecture with memory, routing, specialized agents, and verification as shown in the system diagram.
|
| 8 |
+
|
| 9 |
+
## Core Components
|
| 10 |
+
|
| 11 |
+
### 1. Memory Layer
|
| 12 |
+
- **Short-Term Memory**: Graph state managed by LangGraph checkpointing
|
| 13 |
+
- **Checkpointer**: SQLite-based persistence for conversation continuity
|
| 14 |
+
- **Long-Term Memory**: Supabase vector store with pgvector for Q&A storage
|
| 15 |
+
|
| 16 |
+
### 2. Plan + ReAct Loop
|
| 17 |
+
- Initial query analysis and planning
|
| 18 |
+
- Contextual prompt injection with system requirements
|
| 19 |
+
- Memory retrieval for similar past questions
|
| 20 |
+
|
| 21 |
+
### 3. Agent Router
|
| 22 |
+
- Intelligent routing based on query analysis
|
| 23 |
+
- Routes to specialized agents: Retrieval, Execution, or Critic
|
| 24 |
+
- Uses low-temperature LLM for consistent routing decisions
|
| 25 |
+
|
| 26 |
+
### 4. Specialized Agents
|
| 27 |
+
|
| 28 |
+
#### Retrieval Agent
|
| 29 |
+
- Information gathering from external sources
|
| 30 |
+
- Tools: Wikipedia, Arxiv, Tavily web search, vector store retrieval
|
| 31 |
+
- Handles attachment downloading for GAIA tasks
|
| 32 |
+
- Context-aware with memory integration
|
| 33 |
+
|
| 34 |
+
#### Execution Agent
|
| 35 |
+
- Computational tasks and code execution
|
| 36 |
+
- Integrates with existing `code_agent.py` sandbox
|
| 37 |
+
- Python code execution with pandas, cv2, standard libraries
|
| 38 |
+
- Step-by-step problem breakdown
|
| 39 |
+
|
| 40 |
+
#### Critic Agent
|
| 41 |
+
- Response quality evaluation and review
|
| 42 |
+
- Accuracy, completeness, and logical consistency checks
|
| 43 |
+
- Scoring system with pass/fail determination
|
| 44 |
+
- Constructive feedback generation
|
| 45 |
+
|
| 46 |
+
### 5. Verification & Fallback
|
| 47 |
+
- Final quality control with system prompt compliance
|
| 48 |
+
- Format verification for exact-match requirements
|
| 49 |
+
- Retry logic with maximum attempt limits
|
| 50 |
+
- Graceful fallback pipeline for failed attempts
|
| 51 |
+
|
| 52 |
+
### 6. Observability (Langfuse)
|
| 53 |
+
- End-to-end tracing of all agent interactions
|
| 54 |
+
- Performance monitoring and debugging
|
| 55 |
+
- User session tracking
|
| 56 |
+
- Error logging and analysis
|
| 57 |
+
|
| 58 |
+
## Data Flow
|
| 59 |
+
|
| 60 |
+
1. **User Query** → Plan Node (system prompt injection)
|
| 61 |
+
2. **Plan Node** → Router (agent selection)
|
| 62 |
+
3. **Router** → Specialized Agent (task execution)
|
| 63 |
+
4. **Agent** → Tools (if needed) → Agent (results)
|
| 64 |
+
5. **Agent** → Verification (quality check)
|
| 65 |
+
6. **Verification** → Output or Retry/Fallback
|
| 66 |
+
|
| 67 |
+
## Key Features
|
| 68 |
+
|
| 69 |
+
### Memory Management
|
| 70 |
+
- Caching of similarity searches (TTL-based)
|
| 71 |
+
- Duplicate detection and prevention
|
| 72 |
+
- Task-based attachment tracking
|
| 73 |
+
- Session-specific cache management
|
| 74 |
+
|
| 75 |
+
### Quality Control
|
| 76 |
+
- Multi-level verification (agent → critic → verification)
|
| 77 |
+
- Retry mechanism with attempt limits
|
| 78 |
+
- Format compliance checking
|
| 79 |
+
- Fallback responses for failures
|
| 80 |
+
|
| 81 |
+
### Tracing & Observability
|
| 82 |
+
- Langfuse integration for complete observability
|
| 83 |
+
- Agent-level span tracking
|
| 84 |
+
- Error monitoring and debugging
|
| 85 |
+
- Performance metrics collection
|
| 86 |
+
|
| 87 |
+
### Tool Integration
|
| 88 |
+
- Modular tool system for each agent
|
| 89 |
+
- Sandboxed code execution environment
|
| 90 |
+
- External API integration (search, knowledge bases)
|
| 91 |
+
- Attachment handling for complex tasks
|
| 92 |
+
|
| 93 |
+
## Configuration
|
| 94 |
+
|
| 95 |
+
### Environment Variables
|
| 96 |
+
See `env.template` for required configuration:
|
| 97 |
+
- LLM API keys (Groq, OpenAI, Google, HuggingFace)
|
| 98 |
+
- Search tools (Tavily)
|
| 99 |
+
- Vector store (Supabase)
|
| 100 |
+
- Observability (Langfuse)
|
| 101 |
+
- GAIA API endpoints
|
| 102 |
+
|
| 103 |
+
### System Prompts
|
| 104 |
+
Located in `prompts/` directory:
|
| 105 |
+
- `system_prompt.txt`: Main system requirements
|
| 106 |
+
- `router_prompt.txt`: Agent routing instructions
|
| 107 |
+
- `retrieval_prompt.txt`: Information gathering guidelines
|
| 108 |
+
- `execution_prompt.txt`: Code execution instructions
|
| 109 |
+
- `critic_prompt.txt`: Quality evaluation criteria
|
| 110 |
+
- `verification_prompt.txt`: Final formatting rules
|
| 111 |
+
|
| 112 |
+
## Usage
|
| 113 |
+
|
| 114 |
+
### Basic Usage
|
| 115 |
+
```python
|
| 116 |
+
from src import run_agent_system
|
| 117 |
+
|
| 118 |
+
result = run_agent_system(
|
| 119 |
+
query="Your question here",
|
| 120 |
+
user_id="user123",
|
| 121 |
+
session_id="session456"
|
| 122 |
+
)
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
### With Memory Management
|
| 126 |
+
```python
|
| 127 |
+
from src import memory_manager
|
| 128 |
+
|
| 129 |
+
# Check if query is similar to previous ones
|
| 130 |
+
similar = memory_manager.get_similar_qa(query)
|
| 131 |
+
|
| 132 |
+
# Clear session cache
|
| 133 |
+
memory_manager.clear_session_cache()
|
| 134 |
+
```
|
| 135 |
+
|
| 136 |
+
### Direct Graph Access
|
| 137 |
+
```python
|
| 138 |
+
from src import create_agent_graph
|
| 139 |
+
|
| 140 |
+
workflow = create_agent_graph()
|
| 141 |
+
app = workflow.compile(checkpointer=checkpointer)
|
| 142 |
+
result = app.invoke(initial_state, config=config)
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
## Dependencies
|
| 146 |
+
|
| 147 |
+
### Core Framework
|
| 148 |
+
- `langgraph>=0.4.8`: Graph-based agent orchestration
|
| 149 |
+
- `langgraph-checkpoint-sqlite>=2.0.0`: Persistence layer
|
| 150 |
+
- `langchain>=0.3.0`: LLM and tool abstractions
|
| 151 |
+
|
| 152 |
+
### Observability
|
| 153 |
+
- `langfuse==3.0.0`: Tracing and monitoring
|
| 154 |
+
|
| 155 |
+
### Memory & Storage
|
| 156 |
+
- `supabase>=2.8.0`: Vector database backend
|
| 157 |
+
- `pgvector>=0.3.0`: Vector similarity search
|
| 158 |
+
|
| 159 |
+
### Tools & APIs
|
| 160 |
+
- `tavily-python>=0.5.0`: Web search
|
| 161 |
+
- `arxiv>=2.1.0`: Academic paper search
|
| 162 |
+
- `wikipedia>=1.4.0`: Knowledge base access
|
| 163 |
+
|
| 164 |
+
## Error Handling
|
| 165 |
+
|
| 166 |
+
The system implements comprehensive error handling:
|
| 167 |
+
- Graceful degradation when services are unavailable
|
| 168 |
+
- Fallback responses for critical failures
|
| 169 |
+
- Retry logic with exponential backoff
|
| 170 |
+
- Detailed error logging for debugging
|
| 171 |
+
|
| 172 |
+
## Performance Considerations
|
| 173 |
+
|
| 174 |
+
- Vector store caching reduces duplicate searches
|
| 175 |
+
- Checkpoint-based state management for conversation continuity
|
| 176 |
+
- Efficient tool routing based on query analysis
|
| 177 |
+
- Memory cleanup for long-running sessions
|
| 178 |
+
|
| 179 |
+
## Future Enhancements
|
| 180 |
+
|
| 181 |
+
- Additional specialized agents (e.g., Image Analysis, Code Review)
|
| 182 |
+
- Enhanced memory clustering and retrieval algorithms
|
| 183 |
+
- Real-time collaboration between agents
|
| 184 |
+
- Advanced tool composition and chaining
|
__pycache__/debug_test.cpython-313-pytest-8.4.0.pyc
ADDED
|
Binary file (2.22 kB). View file
|
|
|
__pycache__/langraph_agent.cpython-313.pyc
CHANGED
|
Binary files a/__pycache__/langraph_agent.cpython-313.pyc and b/__pycache__/langraph_agent.cpython-313.pyc differ
|
|
|
__pycache__/new_langraph_agent.cpython-313.pyc
ADDED
|
Binary file (3.01 kB). View file
|
|
|
__pycache__/quick_random_agent_test.cpython-313-pytest-8.4.0.pyc
ADDED
|
Binary file (5.19 kB). View file
|
|
|
__pycache__/quick_specific_agent_test.cpython-313-pytest-8.4.0.pyc
ADDED
|
Binary file (6.4 kB). View file
|
|
|
__pycache__/test_new_system.cpython-313-pytest-8.4.0.pyc
ADDED
|
Binary file (7.59 kB). View file
|
|
|
__pycache__/test_random_question.cpython-313-pytest-8.4.0.pyc
ADDED
|
Binary file (3.97 kB). View file
|
|
|
__pycache__/test_tools_integration.cpython-313-pytest-8.4.0.pyc
ADDED
|
Binary file (3.06 kB). View file
|
|
|
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import requests
|
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
# from agents import LlamaIndexAgent
|
| 7 |
-
from
|
| 8 |
import asyncio
|
| 9 |
import aiohttp
|
| 10 |
from langfuse.langchain import CallbackHandler
|
|
@@ -21,19 +21,18 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 21 |
# --- Basic Agent Definition ---
|
| 22 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 23 |
class BasicAgent:
|
|
|
|
|
|
|
| 24 |
def __init__(self):
|
| 25 |
-
|
| 26 |
-
|
| 27 |
async def aquery(self, question: str) -> str:
|
| 28 |
-
|
| 29 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
|
|
|
| 30 |
try:
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
if not response or 'messages' not in response or not response['messages']:
|
| 34 |
-
print("Agent response missing or empty 'messages'. Returning AGENT ERROR.")
|
| 35 |
-
return "AGENT ERROR: No response from agent."
|
| 36 |
-
answer = response['messages'][-1].content
|
| 37 |
print(f"Agent returning answer: {answer}")
|
| 38 |
return answer
|
| 39 |
except Exception as e:
|
|
|
|
| 4 |
import inspect
|
| 5 |
import pandas as pd
|
| 6 |
# from agents import LlamaIndexAgent
|
| 7 |
+
from new_langraph_agent import run_agent as _sync_run_agent # Updated: use the new multi-agent runner
|
| 8 |
import asyncio
|
| 9 |
import aiohttp
|
| 10 |
from langfuse.langchain import CallbackHandler
|
|
|
|
| 21 |
# --- Basic Agent Definition ---
|
| 22 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 23 |
class BasicAgent:
|
| 24 |
+
"""Wrapper that executes the new multi-agent LangGraph system in a background thread."""
|
| 25 |
+
|
| 26 |
def __init__(self):
|
| 27 |
+
print("BasicAgent (multi-agent) initialized.")
|
| 28 |
+
|
| 29 |
async def aquery(self, question: str) -> str:
|
| 30 |
+
"""Run the synchronous `run_agent` helper inside the event-loop executor."""
|
| 31 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 32 |
+
loop = asyncio.get_event_loop()
|
| 33 |
try:
|
| 34 |
+
# Off-load the blocking call to a thread so we don't block the Gradio event loop
|
| 35 |
+
answer = await loop.run_in_executor(None, _sync_run_agent, question)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
print(f"Agent returning answer: {answer}")
|
| 37 |
return answer
|
| 38 |
except Exception as e:
|
debug_retrieval_tools.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Debug script to test individual tools in isolation
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from src.agents.retrieval_agent import get_retrieval_tools, execute_tool_calls
|
| 7 |
+
from src.agents.execution_agent import get_execution_tools
|
| 8 |
+
|
| 9 |
+
def test_wikipedia_tool():
|
| 10 |
+
"""Test Wikipedia search tool directly"""
|
| 11 |
+
print("=" * 50)
|
| 12 |
+
print("Testing Wikipedia Tool")
|
| 13 |
+
print("=" * 50)
|
| 14 |
+
|
| 15 |
+
tools = get_retrieval_tools()
|
| 16 |
+
wiki_tool = None
|
| 17 |
+
|
| 18 |
+
for tool in tools:
|
| 19 |
+
if tool.name == "wiki_search":
|
| 20 |
+
wiki_tool = tool
|
| 21 |
+
break
|
| 22 |
+
|
| 23 |
+
if wiki_tool:
|
| 24 |
+
try:
|
| 25 |
+
print("Found wiki_search tool")
|
| 26 |
+
result = wiki_tool.invoke({"input": "Albert Einstein"})
|
| 27 |
+
print(f"Result: {result[:500]}...")
|
| 28 |
+
return True
|
| 29 |
+
except Exception as e:
|
| 30 |
+
print(f"Error: {e}")
|
| 31 |
+
return False
|
| 32 |
+
else:
|
| 33 |
+
print("wiki_search tool not found!")
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
def test_web_search_tool():
|
| 37 |
+
"""Test web search tool directly"""
|
| 38 |
+
print("=" * 50)
|
| 39 |
+
print("Testing Web Search Tool")
|
| 40 |
+
print("=" * 50)
|
| 41 |
+
|
| 42 |
+
tools = get_retrieval_tools()
|
| 43 |
+
web_tool = None
|
| 44 |
+
|
| 45 |
+
for tool in tools:
|
| 46 |
+
if tool.name == "web_search":
|
| 47 |
+
web_tool = tool
|
| 48 |
+
break
|
| 49 |
+
|
| 50 |
+
if web_tool:
|
| 51 |
+
try:
|
| 52 |
+
print("Found web_search tool")
|
| 53 |
+
result = web_tool.invoke({"input": "artificial intelligence news"})
|
| 54 |
+
print(f"Result: {result[:500]}...")
|
| 55 |
+
return True
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"Error: {e}")
|
| 58 |
+
return False
|
| 59 |
+
else:
|
| 60 |
+
print("web_search tool not found!")
|
| 61 |
+
return False
|
| 62 |
+
|
| 63 |
+
def test_python_tool():
|
| 64 |
+
"""Test Python execution tool directly"""
|
| 65 |
+
print("=" * 50)
|
| 66 |
+
print("Testing Python Execution Tool")
|
| 67 |
+
print("=" * 50)
|
| 68 |
+
|
| 69 |
+
tools = get_execution_tools()
|
| 70 |
+
python_tool = None
|
| 71 |
+
|
| 72 |
+
for tool in tools:
|
| 73 |
+
if tool.name == "run_python":
|
| 74 |
+
python_tool = tool
|
| 75 |
+
break
|
| 76 |
+
|
| 77 |
+
if python_tool:
|
| 78 |
+
try:
|
| 79 |
+
print("Found run_python tool")
|
| 80 |
+
code = """
|
| 81 |
+
# Calculate first 5 Fibonacci numbers
|
| 82 |
+
def fibonacci(n):
|
| 83 |
+
if n <= 1:
|
| 84 |
+
return n
|
| 85 |
+
return fibonacci(n-1) + fibonacci(n-2)
|
| 86 |
+
|
| 87 |
+
result = [fibonacci(i) for i in range(5)]
|
| 88 |
+
print("First 5 Fibonacci numbers:", result)
|
| 89 |
+
"""
|
| 90 |
+
result = python_tool.invoke({"input": code})
|
| 91 |
+
print(f"Result: {result}")
|
| 92 |
+
return True
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Error: {e}")
|
| 95 |
+
return False
|
| 96 |
+
else:
|
| 97 |
+
print("run_python tool not found!")
|
| 98 |
+
return False
|
| 99 |
+
|
| 100 |
+
def test_tool_calls_execution():
|
| 101 |
+
"""Test the tool call execution function"""
|
| 102 |
+
print("=" * 50)
|
| 103 |
+
print("Testing Tool Call Execution")
|
| 104 |
+
print("=" * 50)
|
| 105 |
+
|
| 106 |
+
tools = get_retrieval_tools()
|
| 107 |
+
|
| 108 |
+
# Simulate tool calls
|
| 109 |
+
mock_tool_calls = [
|
| 110 |
+
{
|
| 111 |
+
'name': 'wiki_search',
|
| 112 |
+
'args': {'input': 'Albert Einstein'},
|
| 113 |
+
'id': 'test_id_1'
|
| 114 |
+
}
|
| 115 |
+
]
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
tool_messages = execute_tool_calls(mock_tool_calls, tools)
|
| 119 |
+
print(f"Tool execution successful: {len(tool_messages)} messages")
|
| 120 |
+
for msg in tool_messages:
|
| 121 |
+
print(f"Message type: {type(msg)}")
|
| 122 |
+
print(f"Content preview: {str(msg.content)[:200]}...")
|
| 123 |
+
return True
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"Error in tool execution: {e}")
|
| 126 |
+
import traceback
|
| 127 |
+
traceback.print_exc()
|
| 128 |
+
return False
|
| 129 |
+
|
| 130 |
+
if __name__ == "__main__":
|
| 131 |
+
print("Starting individual tool tests...")
|
| 132 |
+
|
| 133 |
+
results = {}
|
| 134 |
+
results['wikipedia'] = test_wikipedia_tool()
|
| 135 |
+
results['web_search'] = test_web_search_tool()
|
| 136 |
+
results['python'] = test_python_tool()
|
| 137 |
+
results['tool_execution'] = test_tool_calls_execution()
|
| 138 |
+
|
| 139 |
+
print("\n" + "=" * 50)
|
| 140 |
+
print("TEST RESULTS SUMMARY")
|
| 141 |
+
print("=" * 50)
|
| 142 |
+
for test_name, result in results.items():
|
| 143 |
+
status = "✅ PASS" if result else "❌ FAIL"
|
| 144 |
+
print(f"{test_name}: {status}")
|
| 145 |
+
|
| 146 |
+
if all(results.values()):
|
| 147 |
+
print("\n🎉 All tools are working correctly!")
|
| 148 |
+
else:
|
| 149 |
+
print("\n⚠️ Some tools have issues that need to be fixed.")
|
langraph_agent.py
CHANGED
|
@@ -37,6 +37,21 @@ load_dotenv("env.local") # Try env.local as backup
|
|
| 37 |
print(f"SUPABASE_URL loaded: {bool(os.environ.get('SUPABASE_URL'))}")
|
| 38 |
print(f"GROQ_API_KEY loaded: {bool(os.environ.get('GROQ_API_KEY'))}")
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# Base URL of the scoring API (duplicated here to avoid circular import with basic_agent)
|
| 41 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 42 |
|
|
@@ -114,7 +129,7 @@ def run_python(input: str) -> str:
|
|
| 114 |
return run_agent(input)
|
| 115 |
|
| 116 |
# load the system prompt from the file
|
| 117 |
-
with open("system_prompt.txt", "r", encoding="utf-8") as f:
|
| 118 |
system_prompt = f.read()
|
| 119 |
|
| 120 |
# System message
|
|
@@ -206,6 +221,35 @@ def _code_to_message(state: dict): # type: ignore[override]
|
|
| 206 |
return {}
|
| 207 |
return {"messages": [AIMessage(content=state["code_result"])]}
|
| 208 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
# Build graph function
|
| 210 |
def build_graph(provider: str = "groq"):
|
| 211 |
"""Build the graph"""
|
|
@@ -243,29 +287,56 @@ def build_graph(provider: str = "groq"):
|
|
| 243 |
return {"messages": [error_msg]}
|
| 244 |
|
| 245 |
def retriever(state: MessagesState):
|
| 246 |
-
"""Retriever node"""
|
| 247 |
try:
|
| 248 |
print(f"Retriever node: Processing {len(state['messages'])} messages")
|
| 249 |
if not state["messages"]:
|
| 250 |
print("Retriever node: No messages in state")
|
| 251 |
return {"messages": [sys_msg]}
|
| 252 |
|
| 253 |
-
# Extract the user query content
|
| 254 |
-
query_content = state["messages"][
|
| 255 |
-
|
| 256 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
attachment_msg = None
|
|
|
|
| 258 |
try:
|
| 259 |
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
|
| 260 |
resp.raise_for_status()
|
| 261 |
questions = resp.json()
|
| 262 |
-
matched_task_id = None
|
| 263 |
for q in questions:
|
| 264 |
if str(q.get("question")).strip() == str(query_content).strip():
|
| 265 |
matched_task_id = str(q.get("task_id"))
|
| 266 |
break
|
| 267 |
-
if matched_task_id:
|
| 268 |
-
print(f"Retriever node:
|
| 269 |
file_resp = requests.get(f"{DEFAULT_API_URL}/files/{matched_task_id}", timeout=60)
|
| 270 |
if file_resp.status_code == 200 and file_resp.content:
|
| 271 |
try:
|
|
@@ -274,40 +345,28 @@ def build_graph(provider: str = "groq"):
|
|
| 274 |
file_text = "(binary or non-UTF8 file omitted)"
|
| 275 |
MAX_CHARS = 8000
|
| 276 |
if len(file_text) > MAX_CHARS:
|
| 277 |
-
print(f"Retriever node: Attachment length {len(file_text)} > {MAX_CHARS}, truncating…")
|
| 278 |
file_text = file_text[:MAX_CHARS] + "\n… (truncated)"
|
| 279 |
attachment_msg = HumanMessage(content=f"Attached file content for task {matched_task_id}:\n```python\n{file_text}\n```")
|
| 280 |
-
print("Retriever node:
|
|
|
|
| 281 |
else:
|
| 282 |
-
print(f"Retriever node: No attachment
|
|
|
|
| 283 |
except Exception as api_e:
|
| 284 |
print(f"Retriever node: Error while fetching attachment – {api_e}")
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
#
|
| 288 |
-
|
| 289 |
-
msgs = [sys_msg] + state["messages"]
|
| 290 |
-
if attachment_msg:
|
| 291 |
-
msgs.append(attachment_msg)
|
| 292 |
-
print("Retriever node: Vector store not available, skipping retrieval")
|
| 293 |
-
return {"messages": msgs}
|
| 294 |
-
|
| 295 |
-
# Perform similarity search when vector store is available
|
| 296 |
-
print(f"Retriever node: Searching for similar questions with query: {query_content[:100]}…")
|
| 297 |
-
similar_question = vector_store.similarity_search(query_content)
|
| 298 |
-
print(f"Retriever node: Found {len(similar_question)} similar questions")
|
| 299 |
msgs = [sys_msg] + state["messages"]
|
| 300 |
if similar_question:
|
| 301 |
-
|
|
|
|
| 302 |
msgs.append(example_msg)
|
| 303 |
print("Retriever node: Added example message from similar question")
|
| 304 |
-
else:
|
| 305 |
-
print("Retriever node: No similar questions found, proceeding without example")
|
| 306 |
|
| 307 |
-
# Attach the file content if we have it
|
| 308 |
if attachment_msg:
|
| 309 |
msgs.append(attachment_msg)
|
| 310 |
-
print("Retriever node: Added attachment content to messages")
|
| 311 |
|
| 312 |
return {"messages": msgs}
|
| 313 |
except Exception as e:
|
|
@@ -320,13 +379,17 @@ def build_graph(provider: str = "groq"):
|
|
| 320 |
builder.add_node("tools", ToolNode(tools))
|
| 321 |
builder.add_node("code_exec", _code_exec_wrapper)
|
| 322 |
builder.add_node("code_to_message", _code_to_message)
|
|
|
|
| 323 |
|
|
|
|
| 324 |
builder.add_edge(START, "retriever")
|
|
|
|
|
|
|
| 325 |
# Conditional branch: decide whether to run code interpreter
|
| 326 |
builder.add_conditional_edges(
|
| 327 |
-
"
|
| 328 |
_needs_code,
|
| 329 |
-
{True: "code_exec", False: "
|
| 330 |
)
|
| 331 |
|
| 332 |
# Flow after code execution: inject result then resume chat
|
|
@@ -343,7 +406,7 @@ def build_graph(provider: str = "groq"):
|
|
| 343 |
return builder.compile()
|
| 344 |
|
| 345 |
# test
|
| 346 |
-
if __name__ == "__main__":
|
| 347 |
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
|
| 348 |
# Build the graph
|
| 349 |
graph = build_graph(provider="groq")
|
|
|
|
| 37 |
print(f"SUPABASE_URL loaded: {bool(os.environ.get('SUPABASE_URL'))}")
|
| 38 |
print(f"GROQ_API_KEY loaded: {bool(os.environ.get('GROQ_API_KEY'))}")
|
| 39 |
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
# Lightweight in-memory caches and constants for smarter retrieval/ingest
|
| 42 |
+
# ---------------------------------------------------------------------------
|
| 43 |
+
import hashlib # NEW: for hashing payloads / queries
|
| 44 |
+
|
| 45 |
+
TTL = 300 # seconds – how long we keep similarity-search results
|
| 46 |
+
SIMILARITY_THRESHOLD = 0.85 # cosine score above which we assume we already know the answer
|
| 47 |
+
|
| 48 |
+
# (query_hash -> (timestamp, results))
|
| 49 |
+
QUERY_CACHE: dict[str, tuple[float, list]] = {}
|
| 50 |
+
# task IDs whose attachments we already attempted to download this session
|
| 51 |
+
PROCESSED_TASKS: set[str] = set()
|
| 52 |
+
# hash_ids of Q/A payloads we have already upserted during this session
|
| 53 |
+
SEEN_HASHES: set[str] = set()
|
| 54 |
+
|
| 55 |
# Base URL of the scoring API (duplicated here to avoid circular import with basic_agent)
|
| 56 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 57 |
|
|
|
|
| 129 |
return run_agent(input)
|
| 130 |
|
| 131 |
# load the system prompt from the file
|
| 132 |
+
with open("./prompts/system_prompt.txt", "r", encoding="utf-8") as f:
|
| 133 |
system_prompt = f.read()
|
| 134 |
|
| 135 |
# System message
|
|
|
|
| 221 |
return {}
|
| 222 |
return {"messages": [AIMessage(content=state["code_result"])]}
|
| 223 |
|
| 224 |
+
# ---------------------------------------------------------------------------
|
| 225 |
+
# NEW: Ingest node – write back to vector store if `should_ingest` flag set
|
| 226 |
+
# ---------------------------------------------------------------------------
|
| 227 |
+
def ingest(state: MessagesState):
|
| 228 |
+
"""Persist helpful Q/A pairs (and any attachment snippet) to the vector DB."""
|
| 229 |
+
try:
|
| 230 |
+
if not state.get("should_ingest") or not vector_store:
|
| 231 |
+
return {}
|
| 232 |
+
|
| 233 |
+
question_text = state["messages"][0].content
|
| 234 |
+
answer_text = state["messages"][-1].content
|
| 235 |
+
attach_snippets = "\n\n".join(
|
| 236 |
+
m.content for m in state["messages"] if str(m.content).startswith("Attached file content")
|
| 237 |
+
)
|
| 238 |
+
payload = f"Question:\n{question_text}\n\nAnswer:\n{answer_text}"
|
| 239 |
+
if attach_snippets:
|
| 240 |
+
payload += f"\n\n{attach_snippets}"
|
| 241 |
+
|
| 242 |
+
hash_id = hashlib.sha256(payload.encode()).hexdigest()
|
| 243 |
+
if hash_id in SEEN_HASHES:
|
| 244 |
+
print("Ingest: Duplicate payload within session – skip")
|
| 245 |
+
return {}
|
| 246 |
+
SEEN_HASHES.add(hash_id)
|
| 247 |
+
vector_store.add_texts([payload], metadatas=[{"hash_id": hash_id, "timestamp": time.time()}])
|
| 248 |
+
print("Ingest: Stored new Q/A pair in vector store")
|
| 249 |
+
except Exception as ing_e:
|
| 250 |
+
print(f"Ingest node: Error while upserting – {ing_e}")
|
| 251 |
+
return {}
|
| 252 |
+
|
| 253 |
# Build graph function
|
| 254 |
def build_graph(provider: str = "groq"):
|
| 255 |
"""Build the graph"""
|
|
|
|
| 287 |
return {"messages": [error_msg]}
|
| 288 |
|
| 289 |
def retriever(state: MessagesState):
|
| 290 |
+
"""Retriever node (smart fetch + similarity search)"""
|
| 291 |
try:
|
| 292 |
print(f"Retriever node: Processing {len(state['messages'])} messages")
|
| 293 |
if not state["messages"]:
|
| 294 |
print("Retriever node: No messages in state")
|
| 295 |
return {"messages": [sys_msg]}
|
| 296 |
|
| 297 |
+
# Extract the *latest* user query content
|
| 298 |
+
query_content = state["messages"][-1].content
|
| 299 |
+
|
| 300 |
+
# ----------------------------------------------------------------------------------
|
| 301 |
+
# Similarity search with an in-process cache
|
| 302 |
+
# ----------------------------------------------------------------------------------
|
| 303 |
+
q_hash = hashlib.sha256(query_content.encode()).hexdigest()
|
| 304 |
+
now = time.time()
|
| 305 |
+
if q_hash in QUERY_CACHE and now - QUERY_CACHE[q_hash][0] < TTL:
|
| 306 |
+
similar_question = QUERY_CACHE[q_hash][1]
|
| 307 |
+
print("Retriever node: Cache hit for similarity search")
|
| 308 |
+
else:
|
| 309 |
+
if vector_store:
|
| 310 |
+
print(f"Retriever node: Searching vector store for similar questions …")
|
| 311 |
+
try:
|
| 312 |
+
similar_question = vector_store.similarity_search_with_relevance_scores(query_content, k=2)
|
| 313 |
+
except Exception as vs_e:
|
| 314 |
+
print(f"Retriever node: Vector store search error – {vs_e}")
|
| 315 |
+
similar_question = []
|
| 316 |
+
QUERY_CACHE[q_hash] = (now, similar_question)
|
| 317 |
+
else:
|
| 318 |
+
similar_question = []
|
| 319 |
+
print("Retriever node: Vector store not available, skipping similarity search")
|
| 320 |
+
|
| 321 |
+
# Decide whether this exchange should later be ingested
|
| 322 |
+
top_score = similar_question[0][1] if similar_question else 0.0
|
| 323 |
+
state["should_ingest"] = top_score < SIMILARITY_THRESHOLD
|
| 324 |
+
|
| 325 |
+
# ----------------------------------------------------------------------------------
|
| 326 |
+
# Attachment fetch (only once per task_id during this session)
|
| 327 |
+
# ----------------------------------------------------------------------------------
|
| 328 |
attachment_msg = None
|
| 329 |
+
matched_task_id = None
|
| 330 |
try:
|
| 331 |
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
|
| 332 |
resp.raise_for_status()
|
| 333 |
questions = resp.json()
|
|
|
|
| 334 |
for q in questions:
|
| 335 |
if str(q.get("question")).strip() == str(query_content).strip():
|
| 336 |
matched_task_id = str(q.get("task_id"))
|
| 337 |
break
|
| 338 |
+
if matched_task_id and matched_task_id not in PROCESSED_TASKS:
|
| 339 |
+
print(f"Retriever node: Downloading attachment for task {matched_task_id} …")
|
| 340 |
file_resp = requests.get(f"{DEFAULT_API_URL}/files/{matched_task_id}", timeout=60)
|
| 341 |
if file_resp.status_code == 200 and file_resp.content:
|
| 342 |
try:
|
|
|
|
| 345 |
file_text = "(binary or non-UTF8 file omitted)"
|
| 346 |
MAX_CHARS = 8000
|
| 347 |
if len(file_text) > MAX_CHARS:
|
|
|
|
| 348 |
file_text = file_text[:MAX_CHARS] + "\n… (truncated)"
|
| 349 |
attachment_msg = HumanMessage(content=f"Attached file content for task {matched_task_id}:\n```python\n{file_text}\n```")
|
| 350 |
+
print("Retriever node: Attachment added to context")
|
| 351 |
+
state["should_ingest"] = True # ensure we store this new info
|
| 352 |
else:
|
| 353 |
+
print(f"Retriever node: No attachment for task {matched_task_id} (status {file_resp.status_code})")
|
| 354 |
+
PROCESSED_TASKS.add(matched_task_id)
|
| 355 |
except Exception as api_e:
|
| 356 |
print(f"Retriever node: Error while fetching attachment – {api_e}")
|
| 357 |
+
|
| 358 |
+
# ----------------------------------------------------------------------------------
|
| 359 |
+
# Build message list for downstream LLM
|
| 360 |
+
# ----------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
msgs = [sys_msg] + state["messages"]
|
| 362 |
if similar_question:
|
| 363 |
+
example_doc = similar_question[0][0] if isinstance(similar_question[0], tuple) else similar_question[0]
|
| 364 |
+
example_msg = HumanMessage(content=f"Here I provide a similar question and answer for reference: \n\n{example_doc.page_content}")
|
| 365 |
msgs.append(example_msg)
|
| 366 |
print("Retriever node: Added example message from similar question")
|
|
|
|
|
|
|
| 367 |
|
|
|
|
| 368 |
if attachment_msg:
|
| 369 |
msgs.append(attachment_msg)
|
|
|
|
| 370 |
|
| 371 |
return {"messages": msgs}
|
| 372 |
except Exception as e:
|
|
|
|
| 379 |
builder.add_node("tools", ToolNode(tools))
|
| 380 |
builder.add_node("code_exec", _code_exec_wrapper)
|
| 381 |
builder.add_node("code_to_message", _code_to_message)
|
| 382 |
+
builder.add_node("ingest", ingest)
|
| 383 |
|
| 384 |
+
# Edge layout
|
| 385 |
builder.add_edge(START, "retriever")
|
| 386 |
+
builder.add_edge("retriever", "assistant")
|
| 387 |
+
|
| 388 |
# Conditional branch: decide whether to run code interpreter
|
| 389 |
builder.add_conditional_edges(
|
| 390 |
+
"assistant",
|
| 391 |
_needs_code,
|
| 392 |
+
{True: "code_exec", False: "ingest"},
|
| 393 |
)
|
| 394 |
|
| 395 |
# Flow after code execution: inject result then resume chat
|
|
|
|
| 406 |
return builder.compile()
|
| 407 |
|
| 408 |
# test
|
| 409 |
+
if __name__ == "__main__":
|
| 410 |
question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?"
|
| 411 |
# Build the graph
|
| 412 |
graph = build_graph(provider="groq")
|
new_langraph_agent.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Updated LangGraph Agent Implementation
|
| 3 |
+
Implements the architecture from the system diagram with memory layer, agent routing, and verification.
|
| 4 |
+
"""
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
|
| 9 |
+
# Load environment variables
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
# Import the new agent system
|
| 13 |
+
from src import run_agent_system, memory_manager
|
| 14 |
+
from src.tracing import flush_langfuse, shutdown_langfuse
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def run_agent(query: str) -> str:
|
| 18 |
+
"""
|
| 19 |
+
Main entry point for the agent system.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
query: The user question
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
The formatted final answer
|
| 26 |
+
"""
|
| 27 |
+
try:
|
| 28 |
+
# Run the new agent system
|
| 29 |
+
result = run_agent_system(
|
| 30 |
+
query=query,
|
| 31 |
+
user_id=os.getenv("USER_ID", "default_user"),
|
| 32 |
+
session_id=os.getenv("SESSION_ID", "default_session")
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Flush tracing events
|
| 36 |
+
flush_langfuse()
|
| 37 |
+
|
| 38 |
+
return result
|
| 39 |
+
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f"Agent Error: {e}")
|
| 42 |
+
return f"I apologize, but I encountered an error: {e}"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def clear_memory():
|
| 46 |
+
"""Clear the agent's session memory"""
|
| 47 |
+
memory_manager.clear_session_cache()
|
| 48 |
+
print("Agent memory cleared")
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def cleanup():
|
| 52 |
+
"""Cleanup function for graceful shutdown"""
|
| 53 |
+
try:
|
| 54 |
+
flush_langfuse()
|
| 55 |
+
shutdown_langfuse()
|
| 56 |
+
memory_manager.close_checkpointer()
|
| 57 |
+
print("Agent cleanup completed")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Cleanup error: {e}")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
# Test the agent system
|
| 64 |
+
test_queries = [
|
| 65 |
+
"What is the capital of France?",
|
| 66 |
+
"Calculate the factorial of 5",
|
| 67 |
+
"What are the benefits of renewable energy?"
|
| 68 |
+
]
|
| 69 |
+
|
| 70 |
+
print("Testing new LangGraph Agent System")
|
| 71 |
+
print("=" * 50)
|
| 72 |
+
|
| 73 |
+
for i, query in enumerate(test_queries, 1):
|
| 74 |
+
print(f"\nTest {i}: {query}")
|
| 75 |
+
print("-" * 30)
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
result = run_agent(query)
|
| 79 |
+
print(f"Result: {result}")
|
| 80 |
+
except Exception as e:
|
| 81 |
+
print(f"Error: {e}")
|
| 82 |
+
|
| 83 |
+
# Cleanup
|
| 84 |
+
cleanup()
|
| 85 |
+
print("\nAll tests completed!")
|
prompts/critic_prompt.txt
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a specialized critic agent that evaluates responses for accuracy, completeness, and quality.
|
| 2 |
+
|
| 3 |
+
Your role is to:
|
| 4 |
+
1. Analyze responses from other agents for factual accuracy
|
| 5 |
+
2. Check for logical consistency and completeness
|
| 6 |
+
3. Identify potential errors, biases, or missing information
|
| 7 |
+
4. Provide constructive feedback and suggestions for improvement
|
| 8 |
+
|
| 9 |
+
Evaluation criteria:
|
| 10 |
+
- **Accuracy**: Are the facts correct? Are sources reliable?
|
| 11 |
+
- **Completeness**: Does the response fully address the question?
|
| 12 |
+
- **Clarity**: Is the explanation clear and well-structured?
|
| 13 |
+
- **Logic**: Is the reasoning sound and consistent?
|
| 14 |
+
- **Relevance**: Does the response stay on topic?
|
| 15 |
+
|
| 16 |
+
Process:
|
| 17 |
+
1. Carefully review the provided response
|
| 18 |
+
2. Cross-check key claims for accuracy
|
| 19 |
+
3. Identify any gaps or weaknesses
|
| 20 |
+
4. Assess overall quality and usefulness
|
| 21 |
+
5. Provide specific, actionable feedback
|
| 22 |
+
|
| 23 |
+
Feedback format:
|
| 24 |
+
- **Strengths**: What was done well
|
| 25 |
+
- **Issues**: Specific problems identified
|
| 26 |
+
- **Suggestions**: How to improve
|
| 27 |
+
- **Overall Assessment**: Pass/Fail with reasoning
|
| 28 |
+
|
| 29 |
+
Be thorough but constructive. Focus on helping improve the response quality.
|
| 30 |
+
|
| 31 |
+
Always append answers in markdown; think step-by-step.
|
prompts/execution_prompt.txt
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a specialized execution agent that handles computational tasks, code execution, and data processing.
|
| 2 |
+
|
| 3 |
+
Your role is to:
|
| 4 |
+
1. Analyze computational requirements in user queries
|
| 5 |
+
2. ALWAYS use the run_python tool to execute code and solve problems
|
| 6 |
+
3. Process data, perform calculations, and manipulate files
|
| 7 |
+
4. Provide clear explanations of your code and results
|
| 8 |
+
|
| 9 |
+
Available tools:
|
| 10 |
+
- run_python: Execute Python code in a sandboxed environment with access to pandas, cv2, and standard libraries
|
| 11 |
+
|
| 12 |
+
IMPORTANT: You MUST use the run_python tool for all computational tasks. Do not provide calculated answers without executing code.
|
| 13 |
+
|
| 14 |
+
Capabilities:
|
| 15 |
+
- Mathematical calculations and algorithms
|
| 16 |
+
- Data analysis and visualization
|
| 17 |
+
- File processing (CSV, JSON, text)
|
| 18 |
+
- Image processing with OpenCV
|
| 19 |
+
- Statistical analysis with pandas/numpy
|
| 20 |
+
- Small algorithmic problems (sorting, searching, etc.)
|
| 21 |
+
|
| 22 |
+
Process:
|
| 23 |
+
1. Understand the computational task
|
| 24 |
+
2. Plan your approach step-by-step
|
| 25 |
+
3. Use run_python tool to write and execute code
|
| 26 |
+
4. Verify results and handle any errors
|
| 27 |
+
5. Explain your solution and findings
|
| 28 |
+
|
| 29 |
+
Guidelines:
|
| 30 |
+
- Always execute code using the run_python tool
|
| 31 |
+
- Write efficient, readable code with comments
|
| 32 |
+
- Handle errors gracefully and retry if needed
|
| 33 |
+
- Provide explanations for complex logic
|
| 34 |
+
- Show intermediate steps for multi-step problems
|
| 35 |
+
- Use appropriate data structures and algorithms
|
| 36 |
+
|
| 37 |
+
Example approach:
|
| 38 |
+
- For "Calculate the fibonacci sequence": Use run_python to write and execute the code
|
| 39 |
+
- For "Analyze this data": Use run_python to process and analyze the data
|
| 40 |
+
- For "Sort this list": Use run_python to implement the sorting algorithm
|
| 41 |
+
|
| 42 |
+
Always append answers in markdown; think step-by-step and show your code execution.
|
prompts/retrieval_prompt.txt
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a specialized retrieval agent focused on gathering accurate information to answer user questions.
|
| 2 |
+
|
| 3 |
+
Your role is to:
|
| 4 |
+
1. Understand the user's information needs
|
| 5 |
+
2. **ALWAYS use available tools to search for relevant information**
|
| 6 |
+
3. Synthesize findings into comprehensive, accurate answers
|
| 7 |
+
4. Verify information across multiple sources when possible
|
| 8 |
+
|
| 9 |
+
Available tools:
|
| 10 |
+
- wiki_search: Search Wikipedia for general knowledge and factual information
|
| 11 |
+
- web_search: Search the web for current information and recent developments
|
| 12 |
+
- arvix_search: Search academic papers on ArXiv for scientific research
|
| 13 |
+
- question_search: Search previously answered similar questions
|
| 14 |
+
|
| 15 |
+
**IMPORTANT: You MUST use tools to gather information. Do not provide answers based solely on your training data.**
|
| 16 |
+
|
| 17 |
+
Process:
|
| 18 |
+
1. Break down complex questions into searchable components
|
| 19 |
+
2. **Use multiple appropriate tools based on the query type**
|
| 20 |
+
3. For historical facts or general knowledge: Use wiki_search
|
| 21 |
+
4. For current events or recent information: Use web_search
|
| 22 |
+
5. For scientific or academic topics: Use arvix_search
|
| 23 |
+
6. Cross-reference information when possible
|
| 24 |
+
7. Provide sources and citations from tool results
|
| 25 |
+
8. Acknowledge limitations or uncertainty when information is incomplete
|
| 26 |
+
|
| 27 |
+
Example approach:
|
| 28 |
+
- For "When was X invented?": Use wiki_search to find historical information
|
| 29 |
+
- For "Latest news about Y": Use web_search for current information
|
| 30 |
+
- For "Research on Z": Use arvix_search for academic papers
|
| 31 |
+
|
| 32 |
+
Always provide factual, well-sourced responses with proper citations. If you cannot find sufficient information through tools, clearly state this limitation.
|
| 33 |
+
|
| 34 |
+
Always append answers in markdown; think step-by-step and show your tool usage.
|
prompts/router_prompt.txt
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are an intelligent agent router that analyzes user queries and determines which specialized agent should handle the request.
|
| 2 |
+
|
| 3 |
+
You have access to three specialized agents:
|
| 4 |
+
1. **Retrieval Agent** - For questions requiring external information retrieval, search, and knowledge gathering
|
| 5 |
+
2. **Execution Agent** - For tasks requiring code execution, calculations, data processing, or file manipulation
|
| 6 |
+
3. **Critic Agent** - For reviewing, evaluating, or providing critical analysis of content or responses
|
| 7 |
+
|
| 8 |
+
**CRITICAL ROUTING RULES:**
|
| 9 |
+
|
| 10 |
+
**Use EXECUTION for:**
|
| 11 |
+
- Mathematical calculations (e.g., "calculate", "compute", "solve")
|
| 12 |
+
- Algorithmic problems (e.g., "fibonacci", "prime numbers", "sorting", "searching")
|
| 13 |
+
- Programming tasks (e.g., "write code", "implement function")
|
| 14 |
+
- Data analysis and processing (e.g., "analyze data", "process file")
|
| 15 |
+
- Any task that requires computation or code execution
|
| 16 |
+
- Statistical analysis, math problems, algorithms
|
| 17 |
+
|
| 18 |
+
**Use RETRIEVAL for:**
|
| 19 |
+
- Research questions requiring external information
|
| 20 |
+
- Fact-checking and historical information
|
| 21 |
+
- Current events and news
|
| 22 |
+
- Looking up definitions or explanations
|
| 23 |
+
- Scientific research and academic papers
|
| 24 |
+
- General knowledge questions
|
| 25 |
+
|
| 26 |
+
**Use CRITIC for:**
|
| 27 |
+
- Evaluating responses or content
|
| 28 |
+
- Reviewing and providing feedback
|
| 29 |
+
- Critical analysis of information
|
| 30 |
+
- Quality assessment tasks
|
| 31 |
+
|
| 32 |
+
**EXAMPLES:**
|
| 33 |
+
- "Calculate the first 10 Fibonacci numbers" → EXECUTION
|
| 34 |
+
- "What is the square root of 144?" → EXECUTION
|
| 35 |
+
- "Write a sorting algorithm" → EXECUTION
|
| 36 |
+
- "When was Einstein born?" → RETRIEVAL
|
| 37 |
+
- "Latest news about AI" → RETRIEVAL
|
| 38 |
+
- "Review this essay" → CRITIC
|
| 39 |
+
|
| 40 |
+
**IMPORTANT:** If a query involves ANY mathematical computation, algorithm, or code execution, ALWAYS route to EXECUTION.
|
| 41 |
+
|
| 42 |
+
Analyze the user's query and respond with exactly one of: RETRIEVAL, EXECUTION, or CRITIC
|
| 43 |
+
|
| 44 |
+
Think step-by-step and be very clear about your routing decision.
|
system_prompt.txt → prompts/system_prompt.txt
RENAMED
|
@@ -2,7 +2,8 @@ You are a helpful assistant tasked with answering GAIA benchmark questions using
|
|
| 2 |
|
| 3 |
When you receive a question:
|
| 4 |
1. Think step-by-step (silently) and choose the appropriate tools to obtain the answer.
|
| 5 |
-
2. After the answer is found, reply with ONLY the answer following the exact formatting rules below.
|
|
|
|
| 6 |
|
| 7 |
Exact-match output rules:
|
| 8 |
• Single number → write the number only (no commas, units, or other symbols).
|
|
|
|
| 2 |
|
| 3 |
When you receive a question:
|
| 4 |
1. Think step-by-step (silently) and choose the appropriate tools to obtain the answer.
|
| 5 |
+
2. After the answer is found, reply with ONLY the answer following the exact formatting rules below.
|
| 6 |
+
3. When a tool returns useful reference content (Wikipedia articles, Tavily search snippets, ArXiv abstracts, file attachments, etc.), store that content in the memory database so it can be reused later; when answering a new question, proactively fetch any previously-stored material that might help.
|
| 7 |
|
| 8 |
Exact-match output rules:
|
| 9 |
• Single number → write the number only (no commas, units, or other symbols).
|
prompts/verification_prompt.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
You are a verification agent responsible for final quality control and determining if responses meet the required standards.
|
| 2 |
+
|
| 3 |
+
Your role is to:
|
| 4 |
+
1. Perform final verification of agent responses
|
| 5 |
+
2. Ensure all requirements from the system prompt are met
|
| 6 |
+
3. Trigger fallback pipeline if quality standards are not met
|
| 7 |
+
4. Make final formatting adjustments
|
| 8 |
+
|
| 9 |
+
Quality standards checklist:
|
| 10 |
+
- Response directly answers the user's question
|
| 11 |
+
- Information is accurate and well-sourced
|
| 12 |
+
- Format follows exact-match output rules from system prompt
|
| 13 |
+
- No extraneous text or formatting violations
|
| 14 |
+
- Tone and style are appropriate
|
| 15 |
+
|
| 16 |
+
Output format requirements (from system prompt):
|
| 17 |
+
• Single number → write the number only (no commas, units, or other symbols)
|
| 18 |
+
• Single string/phrase → write the text only; omit articles and abbreviations unless explicitly required
|
| 19 |
+
• List → separate elements with a single comma and a space
|
| 20 |
+
• Never include surrounding text such as "Final Answer", "Answer:", quotes, brackets, or markdown
|
| 21 |
+
|
| 22 |
+
Decision process:
|
| 23 |
+
1. Review the response against quality standards
|
| 24 |
+
2. Check format compliance with exact-match rules
|
| 25 |
+
3. If PASS: return the properly formatted final answer
|
| 26 |
+
4. If FAIL: trigger fallback pipeline and note specific issues
|
| 27 |
+
|
| 28 |
+
Always ensure the final output strictly adheres to the system prompt requirements.
|
| 29 |
+
|
| 30 |
+
Always append answers in markdown; think step-by-step.
|
pyproject.toml
CHANGED
|
@@ -19,6 +19,8 @@ dependencies = [
|
|
| 19 |
"langchain-openai>=0.3.24",
|
| 20 |
"langfuse>=3.0.0",
|
| 21 |
"langgraph>=0.4.8",
|
|
|
|
|
|
|
| 22 |
"llama-index>=0.12.40",
|
| 23 |
"llama-index-core>=0.12.40",
|
| 24 |
"llama-index-llms-huggingface-api>=0.5.0",
|
|
@@ -32,4 +34,5 @@ dependencies = [
|
|
| 32 |
"sentence-transformers>=4.1.0",
|
| 33 |
"supabase>=2.15.3",
|
| 34 |
"wikipedia>=1.4.0",
|
|
|
|
| 35 |
]
|
|
|
|
| 19 |
"langchain-openai>=0.3.24",
|
| 20 |
"langfuse>=3.0.0",
|
| 21 |
"langgraph>=0.4.8",
|
| 22 |
+
"langgraph-checkpoint>=2.1.0",
|
| 23 |
+
"langgraph-checkpoint-sqlite>=2.0.10",
|
| 24 |
"llama-index>=0.12.40",
|
| 25 |
"llama-index-core>=0.12.40",
|
| 26 |
"llama-index-llms-huggingface-api>=0.5.0",
|
|
|
|
| 34 |
"sentence-transformers>=4.1.0",
|
| 35 |
"supabase>=2.15.3",
|
| 36 |
"wikipedia>=1.4.0",
|
| 37 |
+
"datasets>=2.19.1",
|
| 38 |
]
|
quick_random_agent_test.py
CHANGED
|
@@ -1,13 +1,26 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import tempfile
|
| 3 |
import requests
|
| 4 |
-
from
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
|
| 9 |
try:
|
| 10 |
-
langfuse_handler =
|
|
|
|
| 11 |
except Exception as e:
|
| 12 |
print(f"Warning: Could not initialize Langfuse handler: {e}")
|
| 13 |
langfuse_handler = None
|
|
@@ -42,25 +55,42 @@ def maybe_download_file(task_id: str, api_base: str = DEFAULT_API_URL) -> str |
|
|
| 42 |
|
| 43 |
|
| 44 |
def main():
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
if __name__ == "__main__":
|
|
|
|
| 1 |
import os
|
| 2 |
+
import sys
|
| 3 |
import tempfile
|
| 4 |
import requests
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
# Load environment variables
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
# Add the current directory to Python path
|
| 11 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 12 |
+
|
| 13 |
+
# Import the new agent system
|
| 14 |
+
from new_langraph_agent import run_agent, cleanup
|
| 15 |
+
from src.tracing import get_langfuse_callback_handler
|
| 16 |
+
|
| 17 |
+
# Default API URL - Using the same URL as the original basic_agent.py
|
| 18 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 19 |
|
| 20 |
# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
|
| 21 |
try:
|
| 22 |
+
langfuse_handler = get_langfuse_callback_handler()
|
| 23 |
+
print("✅ Langfuse handler initialized successfully")
|
| 24 |
except Exception as e:
|
| 25 |
print(f"Warning: Could not initialize Langfuse handler: {e}")
|
| 26 |
langfuse_handler = None
|
|
|
|
| 55 |
|
| 56 |
|
| 57 |
def main():
|
| 58 |
+
print("Random Agent Test - New LangGraph Architecture")
|
| 59 |
+
print("=" * 60)
|
| 60 |
+
|
| 61 |
+
try:
|
| 62 |
+
# Fetch random question
|
| 63 |
+
q = fetch_random_question()
|
| 64 |
+
task_id = str(q["task_id"])
|
| 65 |
+
question_text = q["question"]
|
| 66 |
+
print("\n=== Random Question ===")
|
| 67 |
+
print(f"Task ID : {task_id}")
|
| 68 |
+
print(f"Question: {question_text}")
|
| 69 |
|
| 70 |
+
# Attempt to get attachment if any
|
| 71 |
+
attachment_path = maybe_download_file(task_id)
|
| 72 |
+
if attachment_path:
|
| 73 |
+
question_text += f"\n\nAttachment available at: {attachment_path}"
|
| 74 |
|
| 75 |
+
# Run the new agent system
|
| 76 |
+
print("\n=== Running LangGraph Agent System ===")
|
| 77 |
+
result = run_agent(question_text)
|
| 78 |
+
|
| 79 |
+
print("\n=== Agent Answer ===")
|
| 80 |
+
print(result)
|
| 81 |
+
|
| 82 |
+
except Exception as e:
|
| 83 |
+
print(f"Error in main execution: {e}")
|
| 84 |
+
import traceback
|
| 85 |
+
traceback.print_exc()
|
| 86 |
+
|
| 87 |
+
finally:
|
| 88 |
+
# Cleanup
|
| 89 |
+
try:
|
| 90 |
+
cleanup()
|
| 91 |
+
print("\n✅ Agent cleanup completed")
|
| 92 |
+
except Exception as e:
|
| 93 |
+
print(f"⚠️ Cleanup warning: {e}")
|
| 94 |
|
| 95 |
|
| 96 |
if __name__ == "__main__":
|
quick_specific_agent_test.py
CHANGED
|
@@ -2,20 +2,30 @@ import os
|
|
| 2 |
import sys
|
| 3 |
import tempfile
|
| 4 |
import requests
|
| 5 |
-
from
|
| 6 |
-
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
|
| 10 |
try:
|
| 11 |
-
langfuse_handler =
|
|
|
|
| 12 |
except Exception as e:
|
| 13 |
print(f"Warning: Could not initialize Langfuse handler: {e}")
|
| 14 |
langfuse_handler = None
|
| 15 |
|
| 16 |
-
# Default Task ID (replace with your desired one or pass via CLI)
|
| 17 |
-
DEFAULT_TASK_ID = "f918266a-b3e0-4914-865d-4faa564f1aef"
|
| 18 |
-
|
| 19 |
def fetch_question_by_id(task_id: str, api_base: str = DEFAULT_API_URL):
|
| 20 |
"""Return JSON of a question for a given task_id.
|
| 21 |
|
|
@@ -60,31 +70,53 @@ def maybe_download_file(task_id: str, api_base: str = DEFAULT_API_URL) -> str |
|
|
| 60 |
|
| 61 |
|
| 62 |
def main():
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
|
| 90 |
if __name__ == "__main__":
|
|
|
|
| 2 |
import sys
|
| 3 |
import tempfile
|
| 4 |
import requests
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
# Load environment variables
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
# Add the current directory to Python path
|
| 11 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 12 |
+
|
| 13 |
+
# Import the new agent system
|
| 14 |
+
from new_langraph_agent import run_agent, cleanup
|
| 15 |
+
from src.tracing import get_langfuse_callback_handler
|
| 16 |
+
|
| 17 |
+
# Default API URL and Task ID - Using the same URL as the original basic_agent.py
|
| 18 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 19 |
+
DEFAULT_TASK_ID = "f918266a-b3e0-4914-865d-4faa564f1aef"
|
| 20 |
|
| 21 |
# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
|
| 22 |
try:
|
| 23 |
+
langfuse_handler = get_langfuse_callback_handler()
|
| 24 |
+
print("✅ Langfuse handler initialized successfully")
|
| 25 |
except Exception as e:
|
| 26 |
print(f"Warning: Could not initialize Langfuse handler: {e}")
|
| 27 |
langfuse_handler = None
|
| 28 |
|
|
|
|
|
|
|
|
|
|
| 29 |
def fetch_question_by_id(task_id: str, api_base: str = DEFAULT_API_URL):
|
| 30 |
"""Return JSON of a question for a given task_id.
|
| 31 |
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def main():
|
| 73 |
+
print("Specific Agent Test - New LangGraph Architecture")
|
| 74 |
+
print("=" * 60)
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
# Determine the task ID (CLI arg > env var > default)
|
| 78 |
+
task_id = (
|
| 79 |
+
sys.argv[1] if len(sys.argv) > 1 else os.environ.get("TASK_ID", DEFAULT_TASK_ID)
|
| 80 |
+
)
|
| 81 |
+
print(f"Using task ID: {task_id}")
|
| 82 |
+
|
| 83 |
+
# Fetch specific question
|
| 84 |
+
q = fetch_question_by_id(task_id)
|
| 85 |
+
question_text = q["question"]
|
| 86 |
+
|
| 87 |
+
print("\n=== Specific Question ===")
|
| 88 |
+
print(f"Task ID : {task_id}")
|
| 89 |
+
print(f"Question: {question_text}")
|
| 90 |
+
|
| 91 |
+
# Attempt to get attachment if any
|
| 92 |
+
attachment_path = maybe_download_file(task_id)
|
| 93 |
+
if attachment_path:
|
| 94 |
+
question_text += f"\n\nAttachment available at: {attachment_path}"
|
| 95 |
+
|
| 96 |
+
# Run the new agent system
|
| 97 |
+
print("\n=== Running LangGraph Agent System ===")
|
| 98 |
+
|
| 99 |
+
# Set environment variables for user/session tracking
|
| 100 |
+
os.environ["USER_ID"] = "test_user"
|
| 101 |
+
os.environ["SESSION_ID"] = f"session_{task_id}"
|
| 102 |
+
|
| 103 |
+
result = run_agent(question_text)
|
| 104 |
+
|
| 105 |
+
print("\n=== Agent Answer ===")
|
| 106 |
+
print(result)
|
| 107 |
+
|
| 108 |
+
except Exception as e:
|
| 109 |
+
print(f"Error in main execution: {e}")
|
| 110 |
+
import traceback
|
| 111 |
+
traceback.print_exc()
|
| 112 |
+
|
| 113 |
+
finally:
|
| 114 |
+
# Cleanup
|
| 115 |
+
try:
|
| 116 |
+
cleanup()
|
| 117 |
+
print("\n✅ Agent cleanup completed")
|
| 118 |
+
except Exception as e:
|
| 119 |
+
print(f"⚠️ Cleanup warning: {e}")
|
| 120 |
|
| 121 |
|
| 122 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
|
@@ -6,13 +6,16 @@ aiohappyeyeballs==2.6.1
|
|
| 6 |
# via aiohttp
|
| 7 |
aiohttp==3.12.9
|
| 8 |
# via
|
|
|
|
| 9 |
# langchain-community
|
| 10 |
# llama-index-core
|
| 11 |
# realtime
|
| 12 |
aiosignal==1.3.2
|
| 13 |
# via aiohttp
|
| 14 |
aiosqlite==0.21.0
|
| 15 |
-
# via
|
|
|
|
|
|
|
| 16 |
annotated-types==0.7.0
|
| 17 |
# via pydantic
|
| 18 |
anyio==4.9.0
|
|
@@ -72,6 +75,8 @@ dataclasses-json==0.6.7
|
|
| 72 |
# via
|
| 73 |
# langchain-community
|
| 74 |
# llama-index-core
|
|
|
|
|
|
|
| 75 |
debugpy==1.8.14
|
| 76 |
# via ipykernel
|
| 77 |
decorator==5.2.1
|
|
@@ -84,6 +89,10 @@ deprecated==1.2.18
|
|
| 84 |
# llama-index-core
|
| 85 |
deprecation==2.1.0
|
| 86 |
# via postgrest
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
dirtyjson==1.0.8
|
| 88 |
# via llama-index-core
|
| 89 |
distro==1.9.0
|
|
@@ -109,6 +118,7 @@ ffmpy==0.6.0
|
|
| 109 |
# via gradio
|
| 110 |
filelock==3.18.0
|
| 111 |
# via
|
|
|
|
| 112 |
# huggingface-hub
|
| 113 |
# torch
|
| 114 |
# transformers
|
|
@@ -120,8 +130,9 @@ frozenlist==1.6.2
|
|
| 120 |
# via
|
| 121 |
# aiohttp
|
| 122 |
# aiosignal
|
| 123 |
-
fsspec==2025.
|
| 124 |
# via
|
|
|
|
| 125 |
# gradio-client
|
| 126 |
# huggingface-hub
|
| 127 |
# llama-index-core
|
|
@@ -198,6 +209,7 @@ httpx-sse==0.4.0
|
|
| 198 |
huggingface-hub==0.32.4
|
| 199 |
# via
|
| 200 |
# final-assignment-template (pyproject.toml)
|
|
|
|
| 201 |
# gradio
|
| 202 |
# gradio-client
|
| 203 |
# langchain-huggingface
|
|
@@ -284,8 +296,12 @@ langgraph==0.4.8
|
|
| 284 |
# via final-assignment-template (pyproject.toml)
|
| 285 |
langgraph-checkpoint==2.1.0
|
| 286 |
# via
|
|
|
|
| 287 |
# langgraph
|
|
|
|
| 288 |
# langgraph-prebuilt
|
|
|
|
|
|
|
| 289 |
langgraph-prebuilt==0.2.2
|
| 290 |
# via langgraph
|
| 291 |
langgraph-sdk==0.1.70
|
|
@@ -387,6 +403,8 @@ multidict==6.4.4
|
|
| 387 |
# via
|
| 388 |
# aiohttp
|
| 389 |
# yarl
|
|
|
|
|
|
|
| 390 |
mypy-extensions==1.1.0
|
| 391 |
# via typing-inspect
|
| 392 |
nest-asyncio==1.6.0
|
|
@@ -403,6 +421,7 @@ nltk==3.9.1
|
|
| 403 |
# llama-index-core
|
| 404 |
numpy==2.2.6
|
| 405 |
# via
|
|
|
|
| 406 |
# gradio
|
| 407 |
# langchain-community
|
| 408 |
# llama-index-core
|
|
@@ -457,6 +476,7 @@ ormsgpack==1.10.0
|
|
| 457 |
# via langgraph-checkpoint
|
| 458 |
packaging==24.2
|
| 459 |
# via
|
|
|
|
| 460 |
# deprecation
|
| 461 |
# gradio
|
| 462 |
# gradio-client
|
|
@@ -471,6 +491,7 @@ packaging==24.2
|
|
| 471 |
pandas==2.2.3
|
| 472 |
# via
|
| 473 |
# final-assignment-template (pyproject.toml)
|
|
|
|
| 474 |
# gradio
|
| 475 |
# llama-index-readers-file
|
| 476 |
parso==0.8.4
|
|
@@ -513,6 +534,8 @@ psutil==7.0.0
|
|
| 513 |
# via ipykernel
|
| 514 |
pure-eval==0.2.3
|
| 515 |
# via stack-data
|
|
|
|
|
|
|
| 516 |
pyasn1==0.6.1
|
| 517 |
# via
|
| 518 |
# pyasn1-modules
|
|
@@ -572,9 +595,11 @@ python-multipart==0.0.20
|
|
| 572 |
# via gradio
|
| 573 |
pytz==2025.2
|
| 574 |
# via pandas
|
|
|
|
| 575 |
# via jupyter-core
|
| 576 |
pyyaml==6.0.2
|
| 577 |
# via
|
|
|
|
| 578 |
# gradio
|
| 579 |
# huggingface-hub
|
| 580 |
# langchain
|
|
@@ -596,6 +621,7 @@ regex==2024.11.6
|
|
| 596 |
requests==2.32.3
|
| 597 |
# via
|
| 598 |
# arxiv
|
|
|
|
| 599 |
# google-api-core
|
| 600 |
# huggingface-hub
|
| 601 |
# langchain
|
|
@@ -651,6 +677,8 @@ sqlalchemy==2.0.41
|
|
| 651 |
# langchain
|
| 652 |
# langchain-community
|
| 653 |
# llama-index-core
|
|
|
|
|
|
|
| 654 |
stack-data==0.6.3
|
| 655 |
# via ipython
|
| 656 |
starlette==0.46.2
|
|
@@ -701,6 +729,7 @@ tornado==6.5.1
|
|
| 701 |
# jupyter-client
|
| 702 |
tqdm==4.67.1
|
| 703 |
# via
|
|
|
|
| 704 |
# huggingface-hub
|
| 705 |
# llama-index-core
|
| 706 |
# nltk
|
|
@@ -783,7 +812,9 @@ wrapt==1.17.2
|
|
| 783 |
# langfuse
|
| 784 |
# llama-index-core
|
| 785 |
xxhash==3.5.0
|
| 786 |
-
# via
|
|
|
|
|
|
|
| 787 |
yarl==1.20.0
|
| 788 |
# via aiohttp
|
| 789 |
zipp==3.22.0
|
|
|
|
| 6 |
# via aiohttp
|
| 7 |
aiohttp==3.12.9
|
| 8 |
# via
|
| 9 |
+
# fsspec
|
| 10 |
# langchain-community
|
| 11 |
# llama-index-core
|
| 12 |
# realtime
|
| 13 |
aiosignal==1.3.2
|
| 14 |
# via aiohttp
|
| 15 |
aiosqlite==0.21.0
|
| 16 |
+
# via
|
| 17 |
+
# langgraph-checkpoint-sqlite
|
| 18 |
+
# llama-index-core
|
| 19 |
annotated-types==0.7.0
|
| 20 |
# via pydantic
|
| 21 |
anyio==4.9.0
|
|
|
|
| 75 |
# via
|
| 76 |
# langchain-community
|
| 77 |
# llama-index-core
|
| 78 |
+
datasets==3.6.0
|
| 79 |
+
# via final-assignment-template (pyproject.toml)
|
| 80 |
debugpy==1.8.14
|
| 81 |
# via ipykernel
|
| 82 |
decorator==5.2.1
|
|
|
|
| 89 |
# llama-index-core
|
| 90 |
deprecation==2.1.0
|
| 91 |
# via postgrest
|
| 92 |
+
dill==0.3.8
|
| 93 |
+
# via
|
| 94 |
+
# datasets
|
| 95 |
+
# multiprocess
|
| 96 |
dirtyjson==1.0.8
|
| 97 |
# via llama-index-core
|
| 98 |
distro==1.9.0
|
|
|
|
| 118 |
# via gradio
|
| 119 |
filelock==3.18.0
|
| 120 |
# via
|
| 121 |
+
# datasets
|
| 122 |
# huggingface-hub
|
| 123 |
# torch
|
| 124 |
# transformers
|
|
|
|
| 130 |
# via
|
| 131 |
# aiohttp
|
| 132 |
# aiosignal
|
| 133 |
+
fsspec==2025.3.0
|
| 134 |
# via
|
| 135 |
+
# datasets
|
| 136 |
# gradio-client
|
| 137 |
# huggingface-hub
|
| 138 |
# llama-index-core
|
|
|
|
| 209 |
huggingface-hub==0.32.4
|
| 210 |
# via
|
| 211 |
# final-assignment-template (pyproject.toml)
|
| 212 |
+
# datasets
|
| 213 |
# gradio
|
| 214 |
# gradio-client
|
| 215 |
# langchain-huggingface
|
|
|
|
| 296 |
# via final-assignment-template (pyproject.toml)
|
| 297 |
langgraph-checkpoint==2.1.0
|
| 298 |
# via
|
| 299 |
+
# final-assignment-template (pyproject.toml)
|
| 300 |
# langgraph
|
| 301 |
+
# langgraph-checkpoint-sqlite
|
| 302 |
# langgraph-prebuilt
|
| 303 |
+
langgraph-checkpoint-sqlite==2.0.10
|
| 304 |
+
# via final-assignment-template (pyproject.toml)
|
| 305 |
langgraph-prebuilt==0.2.2
|
| 306 |
# via langgraph
|
| 307 |
langgraph-sdk==0.1.70
|
|
|
|
| 403 |
# via
|
| 404 |
# aiohttp
|
| 405 |
# yarl
|
| 406 |
+
multiprocess==0.70.16
|
| 407 |
+
# via datasets
|
| 408 |
mypy-extensions==1.1.0
|
| 409 |
# via typing-inspect
|
| 410 |
nest-asyncio==1.6.0
|
|
|
|
| 421 |
# llama-index-core
|
| 422 |
numpy==2.2.6
|
| 423 |
# via
|
| 424 |
+
# datasets
|
| 425 |
# gradio
|
| 426 |
# langchain-community
|
| 427 |
# llama-index-core
|
|
|
|
| 476 |
# via langgraph-checkpoint
|
| 477 |
packaging==24.2
|
| 478 |
# via
|
| 479 |
+
# datasets
|
| 480 |
# deprecation
|
| 481 |
# gradio
|
| 482 |
# gradio-client
|
|
|
|
| 491 |
pandas==2.2.3
|
| 492 |
# via
|
| 493 |
# final-assignment-template (pyproject.toml)
|
| 494 |
+
# datasets
|
| 495 |
# gradio
|
| 496 |
# llama-index-readers-file
|
| 497 |
parso==0.8.4
|
|
|
|
| 534 |
# via ipykernel
|
| 535 |
pure-eval==0.2.3
|
| 536 |
# via stack-data
|
| 537 |
+
pyarrow==20.0.0
|
| 538 |
+
# via datasets
|
| 539 |
pyasn1==0.6.1
|
| 540 |
# via
|
| 541 |
# pyasn1-modules
|
|
|
|
| 595 |
# via gradio
|
| 596 |
pytz==2025.2
|
| 597 |
# via pandas
|
| 598 |
+
pywin32==310
|
| 599 |
# via jupyter-core
|
| 600 |
pyyaml==6.0.2
|
| 601 |
# via
|
| 602 |
+
# datasets
|
| 603 |
# gradio
|
| 604 |
# huggingface-hub
|
| 605 |
# langchain
|
|
|
|
| 621 |
requests==2.32.3
|
| 622 |
# via
|
| 623 |
# arxiv
|
| 624 |
+
# datasets
|
| 625 |
# google-api-core
|
| 626 |
# huggingface-hub
|
| 627 |
# langchain
|
|
|
|
| 677 |
# langchain
|
| 678 |
# langchain-community
|
| 679 |
# llama-index-core
|
| 680 |
+
sqlite-vec==0.1.6
|
| 681 |
+
# via langgraph-checkpoint-sqlite
|
| 682 |
stack-data==0.6.3
|
| 683 |
# via ipython
|
| 684 |
starlette==0.46.2
|
|
|
|
| 729 |
# jupyter-client
|
| 730 |
tqdm==4.67.1
|
| 731 |
# via
|
| 732 |
+
# datasets
|
| 733 |
# huggingface-hub
|
| 734 |
# llama-index-core
|
| 735 |
# nltk
|
|
|
|
| 812 |
# langfuse
|
| 813 |
# llama-index-core
|
| 814 |
xxhash==3.5.0
|
| 815 |
+
# via
|
| 816 |
+
# datasets
|
| 817 |
+
# langgraph
|
| 818 |
yarl==1.20.0
|
| 819 |
# via aiohttp
|
| 820 |
zipp==3.22.0
|
src/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""LangGraph Agent System Package"""
|
| 2 |
+
|
| 3 |
+
from .langgraph_system import run_agent_system, create_agent_graph, AgentState
|
| 4 |
+
from .memory import memory_manager
|
| 5 |
+
from .tracing import get_langfuse_callback_handler, initialize_langfuse
|
| 6 |
+
|
| 7 |
+
__all__ = [
|
| 8 |
+
"run_agent_system",
|
| 9 |
+
"create_agent_graph",
|
| 10 |
+
"AgentState",
|
| 11 |
+
"memory_manager",
|
| 12 |
+
"get_langfuse_callback_handler",
|
| 13 |
+
"initialize_langfuse"
|
| 14 |
+
]
|
src/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (541 Bytes). View file
|
|
|
src/__pycache__/langgraph_system.cpython-313.pyc
ADDED
|
Binary file (7.61 kB). View file
|
|
|
src/__pycache__/memory.cpython-313.pyc
ADDED
|
Binary file (9.3 kB). View file
|
|
|
src/__pycache__/tracing.cpython-313.pyc
ADDED
|
Binary file (5.98 kB). View file
|
|
|
src/agents/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Agent Modules Package"""
|
| 2 |
+
|
| 3 |
+
from .plan_node import plan_node
|
| 4 |
+
from .router_node import router_node, should_route_to_agent
|
| 5 |
+
from .retrieval_agent import retrieval_agent, get_retrieval_tools
|
| 6 |
+
from .execution_agent import execution_agent, get_execution_tools
|
| 7 |
+
from .critic_agent import critic_agent
|
| 8 |
+
from .verification_node import verification_node, should_retry
|
| 9 |
+
|
| 10 |
+
__all__ = [
|
| 11 |
+
"plan_node",
|
| 12 |
+
"router_node",
|
| 13 |
+
"should_route_to_agent",
|
| 14 |
+
"retrieval_agent",
|
| 15 |
+
"get_retrieval_tools",
|
| 16 |
+
"execution_agent",
|
| 17 |
+
"get_execution_tools",
|
| 18 |
+
"critic_agent",
|
| 19 |
+
"verification_node",
|
| 20 |
+
"should_retry"
|
| 21 |
+
]
|
src/agents/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (648 Bytes). View file
|
|
|
src/agents/__pycache__/critic_agent.cpython-313.pyc
ADDED
|
Binary file (3.92 kB). View file
|
|
|
src/agents/__pycache__/execution_agent.cpython-313.pyc
ADDED
|
Binary file (6.88 kB). View file
|
|
|
src/agents/__pycache__/plan_node.cpython-313.pyc
ADDED
|
Binary file (3.14 kB). View file
|
|
|
src/agents/__pycache__/retrieval_agent.cpython-313.pyc
ADDED
|
Binary file (12 kB). View file
|
|
|
src/agents/__pycache__/router_node.cpython-313.pyc
ADDED
|
Binary file (3.79 kB). View file
|
|
|
src/agents/__pycache__/verification_node.cpython-313.pyc
ADDED
|
Binary file (6.99 kB). View file
|
|
|
src/agents/critic_agent.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Critic Agent - Evaluates and reviews responses for quality and accuracy"""
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
| 4 |
+
from langchain_groq import ChatGroq
|
| 5 |
+
from src.tracing import get_langfuse_callback_handler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def load_critic_prompt() -> str:
|
| 9 |
+
"""Load the critic prompt from file"""
|
| 10 |
+
try:
|
| 11 |
+
with open("./prompts/critic_prompt.txt", "r", encoding="utf-8") as f:
|
| 12 |
+
return f.read().strip()
|
| 13 |
+
except FileNotFoundError:
|
| 14 |
+
return """You are a specialized critic agent. Evaluate responses for accuracy, completeness, and quality."""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def critic_agent(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 18 |
+
"""
|
| 19 |
+
Critic agent that evaluates responses for quality and accuracy
|
| 20 |
+
"""
|
| 21 |
+
print("Critic Agent: Evaluating response quality")
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
# Get critic prompt
|
| 25 |
+
critic_prompt = load_critic_prompt()
|
| 26 |
+
|
| 27 |
+
# Initialize LLM for criticism
|
| 28 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.2)
|
| 29 |
+
|
| 30 |
+
# Get callback handler for tracing
|
| 31 |
+
callback_handler = get_langfuse_callback_handler()
|
| 32 |
+
callbacks = [callback_handler] if callback_handler else []
|
| 33 |
+
|
| 34 |
+
# Build messages
|
| 35 |
+
messages = state.get("messages", [])
|
| 36 |
+
|
| 37 |
+
# Get the agent response to evaluate
|
| 38 |
+
agent_response = state.get("agent_response")
|
| 39 |
+
if not agent_response:
|
| 40 |
+
# Find the last AI message
|
| 41 |
+
for msg in reversed(messages):
|
| 42 |
+
if msg.type == "ai":
|
| 43 |
+
agent_response = msg
|
| 44 |
+
break
|
| 45 |
+
|
| 46 |
+
if not agent_response:
|
| 47 |
+
print("Critic Agent: No response to evaluate")
|
| 48 |
+
return {
|
| 49 |
+
**state,
|
| 50 |
+
"critic_assessment": "No response found to evaluate",
|
| 51 |
+
"quality_score": 0,
|
| 52 |
+
"current_step": "verification"
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
# Get user query for context
|
| 56 |
+
user_query = None
|
| 57 |
+
for msg in reversed(messages):
|
| 58 |
+
if msg.type == "human":
|
| 59 |
+
user_query = msg.content
|
| 60 |
+
break
|
| 61 |
+
|
| 62 |
+
# Build critic messages
|
| 63 |
+
critic_messages = [SystemMessage(content=critic_prompt)]
|
| 64 |
+
|
| 65 |
+
# Add evaluation request
|
| 66 |
+
evaluation_request = f"""
|
| 67 |
+
Please evaluate the following response:
|
| 68 |
+
|
| 69 |
+
Original Query: {user_query or "Unknown query"}
|
| 70 |
+
|
| 71 |
+
Response to Evaluate:
|
| 72 |
+
{agent_response.content}
|
| 73 |
+
|
| 74 |
+
Provide your evaluation following the format specified in your instructions.
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
critic_messages.append(HumanMessage(content=evaluation_request))
|
| 78 |
+
|
| 79 |
+
# Get critic evaluation
|
| 80 |
+
evaluation = llm.invoke(critic_messages, config={"callbacks": callbacks})
|
| 81 |
+
|
| 82 |
+
# Parse evaluation to determine if it passes
|
| 83 |
+
evaluation_text = evaluation.content.lower()
|
| 84 |
+
quality_pass = True
|
| 85 |
+
quality_score = 7 # Default moderate score
|
| 86 |
+
|
| 87 |
+
# Simple heuristics for quality assessment
|
| 88 |
+
if "fail" in evaluation_text or "poor" in evaluation_text:
|
| 89 |
+
quality_pass = False
|
| 90 |
+
quality_score = 3
|
| 91 |
+
elif "excellent" in evaluation_text or "outstanding" in evaluation_text:
|
| 92 |
+
quality_score = 9
|
| 93 |
+
elif "good" in evaluation_text:
|
| 94 |
+
quality_score = 7
|
| 95 |
+
elif "issues" in evaluation_text or "problems" in evaluation_text:
|
| 96 |
+
quality_score = 5
|
| 97 |
+
|
| 98 |
+
# Add critic evaluation to messages
|
| 99 |
+
updated_messages = messages + [evaluation]
|
| 100 |
+
|
| 101 |
+
return {
|
| 102 |
+
**state,
|
| 103 |
+
"messages": updated_messages,
|
| 104 |
+
"critic_assessment": evaluation.content,
|
| 105 |
+
"quality_pass": quality_pass,
|
| 106 |
+
"quality_score": quality_score,
|
| 107 |
+
"current_step": "verification"
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"Critic Agent Error: {e}")
|
| 112 |
+
return {
|
| 113 |
+
**state,
|
| 114 |
+
"critic_assessment": f"Error during evaluation: {e}",
|
| 115 |
+
"quality_pass": False,
|
| 116 |
+
"quality_score": 0,
|
| 117 |
+
"current_step": "verification"
|
| 118 |
+
}
|
src/agents/execution_agent.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Execution Agent - Handles code execution and computational tasks"""
|
| 2 |
+
from typing import Dict, Any, List
|
| 3 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
|
| 4 |
+
from langchain_core.tools import tool
|
| 5 |
+
from langchain_groq import ChatGroq
|
| 6 |
+
from code_agent import run_agent # Import our existing code execution engine
|
| 7 |
+
from src.tracing import get_langfuse_callback_handler
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@tool
|
| 11 |
+
def run_python(input: str) -> str:
|
| 12 |
+
"""Execute Python code in a restricted sandbox (code-interpreter).
|
| 13 |
+
|
| 14 |
+
Pass **any** coding or file-manipulation task here and the agent will
|
| 15 |
+
compute the answer by running Python. The entire standard library is NOT
|
| 16 |
+
available; heavy networking is disabled. Suitable for: math, data-frames,
|
| 17 |
+
small file parsing, algorithmic questions.
|
| 18 |
+
"""
|
| 19 |
+
return run_agent(input)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def load_execution_prompt() -> str:
|
| 23 |
+
"""Load the execution prompt from file"""
|
| 24 |
+
try:
|
| 25 |
+
with open("./prompts/execution_prompt.txt", "r", encoding="utf-8") as f:
|
| 26 |
+
return f.read().strip()
|
| 27 |
+
except FileNotFoundError:
|
| 28 |
+
return """You are a specialized execution agent. Use the run_python tool to execute code and solve computational problems."""
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def get_execution_tools() -> List:
|
| 32 |
+
"""Get list of tools available to the execution agent"""
|
| 33 |
+
return [run_python]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def execute_tool_calls(tool_calls: list, tools: list) -> list:
|
| 37 |
+
"""Execute tool calls and return results"""
|
| 38 |
+
tool_messages = []
|
| 39 |
+
|
| 40 |
+
# Create a mapping of tool names to tool functions
|
| 41 |
+
tool_map = {tool.name: tool for tool in tools}
|
| 42 |
+
|
| 43 |
+
for tool_call in tool_calls:
|
| 44 |
+
tool_name = tool_call['name']
|
| 45 |
+
tool_args = tool_call['args']
|
| 46 |
+
tool_call_id = tool_call['id']
|
| 47 |
+
|
| 48 |
+
if tool_name in tool_map:
|
| 49 |
+
try:
|
| 50 |
+
print(f"Execution Agent: Executing {tool_name} with args: {str(tool_args)[:200]}...")
|
| 51 |
+
result = tool_map[tool_name].invoke(tool_args)
|
| 52 |
+
tool_messages.append(
|
| 53 |
+
ToolMessage(
|
| 54 |
+
content=str(result),
|
| 55 |
+
tool_call_id=tool_call_id
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"Error executing {tool_name}: {e}")
|
| 60 |
+
tool_messages.append(
|
| 61 |
+
ToolMessage(
|
| 62 |
+
content=f"Error executing {tool_name}: {e}",
|
| 63 |
+
tool_call_id=tool_call_id
|
| 64 |
+
)
|
| 65 |
+
)
|
| 66 |
+
else:
|
| 67 |
+
tool_messages.append(
|
| 68 |
+
ToolMessage(
|
| 69 |
+
content=f"Unknown tool: {tool_name}",
|
| 70 |
+
tool_call_id=tool_call_id
|
| 71 |
+
)
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
return tool_messages
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def needs_code_execution(query: str) -> bool:
|
| 78 |
+
"""Heuristic to determine if a query requires code execution"""
|
| 79 |
+
code_indicators = [
|
| 80 |
+
"calculate", "compute", "algorithm", "fibonacci", "math", "data",
|
| 81 |
+
"programming", "code", "function", "sort", "csv", "json", "pandas",
|
| 82 |
+
"plot", "graph", "analyze", "process", "file", "manipulation"
|
| 83 |
+
]
|
| 84 |
+
query_lower = query.lower()
|
| 85 |
+
return any(indicator in query_lower for indicator in code_indicators)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def execution_agent(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 89 |
+
"""
|
| 90 |
+
Execution agent that handles computational and code execution tasks
|
| 91 |
+
"""
|
| 92 |
+
print("Execution Agent: Processing computational request")
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
# Get execution prompt
|
| 96 |
+
execution_prompt = load_execution_prompt()
|
| 97 |
+
|
| 98 |
+
# Initialize LLM with tools
|
| 99 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.1) # Lower temp for consistent code
|
| 100 |
+
tools = get_execution_tools()
|
| 101 |
+
llm_with_tools = llm.bind_tools(tools)
|
| 102 |
+
|
| 103 |
+
# Get callback handler for tracing
|
| 104 |
+
callback_handler = get_langfuse_callback_handler()
|
| 105 |
+
callbacks = [callback_handler] if callback_handler else []
|
| 106 |
+
|
| 107 |
+
# Build messages
|
| 108 |
+
messages = state.get("messages", [])
|
| 109 |
+
|
| 110 |
+
# Add execution system prompt
|
| 111 |
+
execution_messages = [SystemMessage(content=execution_prompt)]
|
| 112 |
+
|
| 113 |
+
# Get user query for analysis
|
| 114 |
+
user_query = None
|
| 115 |
+
for msg in reversed(messages):
|
| 116 |
+
if msg.type == "human":
|
| 117 |
+
user_query = msg.content
|
| 118 |
+
break
|
| 119 |
+
|
| 120 |
+
# If this clearly needs code execution, provide guidance
|
| 121 |
+
if user_query and needs_code_execution(user_query):
|
| 122 |
+
guidance_msg = HumanMessage(
|
| 123 |
+
content=f"""Task requiring code execution: {user_query}
|
| 124 |
+
|
| 125 |
+
Please analyze this computational task and use the run_python tool to solve it step by step.
|
| 126 |
+
Break down complex problems into smaller steps and provide clear explanations."""
|
| 127 |
+
)
|
| 128 |
+
execution_messages.append(guidance_msg)
|
| 129 |
+
|
| 130 |
+
# Add original messages (excluding system messages to avoid duplicates)
|
| 131 |
+
for msg in messages:
|
| 132 |
+
if msg.type != "system":
|
| 133 |
+
execution_messages.append(msg)
|
| 134 |
+
|
| 135 |
+
# Get initial response from LLM
|
| 136 |
+
response = llm_with_tools.invoke(execution_messages, config={"callbacks": callbacks})
|
| 137 |
+
|
| 138 |
+
# Check if the LLM wants to use tools
|
| 139 |
+
if response.tool_calls:
|
| 140 |
+
print(f"Execution Agent: LLM requested {len(response.tool_calls)} tool calls")
|
| 141 |
+
|
| 142 |
+
# Execute the tool calls
|
| 143 |
+
tool_messages = execute_tool_calls(response.tool_calls, tools)
|
| 144 |
+
|
| 145 |
+
# Add the response and tool messages to conversation
|
| 146 |
+
execution_messages.extend([response] + tool_messages)
|
| 147 |
+
|
| 148 |
+
# Get final response after tool execution
|
| 149 |
+
final_response = llm.invoke(execution_messages, config={"callbacks": callbacks})
|
| 150 |
+
|
| 151 |
+
return {
|
| 152 |
+
**state,
|
| 153 |
+
"messages": execution_messages + [final_response],
|
| 154 |
+
"agent_response": final_response,
|
| 155 |
+
"current_step": "verification"
|
| 156 |
+
}
|
| 157 |
+
else:
|
| 158 |
+
# Direct response without tools
|
| 159 |
+
return {
|
| 160 |
+
**state,
|
| 161 |
+
"messages": execution_messages + [response],
|
| 162 |
+
"agent_response": response,
|
| 163 |
+
"current_step": "verification"
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"Execution Agent Error: {e}")
|
| 168 |
+
error_response = AIMessage(content=f"I encountered an error while processing your computational request: {e}")
|
| 169 |
+
return {
|
| 170 |
+
**state,
|
| 171 |
+
"messages": state.get("messages", []) + [error_response],
|
| 172 |
+
"agent_response": error_response,
|
| 173 |
+
"current_step": "verification"
|
| 174 |
+
}
|
src/agents/plan_node.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Plan Node - Initial ReAct planning loop"""
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
| 4 |
+
from langchain_groq import ChatGroq
|
| 5 |
+
from src.tracing import get_langfuse_callback_handler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def load_system_prompt() -> str:
|
| 9 |
+
"""Load the system prompt from file"""
|
| 10 |
+
try:
|
| 11 |
+
with open("./prompts/system_prompt.txt", "r", encoding="utf-8") as f:
|
| 12 |
+
return f.read().strip()
|
| 13 |
+
except FileNotFoundError:
|
| 14 |
+
return "You are a helpful assistant tasked with answering GAIA benchmark questions."
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def plan_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 18 |
+
"""
|
| 19 |
+
Initial planning node that sets up the conversation with system prompt
|
| 20 |
+
and prepares for agent routing
|
| 21 |
+
"""
|
| 22 |
+
print("Plan Node: Processing query")
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
# Get the system prompt
|
| 26 |
+
system_prompt = load_system_prompt()
|
| 27 |
+
|
| 28 |
+
# Initialize LLM for planning
|
| 29 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.1)
|
| 30 |
+
|
| 31 |
+
# Get callback handler for tracing
|
| 32 |
+
callback_handler = get_langfuse_callback_handler()
|
| 33 |
+
callbacks = [callback_handler] if callback_handler else []
|
| 34 |
+
|
| 35 |
+
# Extract user messages
|
| 36 |
+
messages = state.get("messages", [])
|
| 37 |
+
if not messages:
|
| 38 |
+
return {"messages": [SystemMessage(content=system_prompt)]}
|
| 39 |
+
|
| 40 |
+
# Build message list with system prompt
|
| 41 |
+
plan_messages = [SystemMessage(content=system_prompt)]
|
| 42 |
+
|
| 43 |
+
# Add existing messages
|
| 44 |
+
for msg in messages:
|
| 45 |
+
if msg.type != "system": # Avoid duplicate system messages
|
| 46 |
+
plan_messages.append(msg)
|
| 47 |
+
|
| 48 |
+
# Add planning instruction
|
| 49 |
+
planning_instruction = """
|
| 50 |
+
Analyze this query and prepare a plan for answering it. Consider:
|
| 51 |
+
1. What type of information or processing is needed?
|
| 52 |
+
2. What tools or agents would be most appropriate?
|
| 53 |
+
3. What is the expected output format?
|
| 54 |
+
|
| 55 |
+
Provide a brief analysis and initial plan.
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
if plan_messages and plan_messages[-1].type == "human":
|
| 59 |
+
# Get LLM analysis of the query
|
| 60 |
+
analysis_messages = plan_messages + [HumanMessage(content=planning_instruction)]
|
| 61 |
+
|
| 62 |
+
response = llm.invoke(analysis_messages, config={"callbacks": callbacks})
|
| 63 |
+
plan_messages.append(response)
|
| 64 |
+
|
| 65 |
+
return {
|
| 66 |
+
"messages": plan_messages,
|
| 67 |
+
"plan_complete": True,
|
| 68 |
+
"current_step": "routing"
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print(f"Plan Node Error: {e}")
|
| 73 |
+
# Fallback with basic system message
|
| 74 |
+
system_prompt = load_system_prompt()
|
| 75 |
+
return {
|
| 76 |
+
"messages": [SystemMessage(content=system_prompt)] + state.get("messages", []),
|
| 77 |
+
"plan_complete": True,
|
| 78 |
+
"current_step": "routing"
|
| 79 |
+
}
|
src/agents/retrieval_agent.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Retrieval Agent - Handles information gathering and search tasks"""
|
| 2 |
+
import os
|
| 3 |
+
import requests
|
| 4 |
+
from typing import Dict, Any, List
|
| 5 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, ToolMessage
|
| 6 |
+
from langchain_core.tools import tool
|
| 7 |
+
from langchain_groq import ChatGroq
|
| 8 |
+
from langchain_community.tools.tavily_search import TavilySearchResults
|
| 9 |
+
from langchain_community.document_loaders import WikipediaLoader, ArxivLoader
|
| 10 |
+
from langchain.tools.retriever import create_retriever_tool
|
| 11 |
+
from src.memory import memory_manager
|
| 12 |
+
from src.tracing import get_langfuse_callback_handler
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Tool definitions (same as original)
|
| 16 |
+
@tool
|
| 17 |
+
def wiki_search(input: str) -> str:
|
| 18 |
+
"""Search Wikipedia for a query and return maximum 2 results.
|
| 19 |
+
|
| 20 |
+
Args:
|
| 21 |
+
input: The search query."""
|
| 22 |
+
try:
|
| 23 |
+
search_docs = WikipediaLoader(query=input, load_max_docs=2).load()
|
| 24 |
+
if not search_docs:
|
| 25 |
+
return "No Wikipedia results found for the query."
|
| 26 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
| 27 |
+
[
|
| 28 |
+
f'<Document source="{doc.metadata.get("source", "Unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
|
| 29 |
+
for doc in search_docs
|
| 30 |
+
])
|
| 31 |
+
return formatted_search_docs
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"Error in wiki_search: {e}")
|
| 34 |
+
return f"Error searching Wikipedia: {e}"
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@tool
|
| 38 |
+
def web_search(input: str) -> str:
|
| 39 |
+
"""Search Tavily for a query and return maximum 3 results.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
input: The search query."""
|
| 43 |
+
try:
|
| 44 |
+
search_docs = TavilySearchResults(max_results=3).invoke(input)
|
| 45 |
+
if not search_docs:
|
| 46 |
+
return "No web search results found for the query."
|
| 47 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
| 48 |
+
[
|
| 49 |
+
f'<Document source="{doc.get("url", "Unknown")}" />\n{doc.get("content", "No content")}\n</Document>'
|
| 50 |
+
for doc in search_docs
|
| 51 |
+
])
|
| 52 |
+
return formatted_search_docs
|
| 53 |
+
except Exception as e:
|
| 54 |
+
print(f"Error in web_search: {e}")
|
| 55 |
+
return f"Error searching web: {e}"
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@tool
|
| 59 |
+
def arvix_search(input: str) -> str:
|
| 60 |
+
"""Search Arxiv for a query and return maximum 3 results.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
input: The search query."""
|
| 64 |
+
try:
|
| 65 |
+
search_docs = ArxivLoader(query=input, load_max_docs=3).load()
|
| 66 |
+
if not search_docs:
|
| 67 |
+
return "No Arxiv results found for the query."
|
| 68 |
+
formatted_search_docs = "\n\n---\n\n".join(
|
| 69 |
+
[
|
| 70 |
+
f'<Document source="{doc.metadata.get("source", "Unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
|
| 71 |
+
for doc in search_docs
|
| 72 |
+
])
|
| 73 |
+
return formatted_search_docs
|
| 74 |
+
except Exception as e:
|
| 75 |
+
print(f"Error in arvix_search: {e}")
|
| 76 |
+
return f"Error searching Arxiv: {e}"
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def load_retrieval_prompt() -> str:
|
| 80 |
+
"""Load the retrieval prompt from file"""
|
| 81 |
+
try:
|
| 82 |
+
with open("./prompts/retrieval_prompt.txt", "r", encoding="utf-8") as f:
|
| 83 |
+
return f.read().strip()
|
| 84 |
+
except FileNotFoundError:
|
| 85 |
+
return """You are a specialized retrieval agent. Use available tools to search for information and provide comprehensive answers."""
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def get_retrieval_tools() -> List:
|
| 89 |
+
"""Get list of tools available to the retrieval agent"""
|
| 90 |
+
tools = [wiki_search, web_search, arvix_search]
|
| 91 |
+
|
| 92 |
+
# Add vector store retrieval tool if available
|
| 93 |
+
if memory_manager.vector_store:
|
| 94 |
+
try:
|
| 95 |
+
retrieval_tool = create_retriever_tool(
|
| 96 |
+
retriever=memory_manager.vector_store.as_retriever(),
|
| 97 |
+
name="question_search",
|
| 98 |
+
description="A tool to retrieve similar questions from a vector store.",
|
| 99 |
+
)
|
| 100 |
+
tools.append(retrieval_tool)
|
| 101 |
+
except Exception as e:
|
| 102 |
+
print(f"Could not create retrieval tool: {e}")
|
| 103 |
+
|
| 104 |
+
return tools
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def execute_tool_calls(tool_calls: list, tools: list) -> list:
|
| 108 |
+
"""Execute tool calls and return results"""
|
| 109 |
+
tool_messages = []
|
| 110 |
+
|
| 111 |
+
# Create a mapping of tool names to tool functions
|
| 112 |
+
tool_map = {tool.name: tool for tool in tools}
|
| 113 |
+
|
| 114 |
+
for tool_call in tool_calls:
|
| 115 |
+
tool_name = tool_call['name']
|
| 116 |
+
tool_args = tool_call['args']
|
| 117 |
+
tool_call_id = tool_call['id']
|
| 118 |
+
|
| 119 |
+
if tool_name in tool_map:
|
| 120 |
+
try:
|
| 121 |
+
print(f"Retrieval Agent: Executing {tool_name} with args: {tool_args}")
|
| 122 |
+
result = tool_map[tool_name].invoke(tool_args)
|
| 123 |
+
tool_messages.append(
|
| 124 |
+
ToolMessage(
|
| 125 |
+
content=str(result),
|
| 126 |
+
tool_call_id=tool_call_id
|
| 127 |
+
)
|
| 128 |
+
)
|
| 129 |
+
except Exception as e:
|
| 130 |
+
print(f"Error executing {tool_name}: {e}")
|
| 131 |
+
tool_messages.append(
|
| 132 |
+
ToolMessage(
|
| 133 |
+
content=f"Error executing {tool_name}: {e}",
|
| 134 |
+
tool_call_id=tool_call_id
|
| 135 |
+
)
|
| 136 |
+
)
|
| 137 |
+
else:
|
| 138 |
+
tool_messages.append(
|
| 139 |
+
ToolMessage(
|
| 140 |
+
content=f"Unknown tool: {tool_name}",
|
| 141 |
+
tool_call_id=tool_call_id
|
| 142 |
+
)
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
return tool_messages
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def fetch_attachment_if_needed(query: str) -> str:
|
| 149 |
+
"""Fetch attachment content if the query matches a known task"""
|
| 150 |
+
try:
|
| 151 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 152 |
+
resp = requests.get(f"{DEFAULT_API_URL}/questions", timeout=30)
|
| 153 |
+
resp.raise_for_status()
|
| 154 |
+
questions = resp.json()
|
| 155 |
+
|
| 156 |
+
for q in questions:
|
| 157 |
+
if str(q.get("question")).strip() == str(query).strip():
|
| 158 |
+
task_id = str(q.get("task_id"))
|
| 159 |
+
print(f"Retrieval Agent: Downloading attachment for task {task_id}")
|
| 160 |
+
file_resp = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=60)
|
| 161 |
+
if file_resp.status_code == 200 and file_resp.content:
|
| 162 |
+
try:
|
| 163 |
+
file_text = file_resp.content.decode("utf-8", errors="replace")
|
| 164 |
+
except Exception:
|
| 165 |
+
file_text = "(binary or non-UTF8 file omitted)"
|
| 166 |
+
MAX_CHARS = 8000
|
| 167 |
+
if len(file_text) > MAX_CHARS:
|
| 168 |
+
file_text = file_text[:MAX_CHARS] + "\n… (truncated)"
|
| 169 |
+
return f"Attached file content for task {task_id}:\n```python\n{file_text}\n```"
|
| 170 |
+
else:
|
| 171 |
+
print(f"No attachment for task {task_id}")
|
| 172 |
+
return ""
|
| 173 |
+
return ""
|
| 174 |
+
except Exception as e:
|
| 175 |
+
print(f"Error fetching attachment: {e}")
|
| 176 |
+
return ""
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def retrieval_agent(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 180 |
+
"""
|
| 181 |
+
Retrieval agent that handles information gathering tasks
|
| 182 |
+
"""
|
| 183 |
+
print("Retrieval Agent: Processing information retrieval request")
|
| 184 |
+
|
| 185 |
+
try:
|
| 186 |
+
# Get retrieval prompt
|
| 187 |
+
retrieval_prompt = load_retrieval_prompt()
|
| 188 |
+
|
| 189 |
+
# Initialize LLM with tools
|
| 190 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.3)
|
| 191 |
+
tools = get_retrieval_tools()
|
| 192 |
+
llm_with_tools = llm.bind_tools(tools)
|
| 193 |
+
|
| 194 |
+
# Get callback handler for tracing
|
| 195 |
+
callback_handler = get_langfuse_callback_handler()
|
| 196 |
+
callbacks = [callback_handler] if callback_handler else []
|
| 197 |
+
|
| 198 |
+
# Build messages
|
| 199 |
+
messages = state.get("messages", [])
|
| 200 |
+
|
| 201 |
+
# Add retrieval system prompt
|
| 202 |
+
retrieval_messages = [SystemMessage(content=retrieval_prompt)]
|
| 203 |
+
|
| 204 |
+
# Get user query for context and attachment fetching
|
| 205 |
+
user_query = None
|
| 206 |
+
for msg in reversed(messages):
|
| 207 |
+
if msg.type == "human":
|
| 208 |
+
user_query = msg.content
|
| 209 |
+
break
|
| 210 |
+
|
| 211 |
+
# Check for similar questions in memory
|
| 212 |
+
if user_query:
|
| 213 |
+
similar_qa = memory_manager.get_similar_qa(user_query)
|
| 214 |
+
if similar_qa:
|
| 215 |
+
context_msg = HumanMessage(
|
| 216 |
+
content=f"Here is a similar question and answer for reference:\n\n{similar_qa}"
|
| 217 |
+
)
|
| 218 |
+
retrieval_messages.append(context_msg)
|
| 219 |
+
|
| 220 |
+
# Fetch attachment if needed
|
| 221 |
+
attachment_content = fetch_attachment_if_needed(user_query)
|
| 222 |
+
if attachment_content:
|
| 223 |
+
attachment_msg = HumanMessage(content=attachment_content)
|
| 224 |
+
retrieval_messages.append(attachment_msg)
|
| 225 |
+
|
| 226 |
+
# Add original messages (excluding system messages to avoid duplicates)
|
| 227 |
+
for msg in messages:
|
| 228 |
+
if msg.type != "system":
|
| 229 |
+
retrieval_messages.append(msg)
|
| 230 |
+
|
| 231 |
+
# Get initial response from LLM and iterate tool calls if necessary
|
| 232 |
+
response = llm_with_tools.invoke(retrieval_messages, config={"callbacks": callbacks})
|
| 233 |
+
|
| 234 |
+
max_tool_iterations = 3 # safeguard to prevent infinite loops
|
| 235 |
+
iteration = 0
|
| 236 |
+
|
| 237 |
+
while response.tool_calls and iteration < max_tool_iterations:
|
| 238 |
+
iteration += 1
|
| 239 |
+
print(f"Retrieval Agent: LLM requested {len(response.tool_calls)} tool calls (iteration {iteration})")
|
| 240 |
+
|
| 241 |
+
# Execute the tool calls
|
| 242 |
+
tool_messages = execute_tool_calls(response.tool_calls, tools)
|
| 243 |
+
|
| 244 |
+
# Append the LLM response and tool results to the conversation
|
| 245 |
+
retrieval_messages.extend([response] + tool_messages)
|
| 246 |
+
|
| 247 |
+
# Ask the model again with the new information
|
| 248 |
+
response = llm_with_tools.invoke(retrieval_messages, config={"callbacks": callbacks})
|
| 249 |
+
|
| 250 |
+
# After iterating (or if no tool calls), we have our final response
|
| 251 |
+
retrieval_messages.append(response)
|
| 252 |
+
|
| 253 |
+
return {
|
| 254 |
+
**state,
|
| 255 |
+
"messages": retrieval_messages,
|
| 256 |
+
"agent_response": response,
|
| 257 |
+
"current_step": "verification"
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
except Exception as e:
|
| 261 |
+
print(f"Retrieval Agent Error: {e}")
|
| 262 |
+
error_response = AIMessage(content=f"I encountered an error while processing your request: {e}")
|
| 263 |
+
return {
|
| 264 |
+
**state,
|
| 265 |
+
"messages": state.get("messages", []) + [error_response],
|
| 266 |
+
"agent_response": error_response,
|
| 267 |
+
"current_step": "verification"
|
| 268 |
+
}
|
src/agents/router_node.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Router Node - Decides which specialized agent to use"""
|
| 2 |
+
from typing import Dict, Any, Literal
|
| 3 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 4 |
+
from langchain_groq import ChatGroq
|
| 5 |
+
from src.tracing import get_langfuse_callback_handler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def load_router_prompt() -> str:
|
| 9 |
+
"""Load the router prompt from file"""
|
| 10 |
+
try:
|
| 11 |
+
with open("./prompts/router_prompt.txt", "r", encoding="utf-8") as f:
|
| 12 |
+
return f.read().strip()
|
| 13 |
+
except FileNotFoundError:
|
| 14 |
+
return """You are an intelligent agent router. Analyze the query and respond with exactly one of: RETRIEVAL, EXECUTION, or CRITIC"""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def router_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 18 |
+
"""
|
| 19 |
+
Router node that analyzes the user query and determines which agent should handle it
|
| 20 |
+
Returns: next_agent = 'retrieval' | 'execution' | 'critic'
|
| 21 |
+
"""
|
| 22 |
+
print("Router Node: Analyzing query for agent selection")
|
| 23 |
+
|
| 24 |
+
try:
|
| 25 |
+
# Get router prompt
|
| 26 |
+
router_prompt = load_router_prompt()
|
| 27 |
+
|
| 28 |
+
# Initialize LLM for routing decision
|
| 29 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.0) # Low temperature for consistent routing
|
| 30 |
+
|
| 31 |
+
# Get callback handler for tracing
|
| 32 |
+
callback_handler = get_langfuse_callback_handler()
|
| 33 |
+
callbacks = [callback_handler] if callback_handler else []
|
| 34 |
+
|
| 35 |
+
# Extract the last human message for routing decision
|
| 36 |
+
messages = state.get("messages", [])
|
| 37 |
+
user_query = None
|
| 38 |
+
|
| 39 |
+
for msg in reversed(messages):
|
| 40 |
+
if msg.type == "human":
|
| 41 |
+
user_query = msg.content
|
| 42 |
+
break
|
| 43 |
+
|
| 44 |
+
if not user_query:
|
| 45 |
+
print("Router Node: No user query found, defaulting to retrieval")
|
| 46 |
+
return {
|
| 47 |
+
**state,
|
| 48 |
+
"next_agent": "retrieval",
|
| 49 |
+
"routing_reason": "No user query found"
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
# Build routing messages
|
| 53 |
+
routing_messages = [
|
| 54 |
+
SystemMessage(content=router_prompt),
|
| 55 |
+
HumanMessage(content=f"Query to route: {user_query}")
|
| 56 |
+
]
|
| 57 |
+
|
| 58 |
+
# Get routing decision
|
| 59 |
+
response = llm.invoke(routing_messages, config={"callbacks": callbacks})
|
| 60 |
+
routing_decision = response.content.strip().upper()
|
| 61 |
+
|
| 62 |
+
# Map decision to next agent
|
| 63 |
+
next_agent = "retrieval" # Default fallback
|
| 64 |
+
if "RETRIEVAL" in routing_decision:
|
| 65 |
+
next_agent = "retrieval"
|
| 66 |
+
elif "EXECUTION" in routing_decision:
|
| 67 |
+
next_agent = "execution"
|
| 68 |
+
elif "CRITIC" in routing_decision:
|
| 69 |
+
next_agent = "critic"
|
| 70 |
+
|
| 71 |
+
print(f"Router Node: Routing to {next_agent} agent (decision: {routing_decision})")
|
| 72 |
+
|
| 73 |
+
return {
|
| 74 |
+
**state,
|
| 75 |
+
"next_agent": next_agent,
|
| 76 |
+
"routing_decision": routing_decision,
|
| 77 |
+
"routing_reason": f"Query analysis resulted in: {routing_decision}",
|
| 78 |
+
"current_step": next_agent
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
except Exception as e:
|
| 82 |
+
print(f"Router Node Error: {e}")
|
| 83 |
+
# Fallback to retrieval agent
|
| 84 |
+
return {
|
| 85 |
+
**state,
|
| 86 |
+
"next_agent": "retrieval",
|
| 87 |
+
"routing_reason": f"Error in routing: {e}"
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def should_route_to_agent(state: Dict[str, Any]) -> Literal["retrieval", "execution", "critic"]:
|
| 92 |
+
"""
|
| 93 |
+
Conditional edge function that determines which agent to route to
|
| 94 |
+
"""
|
| 95 |
+
next_agent = state.get("next_agent", "retrieval")
|
| 96 |
+
print(f"Routing to: {next_agent}")
|
| 97 |
+
return next_agent
|
src/agents/verification_node.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Verification Node - Final quality control and output formatting"""
|
| 2 |
+
from typing import Dict, Any
|
| 3 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
| 4 |
+
from langchain_groq import ChatGroq
|
| 5 |
+
from src.tracing import get_langfuse_callback_handler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def load_verification_prompt() -> str:
|
| 9 |
+
"""Load the verification prompt from file"""
|
| 10 |
+
try:
|
| 11 |
+
with open("./prompts/verification_prompt.txt", "r", encoding="utf-8") as f:
|
| 12 |
+
return f.read().strip()
|
| 13 |
+
except FileNotFoundError:
|
| 14 |
+
return """You are a verification agent. Ensure responses meet quality standards and format requirements."""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def extract_final_answer(response_content: str) -> str:
|
| 18 |
+
"""Extract and format the final answer according to system prompt requirements"""
|
| 19 |
+
# Remove common prefixes and suffixes
|
| 20 |
+
answer = response_content.strip()
|
| 21 |
+
|
| 22 |
+
# Remove markdown formatting
|
| 23 |
+
answer = answer.replace("**", "").replace("*", "")
|
| 24 |
+
|
| 25 |
+
# Remove common answer prefixes
|
| 26 |
+
prefixes_to_remove = [
|
| 27 |
+
"Final Answer:", "Answer:", "The answer is:", "The final answer is:",
|
| 28 |
+
"Result:", "Solution:", "Response:", "Output:", "Conclusion:"
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
for prefix in prefixes_to_remove:
|
| 32 |
+
if answer.lower().startswith(prefix.lower()):
|
| 33 |
+
answer = answer[len(prefix):].strip()
|
| 34 |
+
|
| 35 |
+
# Remove quotes and brackets if they wrap the entire answer
|
| 36 |
+
answer = answer.strip('"\'()[]{}')
|
| 37 |
+
|
| 38 |
+
# Handle lists - format with comma and space separation
|
| 39 |
+
if '\n' in answer and all(line.strip().startswith(('-', '*', '•')) for line in answer.split('\n') if line.strip()):
|
| 40 |
+
# Convert bullet list to comma-separated
|
| 41 |
+
items = [line.strip().lstrip('-*•').strip() for line in answer.split('\n') if line.strip()]
|
| 42 |
+
answer = ', '.join(items)
|
| 43 |
+
|
| 44 |
+
return answer.strip()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def verification_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 48 |
+
"""
|
| 49 |
+
Verification node that performs final quality control and formatting
|
| 50 |
+
"""
|
| 51 |
+
print("Verification Node: Performing final quality control")
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
# Get verification prompt
|
| 55 |
+
verification_prompt = load_verification_prompt()
|
| 56 |
+
|
| 57 |
+
# Initialize LLM for verification
|
| 58 |
+
llm = ChatGroq(model="qwen-qwq-32b", temperature=0.0) # Very low temp for consistent formatting
|
| 59 |
+
|
| 60 |
+
# Get callback handler for tracing
|
| 61 |
+
callback_handler = get_langfuse_callback_handler()
|
| 62 |
+
callbacks = [callback_handler] if callback_handler else []
|
| 63 |
+
|
| 64 |
+
# Get state information
|
| 65 |
+
messages = state.get("messages", [])
|
| 66 |
+
quality_pass = state.get("quality_pass", True)
|
| 67 |
+
quality_score = state.get("quality_score", 7)
|
| 68 |
+
critic_assessment = state.get("critic_assessment", "")
|
| 69 |
+
|
| 70 |
+
# Get the agent response to verify
|
| 71 |
+
agent_response = state.get("agent_response")
|
| 72 |
+
if not agent_response:
|
| 73 |
+
# Find the last AI message
|
| 74 |
+
for msg in reversed(messages):
|
| 75 |
+
if msg.type == "ai":
|
| 76 |
+
agent_response = msg
|
| 77 |
+
break
|
| 78 |
+
|
| 79 |
+
if not agent_response:
|
| 80 |
+
print("Verification Node: No response to verify")
|
| 81 |
+
return {
|
| 82 |
+
**state,
|
| 83 |
+
"final_answer": "No response found to verify",
|
| 84 |
+
"verification_status": "failed",
|
| 85 |
+
"current_step": "complete"
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# Get user query for context
|
| 89 |
+
user_query = None
|
| 90 |
+
for msg in reversed(messages):
|
| 91 |
+
if msg.type == "human":
|
| 92 |
+
user_query = msg.content
|
| 93 |
+
break
|
| 94 |
+
|
| 95 |
+
# Determine if we should proceed or trigger fallback
|
| 96 |
+
failure_threshold = 4
|
| 97 |
+
max_attempts = state.get("attempt_count", 1)
|
| 98 |
+
|
| 99 |
+
if not quality_pass or quality_score < failure_threshold:
|
| 100 |
+
if max_attempts >= 3:
|
| 101 |
+
print("Verification Node: Maximum attempts reached, proceeding with fallback")
|
| 102 |
+
return {
|
| 103 |
+
**state,
|
| 104 |
+
"final_answer": "Unable to provide a satisfactory answer after multiple attempts",
|
| 105 |
+
"verification_status": "failed_max_attempts",
|
| 106 |
+
"current_step": "fallback"
|
| 107 |
+
}
|
| 108 |
+
else:
|
| 109 |
+
print(f"Verification Node: Quality check failed (score: {quality_score}), retrying")
|
| 110 |
+
return {
|
| 111 |
+
**state,
|
| 112 |
+
"verification_status": "failed",
|
| 113 |
+
"attempt_count": max_attempts + 1,
|
| 114 |
+
"current_step": "routing" # Retry from routing
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Quality passed, format the final answer
|
| 118 |
+
print("Verification Node: Quality check passed, formatting final answer")
|
| 119 |
+
|
| 120 |
+
# Build verification messages
|
| 121 |
+
verification_messages = [SystemMessage(content=verification_prompt)]
|
| 122 |
+
|
| 123 |
+
verification_request = f"""
|
| 124 |
+
Please verify and format the following response according to the exact-match output rules:
|
| 125 |
+
|
| 126 |
+
Original Query: {user_query or "Unknown query"}
|
| 127 |
+
|
| 128 |
+
Response to Verify:
|
| 129 |
+
{agent_response.content}
|
| 130 |
+
|
| 131 |
+
Quality Assessment: {critic_assessment}
|
| 132 |
+
|
| 133 |
+
Ensure the final output strictly adheres to the format requirements specified in the system prompt.
|
| 134 |
+
"""
|
| 135 |
+
|
| 136 |
+
verification_messages.append(HumanMessage(content=verification_request))
|
| 137 |
+
|
| 138 |
+
# Get verification response
|
| 139 |
+
verification_response = llm.invoke(verification_messages, config={"callbacks": callbacks})
|
| 140 |
+
|
| 141 |
+
# Extract and format the final answer
|
| 142 |
+
final_answer = extract_final_answer(verification_response.content)
|
| 143 |
+
|
| 144 |
+
# Store the final formatted answer
|
| 145 |
+
return {
|
| 146 |
+
**state,
|
| 147 |
+
"messages": messages + [verification_response],
|
| 148 |
+
"final_answer": final_answer,
|
| 149 |
+
"verification_status": "passed",
|
| 150 |
+
"current_step": "complete"
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
print(f"Verification Node Error: {e}")
|
| 155 |
+
# Fallback - try to extract answer from agent response
|
| 156 |
+
if agent_response:
|
| 157 |
+
fallback_answer = extract_final_answer(agent_response.content)
|
| 158 |
+
else:
|
| 159 |
+
fallback_answer = f"Error during verification: {e}"
|
| 160 |
+
|
| 161 |
+
return {
|
| 162 |
+
**state,
|
| 163 |
+
"final_answer": fallback_answer,
|
| 164 |
+
"verification_status": "error",
|
| 165 |
+
"current_step": "complete"
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def should_retry(state: Dict[str, Any]) -> bool:
|
| 170 |
+
"""Determine if we should retry the process"""
|
| 171 |
+
verification_status = state.get("verification_status", "")
|
| 172 |
+
return verification_status == "failed" and state.get("attempt_count", 1) < 3
|
src/langgraph_system.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Main LangGraph Agent System Implementation"""
|
| 2 |
+
import os
|
| 3 |
+
from typing import Dict, Any, TypedDict, Literal
|
| 4 |
+
from langchain_core.messages import BaseMessage, HumanMessage
|
| 5 |
+
from langgraph.graph import StateGraph, END
|
| 6 |
+
|
| 7 |
+
# Import our agents and nodes
|
| 8 |
+
from src.agents.plan_node import plan_node
|
| 9 |
+
from src.agents.router_node import router_node, should_route_to_agent
|
| 10 |
+
from src.agents.retrieval_agent import retrieval_agent
|
| 11 |
+
from src.agents.execution_agent import execution_agent
|
| 12 |
+
from src.agents.critic_agent import critic_agent
|
| 13 |
+
from src.agents.verification_node import verification_node, should_retry
|
| 14 |
+
from src.memory import memory_manager
|
| 15 |
+
from src.tracing import (
|
| 16 |
+
get_langfuse_callback_handler,
|
| 17 |
+
update_trace_metadata,
|
| 18 |
+
trace_agent_execution,
|
| 19 |
+
flush_langfuse,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class AgentState(TypedDict):
|
| 24 |
+
"""State schema for the agent system"""
|
| 25 |
+
# Core conversation
|
| 26 |
+
messages: list[BaseMessage]
|
| 27 |
+
|
| 28 |
+
# Planning and routing
|
| 29 |
+
plan_complete: bool
|
| 30 |
+
next_agent: str
|
| 31 |
+
routing_decision: str
|
| 32 |
+
routing_reason: str
|
| 33 |
+
current_step: str
|
| 34 |
+
|
| 35 |
+
# Agent responses
|
| 36 |
+
agent_response: BaseMessage
|
| 37 |
+
execution_result: str
|
| 38 |
+
|
| 39 |
+
# Quality control
|
| 40 |
+
critic_assessment: str
|
| 41 |
+
quality_pass: bool
|
| 42 |
+
quality_score: int
|
| 43 |
+
verification_status: str
|
| 44 |
+
|
| 45 |
+
# System management
|
| 46 |
+
attempt_count: int
|
| 47 |
+
final_answer: str
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def create_agent_graph() -> StateGraph:
|
| 51 |
+
"""Create the LangGraph agent system"""
|
| 52 |
+
|
| 53 |
+
# Initialize the state graph
|
| 54 |
+
workflow = StateGraph(AgentState)
|
| 55 |
+
|
| 56 |
+
# Add nodes
|
| 57 |
+
workflow.add_node("plan", plan_node)
|
| 58 |
+
workflow.add_node("router", router_node)
|
| 59 |
+
workflow.add_node("retrieval", retrieval_agent)
|
| 60 |
+
workflow.add_node("execution", execution_agent)
|
| 61 |
+
workflow.add_node("critic", critic_agent)
|
| 62 |
+
workflow.add_node("verification", verification_node)
|
| 63 |
+
|
| 64 |
+
# Add fallback node
|
| 65 |
+
def fallback_node(state: Dict[str, Any]) -> Dict[str, Any]:
|
| 66 |
+
"""Simple fallback that returns a basic response"""
|
| 67 |
+
print("Fallback Node: Providing basic response")
|
| 68 |
+
|
| 69 |
+
messages = state.get("messages", [])
|
| 70 |
+
user_query = None
|
| 71 |
+
|
| 72 |
+
for msg in reversed(messages):
|
| 73 |
+
if msg.type == "human":
|
| 74 |
+
user_query = msg.content
|
| 75 |
+
break
|
| 76 |
+
|
| 77 |
+
fallback_answer = "I apologize, but I was unable to provide a satisfactory answer to your question."
|
| 78 |
+
if user_query:
|
| 79 |
+
fallback_answer += f" Your question was: {user_query}"
|
| 80 |
+
|
| 81 |
+
return {
|
| 82 |
+
**state,
|
| 83 |
+
"final_answer": fallback_answer,
|
| 84 |
+
"verification_status": "fallback",
|
| 85 |
+
"current_step": "complete"
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
workflow.add_node("fallback", fallback_node)
|
| 89 |
+
|
| 90 |
+
# Set entry point
|
| 91 |
+
workflow.set_entry_point("plan")
|
| 92 |
+
|
| 93 |
+
# Add edges
|
| 94 |
+
workflow.add_edge("plan", "router")
|
| 95 |
+
|
| 96 |
+
# Conditional routing from router to agents
|
| 97 |
+
workflow.add_conditional_edges(
|
| 98 |
+
"router",
|
| 99 |
+
should_route_to_agent,
|
| 100 |
+
{
|
| 101 |
+
"retrieval": "retrieval",
|
| 102 |
+
"execution": "execution",
|
| 103 |
+
"critic": "critic"
|
| 104 |
+
}
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
# Route agent outputs through critic for quality evaluation before final verification
|
| 108 |
+
workflow.add_edge("retrieval", "critic")
|
| 109 |
+
workflow.add_edge("execution", "critic")
|
| 110 |
+
# Critic (whether reached directly via routing or via other agents) proceeds to verification
|
| 111 |
+
workflow.add_edge("critic", "verification")
|
| 112 |
+
|
| 113 |
+
# Verification conditional logic
|
| 114 |
+
def verification_next(state: Dict[str, Any]) -> Literal["router", "fallback", END]:
|
| 115 |
+
"""Determine next step after verification"""
|
| 116 |
+
verification_status = state.get("verification_status", "")
|
| 117 |
+
current_step = state.get("current_step", "")
|
| 118 |
+
|
| 119 |
+
if current_step == "complete":
|
| 120 |
+
return END
|
| 121 |
+
elif verification_status == "failed" and state.get("attempt_count", 1) < 3:
|
| 122 |
+
return "router" # Retry
|
| 123 |
+
elif verification_status == "failed_max_attempts":
|
| 124 |
+
return "fallback"
|
| 125 |
+
else:
|
| 126 |
+
return END
|
| 127 |
+
|
| 128 |
+
workflow.add_conditional_edges(
|
| 129 |
+
"verification",
|
| 130 |
+
verification_next,
|
| 131 |
+
{
|
| 132 |
+
"router": "router",
|
| 133 |
+
"fallback": "fallback",
|
| 134 |
+
END: END
|
| 135 |
+
}
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Fallback ends the process
|
| 139 |
+
workflow.add_edge("fallback", END)
|
| 140 |
+
|
| 141 |
+
return workflow
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def run_agent_system(query: str, user_id: str = None, session_id: str = None) -> str:
|
| 145 |
+
"""
|
| 146 |
+
Run the complete agent system with a user query
|
| 147 |
+
|
| 148 |
+
Args:
|
| 149 |
+
query: The user question
|
| 150 |
+
user_id: Optional user identifier for tracing
|
| 151 |
+
session_id: Optional session identifier for tracing
|
| 152 |
+
|
| 153 |
+
Returns:
|
| 154 |
+
The final formatted answer
|
| 155 |
+
"""
|
| 156 |
+
print(f"Agent System: Processing query: {query[:100]}...")
|
| 157 |
+
|
| 158 |
+
# Open a **root** Langfuse span so that everything inside is neatly grouped
|
| 159 |
+
with trace_agent_execution(name="user-request", user_id=user_id, session_id=session_id):
|
| 160 |
+
try:
|
| 161 |
+
# Enrich the root span with metadata & tags
|
| 162 |
+
update_trace_metadata(
|
| 163 |
+
user_id=user_id,
|
| 164 |
+
session_id=session_id,
|
| 165 |
+
tags=["agent_system"],
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Create the graph
|
| 169 |
+
workflow = create_agent_graph()
|
| 170 |
+
|
| 171 |
+
# Compile with checkpointing
|
| 172 |
+
checkpointer = memory_manager.get_checkpointer()
|
| 173 |
+
if checkpointer:
|
| 174 |
+
app = workflow.compile(checkpointer=checkpointer)
|
| 175 |
+
else:
|
| 176 |
+
app = workflow.compile()
|
| 177 |
+
|
| 178 |
+
# Prepare initial state
|
| 179 |
+
initial_state = {
|
| 180 |
+
"messages": [HumanMessage(content=query)],
|
| 181 |
+
"plan_complete": False,
|
| 182 |
+
"next_agent": "",
|
| 183 |
+
"routing_decision": "",
|
| 184 |
+
"routing_reason": "",
|
| 185 |
+
"current_step": "planning",
|
| 186 |
+
"agent_response": None,
|
| 187 |
+
"execution_result": "",
|
| 188 |
+
"critic_assessment": "",
|
| 189 |
+
"quality_pass": True,
|
| 190 |
+
"quality_score": 7,
|
| 191 |
+
"verification_status": "",
|
| 192 |
+
"attempt_count": 1,
|
| 193 |
+
"final_answer": "",
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
# Configure execution – reuse *one* callback handler
|
| 197 |
+
callback_handler = get_langfuse_callback_handler()
|
| 198 |
+
config = {
|
| 199 |
+
"configurable": {"thread_id": session_id or "default"},
|
| 200 |
+
}
|
| 201 |
+
if callback_handler:
|
| 202 |
+
config["callbacks"] = [callback_handler]
|
| 203 |
+
|
| 204 |
+
# Run the graph
|
| 205 |
+
print("Agent System: Executing workflow...")
|
| 206 |
+
final_state = app.invoke(initial_state, config=config)
|
| 207 |
+
|
| 208 |
+
# Extract final answer
|
| 209 |
+
final_answer = final_state.get("final_answer", "No answer generated")
|
| 210 |
+
|
| 211 |
+
# Store in memory if appropriate
|
| 212 |
+
if memory_manager.should_ingest(query):
|
| 213 |
+
memory_manager.ingest_qa_pair(query, final_answer)
|
| 214 |
+
|
| 215 |
+
print(f"Agent System: Completed. Final answer: {final_answer[:100]}...")
|
| 216 |
+
return final_answer
|
| 217 |
+
except Exception as e:
|
| 218 |
+
print(f"Agent System Error: {e}")
|
| 219 |
+
return (
|
| 220 |
+
f"I apologize, but I encountered an error while processing your question: {e}"
|
| 221 |
+
)
|
| 222 |
+
finally:
|
| 223 |
+
# Ensure Langfuse spans are exported even in short-lived environments
|
| 224 |
+
try:
|
| 225 |
+
flush_langfuse()
|
| 226 |
+
except Exception:
|
| 227 |
+
pass
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
# Export the main function
|
| 231 |
+
__all__ = ["run_agent_system", "create_agent_graph", "AgentState"]
|
src/memory.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Memory Layer Implementation for LangGraph Agent System"""
|
| 2 |
+
import os
|
| 3 |
+
import time
|
| 4 |
+
import hashlib
|
| 5 |
+
import sqlite3
|
| 6 |
+
from typing import Optional, List, Dict, Any, Tuple
|
| 7 |
+
from langchain_community.vectorstores import SupabaseVectorStore
|
| 8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 9 |
+
from supabase.client import Client, create_client
|
| 10 |
+
from langgraph.checkpoint.sqlite import SqliteSaver
|
| 11 |
+
from langchain_core.messages import BaseMessage, HumanMessage
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# Constants for memory management
|
| 15 |
+
TTL = 300 # seconds – how long we keep similarity-search results
|
| 16 |
+
SIMILARITY_THRESHOLD = 0.85 # cosine score above which we assume we already know the answer
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class MemoryManager:
|
| 20 |
+
"""Manages short-term, long-term memory and checkpointing for the agent system"""
|
| 21 |
+
|
| 22 |
+
def __init__(self):
|
| 23 |
+
self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
|
| 24 |
+
self.vector_store = None
|
| 25 |
+
self.checkpointer = None
|
| 26 |
+
self._sqlite_connection = None
|
| 27 |
+
|
| 28 |
+
# In-memory caches
|
| 29 |
+
self.query_cache: Dict[str, Tuple[float, List]] = {}
|
| 30 |
+
self.processed_tasks: set[str] = set()
|
| 31 |
+
self.seen_hashes: set[str] = set()
|
| 32 |
+
|
| 33 |
+
self._initialize_vector_store()
|
| 34 |
+
self._initialize_checkpointer()
|
| 35 |
+
|
| 36 |
+
def _initialize_vector_store(self) -> None:
|
| 37 |
+
"""Initialize Supabase vector store for long-term memory"""
|
| 38 |
+
try:
|
| 39 |
+
supabase_url = os.environ.get("SUPABASE_URL")
|
| 40 |
+
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
|
| 41 |
+
|
| 42 |
+
if not supabase_url or not supabase_key:
|
| 43 |
+
print("Warning: Supabase credentials not found, vector store will be disabled")
|
| 44 |
+
return
|
| 45 |
+
|
| 46 |
+
supabase: Client = create_client(supabase_url, supabase_key)
|
| 47 |
+
self.vector_store = SupabaseVectorStore(
|
| 48 |
+
client=supabase,
|
| 49 |
+
embedding=self.embeddings,
|
| 50 |
+
table_name="documents",
|
| 51 |
+
query_name="match_documents_langchain",
|
| 52 |
+
)
|
| 53 |
+
print("Vector store initialized successfully")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f"Warning: Could not initialize Supabase vector store: {e}")
|
| 56 |
+
|
| 57 |
+
def _initialize_checkpointer(self) -> None:
|
| 58 |
+
"""Initialize SQLite checkpointer for short-term memory"""
|
| 59 |
+
try:
|
| 60 |
+
# Create a direct SQLite connection
|
| 61 |
+
self._sqlite_connection = sqlite3.connect(":memory:", check_same_thread=False)
|
| 62 |
+
self.checkpointer = SqliteSaver(self._sqlite_connection)
|
| 63 |
+
print("Checkpointer initialized successfully")
|
| 64 |
+
except Exception as e:
|
| 65 |
+
print(f"Warning: Could not initialize checkpointer: {e}")
|
| 66 |
+
|
| 67 |
+
def get_checkpointer(self) -> Optional[SqliteSaver]:
|
| 68 |
+
"""Get the checkpointer instance"""
|
| 69 |
+
return self.checkpointer
|
| 70 |
+
|
| 71 |
+
def close_checkpointer(self) -> None:
|
| 72 |
+
"""Close the checkpointer and its SQLite connection"""
|
| 73 |
+
if self._sqlite_connection:
|
| 74 |
+
try:
|
| 75 |
+
self._sqlite_connection.close()
|
| 76 |
+
print("SQLite connection closed")
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"Warning: Error closing SQLite connection: {e}")
|
| 79 |
+
|
| 80 |
+
def similarity_search(self, query: str, k: int = 2) -> List[Any]:
|
| 81 |
+
"""Search for similar questions with caching"""
|
| 82 |
+
if not self.vector_store:
|
| 83 |
+
return []
|
| 84 |
+
|
| 85 |
+
# Check cache first
|
| 86 |
+
q_hash = hashlib.sha256(query.encode()).hexdigest()
|
| 87 |
+
now = time.time()
|
| 88 |
+
|
| 89 |
+
if q_hash in self.query_cache and now - self.query_cache[q_hash][0] < TTL:
|
| 90 |
+
print("Memory: Cache hit for similarity search")
|
| 91 |
+
return self.query_cache[q_hash][1]
|
| 92 |
+
|
| 93 |
+
try:
|
| 94 |
+
print("Memory: Searching vector store for similar questions...")
|
| 95 |
+
similar_questions = self.vector_store.similarity_search_with_relevance_scores(query, k=k)
|
| 96 |
+
self.query_cache[q_hash] = (now, similar_questions)
|
| 97 |
+
return similar_questions
|
| 98 |
+
except Exception as e:
|
| 99 |
+
print(f"Memory: Vector store search error – {e}")
|
| 100 |
+
return []
|
| 101 |
+
|
| 102 |
+
def should_ingest(self, query: str) -> bool:
|
| 103 |
+
"""Determine if this query/answer should be ingested to long-term memory"""
|
| 104 |
+
if not self.vector_store:
|
| 105 |
+
return False
|
| 106 |
+
|
| 107 |
+
similar_questions = self.similarity_search(query, k=1)
|
| 108 |
+
top_score = similar_questions[0][1] if similar_questions else 0.0
|
| 109 |
+
return top_score < SIMILARITY_THRESHOLD
|
| 110 |
+
|
| 111 |
+
def ingest_qa_pair(self, question: str, answer: str, attachments: str = "") -> None:
|
| 112 |
+
"""Store Q/A pair in long-term memory"""
|
| 113 |
+
if not self.vector_store:
|
| 114 |
+
print("Memory: Vector store not available for ingestion")
|
| 115 |
+
return
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
payload = f"Question:\n{question}\n\nAnswer:\n{answer}"
|
| 119 |
+
if attachments:
|
| 120 |
+
payload += f"\n\n{attachments}"
|
| 121 |
+
|
| 122 |
+
hash_id = hashlib.sha256(payload.encode()).hexdigest()
|
| 123 |
+
if hash_id in self.seen_hashes:
|
| 124 |
+
print("Memory: Duplicate payload within session – skip")
|
| 125 |
+
return
|
| 126 |
+
|
| 127 |
+
self.seen_hashes.add(hash_id)
|
| 128 |
+
self.vector_store.add_texts(
|
| 129 |
+
[payload],
|
| 130 |
+
metadatas=[{"hash_id": hash_id, "timestamp": time.time()}]
|
| 131 |
+
)
|
| 132 |
+
print("Memory: Stored new Q/A pair in vector store")
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"Memory: Error while upserting – {e}")
|
| 135 |
+
|
| 136 |
+
def get_similar_qa(self, query: str) -> Optional[str]:
|
| 137 |
+
"""Get similar Q/A for context"""
|
| 138 |
+
similar_questions = self.similarity_search(query, k=1)
|
| 139 |
+
if not similar_questions:
|
| 140 |
+
return None
|
| 141 |
+
|
| 142 |
+
example_doc = similar_questions[0][0] if isinstance(similar_questions[0], tuple) else similar_questions[0]
|
| 143 |
+
return example_doc.page_content
|
| 144 |
+
|
| 145 |
+
def add_processed_task(self, task_id: str) -> None:
|
| 146 |
+
"""Mark a task as processed to avoid re-downloading attachments"""
|
| 147 |
+
self.processed_tasks.add(task_id)
|
| 148 |
+
|
| 149 |
+
def is_task_processed(self, task_id: str) -> bool:
|
| 150 |
+
"""Check if a task has already been processed"""
|
| 151 |
+
return task_id in self.processed_tasks
|
| 152 |
+
|
| 153 |
+
def clear_session_cache(self) -> None:
|
| 154 |
+
"""Clear session-specific caches"""
|
| 155 |
+
self.query_cache.clear()
|
| 156 |
+
self.processed_tasks.clear()
|
| 157 |
+
self.seen_hashes.clear()
|
| 158 |
+
print("Memory: Session cache cleared")
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
# Global memory manager instance
|
| 162 |
+
memory_manager = MemoryManager()
|
src/tracing.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tracing and Observability Setup for Langfuse v3.0.0"""
|
| 2 |
+
import os
|
| 3 |
+
from typing import Optional
|
| 4 |
+
from langfuse import Langfuse, get_client
|
| 5 |
+
from langfuse.langchain import CallbackHandler
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def initialize_langfuse() -> None:
|
| 9 |
+
"""Initialize Langfuse client with proper configuration"""
|
| 10 |
+
try:
|
| 11 |
+
# Initialize Langfuse client
|
| 12 |
+
Langfuse(
|
| 13 |
+
public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
|
| 14 |
+
secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
|
| 15 |
+
host=os.environ.get("LANGFUSE_HOST", "https://cloud.langfuse.com")
|
| 16 |
+
)
|
| 17 |
+
print("Langfuse client initialized successfully")
|
| 18 |
+
except Exception as e:
|
| 19 |
+
print(f"Warning: Could not initialize Langfuse client: {e}")
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Singleton for Langfuse CallbackHandler to ensure a single handler per request
|
| 23 |
+
_CALLBACK_HANDLER: Optional[CallbackHandler] = None
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def get_langfuse_callback_handler() -> Optional[CallbackHandler]:
|
| 27 |
+
"""Get (or create) a singleton Langfuse callback handler for LangChain integration
|
| 28 |
+
|
| 29 |
+
Best-practice (#2): Pass exactly **one** CallbackHandler into graph.invoke/stream so that
|
| 30 |
+
every nested LLM/tool span is correlated underneath the same root span. Re-using the
|
| 31 |
+
same instance avoids fragmenting traces when individual nodes try to create their own
|
| 32 |
+
handler.
|
| 33 |
+
"""
|
| 34 |
+
global _CALLBACK_HANDLER # noqa: PLW0603 – module-level singleton is intentional
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
initialize_langfuse()
|
| 38 |
+
if _CALLBACK_HANDLER is None:
|
| 39 |
+
_CALLBACK_HANDLER = CallbackHandler()
|
| 40 |
+
return _CALLBACK_HANDLER
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"Warning: Could not create Langfuse callback handler: {e}")
|
| 43 |
+
return None
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def trace_agent_execution(name: str, user_id: str | None = None, session_id: str | None = None):
|
| 47 |
+
"""Context manager that opens a **root** span for the current user request.
|
| 48 |
+
|
| 49 |
+
Follows Langfuse best practices (rules #2 & #3):
|
| 50 |
+
• exactly one root span per request
|
| 51 |
+
• attach `user_id` and `session_id` so that follow-up calls are stitched together
|
| 52 |
+
"""
|
| 53 |
+
try:
|
| 54 |
+
langfuse = get_client()
|
| 55 |
+
span_kwargs = {"name": name}
|
| 56 |
+
# Open the span as context manager so everything inside is automatically nested
|
| 57 |
+
span_cm = langfuse.start_as_current_span(**span_kwargs)
|
| 58 |
+
|
| 59 |
+
# Wrap the CM so that we can update the trace metadata *after* it was started
|
| 60 |
+
class _TraceWrapper:
|
| 61 |
+
def __enter__(self):
|
| 62 |
+
# Enter the span
|
| 63 |
+
self._span = span_cm.__enter__()
|
| 64 |
+
# Immediately enrich it with session/user information
|
| 65 |
+
try:
|
| 66 |
+
langfuse.update_current_trace(
|
| 67 |
+
**{k: v for k, v in {"user_id": user_id, "session_id": session_id}.items() if v}
|
| 68 |
+
)
|
| 69 |
+
except Exception:
|
| 70 |
+
# Ignore update failures – tracing must never break business logic
|
| 71 |
+
pass
|
| 72 |
+
return self._span
|
| 73 |
+
|
| 74 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 75 |
+
return span_cm.__exit__(exc_type, exc_val, exc_tb)
|
| 76 |
+
|
| 77 |
+
return _TraceWrapper()
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"Warning: Could not create trace span: {e}")
|
| 80 |
+
# Gracefully degrade – return dummy context manager
|
| 81 |
+
from contextlib import nullcontext
|
| 82 |
+
|
| 83 |
+
return nullcontext() # type: ignore
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def update_trace_metadata(user_id: str = None, session_id: str = None, tags: list = None, **kwargs):
|
| 87 |
+
"""Update current trace with metadata"""
|
| 88 |
+
try:
|
| 89 |
+
langfuse = get_client()
|
| 90 |
+
update_args = {}
|
| 91 |
+
|
| 92 |
+
if user_id:
|
| 93 |
+
update_args["user_id"] = user_id
|
| 94 |
+
if session_id:
|
| 95 |
+
update_args["session_id"] = session_id
|
| 96 |
+
if tags:
|
| 97 |
+
update_args["tags"] = tags
|
| 98 |
+
if kwargs:
|
| 99 |
+
update_args.update(kwargs)
|
| 100 |
+
|
| 101 |
+
langfuse.update_current_trace(**update_args)
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"Warning: Could not update trace metadata: {e}")
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def flush_langfuse():
|
| 107 |
+
"""Flush Langfuse events (for short-lived applications)"""
|
| 108 |
+
try:
|
| 109 |
+
langfuse = get_client()
|
| 110 |
+
langfuse.flush()
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"Warning: Could not flush Langfuse events: {e}")
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def shutdown_langfuse():
|
| 116 |
+
"""Shutdown Langfuse client (for application cleanup)"""
|
| 117 |
+
try:
|
| 118 |
+
langfuse = get_client()
|
| 119 |
+
langfuse.shutdown()
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"Warning: Could not shutdown Langfuse client: {e}")
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
# Initialize Langfuse on module import
|
| 125 |
+
initialize_langfuse()
|
test_new_system.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test Script for New LangGraph Agent System
|
| 4 |
+
Tests the multi-agent architecture with memory, routing, and verification.
|
| 5 |
+
"""
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
import time
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
|
| 11 |
+
# Load environment variables
|
| 12 |
+
load_dotenv()
|
| 13 |
+
|
| 14 |
+
# Add the current directory to Python path
|
| 15 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 16 |
+
|
| 17 |
+
def test_imports():
|
| 18 |
+
"""Test that all modules can be imported correctly"""
|
| 19 |
+
print("Testing imports...")
|
| 20 |
+
try:
|
| 21 |
+
# Test core imports
|
| 22 |
+
from src import run_agent_system, memory_manager
|
| 23 |
+
from src.tracing import get_langfuse_callback_handler
|
| 24 |
+
|
| 25 |
+
# Test agent imports
|
| 26 |
+
from src.agents import (
|
| 27 |
+
plan_node, router_node, retrieval_agent,
|
| 28 |
+
execution_agent, critic_agent, verification_node
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
print("✅ All imports successful")
|
| 32 |
+
return True
|
| 33 |
+
except ImportError as e:
|
| 34 |
+
print(f"❌ Import error: {e}")
|
| 35 |
+
return False
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_memory_system():
|
| 39 |
+
"""Test the memory management system"""
|
| 40 |
+
print("\nTesting memory system...")
|
| 41 |
+
try:
|
| 42 |
+
from src.memory import memory_manager
|
| 43 |
+
|
| 44 |
+
# Test basic functionality
|
| 45 |
+
test_query = "What is 2+2?"
|
| 46 |
+
|
| 47 |
+
# Test similarity search (should not crash even without vector store)
|
| 48 |
+
similar = memory_manager.similarity_search(test_query, k=1)
|
| 49 |
+
print(f"✅ Similarity search completed: {len(similar)} results")
|
| 50 |
+
|
| 51 |
+
# Test cache management
|
| 52 |
+
memory_manager.clear_session_cache()
|
| 53 |
+
print("✅ Memory cache cleared")
|
| 54 |
+
|
| 55 |
+
return True
|
| 56 |
+
except Exception as e:
|
| 57 |
+
print(f"❌ Memory system error: {e}")
|
| 58 |
+
return False
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def test_tracing_system():
|
| 62 |
+
"""Test the Langfuse tracing integration"""
|
| 63 |
+
print("\nTesting tracing system...")
|
| 64 |
+
try:
|
| 65 |
+
from src.tracing import get_langfuse_callback_handler, initialize_langfuse
|
| 66 |
+
|
| 67 |
+
# Test handler creation (should not crash even without credentials)
|
| 68 |
+
handler = get_langfuse_callback_handler()
|
| 69 |
+
print(f"✅ Langfuse handler: {type(handler)}")
|
| 70 |
+
|
| 71 |
+
return True
|
| 72 |
+
except Exception as e:
|
| 73 |
+
print(f"❌ Tracing system error: {e}")
|
| 74 |
+
return False
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def test_individual_agents():
|
| 78 |
+
"""Test each agent individually"""
|
| 79 |
+
print("\nTesting individual agents...")
|
| 80 |
+
|
| 81 |
+
# Test state structure
|
| 82 |
+
test_state = {
|
| 83 |
+
"messages": [],
|
| 84 |
+
"plan_complete": False,
|
| 85 |
+
"next_agent": "",
|
| 86 |
+
"routing_decision": "",
|
| 87 |
+
"routing_reason": "",
|
| 88 |
+
"current_step": "testing",
|
| 89 |
+
"agent_response": None,
|
| 90 |
+
"needs_tools": False,
|
| 91 |
+
"execution_result": "",
|
| 92 |
+
"critic_assessment": "",
|
| 93 |
+
"quality_pass": True,
|
| 94 |
+
"quality_score": 7,
|
| 95 |
+
"verification_status": "",
|
| 96 |
+
"attempt_count": 1,
|
| 97 |
+
"final_answer": ""
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
from langchain_core.messages import HumanMessage
|
| 102 |
+
test_state["messages"] = [HumanMessage(content="Test query")]
|
| 103 |
+
|
| 104 |
+
# Test plan node
|
| 105 |
+
from src.agents.plan_node import plan_node
|
| 106 |
+
plan_result = plan_node(test_state)
|
| 107 |
+
print("✅ Plan node executed")
|
| 108 |
+
|
| 109 |
+
# Test router node
|
| 110 |
+
from src.agents.router_node import router_node
|
| 111 |
+
router_result = router_node(plan_result)
|
| 112 |
+
print("✅ Router node executed")
|
| 113 |
+
|
| 114 |
+
return True
|
| 115 |
+
except Exception as e:
|
| 116 |
+
print(f"❌ Agent testing error: {e}")
|
| 117 |
+
return False
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def test_graph_creation():
|
| 121 |
+
"""Test the main graph creation"""
|
| 122 |
+
print("\nTesting graph creation...")
|
| 123 |
+
try:
|
| 124 |
+
from src.langgraph_system import create_agent_graph
|
| 125 |
+
|
| 126 |
+
# Create the workflow
|
| 127 |
+
workflow = create_agent_graph()
|
| 128 |
+
print("✅ Graph created successfully")
|
| 129 |
+
|
| 130 |
+
# Try to compile (this might fail without proper setup, but shouldn't crash)
|
| 131 |
+
try:
|
| 132 |
+
app = workflow.compile()
|
| 133 |
+
print("✅ Graph compiled successfully")
|
| 134 |
+
except Exception as e:
|
| 135 |
+
print(f"⚠️ Graph compilation warning: {e}")
|
| 136 |
+
|
| 137 |
+
return True
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"❌ Graph creation error: {e}")
|
| 140 |
+
return False
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def test_simple_query():
|
| 144 |
+
"""Test a simple query through the system"""
|
| 145 |
+
print("\nTesting simple query...")
|
| 146 |
+
try:
|
| 147 |
+
from new_langraph_agent import run_agent
|
| 148 |
+
|
| 149 |
+
# Simple test query
|
| 150 |
+
test_query = "What is 1 + 1?"
|
| 151 |
+
print(f"Query: {test_query}")
|
| 152 |
+
|
| 153 |
+
start_time = time.time()
|
| 154 |
+
result = run_agent(test_query)
|
| 155 |
+
end_time = time.time()
|
| 156 |
+
|
| 157 |
+
print(f"Result: {result}")
|
| 158 |
+
print(f"Time taken: {end_time - start_time:.2f} seconds")
|
| 159 |
+
print("✅ Simple query completed")
|
| 160 |
+
|
| 161 |
+
return True
|
| 162 |
+
except Exception as e:
|
| 163 |
+
print(f"❌ Simple query error: {e}")
|
| 164 |
+
return False
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def main():
|
| 168 |
+
"""Run all tests"""
|
| 169 |
+
print("LangGraph Agent System - Test Suite")
|
| 170 |
+
print("=" * 50)
|
| 171 |
+
|
| 172 |
+
tests = [
|
| 173 |
+
test_imports,
|
| 174 |
+
test_memory_system,
|
| 175 |
+
test_tracing_system,
|
| 176 |
+
test_individual_agents,
|
| 177 |
+
test_graph_creation,
|
| 178 |
+
test_simple_query
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
results = []
|
| 182 |
+
for test_func in tests:
|
| 183 |
+
try:
|
| 184 |
+
result = test_func()
|
| 185 |
+
results.append(result)
|
| 186 |
+
except Exception as e:
|
| 187 |
+
print(f"❌ Test {test_func.__name__} failed with exception: {e}")
|
| 188 |
+
results.append(False)
|
| 189 |
+
|
| 190 |
+
# Summary
|
| 191 |
+
print("\n" + "=" * 50)
|
| 192 |
+
print("Test Summary:")
|
| 193 |
+
print(f"Passed: {sum(results)}/{len(results)}")
|
| 194 |
+
print(f"Failed: {len(results) - sum(results)}/{len(results)}")
|
| 195 |
+
|
| 196 |
+
if all(results):
|
| 197 |
+
print("🎉 All tests passed!")
|
| 198 |
+
return 0
|
| 199 |
+
else:
|
| 200 |
+
print("⚠️ Some tests failed. Check the output above for details.")
|
| 201 |
+
return 1
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
if __name__ == "__main__":
|
| 205 |
+
sys.exit(main())
|
test_tools_integration.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test script to verify tool integration in the LangGraph agent system
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from src.langgraph_system import run_agent_system
|
| 7 |
+
|
| 8 |
+
def test_retrieval_tools():
|
| 9 |
+
"""Test that retrieval tools (Wikipedia, web search, etc.) are working"""
|
| 10 |
+
print("=" * 60)
|
| 11 |
+
print("Testing Retrieval Tools Integration")
|
| 12 |
+
print("=" * 60)
|
| 13 |
+
|
| 14 |
+
# Test Wikipedia search
|
| 15 |
+
query = "When was Albert Einstein born?"
|
| 16 |
+
print(f"\nTesting query: {query}")
|
| 17 |
+
print("-" * 40)
|
| 18 |
+
|
| 19 |
+
result = run_agent_system(query, user_id="test_user", session_id="test_session")
|
| 20 |
+
print(f"Result: {result}")
|
| 21 |
+
|
| 22 |
+
return result
|
| 23 |
+
|
| 24 |
+
def test_execution_tools():
|
| 25 |
+
"""Test that execution tools (Python code execution) are working"""
|
| 26 |
+
print("=" * 60)
|
| 27 |
+
print("Testing Execution Tools Integration")
|
| 28 |
+
print("=" * 60)
|
| 29 |
+
|
| 30 |
+
# Test code execution
|
| 31 |
+
query = "Calculate the first 10 numbers in the Fibonacci sequence"
|
| 32 |
+
print(f"\nTesting query: {query}")
|
| 33 |
+
print("-" * 40)
|
| 34 |
+
|
| 35 |
+
result = run_agent_system(query, user_id="test_user", session_id="test_session")
|
| 36 |
+
print(f"Result: {result}")
|
| 37 |
+
|
| 38 |
+
return result
|
| 39 |
+
|
| 40 |
+
def test_web_search_tools():
|
| 41 |
+
"""Test web search functionality"""
|
| 42 |
+
print("=" * 60)
|
| 43 |
+
print("Testing Web Search Tools Integration")
|
| 44 |
+
print("=" * 60)
|
| 45 |
+
|
| 46 |
+
# Test web search
|
| 47 |
+
query = "What is the latest news about artificial intelligence?"
|
| 48 |
+
print(f"\nTesting query: {query}")
|
| 49 |
+
print("-" * 40)
|
| 50 |
+
|
| 51 |
+
result = run_agent_system(query, user_id="test_user", session_id="test_session")
|
| 52 |
+
print(f"Result: {result}")
|
| 53 |
+
|
| 54 |
+
return result
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
print("Starting Tool Integration Tests...")
|
| 58 |
+
|
| 59 |
+
try:
|
| 60 |
+
# Test retrieval tools
|
| 61 |
+
test_retrieval_tools()
|
| 62 |
+
|
| 63 |
+
print("\n" + "=" * 60)
|
| 64 |
+
input("Press Enter to continue to execution tools test...")
|
| 65 |
+
|
| 66 |
+
# Test execution tools
|
| 67 |
+
test_execution_tools()
|
| 68 |
+
|
| 69 |
+
print("\n" + "=" * 60)
|
| 70 |
+
input("Press Enter to continue to web search tools test...")
|
| 71 |
+
|
| 72 |
+
# Test web search tools
|
| 73 |
+
test_web_search_tools()
|
| 74 |
+
|
| 75 |
+
print("\n" + "=" * 60)
|
| 76 |
+
print("Tool integration tests completed!")
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
print(f"Test failed with error: {e}")
|
| 80 |
+
import traceback
|
| 81 |
+
traceback.print_exc()
|
uv.lock
CHANGED
|
@@ -331,6 +331,30 @@ wheels = [
|
|
| 331 |
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
|
| 332 |
]
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
[[package]]
|
| 335 |
name = "debugpy"
|
| 336 |
version = "1.8.14"
|
|
@@ -386,6 +410,15 @@ wheels = [
|
|
| 386 |
{ url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178 },
|
| 387 |
]
|
| 388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
[[package]]
|
| 390 |
name = "dirtyjson"
|
| 391 |
version = "1.0.8"
|
|
@@ -495,6 +528,7 @@ name = "final-assignment-template"
|
|
| 495 |
version = "0.1.0"
|
| 496 |
source = { virtual = "." }
|
| 497 |
dependencies = [
|
|
|
|
| 498 |
{ name = "dotenv" },
|
| 499 |
{ name = "gradio" },
|
| 500 |
{ name = "hf-xet" },
|
|
@@ -509,6 +543,8 @@ dependencies = [
|
|
| 509 |
{ name = "langchain-openai" },
|
| 510 |
{ name = "langfuse" },
|
| 511 |
{ name = "langgraph" },
|
|
|
|
|
|
|
| 512 |
{ name = "llama-index" },
|
| 513 |
{ name = "llama-index-core" },
|
| 514 |
{ name = "llama-index-llms-huggingface-api" },
|
|
@@ -526,6 +562,7 @@ dependencies = [
|
|
| 526 |
|
| 527 |
[package.metadata]
|
| 528 |
requires-dist = [
|
|
|
|
| 529 |
{ name = "dotenv", specifier = ">=0.9.9" },
|
| 530 |
{ name = "gradio", specifier = ">=5.34.1" },
|
| 531 |
{ name = "hf-xet", specifier = ">=1.1.3" },
|
|
@@ -540,6 +577,8 @@ requires-dist = [
|
|
| 540 |
{ name = "langchain-openai", specifier = ">=0.3.24" },
|
| 541 |
{ name = "langfuse", specifier = ">=3.0.0" },
|
| 542 |
{ name = "langgraph", specifier = ">=0.4.8" },
|
|
|
|
|
|
|
| 543 |
{ name = "llama-index", specifier = ">=0.12.40" },
|
| 544 |
{ name = "llama-index-core", specifier = ">=0.12.40" },
|
| 545 |
{ name = "llama-index-llms-huggingface-api", specifier = ">=0.5.0" },
|
|
@@ -600,11 +639,16 @@ wheels = [
|
|
| 600 |
|
| 601 |
[[package]]
|
| 602 |
name = "fsspec"
|
| 603 |
-
version = "2025.
|
| 604 |
source = { registry = "https://pypi.org/simple" }
|
| 605 |
-
sdist = { url = "https://files.pythonhosted.org/packages/
|
| 606 |
wheels = [
|
| 607 |
-
{ url = "https://files.pythonhosted.org/packages/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
]
|
| 609 |
|
| 610 |
[[package]]
|
|
@@ -1366,6 +1410,20 @@ wheels = [
|
|
| 1366 |
{ url = "https://files.pythonhosted.org/packages/0f/41/390a97d9d0abe5b71eea2f6fb618d8adadefa674e97f837bae6cda670bc7/langgraph_checkpoint-2.1.0-py3-none-any.whl", hash = "sha256:4cea3e512081da1241396a519cbfe4c5d92836545e2c64e85b6f5c34a1b8bc61", size = 43844 },
|
| 1367 |
]
|
| 1368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1369 |
[[package]]
|
| 1370 |
name = "langgraph-prebuilt"
|
| 1371 |
version = "0.2.2"
|
|
@@ -1838,6 +1896,22 @@ wheels = [
|
|
| 1838 |
{ url = "https://files.pythonhosted.org/packages/84/5d/e17845bb0fa76334477d5de38654d27946d5b5d3695443987a094a71b440/multidict-6.4.4-py3-none-any.whl", hash = "sha256:bd4557071b561a8b3b6075c3ce93cf9bfb6182cb241805c3d66ced3b75eff4ac", size = 10481 },
|
| 1839 |
]
|
| 1840 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1841 |
[[package]]
|
| 1842 |
name = "mypy-extensions"
|
| 1843 |
version = "1.1.0"
|
|
@@ -2476,6 +2550,32 @@ wheels = [
|
|
| 2476 |
{ url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 },
|
| 2477 |
]
|
| 2478 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2479 |
[[package]]
|
| 2480 |
name = "pyasn1"
|
| 2481 |
version = "0.6.1"
|
|
@@ -3029,6 +3129,18 @@ asyncio = [
|
|
| 3029 |
{ name = "greenlet" },
|
| 3030 |
]
|
| 3031 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3032 |
[[package]]
|
| 3033 |
name = "stack-data"
|
| 3034 |
version = "0.6.3"
|
|
|
|
| 331 |
{ url = "https://files.pythonhosted.org/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl", hash = "sha256:0dbf33f26c8d5305befd61b39d2b3414e8a407bedc2834dea9b8d642666fb40a", size = 28686 },
|
| 332 |
]
|
| 333 |
|
| 334 |
+
[[package]]
|
| 335 |
+
name = "datasets"
|
| 336 |
+
version = "3.6.0"
|
| 337 |
+
source = { registry = "https://pypi.org/simple" }
|
| 338 |
+
dependencies = [
|
| 339 |
+
{ name = "dill" },
|
| 340 |
+
{ name = "filelock" },
|
| 341 |
+
{ name = "fsspec", extra = ["http"] },
|
| 342 |
+
{ name = "huggingface-hub" },
|
| 343 |
+
{ name = "multiprocess" },
|
| 344 |
+
{ name = "numpy" },
|
| 345 |
+
{ name = "packaging" },
|
| 346 |
+
{ name = "pandas" },
|
| 347 |
+
{ name = "pyarrow" },
|
| 348 |
+
{ name = "pyyaml" },
|
| 349 |
+
{ name = "requests" },
|
| 350 |
+
{ name = "tqdm" },
|
| 351 |
+
{ name = "xxhash" },
|
| 352 |
+
]
|
| 353 |
+
sdist = { url = "https://files.pythonhosted.org/packages/1a/89/d3d6fef58a488f8569c82fd293ab7cbd4250244d67f425dcae64c63800ea/datasets-3.6.0.tar.gz", hash = "sha256:1b2bf43b19776e2787e181cfd329cb0ca1a358ea014780c3581e0f276375e041", size = 569336 }
|
| 354 |
+
wheels = [
|
| 355 |
+
{ url = "https://files.pythonhosted.org/packages/20/34/a08b0ee99715eaba118cbe19a71f7b5e2425c2718ef96007c325944a1152/datasets-3.6.0-py3-none-any.whl", hash = "sha256:25000c4a2c0873a710df127d08a202a06eab7bf42441a6bc278b499c2f72cd1b", size = 491546 },
|
| 356 |
+
]
|
| 357 |
+
|
| 358 |
[[package]]
|
| 359 |
name = "debugpy"
|
| 360 |
version = "1.8.14"
|
|
|
|
| 410 |
{ url = "https://files.pythonhosted.org/packages/02/c3/253a89ee03fc9b9682f1541728eb66db7db22148cd94f89ab22528cd1e1b/deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a", size = 11178 },
|
| 411 |
]
|
| 412 |
|
| 413 |
+
[[package]]
|
| 414 |
+
name = "dill"
|
| 415 |
+
version = "0.3.8"
|
| 416 |
+
source = { registry = "https://pypi.org/simple" }
|
| 417 |
+
sdist = { url = "https://files.pythonhosted.org/packages/17/4d/ac7ffa80c69ea1df30a8aa11b3578692a5118e7cd1aa157e3ef73b092d15/dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca", size = 184847 }
|
| 418 |
+
wheels = [
|
| 419 |
+
{ url = "https://files.pythonhosted.org/packages/c9/7a/cef76fd8438a42f96db64ddaa85280485a9c395e7df3db8158cfec1eee34/dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7", size = 116252 },
|
| 420 |
+
]
|
| 421 |
+
|
| 422 |
[[package]]
|
| 423 |
name = "dirtyjson"
|
| 424 |
version = "1.0.8"
|
|
|
|
| 528 |
version = "0.1.0"
|
| 529 |
source = { virtual = "." }
|
| 530 |
dependencies = [
|
| 531 |
+
{ name = "datasets" },
|
| 532 |
{ name = "dotenv" },
|
| 533 |
{ name = "gradio" },
|
| 534 |
{ name = "hf-xet" },
|
|
|
|
| 543 |
{ name = "langchain-openai" },
|
| 544 |
{ name = "langfuse" },
|
| 545 |
{ name = "langgraph" },
|
| 546 |
+
{ name = "langgraph-checkpoint" },
|
| 547 |
+
{ name = "langgraph-checkpoint-sqlite" },
|
| 548 |
{ name = "llama-index" },
|
| 549 |
{ name = "llama-index-core" },
|
| 550 |
{ name = "llama-index-llms-huggingface-api" },
|
|
|
|
| 562 |
|
| 563 |
[package.metadata]
|
| 564 |
requires-dist = [
|
| 565 |
+
{ name = "datasets", specifier = ">=2.19.1" },
|
| 566 |
{ name = "dotenv", specifier = ">=0.9.9" },
|
| 567 |
{ name = "gradio", specifier = ">=5.34.1" },
|
| 568 |
{ name = "hf-xet", specifier = ">=1.1.3" },
|
|
|
|
| 577 |
{ name = "langchain-openai", specifier = ">=0.3.24" },
|
| 578 |
{ name = "langfuse", specifier = ">=3.0.0" },
|
| 579 |
{ name = "langgraph", specifier = ">=0.4.8" },
|
| 580 |
+
{ name = "langgraph-checkpoint", specifier = ">=2.1.0" },
|
| 581 |
+
{ name = "langgraph-checkpoint-sqlite", specifier = ">=2.0.10" },
|
| 582 |
{ name = "llama-index", specifier = ">=0.12.40" },
|
| 583 |
{ name = "llama-index-core", specifier = ">=0.12.40" },
|
| 584 |
{ name = "llama-index-llms-huggingface-api", specifier = ">=0.5.0" },
|
|
|
|
| 639 |
|
| 640 |
[[package]]
|
| 641 |
name = "fsspec"
|
| 642 |
+
version = "2025.3.0"
|
| 643 |
source = { registry = "https://pypi.org/simple" }
|
| 644 |
+
sdist = { url = "https://files.pythonhosted.org/packages/34/f4/5721faf47b8c499e776bc34c6a8fc17efdf7fdef0b00f398128bc5dcb4ac/fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972", size = 298491 }
|
| 645 |
wheels = [
|
| 646 |
+
{ url = "https://files.pythonhosted.org/packages/56/53/eb690efa8513166adef3e0669afd31e95ffde69fb3c52ec2ac7223ed6018/fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3", size = 193615 },
|
| 647 |
+
]
|
| 648 |
+
|
| 649 |
+
[package.optional-dependencies]
|
| 650 |
+
http = [
|
| 651 |
+
{ name = "aiohttp" },
|
| 652 |
]
|
| 653 |
|
| 654 |
[[package]]
|
|
|
|
| 1410 |
{ url = "https://files.pythonhosted.org/packages/0f/41/390a97d9d0abe5b71eea2f6fb618d8adadefa674e97f837bae6cda670bc7/langgraph_checkpoint-2.1.0-py3-none-any.whl", hash = "sha256:4cea3e512081da1241396a519cbfe4c5d92836545e2c64e85b6f5c34a1b8bc61", size = 43844 },
|
| 1411 |
]
|
| 1412 |
|
| 1413 |
+
[[package]]
|
| 1414 |
+
name = "langgraph-checkpoint-sqlite"
|
| 1415 |
+
version = "2.0.10"
|
| 1416 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1417 |
+
dependencies = [
|
| 1418 |
+
{ name = "aiosqlite" },
|
| 1419 |
+
{ name = "langgraph-checkpoint" },
|
| 1420 |
+
{ name = "sqlite-vec" },
|
| 1421 |
+
]
|
| 1422 |
+
sdist = { url = "https://files.pythonhosted.org/packages/7b/38/5d44b91fa21e06309be8f1658ae966f5c717443401df005b20d9af91b6b5/langgraph_checkpoint_sqlite-2.0.10.tar.gz", hash = "sha256:c8a55a268b857761dc77f123df48addaf8e9a40b72c4eaddb7c551ddced1c5b6", size = 103625 }
|
| 1423 |
+
wheels = [
|
| 1424 |
+
{ url = "https://files.pythonhosted.org/packages/c1/ff/63b16d83a513f7d7a5001bb01a40024986d330718a5315bf1962d7cc50c8/langgraph_checkpoint_sqlite-2.0.10-py3-none-any.whl", hash = "sha256:89d1d2201fe26aa52f1a9c03e1015d226635649be596b26542a5de78f8cc6c9f", size = 30973 },
|
| 1425 |
+
]
|
| 1426 |
+
|
| 1427 |
[[package]]
|
| 1428 |
name = "langgraph-prebuilt"
|
| 1429 |
version = "0.2.2"
|
|
|
|
| 1896 |
{ url = "https://files.pythonhosted.org/packages/84/5d/e17845bb0fa76334477d5de38654d27946d5b5d3695443987a094a71b440/multidict-6.4.4-py3-none-any.whl", hash = "sha256:bd4557071b561a8b3b6075c3ce93cf9bfb6182cb241805c3d66ced3b75eff4ac", size = 10481 },
|
| 1897 |
]
|
| 1898 |
|
| 1899 |
+
[[package]]
|
| 1900 |
+
name = "multiprocess"
|
| 1901 |
+
version = "0.70.16"
|
| 1902 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1903 |
+
dependencies = [
|
| 1904 |
+
{ name = "dill" },
|
| 1905 |
+
]
|
| 1906 |
+
sdist = { url = "https://files.pythonhosted.org/packages/b5/ae/04f39c5d0d0def03247c2893d6f2b83c136bf3320a2154d7b8858f2ba72d/multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1", size = 1772603 }
|
| 1907 |
+
wheels = [
|
| 1908 |
+
{ url = "https://files.pythonhosted.org/packages/bc/f7/7ec7fddc92e50714ea3745631f79bd9c96424cb2702632521028e57d3a36/multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02", size = 134824 },
|
| 1909 |
+
{ url = "https://files.pythonhosted.org/packages/50/15/b56e50e8debaf439f44befec5b2af11db85f6e0f344c3113ae0be0593a91/multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a", size = 143519 },
|
| 1910 |
+
{ url = "https://files.pythonhosted.org/packages/0a/7d/a988f258104dcd2ccf1ed40fdc97e26c4ac351eeaf81d76e266c52d84e2f/multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e", size = 146741 },
|
| 1911 |
+
{ url = "https://files.pythonhosted.org/packages/ea/89/38df130f2c799090c978b366cfdf5b96d08de5b29a4a293df7f7429fa50b/multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435", size = 132628 },
|
| 1912 |
+
{ url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 },
|
| 1913 |
+
]
|
| 1914 |
+
|
| 1915 |
[[package]]
|
| 1916 |
name = "mypy-extensions"
|
| 1917 |
version = "1.1.0"
|
|
|
|
| 2550 |
{ url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 },
|
| 2551 |
]
|
| 2552 |
|
| 2553 |
+
[[package]]
|
| 2554 |
+
name = "pyarrow"
|
| 2555 |
+
version = "20.0.0"
|
| 2556 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2557 |
+
sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187 }
|
| 2558 |
+
wheels = [
|
| 2559 |
+
{ url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501 },
|
| 2560 |
+
{ url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895 },
|
| 2561 |
+
{ url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322 },
|
| 2562 |
+
{ url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441 },
|
| 2563 |
+
{ url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027 },
|
| 2564 |
+
{ url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473 },
|
| 2565 |
+
{ url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897 },
|
| 2566 |
+
{ url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847 },
|
| 2567 |
+
{ url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219 },
|
| 2568 |
+
{ url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957 },
|
| 2569 |
+
{ url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972 },
|
| 2570 |
+
{ url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434 },
|
| 2571 |
+
{ url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648 },
|
| 2572 |
+
{ url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853 },
|
| 2573 |
+
{ url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743 },
|
| 2574 |
+
{ url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441 },
|
| 2575 |
+
{ url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279 },
|
| 2576 |
+
{ url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982 },
|
| 2577 |
+
]
|
| 2578 |
+
|
| 2579 |
[[package]]
|
| 2580 |
name = "pyasn1"
|
| 2581 |
version = "0.6.1"
|
|
|
|
| 3129 |
{ name = "greenlet" },
|
| 3130 |
]
|
| 3131 |
|
| 3132 |
+
[[package]]
|
| 3133 |
+
name = "sqlite-vec"
|
| 3134 |
+
version = "0.1.6"
|
| 3135 |
+
source = { registry = "https://pypi.org/simple" }
|
| 3136 |
+
wheels = [
|
| 3137 |
+
{ url = "https://files.pythonhosted.org/packages/88/ed/aabc328f29ee6814033d008ec43e44f2c595447d9cccd5f2aabe60df2933/sqlite_vec-0.1.6-py3-none-macosx_10_6_x86_64.whl", hash = "sha256:77491bcaa6d496f2acb5cc0d0ff0b8964434f141523c121e313f9a7d8088dee3", size = 164075 },
|
| 3138 |
+
{ url = "https://files.pythonhosted.org/packages/a7/57/05604e509a129b22e303758bfa062c19afb020557d5e19b008c64016704e/sqlite_vec-0.1.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fdca35f7ee3243668a055255d4dee4dea7eed5a06da8cad409f89facf4595361", size = 165242 },
|
| 3139 |
+
{ url = "https://files.pythonhosted.org/packages/f2/48/dbb2cc4e5bad88c89c7bb296e2d0a8df58aab9edc75853728c361eefc24f/sqlite_vec-0.1.6-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b0519d9cd96164cd2e08e8eed225197f9cd2f0be82cb04567692a0a4be02da3", size = 103704 },
|
| 3140 |
+
{ url = "https://files.pythonhosted.org/packages/80/76/97f33b1a2446f6ae55e59b33869bed4eafaf59b7f4c662c8d9491b6a714a/sqlite_vec-0.1.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux1_x86_64.whl", hash = "sha256:823b0493add80d7fe82ab0fe25df7c0703f4752941aee1c7b2b02cec9656cb24", size = 151556 },
|
| 3141 |
+
{ url = "https://files.pythonhosted.org/packages/6a/98/e8bc58b178266eae2fcf4c9c7a8303a8d41164d781b32d71097924a6bebe/sqlite_vec-0.1.6-py3-none-win_amd64.whl", hash = "sha256:c65bcfd90fa2f41f9000052bcb8bb75d38240b2dae49225389eca6c3136d3f0c", size = 281540 },
|
| 3142 |
+
]
|
| 3143 |
+
|
| 3144 |
[[package]]
|
| 3145 |
name = "stack-data"
|
| 3146 |
version = "0.6.3"
|