Rohan03 commited on
Commit
3110b12
·
verified ·
1 Parent(s): 73ecef8

Add purpose_agent/actor.py

Browse files
Files changed (1) hide show
  1. purpose_agent/actor.py +317 -0
purpose_agent/actor.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Actor Module — The agent that executes actions in the environment.
3
+
4
+ Implements a ReAct-style (Reason + Act) loop where each step produces:
5
+ 1. Thought: Chain-of-thought reasoning about the current state
6
+ 2. Action: What to do next (name + params)
7
+ 3. Expected Delta: What the actor predicts will change
8
+
9
+ The Actor's system prompt is dynamically composed from:
10
+ - Base instructions (static)
11
+ - Strategic memory heuristics (updated after each task — from MUSE)
12
+ - Retrieved procedural SOPs (fetched on demand — from MUSE)
13
+ - Tool-level "muscle memory" (returned with each observation — from MUSE)
14
+
15
+ This module is intentionally stateless between tasks — all learning happens
16
+ via the memory system that feeds into the prompt.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ from typing import Any
24
+
25
+ from purpose_agent.types import Action, Heuristic, MemoryTier, State
26
+ from purpose_agent.llm_backend import ChatMessage, LLMBackend
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # System Prompt Templates
33
+ # ---------------------------------------------------------------------------
34
+
35
+ ACTOR_SYSTEM_PROMPT = """\
36
+ You are a goal-directed agent. Your purpose is to achieve the stated goal
37
+ by taking incremental actions that each move the state closer to the goal.
38
+
39
+ ## Your Decision Process
40
+ For each step, you MUST:
41
+ 1. THINK: Analyze the current state. What has been achieved? What remains?
42
+ 2. ACT: Choose the single best next action from available actions.
43
+ 3. PREDICT: State specifically what you expect to change after this action.
44
+
45
+ ## Rules
46
+ - Take ONE action per step. Never skip ahead or combine actions.
47
+ - Be specific in your predictions — name exact state fields you expect to change.
48
+ - If a previous action didn't produce the expected result, adapt your strategy.
49
+ - If you believe the goal is achieved, use action "DONE" with no parameters.
50
+
51
+ ## Available Actions
52
+ {available_actions}
53
+
54
+ ## Learned Strategies (from past experience)
55
+ {strategic_memory}
56
+
57
+ ## Relevant Procedures
58
+ {procedural_memory}
59
+ """
60
+
61
+ ACTOR_STEP_PROMPT = """\
62
+ ## Current Goal
63
+ {purpose}
64
+
65
+ ## Current State
66
+ {state}
67
+
68
+ ## Action History (last {history_window} steps)
69
+ {history}
70
+
71
+ ## Tool Tips
72
+ {tool_memory}
73
+
74
+ Based on the current state and your goal, decide your next action.
75
+
76
+ Respond in this exact JSON format:
77
+ ```json
78
+ {{
79
+ "thought": "Your reasoning about the current state and what to do next",
80
+ "action": {{
81
+ "name": "action_name",
82
+ "params": {{"param1": "value1"}}
83
+ }},
84
+ "expected_delta": "Specific prediction of what will change in the state"
85
+ }}
86
+ ```
87
+ """
88
+
89
+
90
+ class Actor:
91
+ """
92
+ The Actor agent — executes actions in an environment.
93
+
94
+ The Actor does NOT evaluate its own performance. That's the Purpose
95
+ Function's job. The Actor just reasons, acts, and predicts.
96
+
97
+ Architecture notes (from MUSE arxiv:2510.08002):
98
+ - System prompt is composed dynamically from 3-tier memory
99
+ - Strategic memory is always present (global dilemmas → strategies)
100
+ - Procedural memory is lazy-loaded (index in prompt, details on demand)
101
+ - Tool memory is returned per-step (dynamic instructions with observations)
102
+
103
+ Args:
104
+ llm: The LLM backend to use for reasoning
105
+ available_actions: Dict of {action_name: description} the agent can take
106
+ history_window: How many past steps to include in the prompt
107
+ strategic_memory: List of strategic heuristics (loaded at task start)
108
+ procedural_memory: List of procedural SOPs (indexed, fetched on demand)
109
+ tool_memory: Dict of {action_name: dynamic_tip} (updated per-step)
110
+ """
111
+
112
+ def __init__(
113
+ self,
114
+ llm: LLMBackend,
115
+ available_actions: dict[str, str] | None = None,
116
+ history_window: int = 5,
117
+ strategic_memory: list[Heuristic] | None = None,
118
+ procedural_memory: list[Heuristic] | None = None,
119
+ tool_memory: dict[str, str] | None = None,
120
+ ):
121
+ self.llm = llm
122
+ self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"}
123
+ self.history_window = history_window
124
+ self.strategic_memory = strategic_memory or []
125
+ self.procedural_memory = procedural_memory or []
126
+ self.tool_memory = tool_memory or {}
127
+
128
+ # ------------------------------------------------------------------
129
+ # Prompt Composition
130
+ # ------------------------------------------------------------------
131
+
132
+ def _format_actions(self) -> str:
133
+ if not self.available_actions:
134
+ return "No specific action constraints. You may take any action."
135
+ lines = []
136
+ for name, desc in self.available_actions.items():
137
+ lines.append(f"- **{name}**: {desc}")
138
+ return "\n".join(lines)
139
+
140
+ def _format_strategic_memory(self) -> str:
141
+ if not self.strategic_memory:
142
+ return "None yet — this is your first task."
143
+ lines = []
144
+ for h in sorted(self.strategic_memory, key=lambda x: -x.q_value):
145
+ lines.append(f"- When: {h.pattern}\n Do: {h.strategy} (confidence: {h.q_value:.2f})")
146
+ return "\n".join(lines)
147
+
148
+ def _format_procedural_memory(self) -> str:
149
+ if not self.procedural_memory:
150
+ return "No standard operating procedures available."
151
+ lines = ["Available SOPs (ask for details if relevant):"]
152
+ for h in self.procedural_memory:
153
+ lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}")
154
+ return "\n".join(lines)
155
+
156
+ def _format_tool_memory(self) -> str:
157
+ if not self.tool_memory:
158
+ return "No tool-specific tips available."
159
+ lines = []
160
+ for action_name, tip in self.tool_memory.items():
161
+ lines.append(f"- **{action_name}**: {tip}")
162
+ return "\n".join(lines)
163
+
164
+ def _format_history(self, history: list[dict[str, Any]]) -> str:
165
+ if not history:
166
+ return "No actions taken yet."
167
+ recent = history[-self.history_window:]
168
+ lines = []
169
+ for i, entry in enumerate(recent):
170
+ step_num = len(history) - len(recent) + i + 1
171
+ lines.append(
172
+ f"Step {step_num}: Action={entry.get('action', 'N/A')}, "
173
+ f"Result={entry.get('result', 'N/A')[:200]}"
174
+ )
175
+ return "\n".join(lines)
176
+
177
+ def _build_system_prompt(self) -> str:
178
+ return ACTOR_SYSTEM_PROMPT.format(
179
+ available_actions=self._format_actions(),
180
+ strategic_memory=self._format_strategic_memory(),
181
+ procedural_memory=self._format_procedural_memory(),
182
+ )
183
+
184
+ def _build_step_prompt(
185
+ self, purpose: str, state: State, history: list[dict[str, Any]]
186
+ ) -> str:
187
+ return ACTOR_STEP_PROMPT.format(
188
+ purpose=purpose,
189
+ state=state.describe(),
190
+ history=self._format_history(history),
191
+ tool_memory=self._format_tool_memory(),
192
+ history_window=self.history_window,
193
+ )
194
+
195
+ # ------------------------------------------------------------------
196
+ # Core Action Generation
197
+ # ------------------------------------------------------------------
198
+
199
+ def decide(
200
+ self,
201
+ purpose: str,
202
+ current_state: State,
203
+ history: list[dict[str, Any]] | None = None,
204
+ ) -> Action:
205
+ """
206
+ Given the current state and purpose, decide the next action.
207
+
208
+ Returns an Action with thought, name, params, and expected_delta.
209
+ """
210
+ history = history or []
211
+
212
+ messages = [
213
+ ChatMessage(role="system", content=self._build_system_prompt()),
214
+ ChatMessage(role="user", content=self._build_step_prompt(
215
+ purpose=purpose,
216
+ state=current_state,
217
+ history=history,
218
+ )),
219
+ ]
220
+
221
+ # Try structured output first, fall back to text parsing
222
+ schema = {
223
+ "type": "object",
224
+ "properties": {
225
+ "thought": {"type": "string"},
226
+ "action": {
227
+ "type": "object",
228
+ "properties": {
229
+ "name": {"type": "string"},
230
+ "params": {"type": "object"},
231
+ },
232
+ "required": ["name"],
233
+ },
234
+ "expected_delta": {"type": "string"},
235
+ },
236
+ "required": ["thought", "action", "expected_delta"],
237
+ }
238
+
239
+ try:
240
+ result = self.llm.generate_structured(messages, schema=schema)
241
+ except Exception as e:
242
+ logger.warning(f"Structured output failed ({e}), falling back to text parse")
243
+ raw = self.llm.generate(messages, temperature=0.7)
244
+ result = self._parse_action_text(raw)
245
+
246
+ action_data = result.get("action", {})
247
+ return Action(
248
+ name=action_data.get("name", "UNKNOWN"),
249
+ params=action_data.get("params", {}),
250
+ thought=result.get("thought", ""),
251
+ expected_delta=result.get("expected_delta", ""),
252
+ )
253
+
254
+ # ------------------------------------------------------------------
255
+ # Memory Updates (called by Orchestrator between tasks)
256
+ # ------------------------------------------------------------------
257
+
258
+ def update_strategic_memory(self, heuristics: list[Heuristic]) -> None:
259
+ """Replace strategic memory with updated heuristics."""
260
+ self.strategic_memory = [
261
+ h for h in heuristics if h.tier == MemoryTier.STRATEGIC
262
+ ]
263
+ logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics")
264
+
265
+ def update_procedural_memory(self, heuristics: list[Heuristic]) -> None:
266
+ """Update the procedural SOP index."""
267
+ self.procedural_memory = [
268
+ h for h in heuristics if h.tier == MemoryTier.PROCEDURAL
269
+ ]
270
+ logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs")
271
+
272
+ def update_tool_memory(self, tips: dict[str, str]) -> None:
273
+ """Update per-action tool tips."""
274
+ self.tool_memory.update(tips)
275
+ logger.info(f"Actor tool memory updated: {list(tips.keys())}")
276
+
277
+ # ------------------------------------------------------------------
278
+ # Fallback Text Parser
279
+ # ------------------------------------------------------------------
280
+
281
+ @staticmethod
282
+ def _parse_action_text(raw: str) -> dict[str, Any]:
283
+ """Best-effort extraction of action JSON from free-form text."""
284
+ import re
285
+
286
+ # Try to find JSON block
287
+ json_match = re.search(r'\{[^{}]*"thought"[^{}]*\}', raw, re.DOTALL)
288
+ if json_match:
289
+ try:
290
+ return json.loads(json_match.group())
291
+ except json.JSONDecodeError:
292
+ pass
293
+
294
+ # Try to find JSON in code blocks
295
+ code_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', raw, re.DOTALL)
296
+ if code_match:
297
+ try:
298
+ return json.loads(code_match.group(1))
299
+ except json.JSONDecodeError:
300
+ pass
301
+
302
+ # Last resort: extract key-value pairs
303
+ thought = ""
304
+ thought_match = re.search(r'"thought"\s*:\s*"([^"]*)"', raw)
305
+ if thought_match:
306
+ thought = thought_match.group(1)
307
+
308
+ action_name = "UNKNOWN"
309
+ name_match = re.search(r'"name"\s*:\s*"([^"]*)"', raw)
310
+ if name_match:
311
+ action_name = name_match.group(1)
312
+
313
+ return {
314
+ "thought": thought or raw[:200],
315
+ "action": {"name": action_name, "params": {}},
316
+ "expected_delta": "Unable to parse prediction",
317
+ }