Rohan03
/

purpose-agent

+"""
+Actor Module — The agent that executes actions in the environment.
+Implements a ReAct-style (Reason + Act) loop where each step produces:
+  1. Thought: Chain-of-thought reasoning about the current state
+  2. Action: What to do next (name + params)
+  3. Expected Delta: What the actor predicts will change
+The Actor's system prompt is dynamically composed from:
+  - Base instructions (static)
+  - Strategic memory heuristics (updated after each task — from MUSE)
+  - Retrieved procedural SOPs (fetched on demand — from MUSE)
+  - Tool-level "muscle memory" (returned with each observation — from MUSE)
+This module is intentionally stateless between tasks — all learning happens
+via the memory system that feeds into the prompt.
+"""
+from __future__ import annotations
+import json
+import logging
+from typing import Any
+from purpose_agent.types import Action, Heuristic, MemoryTier, State
+from purpose_agent.llm_backend import ChatMessage, LLMBackend
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# System Prompt Templates
+# ---------------------------------------------------------------------------
+ACTOR_SYSTEM_PROMPT = """\
+You are a goal-directed agent. Your purpose is to achieve the stated goal
+by taking incremental actions that each move the state closer to the goal.
+## Your Decision Process
+For each step, you MUST:
+1. THINK: Analyze the current state. What has been achieved? What remains?
+2. ACT: Choose the single best next action from available actions.
+3. PREDICT: State specifically what you expect to change after this action.
+## Rules
+- Take ONE action per step. Never skip ahead or combine actions.
+- Be specific in your predictions — name exact state fields you expect to change.
+- If a previous action didn't produce the expected result, adapt your strategy.
+- If you believe the goal is achieved, use action "DONE" with no parameters.
+## Available Actions
+{available_actions}
+## Learned Strategies (from past experience)
+{strategic_memory}
+## Relevant Procedures
+{procedural_memory}
+"""
+ACTOR_STEP_PROMPT = """\
+## Current Goal
+{purpose}
+## Current State
+{state}
+## Action History (last {history_window} steps)
+{history}
+## Tool Tips
+{tool_memory}
+Based on the current state and your goal, decide your next action.
+Respond in this exact JSON format:
+```json
+{{
+  "thought": "Your reasoning about the current state and what to do next",
+  "action": {{
+    "name": "action_name",
+    "params": {{"param1": "value1"}}
+  }},
+  "expected_delta": "Specific prediction of what will change in the state"
+}}
+```
+"""
+class Actor:
+    """
+    The Actor agent — executes actions in an environment.
+    The Actor does NOT evaluate its own performance. That's the Purpose
+    Function's job. The Actor just reasons, acts, and predicts.
+    Architecture notes (from MUSE arxiv:2510.08002):
+    - System prompt is composed dynamically from 3-tier memory
+    - Strategic memory is always present (global dilemmas → strategies)
+    - Procedural memory is lazy-loaded (index in prompt, details on demand)
+    - Tool memory is returned per-step (dynamic instructions with observations)
+    Args:
+        llm: The LLM backend to use for reasoning
+        available_actions: Dict of {action_name: description} the agent can take
+        history_window: How many past steps to include in the prompt
+        strategic_memory: List of strategic heuristics (loaded at task start)
+        procedural_memory: List of procedural SOPs (indexed, fetched on demand)
+        tool_memory: Dict of {action_name: dynamic_tip} (updated per-step)
+    """
+    def __init__(
+        self,
+        llm: LLMBackend,
+        available_actions: dict[str, str] | None = None,
+        history_window: int = 5,
+        strategic_memory: list[Heuristic] | None = None,
+        procedural_memory: list[Heuristic] | None = None,
+        tool_memory: dict[str, str] | None = None,
+    ):
+        self.llm = llm
+        self.available_actions = available_actions or {"DONE": "Signal that the goal is achieved"}
+        self.history_window = history_window
+        self.strategic_memory = strategic_memory or []
+        self.procedural_memory = procedural_memory or []
+        self.tool_memory = tool_memory or {}
+    # ------------------------------------------------------------------
+    # Prompt Composition
+    # ------------------------------------------------------------------
+    def _format_actions(self) -> str:
+        if not self.available_actions:
+            return "No specific action constraints. You may take any action."
+        lines = []
+        for name, desc in self.available_actions.items():
+            lines.append(f"- **{name}**: {desc}")
+        return "\n".join(lines)
+    def _format_strategic_memory(self) -> str:
+        if not self.strategic_memory:
+            return "None yet — this is your first task."
+        lines = []
+        for h in sorted(self.strategic_memory, key=lambda x: -x.q_value):
+            lines.append(f"- When: {h.pattern}\n  Do: {h.strategy} (confidence: {h.q_value:.2f})")
+        return "\n".join(lines)
+    def _format_procedural_memory(self) -> str:
+        if not self.procedural_memory:
+            return "No standard operating procedures available."
+        lines = ["Available SOPs (ask for details if relevant):"]
+        for h in self.procedural_memory:
+            lines.append(f"- [{h.id}] {h.pattern}: {h.strategy}")
+        return "\n".join(lines)
+    def _format_tool_memory(self) -> str:
+        if not self.tool_memory:
+            return "No tool-specific tips available."
+        lines = []
+        for action_name, tip in self.tool_memory.items():
+            lines.append(f"- **{action_name}**: {tip}")
+        return "\n".join(lines)
+    def _format_history(self, history: list[dict[str, Any]]) -> str:
+        if not history:
+            return "No actions taken yet."
+        recent = history[-self.history_window:]
+        lines = []
+        for i, entry in enumerate(recent):
+            step_num = len(history) - len(recent) + i + 1
+            lines.append(
+                f"Step {step_num}: Action={entry.get('action', 'N/A')}, "
+                f"Result={entry.get('result', 'N/A')[:200]}"
+            )
+        return "\n".join(lines)
+    def _build_system_prompt(self) -> str:
+        return ACTOR_SYSTEM_PROMPT.format(
+            available_actions=self._format_actions(),
+            strategic_memory=self._format_strategic_memory(),
+            procedural_memory=self._format_procedural_memory(),
+        )
+    def _build_step_prompt(
+        self, purpose: str, state: State, history: list[dict[str, Any]]
+    ) -> str:
+        return ACTOR_STEP_PROMPT.format(
+            purpose=purpose,
+            state=state.describe(),
+            history=self._format_history(history),
+            tool_memory=self._format_tool_memory(),
+            history_window=self.history_window,
+        )
+    # ------------------------------------------------------------------
+    # Core Action Generation
+    # ------------------------------------------------------------------
+    def decide(
+        self,
+        purpose: str,
+        current_state: State,
+        history: list[dict[str, Any]] | None = None,
+    ) -> Action:
+        """
+        Given the current state and purpose, decide the next action.
+        Returns an Action with thought, name, params, and expected_delta.
+        """
+        history = history or []
+        messages = [
+            ChatMessage(role="system", content=self._build_system_prompt()),
+            ChatMessage(role="user", content=self._build_step_prompt(
+                purpose=purpose,
+                state=current_state,
+                history=history,
+            )),
+        ]
+        # Try structured output first, fall back to text parsing
+        schema = {
+            "type": "object",
+            "properties": {
+                "thought": {"type": "string"},
+                "action": {
+                    "type": "object",
+                    "properties": {
+                        "name": {"type": "string"},
+                        "params": {"type": "object"},
+                    },
+                    "required": ["name"],
+                },
+                "expected_delta": {"type": "string"},
+            },
+            "required": ["thought", "action", "expected_delta"],
+        }
+        try:
+            result = self.llm.generate_structured(messages, schema=schema)
+        except Exception as e:
+            logger.warning(f"Structured output failed ({e}), falling back to text parse")
+            raw = self.llm.generate(messages, temperature=0.7)
+            result = self._parse_action_text(raw)
+        action_data = result.get("action", {})
+        return Action(
+            name=action_data.get("name", "UNKNOWN"),
+            params=action_data.get("params", {}),
+            thought=result.get("thought", ""),
+            expected_delta=result.get("expected_delta", ""),
+        )
+    # ------------------------------------------------------------------
+    # Memory Updates (called by Orchestrator between tasks)
+    # ------------------------------------------------------------------
+    def update_strategic_memory(self, heuristics: list[Heuristic]) -> None:
+        """Replace strategic memory with updated heuristics."""
+        self.strategic_memory = [
+            h for h in heuristics if h.tier == MemoryTier.STRATEGIC
+        ]
+        logger.info(f"Actor strategic memory updated: {len(self.strategic_memory)} heuristics")
+    def update_procedural_memory(self, heuristics: list[Heuristic]) -> None:
+        """Update the procedural SOP index."""
+        self.procedural_memory = [
+            h for h in heuristics if h.tier == MemoryTier.PROCEDURAL
+        ]
+        logger.info(f"Actor procedural memory updated: {len(self.procedural_memory)} SOPs")
+    def update_tool_memory(self, tips: dict[str, str]) -> None:
+        """Update per-action tool tips."""
+        self.tool_memory.update(tips)
+        logger.info(f"Actor tool memory updated: {list(tips.keys())}")
+    # ------------------------------------------------------------------
+    # Fallback Text Parser
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _parse_action_text(raw: str) -> dict[str, Any]:
+        """Best-effort extraction of action JSON from free-form text."""
+        import re
+        # Try to find JSON block
+        json_match = re.search(r'\{[^{}]*"thought"[^{}]*\}', raw, re.DOTALL)
+        if json_match:
+            try:
+                return json.loads(json_match.group())
+            except json.JSONDecodeError:
+                pass
+        # Try to find JSON in code blocks
+        code_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', raw, re.DOTALL)
+        if code_match:
+            try:
+                return json.loads(code_match.group(1))
+            except json.JSONDecodeError:
+                pass
+        # Last resort: extract key-value pairs
+        thought = ""
+        thought_match = re.search(r'"thought"\s*:\s*"([^"]*)"', raw)
+        if thought_match:
+            thought = thought_match.group(1)
+        action_name = "UNKNOWN"
+        name_match = re.search(r'"name"\s*:\s*"([^"]*)"', raw)
+        if name_match:
+            action_name = name_match.group(1)
+        return {
+            "thought": thought or raw[:200],
+            "action": {"name": action_name, "params": {}},
+            "expected_delta": "Unable to parse prediction",
+        }