| """ |
| state_delta.py — Markovian State-Delta Evaluation. |
| |
| PROBLEM: Passing full conversation history to the Critic costs O(N²) tokens |
| as the trajectory grows. This makes long tasks unaffordable. |
| |
| SOLUTION: Recognize this IS a Markov Decision Process. The Critic only needs: |
| - The delta between state(t) and state(t+1) |
| - The action taken |
| - The purpose (constant) |
| |
| This reduces the Critic's input to ~300 tokens regardless of trajectory length. |
| |
| Mathematical basis: |
| In an MDP, the value of state s' depends only on s' itself, not on the path |
| taken to reach it. Therefore Φ(s') can be computed from s' alone. |
| |
| We compute: diff = state_diff(s_t, s_{t+1}) |
| Critic sees: (purpose, action, diff) → Φ score |
| Token cost: O(1) per step, not O(N) |
| """ |
| from __future__ import annotations |
|
|
| import difflib |
| import json |
| from dataclasses import dataclass |
| from typing import Any |
|
|
| from purpose_agent.types import State |
|
|
|
|
| @dataclass |
| class StateDelta: |
| """ |
| The computed difference between two states. |
| |
| This is ALL the Critic needs to see — not the full history. |
| Keeps token cost constant (~100-300 tokens) regardless of trajectory length. |
| """ |
| added_keys: dict[str, Any] |
| removed_keys: list[str] |
| changed_keys: dict[str, tuple[Any, Any]] |
| summary_diff: str |
| token_estimate: int = 0 |
|
|
| @property |
| def is_empty(self) -> bool: |
| """No state change occurred.""" |
| return not self.added_keys and not self.removed_keys and not self.changed_keys |
|
|
| @property |
| def change_magnitude(self) -> int: |
| """How much changed — useful for detecting no-ops.""" |
| return len(self.added_keys) + len(self.removed_keys) + len(self.changed_keys) |
|
|
|
|
| def compute_state_delta(s_before: State, s_after: State) -> StateDelta: |
| """ |
| Compute the minimal diff between two states. |
| |
| This is the core O(1) optimization: instead of passing both full states |
| to the Critic (which grows with trajectory), we pass only what changed. |
| |
| Token cost: ~100-300 tokens regardless of state size or trajectory length. |
| """ |
| d_before = s_before.data |
| d_after = s_after.data |
|
|
| added = {} |
| removed = [] |
| changed = {} |
|
|
| |
| for key, val in d_after.items(): |
| if key.startswith("_"): |
| continue |
| if key not in d_before: |
| added[key] = val |
| elif d_before[key] != val: |
| changed[key] = (d_before[key], val) |
|
|
| |
| for key in d_before: |
| if key.startswith("_"): |
| continue |
| if key not in d_after: |
| removed.append(key) |
|
|
| |
| lines = [] |
| if added: |
| for k, v in added.items(): |
| lines.append(f"+ {k} = {_truncate(v)}") |
| if removed: |
| for k in removed: |
| lines.append(f"- {k} (removed)") |
| if changed: |
| for k, (old, new) in changed.items(): |
| lines.append(f"~ {k}: {_truncate(old)} → {_truncate(new)}") |
|
|
| |
| if s_before.summary and s_after.summary and s_before.summary != s_after.summary: |
| text_diff = _text_diff(s_before.summary, s_after.summary) |
| if text_diff and not lines: |
| lines.append(text_diff) |
|
|
| summary = "\n".join(lines) if lines else "(no observable change)" |
| token_est = len(summary) // 4 |
|
|
| return StateDelta( |
| added_keys=added, |
| removed_keys=removed, |
| changed_keys=changed, |
| summary_diff=summary, |
| token_estimate=token_est, |
| ) |
|
|
|
|
| def format_critic_input( |
| purpose: str, |
| action_name: str, |
| action_thought: str, |
| delta: StateDelta, |
| max_tokens: int = 300, |
| ) -> str: |
| """ |
| Format the minimal input for the Markovian Critic. |
| |
| Total budget: ~300 tokens. This is ALL the Critic gets. |
| No history. No full state. Just: purpose + action + what changed. |
| |
| This is the fundamental insight: in an MDP, Φ(s') depends only on s', |
| not on the path taken to reach it. |
| """ |
| parts = [ |
| f"PURPOSE: {purpose[:100]}", |
| f"ACTION: {action_name}", |
| f"THOUGHT: {action_thought[:80]}", |
| f"STATE CHANGE:\n{delta.summary_diff[:400]}", |
| ] |
|
|
| text = "\n".join(parts) |
|
|
| |
| char_budget = max_tokens * 4 |
| if len(text) > char_budget: |
| text = text[:char_budget] + "\n...(truncated)" |
|
|
| return text |
|
|
|
|
| def _truncate(val: Any, max_len: int = 80) -> str: |
| """Truncate a value for display.""" |
| s = str(val) |
| return s[:max_len] + "..." if len(s) > max_len else s |
|
|
|
|
| def _text_diff(before: str, after: str) -> str: |
| """Compute a concise text diff between two summaries.""" |
| if before == after: |
| return "" |
|
|
| |
| before_lines = before.split("\n") |
| after_lines = after.split("\n") |
|
|
| diff_lines = [] |
| for line in difflib.unified_diff(before_lines, after_lines, lineterm="", n=0): |
| if line.startswith("---") or line.startswith("+++") or line.startswith("@@"): |
| continue |
| if line.startswith("+"): |
| diff_lines.append(f"+ {line[1:]}") |
| elif line.startswith("-"): |
| diff_lines.append(f"- {line[1:]}") |
|
|
| return "\n".join(diff_lines[:5]) |
|
|