first-principles: state_delta.py — O(1) Markovian critic (no history needed)

d1da4ac verified 11 days ago

5.57 kB

	"""
	state_delta.py — Markovian State-Delta Evaluation.

	PROBLEM: Passing full conversation history to the Critic costs O(N²) tokens
	as the trajectory grows. This makes long tasks unaffordable.

	SOLUTION: Recognize this IS a Markov Decision Process. The Critic only needs:
	- The delta between state(t) and state(t+1)
	- The action taken
	- The purpose (constant)

	This reduces the Critic's input to ~300 tokens regardless of trajectory length.

	Mathematical basis:
	In an MDP, the value of state s' depends only on s' itself, not on the path
	taken to reach it. Therefore Φ(s') can be computed from s' alone.

	We compute: diff = state_diff(s_t, s_{t+1})
	Critic sees: (purpose, action, diff) → Φ score
	Token cost: O(1) per step, not O(N)
	"""
	from __future__ import annotations

	import difflib
	import json
	from dataclasses import dataclass
	from typing import Any

	from purpose_agent.types import State


	@dataclass
	class StateDelta:
	"""
	The computed difference between two states.

	This is ALL the Critic needs to see — not the full history.
	Keeps token cost constant (~100-300 tokens) regardless of trajectory length.
	"""
	added_keys: dict[str, Any] # New keys in s_{t+1} that weren't in s_t
	removed_keys: list[str] # Keys in s_t that are gone in s_{t+1}
	changed_keys: dict[str, tuple[Any, Any]] # key → (old_value, new_value)
	summary_diff: str # Human-readable diff (what the Critic sees)
	token_estimate: int = 0 # Estimated tokens for this delta

	@property
	def is_empty(self) -> bool:
	"""No state change occurred."""
	return not self.added_keys and not self.removed_keys and not self.changed_keys

	@property
	def change_magnitude(self) -> int:
	"""How much changed — useful for detecting no-ops."""
	return len(self.added_keys) + len(self.removed_keys) + len(self.changed_keys)


	def compute_state_delta(s_before: State, s_after: State) -> StateDelta:
	"""
	Compute the minimal diff between two states.

	This is the core O(1) optimization: instead of passing both full states
	to the Critic (which grows with trajectory), we pass only what changed.

	Token cost: ~100-300 tokens regardless of state size or trajectory length.
	"""
	d_before = s_before.data
	d_after = s_after.data

	added = {}
	removed = []
	changed = {}

	# Find additions and changes
	for key, val in d_after.items():
	if key.startswith("_"): # Skip internal keys
	continue
	if key not in d_before:
	added[key] = val
	elif d_before[key] != val:
	changed[key] = (d_before[key], val)

	# Find removals
	for key in d_before:
	if key.startswith("_"):
	continue
	if key not in d_after:
	removed.append(key)

	# Build human-readable summary (what the Critic actually sees)
	lines = []
	if added:
	for k, v in added.items():
	lines.append(f"+ {k} = {_truncate(v)}")
	if removed:
	for k in removed:
	lines.append(f"- {k} (removed)")
	if changed:
	for k, (old, new) in changed.items():
	lines.append(f"~ {k}: {_truncate(old)} → {_truncate(new)}")

	# If summaries available, use text diff
	if s_before.summary and s_after.summary and s_before.summary != s_after.summary:
	text_diff = _text_diff(s_before.summary, s_after.summary)
	if text_diff and not lines:
	lines.append(text_diff)

	summary = "\n".join(lines) if lines else "(no observable change)"
	token_est = len(summary) // 4 # ~4 chars per token

	return StateDelta(
	added_keys=added,
	removed_keys=removed,
	changed_keys=changed,
	summary_diff=summary,
	token_estimate=token_est,
	)


	def format_critic_input(
	purpose: str,
	action_name: str,
	action_thought: str,
	delta: StateDelta,
	max_tokens: int = 300,
	) -> str:
	"""
	Format the minimal input for the Markovian Critic.

	Total budget: ~300 tokens. This is ALL the Critic gets.
	No history. No full state. Just: purpose + action + what changed.

	This is the fundamental insight: in an MDP, Φ(s') depends only on s',
	not on the path taken to reach it.
	"""
	parts = [
	f"PURPOSE: {purpose[:100]}",
	f"ACTION: {action_name}",
	f"THOUGHT: {action_thought[:80]}",
	f"STATE CHANGE:\n{delta.summary_diff[:400]}",
	]

	text = "\n".join(parts)

	# Hard cap
	char_budget = max_tokens * 4
	if len(text) > char_budget:
	text = text[:char_budget] + "\n...(truncated)"

	return text


	def _truncate(val: Any, max_len: int = 80) -> str:
	"""Truncate a value for display."""
	s = str(val)
	return s[:max_len] + "..." if len(s) > max_len else s


	def _text_diff(before: str, after: str) -> str:
	"""Compute a concise text diff between two summaries."""
	if before == after:
	return ""

	# Use unified diff for short texts
	before_lines = before.split("\n")
	after_lines = after.split("\n")

	diff_lines = []
	for line in difflib.unified_diff(before_lines, after_lines, lineterm="", n=0):
	if line.startswith("---") or line.startswith("+++") or line.startswith("@@"):
	continue
	if line.startswith("+"):
	diff_lines.append(f"+ {line[1:]}")
	elif line.startswith("-"):
	diff_lines.append(f"- {line[1:]}")

	return "\n".join(diff_lines[:5]) # Cap at 5 diff lines