Spaces:

Yash030
/

agentmemory-python

Running

File size: 104,776 Bytes

e3a4f99

import os
import re
import time
import uuid
import json
import hashlib
import datetime
from typing import Dict, Any, List, Optional, Tuple, Set
from db import StateKV
from search import (
    SearchIndex,
    VectorIndex,
    GeminiEmbeddingProvider,
    HybridSearch,
    base64_to_float32,
    float32_to_base64
)

# =====================================================================
# Global Variables / Module State
# =====================================================================
_bm25_index = SearchIndex()
_vector_index = VectorIndex()
_embedding_provider = None
_hybrid_search = HybridSearch(_bm25_index, _vector_index, None, None)
_index_persistence = None
_stream_broadcaster = None  # Callable: (payload) -> None

# Default scopes matching schema.ts
class KV:
    sessions = "mem:sessions"
    memories = "mem:memories"
    summaries = "mem:summaries"
    config = "mem:config"
    metrics = "mem:metrics"
    health = "mem:health"
    bm25Index = "mem:index:bm25"
    relations = "mem:relations"
    profiles = "mem:profiles"
    claudeBridge = "mem:claude-bridge"
    graphNodes = "mem:graph:nodes"
    graphEdges = "mem:graph:edges"
    graphSnapshot = "mem:graph:snapshot"
    graphNameIndex = "mem:graph:name-index"
    graphEdgeKey = "mem:graph:edge-key"
    graphNodeDegree = "mem:graph:node-degree"
    semantic = "mem:semantic"
    procedural = "mem:procedural"
    audit = "mem:audit"
    actions = "mem:actions"
    actionEdges = "mem:action-edges"
    leases = "mem:leases"
    routines = "mem:routines"
    routineRuns = "mem:routine-runs"
    signals = "mem:signals"
    checkpoints = "mem:checkpoints"
    mesh = "mem:mesh"
    sketches = "mem:sketches"
    facets = "mem:facets"
    sentinels = "mem:sentinels"
    crystals = "mem:crystals"
    lessons = "mem:lessons"
    insights = "mem:insights"
    graphEdgeHistory = "mem:graph:edge-history"
    retentionScores = "mem:retention"
    accessLog = "mem:access"
    imageRefs = "mem:image-refs"
    slots = "mem:slots"
    globalSlots = "mem:slots:global"
    commits = "mem:commits"
    recentSearches = "mem:recent-searches"

    @staticmethod
    def observations(session_id: str) -> str:
        return f"mem:obs:{session_id}"

    @staticmethod
    def team_shared(team_id: str) -> str:
        return f"mem:team:{team_id}:shared"

    @staticmethod
    def team_users(team_id: str, user_id: str) -> str:
        return f"mem:team:{team_id}:users:{user_id}"

    @staticmethod
    def team_profile(team_id: str) -> str:
        return f"mem:team:{team_id}:profile"

    @staticmethod
    def enriched_chunks(session_id: str) -> str:
        return f"mem:enriched:{session_id}"

    @staticmethod
    def latent_embeddings(obs_id: str) -> str:
        return f"mem:latent:{obs_id}"

# =====================================================================
# Core Helpers & Utilities
# =====================================================================

def generate_id(prefix: str) -> str:
    t = int(time.time() * 1000)
    chars = "0123456789abcdefghijklmnopqrstuvwxyz"
    ts_str = ""
    while t > 0:
        ts_str = chars[t % 36] + ts_str
        t //= 36
    if not ts_str:
        ts_str = "0"
    rand = uuid.uuid4().hex[:12]
    return f"{prefix}_{ts_str}_{rand}"

def fingerprint_id(prefix: str, content: str) -> str:
    h = hashlib.sha256(content.strip().lower().encode('utf-8')).hexdigest()
    return f"{prefix}_{h[:16]}"

def auto_complete_old_active_sessions(kv: StateKV, current_session_id: str) -> int:
    sessions = kv.list(KV.sessions)
    count = 0
    now = datetime.datetime.utcnow().isoformat() + "Z"
    for s in sessions:
        if s.get("id") != current_session_id and s.get("status") == "active":
            s["status"] = "completed"
            if "endedAt" not in s:
                s["endedAt"] = now
            s["updatedAt"] = now
            kv.set(KV.sessions, s["id"], s)
            count += 1
    if count > 0:
        print(f"[session] Auto-completed {count} dangling active sessions.")
    return count

def jaccard_similarity(a: str, b: str) -> float:
    tokens_a = [t for t in a.split() if len(t) > 2]
    tokens_b = [t for t in b.split() if len(t) > 2]
    set_a = set(tokens_a)
    set_b = set(tokens_b)
    if not set_a and not set_b:
        return 1.0
    if not set_a or not set_b:
        return 0.0
    intersection = len(set_a.intersection(set_b))
    union = len(set_a.union(set_b))
    return intersection / union

# =====================================================================
# Privacy & Data Scrubbing
# =====================================================================

PRIVATE_TAG_RE = re.compile(r'<private>[\s\S]*?</private>', re.IGNORECASE)

SECRET_PATTERN_SOURCES = [
    re.compile(r'(?:api[_-]?key|secret|token|password|credential|auth)[\s]*[=:]\s*["\']?[A-Za-z0-9_\-/.+]{20,}["\']?', re.IGNORECASE),
    re.compile(r'Bearer\s+[A-Za-z0-9._\-+/=]{20,}', re.IGNORECASE),
    re.compile(r'sk-proj-[A-Za-z0-9\-_]{20,}', re.IGNORECASE),
    re.compile(r'(?:sk|pk|rk|ak)-[A-Za-z0-9][A-Za-z0-9\-_]{19,}', re.IGNORECASE),
    re.compile(r'sk-ant-[A-Za-z0-9\-_]{20,}', re.IGNORECASE),
    re.compile(r'gh[pus]_[A-Za-z0-9]{36,}', re.IGNORECASE),
    re.compile(r'github_pat_[A-Za-z0-9_]{22,}', re.IGNORECASE),
    re.compile(r'xoxb-[A-Za-z0-9\-]+', re.IGNORECASE),
    re.compile(r'AKIA[0-9A-Z]{16}', re.IGNORECASE),
    re.compile(r'AIza[A-Za-z0-9\-_]{35}', re.IGNORECASE),
    re.compile(r'eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}', re.IGNORECASE),
    re.compile(r'npm_[A-Za-z0-9]{36}', re.IGNORECASE),
    re.compile(r'glpat-[A-Za-z0-9\-_]{20,}', re.IGNORECASE),
    re.compile(r'dop_v1_[A-Za-z0-9]{64}', re.IGNORECASE),
]

def strip_private_data(input_str: str) -> str:
    result = PRIVATE_TAG_RE.sub("[REDACTED]", input_str)
    for pattern in SECRET_PATTERN_SOURCES:
        result = pattern.sub("[REDACTED_SECRET]", result)
    return result

# =====================================================================
# Audit Log System
# =====================================================================

def record_audit(

    kv: StateKV,

    operation: str,

    function_id: str,

    target_ids: List[str],

    details: Dict[str, Any] = {},

    quality_score: Optional[float] = None,

    user_id: Optional[str] = None,

) -> Dict[str, Any]:
    entry = {
        "id": generate_id("aud"),
        "timestamp": datetime.datetime.utcnow().isoformat() + "Z",
        "operation": operation,
        "userId": user_id,
        "functionId": function_id,
        "targetIds": target_ids,
        "details": details,
        "qualityScore": quality_score,
    }
    kv.set(KV.audit, entry["id"], entry)
    return entry

def safe_audit(

    kv: StateKV,

    operation: str,

    function_id: str,

    target_ids: List[str],

    details: Dict[str, Any] = {},

    quality_score: Optional[float] = None,

    user_id: Optional[str] = None,

) -> None:
    try:
        record_audit(kv, operation, function_id, target_ids, details, quality_score, user_id)
    except Exception as e:
        print(f"[audit] Failed to write audit: {e}")

def query_audit(

    kv: StateKV,

    filter_opts: Optional[Dict[str, Any]] = None

) -> List[Dict[str, Any]]:
    all_entries = kv.list(KV.audit)
    entries = sorted(all_entries, key=lambda x: x.get("timestamp", ""), reverse=True)
    if not filter_opts:
        return entries[:100]

    op = filter_opts.get("operation")
    if op:
        entries = [e for e in entries if e.get("operation") == op]

    import dateutil.parser

    date_from = filter_opts.get("dateFrom")
    if date_from:
        try:
            dt_from = dateutil.parser.parse(date_from).replace(tzinfo=None)
            filtered_entries = []
            for e in entries:
                ts = e.get("timestamp")
                if ts:
                    try:
                        dt_ts = dateutil.parser.parse(ts).replace(tzinfo=None)
                        if dt_ts >= dt_from:
                            filtered_entries.append(e)
                    except Exception:
                        pass
            entries = filtered_entries
        except Exception:
            pass

    date_to = filter_opts.get("dateTo")
    if date_to:
        try:
            dt_to = dateutil.parser.parse(date_to).replace(tzinfo=None)
            filtered_entries = []
            for e in entries:
                ts = e.get("timestamp")
                if ts:
                    try:
                        dt_ts = dateutil.parser.parse(ts).replace(tzinfo=None)
                        if dt_ts <= dt_to:
                            filtered_entries.append(e)
                    except Exception:
                        pass
            entries = filtered_entries
        except Exception:
            pass

    limit = filter_opts.get("limit", 100)
    return entries[:limit]

# =====================================================================
# Image Store System
# =====================================================================

IMAGES_DIR = os.path.join(os.path.expanduser("~"), ".agentmemory", "images")

def get_max_bytes() -> int:
    return int(os.getenv("AGENTMEMORY_IMAGE_STORE_MAX_BYTES", 500 * 1024 * 1024))

def is_managed_image_path(file_path: str) -> bool:
    if not file_path:
        return False
    resolved = os.path.abspath(file_path)
    normalized_images_dir = os.path.abspath(IMAGES_DIR)
    return resolved.startswith(normalized_images_dir + os.sep) or resolved == normalized_images_dir

def save_image_to_disk(base64_data: str) -> Tuple[str, int]:
    if not base64_data:
        return "", 0

    if not os.path.exists(IMAGES_DIR):
        os.makedirs(IMAGES_DIR, exist_ok=True)

    clean_base64 = base64_data
    ext = "png"

    if base64_data.startswith("data:image/"):
        comma_idx = base64_data.find(",")
        if comma_idx != -1:
            meta = base64_data[:comma_idx]
            if "jpeg" in meta or "jpg" in meta:
                ext = "jpg"
            elif "webp" in meta:
                ext = "webp"
            elif "gif" in meta:
                ext = "gif"
            clean_base64 = base64_data[comma_idx + 1:]
    elif base64_data.startswith("/9j/"):
        ext = "jpg"

    h = hashlib.sha256(clean_base64.encode('utf-8')).hexdigest()
    file_path = os.path.join(IMAGES_DIR, f"{h}.{ext}")

    if os.path.exists(file_path):
        return file_path, 0

    import base64
    buffer = base64.b64decode(clean_base64)
    with open(file_path, "wb") as f:
        f.write(buffer)

    size = os.path.getsize(file_path)
    return file_path, size

def delete_image(file_path: Optional[str]) -> int:
    if not file_path or not is_managed_image_path(file_path):
        return 0
    try:
        if os.path.exists(file_path):
            size = os.path.getsize(file_path)
            os.remove(file_path)
            return size
    except Exception as e:
        print(f"[agentmemory] Failed to delete image context: {e}")
    return 0

def touch_image(file_path: str) -> None:
    if not file_path or not is_managed_image_path(file_path):
        return
    try:
        if os.path.exists(file_path):
            os.utime(file_path, None)
    except Exception:
        pass

# =====================================================================
# Index Persistence System (JSON Sharded)
# =====================================================================

class IndexPersistence:
    def __init__(self, kv: StateKV, bm25: SearchIndex, vector: Optional[VectorIndex]):
        self.kv = kv
        self.bm25 = bm25
        self.vector = vector

    def schedule_save(self) -> None:
        self.save()

    def save(self) -> None:
        try:
            self.save_sharded_index(
                json.dumps(self.bm25.serialize_data()),
                "data:manifest",
                "data",
                "mem:index:bm25:bm25:"
            )
            if self.vector:
                self.save_sharded_index(
                    json.dumps(self.vector.serialize_data()),
                    "vectors:manifest",
                    "vectors",
                    "mem:index:bm25:vectors:"
                )
        except Exception as e:
            print(f"[index persistence] failed to save index: {e}")

    def save_sharded_index(self, serialized: str, manifest_key: str, legacy_key: str, scope_prefix: str) -> None:
        previous = self.kv.get(KV.bm25Index, manifest_key)
        generation = generate_id("idx")
        chunk_chars = 2000000
        shards = []
        chunks = []

        offset = 0
        shard_idx = 0
        while offset < len(serialized):
            scope = f"{scope_prefix}{generation}:{str(shard_idx).zfill(5)}"
            chunk = serialized[offset:offset + chunk_chars]
            shards.append({"scope": scope, "key": "data", "chars": len(chunk)})
            chunks.append(chunk)
            offset += chunk_chars
            shard_idx += 1

        for shard, chunk in zip(shards, chunks):
            self.kv.set(shard["scope"], shard["key"], chunk)

        next_manifest = {
            "v": 1,
            "generation": generation,
            "shards": shards,
            "chars": len(serialized)
        }

        self.kv.set(KV.bm25Index, manifest_key, next_manifest)
        self.kv.delete(KV.bm25Index, legacy_key)

        # Cleanup ALL obsolete shards starting with scope_prefix that are NOT in the current shards
        try:
            conn = self.kv._get_conn()
            try:
                with conn.cursor() as cursor:
                    cursor.execute(
                        "SELECT DISTINCT scope FROM kv_store WHERE scope LIKE %s",
                        (scope_prefix + "%",)
                    )
                    rows = cursor.fetchall()
                    current_scopes = {s["scope"] for s in shards}
                    to_delete = []
                    for row in rows:
                        scope_name = row["scope"]
                        if scope_name not in current_scopes:
                            to_delete.append(scope_name)
                    
                    if to_delete:
                        for i in range(0, len(to_delete), 50):
                            chunk_delete = to_delete[i:i + 50]
                            format_strings = ','.join(['%s'] * len(chunk_delete))
                            cursor.execute(
                                f"DELETE FROM kv_store WHERE scope IN ({format_strings})",
                                tuple(chunk_delete)
                            )
            finally:
                conn.close()
        except Exception as ex:
            print(f"[index persistence] error cleaning up obsolete shards: {ex}")

        if previous and isinstance(previous, dict) and previous.get("v") == 1 and isinstance(previous.get("shards"), list):
            current_shards = {(s["scope"], s["key"]) for s in shards}
            for old_shard in previous["shards"]:
                if (old_shard["scope"], old_shard["key"]) not in current_shards:
                    self.kv.delete(old_shard["scope"], old_shard["key"])

    def load(self) -> Dict[str, Any]:
        bm25_data = self.load_sharded_data("data", "data:manifest")
        bm25_loaded = False
        if bm25_data:
            try:
                self.bm25.restore_from_data(json.loads(bm25_data))
                bm25_loaded = True
            except Exception as e:
                print(f"[index persistence] failed to restore BM25: {e}")

        vector_loaded = False
        if self.vector:
            vector_data = self.load_sharded_data("vectors", "vectors:manifest")
            if vector_data:
                try:
                    self.vector.restore_from_data(json.loads(vector_data))
                    vector_loaded = True
                except Exception as e:
                    print(f"[index persistence] failed to restore vectors: {e}")

        return {"bm25": bm25_loaded, "vector": vector_loaded}

    def load_sharded_data(self, legacy_key: str, manifest_key: str) -> Optional[str]:
        manifest = self.kv.get(KV.bm25Index, manifest_key)
        if manifest and isinstance(manifest, dict) and manifest.get("v") == 1:
            shards = manifest.get("shards", [])
            chunks = []
            for shard in shards:
                chunk = self.kv.get(shard["scope"], shard["key"])
                if chunk is None:
                    return None
                chunks.append(chunk)
            return "".join(chunks)

        legacy = self.kv.get(KV.bm25Index, legacy_key)
        if isinstance(legacy, str):
            return legacy
        return None

# =====================================================================
# Vector Index / Embedding Helpers
# =====================================================================

def clip_embed_input(text: str) -> str:
    EMBED_MAX_CHARS = 16000
    if len(text) <= EMBED_MAX_CHARS:
        return text
    return text[:EMBED_MAX_CHARS]

def get_agent_id() -> Optional[str]:
    return os.getenv("AGENT_ID") or None

def commit_if_enabled(kv: StateKV, message: str, agent_id: Optional[str]) -> Optional[str]:
    return kv.commit_version(message, agent_id or "unknown-agent")


def is_agent_scope_isolated() -> bool:
    return os.getenv("AGENTMEMORY_AGENT_SCOPE") == "isolated"

def is_auto_compress_enabled() -> bool:
    return os.getenv("AGENTMEMORY_AUTO_COMPRESS") == "true"

def is_slots_enabled() -> bool:
    return os.getenv("AGENTMEMORY_SLOTS") == "true"

def is_reflect_enabled() -> bool:
    return os.getenv("AGENTMEMORY_REFLECT") == "true"

def is_graph_extraction_enabled() -> bool:
    return os.getenv("GRAPH_EXTRACTION_ENABLED") == "true"

def is_consolidation_enabled() -> bool:
    val = os.getenv("CONSOLIDATION_ENABLED")
    if val in ("false", "0"):
        return False
    if val in ("true", "1"):
        return True
    return bool(os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"))

def vector_index_add_guarded(

    obs_id: str,

    session_id: str,

    text: str,

    context: Dict[str, Any]

) -> bool:
    vi = _vector_index
    ep = _embedding_provider
    if not vi or not ep:
        return False
    try:
        clipped = clip_embed_input(text)
        embedding = ep.embed(clipped)
        if len(embedding) != ep.dimensions:
            print(f"[vector-index] Dimension mismatch: expected {ep.dimensions}, got {len(embedding)}")
            return False
        vi.add(obs_id, session_id, embedding)
        return True
    except Exception as e:
        print(f"[vector-index] Embed failed: {e}")
        return False

# =====================================================================
# Observation System (Observe, Synthetic Compression)
# =====================================================================

def extract_image(d: Any) -> Optional[str]:
    if not d:
        return None
    if isinstance(d, str):
        if d.startswith("data:image/") or d.startswith("iVBORw0KGgo") or d.startswith("/9j/"):
            return d
        return None
    if isinstance(d, dict):
        for k in ["image_data", "image_path", "imageBase64", "imagePath"]:
            if isinstance(d.get(k), str):
                return d[k]
        for key, val in d.items():
            match = extract_image(val)
            if match:
                return match
    return None

def infer_type(tool_name: Optional[str], hook_type: str) -> str:
    if hook_type == "post_tool_failure":
        return "error"
    if hook_type == "prompt_submit":
        return "conversation"
    if hook_type in ("subagent_stop", "task_completed"):
        return "subagent"
    if hook_type == "notification":
        return "notification"

    if not tool_name:
        return "other"

    n = re.sub(r'([a-z])([A-Z])', r'\1_\2', tool_name)
    n = re.sub(r'[-\s]+', '_', n).lower()

    def has_word(word: str) -> bool:
        return bool(re.search(rf"(^|_){word}(_|$)", n)) or n == word or n.endswith(word) or n.startswith(word)

    if any(has_word(w) for w in ["fetch", "http", "web"]):
        return "web_fetch"
    if any(has_word(w) for w in ["grep", "search", "glob", "find"]):
        return "search"
    if any(has_word(w) for w in ["bash", "shell", "exec", "run"]):
        return "command_run"
    if any(has_word(w) for w in ["edit", "update", "patch", "replace"]):
        return "file_edit"
    if any(has_word(w) for w in ["write", "create"]):
        return "file_write"
    if any(has_word(w) for w in ["read", "view"]):
        return "file_read"
    if any(has_word(w) for w in ["task", "agent"]):
        return "subagent"
    return "other"

def extract_files(input_data: Any) -> List[str]:
    if not input_data or not isinstance(input_data, dict):
        return []
    out = set()
    for key in ["file_path", "filepath", "path", "filePath", "file", "pattern"]:
        v = input_data.get(key)
        if isinstance(v, str) and 0 < len(v) < 512:
            out.add(v)
    return list(out)

def stringify_for_narrative(v: Any) -> str:
    if v is None:
        return ""
    if isinstance(v, str):
        return v
    try:
        return json.dumps(v)
    except Exception:
        return str(v)

def build_synthetic_compression(raw: Dict[str, Any]) -> Dict[str, Any]:
    tool_name = raw.get("toolName") or raw.get("hookType")
    input_str = stringify_for_narrative(raw.get("toolInput"))
    output_str = stringify_for_narrative(raw.get("toolOutput"))
    prompt_str = raw.get("userPrompt") or ""

    parts = [s for s in [prompt_str, input_str, output_str] if len(s) > 0]
    narrative = " | ".join(parts)
    if len(narrative) > 400:
        narrative = narrative[:399] + "\u2026"

    title = tool_name or "observation"
    if len(title) > 80:
        title = title[:79] + "\u2026"

    subtitle = None
    if input_str:
        subtitle = input_str
        if len(subtitle) > 120:
            subtitle = subtitle[:119] + "\u2026"

    res = {
        "id": raw["id"],
        "sessionId": raw["sessionId"],
        "timestamp": raw["timestamp"],
        "type": infer_type(raw.get("toolName"), raw["hookType"]),
        "title": title,
        "subtitle": subtitle,
        "facts": [],
        "narrative": narrative,
        "concepts": [],
        "files": extract_files(raw.get("toolInput")),
        "importance": 5,
        "confidence": 0.3,
    }
    for k in ["modality", "imageData", "agentId"]:
        if raw.get(k) is not None:
            res[k] = raw[k]
    return res

def observe(kv: StateKV, payload: Dict[str, Any]) -> Dict[str, Any]:
    session_id = payload.get("sessionId")
    hook_type = payload.get("hookType")
    timestamp = payload.get("timestamp")

    if not session_id or not hook_type or not timestamp:
        raise ValueError("Invalid payload: sessionId, hookType, and timestamp are required")

    obs_id = generate_id("obs")
    sanitized_data = payload.get("data")
    try:
        json_str = json.dumps(payload.get("data"))
        sanitized = strip_private_data(json_str)
        sanitized_data = json.loads(sanitized)
    except Exception:
        sanitized_data = strip_private_data(str(payload.get("data")))

    raw = {
        "id": obs_id,
        "sessionId": session_id,
        "timestamp": timestamp,
        "hookType": hook_type,
        "raw": sanitized_data,
    }

    extracted_img = extract_image(sanitized_data)
    if isinstance(sanitized_data, dict):
        if hook_type in ("post_tool_use", "post_tool_failure"):
            raw["toolName"] = sanitized_data.get("tool_name")
            raw["toolInput"] = sanitized_data.get("tool_input")
            raw["toolOutput"] = sanitized_data.get("tool_output") or sanitized_data.get("error")
        if hook_type == "prompt_submit":
            raw["userPrompt"] = sanitized_data.get("prompt")
        if extracted_img:
            raw["modality"] = "mixed" if (raw.get("toolInput") or raw.get("toolOutput") or raw.get("userPrompt")) else "image"
    elif isinstance(sanitized_data, str) and extracted_img:
        raw["modality"] = "image"

    max_obs = int(os.getenv("MAX_OBS_PER_SESSION", "500"))
    if max_obs > 0:
        existing = kv.list(KV.observations(session_id))
        if len(existing) >= max_obs:
            raise ValueError(f"Session observation limit reached ({max_obs})")

    existing_session = kv.get(KV.sessions, session_id)
    inherited_agent_id = existing_session.get("agentId") if existing_session else get_agent_id()
    if inherited_agent_id:
        raw["agentId"] = inherited_agent_id

    if extracted_img and (extracted_img.startswith("data:image/") or extracted_img.startswith("iVBORw0KGgo") or extracted_img.startswith("/9j/")):
        try:
            file_path, bytes_written = save_image_to_disk(extracted_img)
            raw["imageData"] = file_path
            
            # Increment image ref count
            img_refs = kv.get(KV.imageRefs, file_path) or 0
            kv.set(KV.imageRefs, file_path, img_refs + 1)
        except Exception as ex:
            print(f"[image store] failed: {ex}")

    # Set raw observation
    kv.set(KV.observations(session_id), obs_id, raw)

    # Stream raw observation
    broadcast_stream({
        "type": "raw_observation",
        "sessionId": session_id,
        "data": {
            "type": "raw",
            "observation": raw,
            "sessionId": session_id
        }
    })

    if existing_session:
        updates = [
            {"type": "set", "path": "updatedAt", "value": datetime.datetime.utcnow().isoformat() + "Z"},
            {"type": "set", "path": "observationCount", "value": (existing_session.get("observationCount") or 0) + 1}
        ]
        if not existing_session.get("firstPrompt") and isinstance(raw.get("userPrompt"), str):
            trimmed = " ".join(raw["userPrompt"].split()).strip()
            if trimmed:
                updates.append({"type": "set", "path": "firstPrompt", "value": trimmed[:200]})
        kv.update(KV.sessions, session_id, updates)
    else:
        auto_complete_old_active_sessions(kv, session_id)
        project = payload.get("project") or "unknown"
        cwd = payload.get("cwd") or os.getcwd()
        trimmed_prompt = None
        if isinstance(raw.get("userPrompt"), str):
            trimmed_prompt = " ".join(raw["userPrompt"].split()).strip()[:200]
        ts = datetime.datetime.utcnow().isoformat() + "Z"
        new_sess = {
            "id": session_id,
            "project": project,
            "cwd": cwd,
            "startedAt": payload.get("timestamp") or ts,
            "updatedAt": ts,
            "status": "active",
            "observationCount": 1,
        }
        if inherited_agent_id:
            new_sess["agentId"] = inherited_agent_id
        if trimmed_prompt:
            new_sess["firstPrompt"] = trimmed_prompt
        kv.set(KV.sessions, session_id, new_sess)

    # Perform synthetic compression (we default to synthetic)
    synthetic = build_synthetic_compression(raw)
    for k in ["hookType", "raw", "toolName", "toolInput", "toolOutput", "userPrompt"]:
        if k in raw:
            synthetic[k] = raw[k]
    kv.set(KV.observations(session_id), obs_id, synthetic)
    _bm25_index.add(synthetic)

    comb_text = synthetic["title"] + " " + (synthetic.get("narrative") or "")
    vector_index_add_guarded(synthetic["id"], synthetic["sessionId"], comb_text, {"kind": "synthetic", "logId": synthetic["id"]})

    if _index_persistence:
        _index_persistence.schedule_save()

    # Stream compressed observation
    broadcast_stream({
        "type": "compressed_observation",
        "sessionId": session_id,
        "data": {
            "type": "compressed",
            "observation": synthetic,
            "sessionId": session_id
        }
    })

    # Commit to Dolt
    commit_if_enabled(kv, f"Observe: {synthetic.get('title', 'observation')} in session {session_id[:8]}", synthetic.get("agentId"))

    return {"observationId": obs_id}


# =====================================================================
# Memory System (Remember, Forget, Evolve)
# =====================================================================

def memory_to_observation(memory: Dict[str, Any]) -> Dict[str, Any]:
    return {
        "id": memory["id"],
        "sessionId": memory.get("sessionIds", ["memory"])[0] if memory.get("sessionIds") else "memory",
        "timestamp": memory["createdAt"],
        "type": "decision",
        "title": memory["title"],
        "facts": [memory["content"]],
        "narrative": memory["content"],
        "concepts": memory.get("concepts", []),
        "files": memory.get("files", []),
        "importance": memory.get("strength", 7),
    }

def remember(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    content = data.get("content")
    if not content or not content.strip():
        raise ValueError("content is required")
    content = strip_private_data(content)

    concepts = data.get("concepts") or []
    files = data.get("files") or []
    source_obs = data.get("sourceObservationIds") or []
    ttl_days = data.get("ttlDays")
    mem_type = data.get("type") or "fact"
    project = data.get("project")
    if project:
        project = project.strip()

    now = datetime.datetime.utcnow().isoformat() + "Z"
    existing_memories = kv.list(KV.memories)
    superseded_id = None
    superseded_version = 1
    superseded_memory = None
    lower_content = content.lower()

    for existing in existing_memories:
        if existing.get("isLatest") is False:
            continue
        if project and existing.get("project") and existing["project"] != project:
            continue
        similarity = jaccard_similarity(lower_content, existing.get("content", "").lower())
        if similarity > 0.7:
            superseded_id = existing["id"]
            superseded_version = existing.get("version") or 1
            superseded_memory = existing
            break

    call_agent_id = data.get("agentId") or get_agent_id()
    new_mem = {
        "id": generate_id("mem"),
        "createdAt": now,
        "updatedAt": now,
        "type": mem_type,
        "title": content[:80],
        "content": content,
        "concepts": concepts,
        "files": files,
        "sessionIds": [],
        "strength": 7,
        "version": superseded_version + 1 if superseded_id else 1,
        "parentId": superseded_id,
        "supersedes": [superseded_id] if superseded_id else [],
        "sourceObservationIds": [i for i in source_obs if i],
        "isLatest": True,
    }
    if call_agent_id:
        new_mem["agentId"] = call_agent_id
    if project:
        new_mem["project"] = project

    if ttl_days and isinstance(ttl_days, (int, float)) and ttl_days > 0:
        forget_time = datetime.datetime.utcnow() + datetime.timedelta(days=ttl_days)
        new_mem["forgetAfter"] = forget_time.isoformat() + "Z"

    if superseded_memory:
        superseded_memory["isLatest"] = False
        kv.set(KV.memories, superseded_memory["id"], superseded_memory)

    kv.set(KV.memories, new_mem["id"], new_mem)

    try:
        _bm25_index.add(memory_to_observation(new_mem))
    except Exception as ex:
        print(f"[bm25] memory add failed: {ex}")

    comb_text = new_mem["title"] + " " + new_mem["content"]
    vector_index_add_guarded(new_mem["id"], "memory", comb_text, {"kind": "memory", "logId": new_mem["id"]})

    if _index_persistence:
        _index_persistence.schedule_save()

    # Commit to Dolt
    commit_if_enabled(kv, f"Remember: {new_mem.get('title', '')}", new_mem.get("agentId"))

    return {"success": True, "memory": new_mem}


def forget(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    memory_id = data.get("memoryId")
    session_id = data.get("sessionId")
    obs_ids = data.get("observationIds") or []
    deleted = 0
    deleted_mem_ids = []
    deleted_obs_ids = []
    deleted_session = False

    if memory_id:
        mem = kv.get(KV.memories, memory_id)
        kv.delete(KV.memories, memory_id)
        if mem and mem.get("imageRef"):
            ref = mem["imageRef"]
            refs = kv.get(KV.imageRefs, ref) or 0
            if refs > 0:
                kv.set(KV.imageRefs, ref, refs - 1)
        _bm25_index.remove(memory_id)
        if _vector_index:
            _vector_index.remove(memory_id)
        deleted_mem_ids.append(memory_id)
        deleted += 1

    if session_id and obs_ids:
        for oid in obs_ids:
            obs = kv.get(KV.observations(session_id), oid)
            kv.delete(KV.observations(session_id), oid)
            if obs:
                img = obs.get("imageData") or obs.get("imageRef")
                if img:
                    refs = kv.get(KV.imageRefs, img) or 0
                    if refs > 0:
                        kv.set(KV.imageRefs, img, refs - 1)
            _bm25_index.remove(oid)
            if _vector_index:
                _vector_index.remove(oid)
            deleted_obs_ids.append(oid)
            deleted += 1

    if session_id and not obs_ids and not memory_id:
        obs_list = kv.list(KV.observations(session_id))
        for obs in obs_list:
            kv.delete(KV.observations(session_id), obs["id"])
            img = obs.get("imageData") or obs.get("imageRef")
            if img:
                refs = kv.get(KV.imageRefs, img) or 0
                if refs > 0:
                    kv.set(KV.imageRefs, img, refs - 1)
            _bm25_index.remove(obs["id"])
            if _vector_index:
                _vector_index.remove(obs["id"])
            deleted_obs_ids.append(obs["id"])
            deleted += 1
        kv.delete(KV.sessions, session_id)
        kv.delete(KV.summaries, session_id)
        deleted_session = True
        deleted += 2

    if deleted > 0:
        if _index_persistence:
            _index_persistence.schedule_save()
        safe_audit(
            kv,
            "forget",
            "mem::forget",
            deleted_mem_ids + deleted_obs_ids,
            {
                "sessionId": session_id,
                "deleted": deleted,
                "memoriesDeleted": len(deleted_mem_ids),
                "observationsDeleted": len(deleted_obs_ids),
                "sessionDeleted": deleted_session,
                "reason": "user-initiated forget"
            }
        )
        
        # Commit to Dolt
        agent_id = data.get("agentId") or get_agent_id()
        commit_if_enabled(kv, f"Forget: memory_id={memory_id} session_id={session_id}", agent_id)

    return {"success": True, "deleted": deleted}


# =====================================================================
# Prompt Context Compilation System
# =====================================================================

def estimate_tokens(text: str) -> int:
    return int(len(text) / 3)

def escape_xml_attr(s: str) -> str:
    return s.replace("&", "&amp;").replace('"', "&quot;").replace("<", "&lt;").replace(">", "&gt;")

def context(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    session_id = data.get("sessionId")
    project = data.get("project")
    budget = data.get("budget") or int(os.getenv("TOKEN_BUDGET", "2000"))

    if not session_id or not project:
        raise ValueError("sessionId and project are required")

    blocks = []

    # 1. Pinned Slots
    pinned_slots = list_pinned_slots(kv)
    slot_content = render_pinned_context(pinned_slots)
    if slot_content:
        blocks.append({
            "type": "memory",
            "content": slot_content,
            "tokens": estimate_tokens(slot_content),
            "recency": int(time.time() * 1000)
        })

    # 2. Profile
    profile = kv.get(KV.profiles, project)
    if profile:
        profile_parts = []
        if profile.get("topConcepts"):
            profile_parts.append(
                "Concepts: " + ", ".join([c["concept"] for c in profile["topConcepts"][:8]])
            )
        if profile.get("topFiles"):
            profile_parts.append(
                "Key files: " + ", ".join([f["file"] for f in profile["topFiles"][:5]])
            )
        if profile.get("conventions"):
            profile_parts.append("Conventions: " + "; ".join(profile["conventions"]))
        if profile.get("commonErrors"):
            profile_parts.append("Common errors: " + "; ".join(profile["commonErrors"][:3]))
        
        if profile_parts:
            profile_content = f"## Project Profile\n" + "\n".join(profile_parts)
            blocks.append({
                "type": "memory",
                "content": profile_content,
                "tokens": estimate_tokens(profile_content),
                "recency": int(time.time() * 1000)
            })

    # 3. Lessons
    lessons = kv.list(KV.lessons)
    relevant_lessons = [
        l for l in lessons
        if not l.get("deleted") and (not l.get("project") or l["project"] == project)
    ]
    # Score lessons
    def lesson_score(l):
        factor = 1.5 if l.get("project") == project else 1.0
        return factor * l.get("confidence", 0.5)

    relevant_lessons.sort(key=lesson_score, reverse=True)
    relevant_lessons = relevant_lessons[:10]

    if relevant_lessons:
        items = []
        for l in relevant_lessons:
            desc = f"- ({l['confidence']:.2f}) {l['content']}"
            if l.get("context"):
                desc += f" — {l['context']}"
            items.append(desc)
        lessons_content = "## Lessons Learned\n" + "\n".join(items)
        blocks.append({
            "type": "memory",
            "content": lessons_content,
            "tokens": estimate_tokens(lessons_content),
            "recency": int(time.time() * 1000)
        })

    # 4. Sessions & Summaries
    all_sessions = kv.list(KV.sessions)
    sessions = [
        s for s in all_sessions
        if s.get("project") == project and s["id"] != session_id
    ]
    sessions.sort(key=lambda s: s.get("startedAt", ""), reverse=True)
    sessions = sessions[:10]

    for s in sessions:
        summary = kv.get(KV.summaries, s["id"])
        if summary:
            content = f"## {summary.get('title', 'Session summary')}\n{summary.get('narrative', '')}\n" \
                      f"Decisions: {'; '.join(summary.get('keyDecisions', []))}\n" \
                      f"Files: {', '.join(summary.get('filesModified', []))}"
            blocks.append({
                "type": "summary",
                "content": content,
                "tokens": estimate_tokens(content),
                "recency": int(time.time() * 1000)
            })
        else:
            # Fallback to important observations
            obs_list = kv.list(KV.observations(s["id"]))
            important = [o for o in obs_list if o.get("title") and o.get("importance", 0) >= 5]
            if important:
                important.sort(key=lambda o: o.get("importance", 0), reverse=True)
                top = important[:5]
                items = [f"- [{o.get('type')}] {o.get('title')}: {o.get('narrative')}" for o in top]
                content = f"## Session {s['id'][:8]} ({s.get('startedAt')})\n" + "\n".join(items)
                blocks.append({
                    "type": "observation",
                    "content": content,
                    "tokens": estimate_tokens(content),
                    "recency": int(time.time() * 1000)
                })

    blocks.sort(key=lambda b: b.get("recency", 0), reverse=True)

    header = f'<agentmemory-context project="{escape_xml_attr(project)}">'
    footer = "</agentmemory-context>"
    used_tokens = estimate_tokens(header) + estimate_tokens(footer)

    selected = []
    for b in blocks:
        if used_tokens + b["tokens"] > budget:
            continue
        selected.append(b["content"])
        used_tokens += b["tokens"]

    if not selected:
        return {"context": "", "blocks": 0, "tokens": 0}

    res_context = f"{header}\n" + "\n\n".join(selected) + f"\n{footer}"
    return {"context": res_context, "blocks": len(selected), "tokens": used_tokens}

# =====================================================================
# Memory Slots System
# =====================================================================

DEFAULT_SLOTS = [
    {
        "label": "persona",
        "content": "",
        "sizeLimit": 1000,
        "description": "How the agent should see itself: role, tone, behavioural guidelines.",
        "pinned": True,
        "readOnly": False,
        "scope": "global",
    },
    {
        "label": "user_preferences",
        "content": "",
        "sizeLimit": 2000,
        "description": "Coding style, tool preferences, naming conventions, and other habits the user wants preserved across sessions.",
        "pinned": True,
        "readOnly": False,
        "scope": "global",
    },
    {
        "label": "tool_guidelines",
        "content": "",
        "sizeLimit": 1500,
        "description": "Rules the agent should follow when picking or sequencing tools (e.g. prefer X over Y, never run Z without confirmation).",
        "pinned": True,
        "readOnly": False,
        "scope": "global",
    },
    {
        "label": "project_context",
        "content": "",
        "sizeLimit": 3000,
        "description": "Architecture decisions, codebase conventions, build/test commands, and cross-cutting constraints for the current project.",
        "pinned": True,
        "readOnly": False,
        "scope": "project",
    },
    {
        "label": "guidance",
        "content": "",
        "sizeLimit": 1500,
        "description": "Active advice for the next session: what to focus on, what to avoid, open risks.",
        "pinned": True,
        "readOnly": False,
        "scope": "project",
    },
    {
        "label": "pending_items",
        "content": "",
        "sizeLimit": 2000,
        "description": "Unfinished work, explicit TODOs, and promises made but not yet delivered.",
        "pinned": True,
        "readOnly": False,
        "scope": "project",
    },
    {
        "label": "session_patterns",
        "content": "",
        "sizeLimit": 1500,
        "description": "Recurring behaviours and common struggles observed across recent sessions.",
        "pinned": False,
        "readOnly": False,
        "scope": "project",
    },
    {
        "label": "self_notes",
        "content": "",
        "sizeLimit": 1500,
        "description": "Free-form notes the agent keeps for itself: hypotheses, dead ends, things to revisit.",
        "pinned": False,
        "readOnly": False,
        "scope": "project",
    },
]

def seed_defaults(kv: StateKV) -> None:
    now = datetime.datetime.utcnow().isoformat() + "Z"
    for tmpl in DEFAULT_SLOTS:
        scope = tmpl["scope"]
        target = KV.globalSlots if scope == "global" else KV.slots
        existing = kv.get(target, tmpl["label"])
        if existing:
            continue
        slot = dict(tmpl)
        slot["createdAt"] = now
        slot["updatedAt"] = now
        kv.set(target, tmpl["label"], slot)

def list_pinned_slots(kv: StateKV) -> List[Dict[str, Any]]:
    p_slots = kv.list(KV.slots)
    g_slots = kv.list(KV.globalSlots)
    merged = {}
    for s in g_slots:
        merged[s["label"]] = s
    for s in p_slots:
        merged[s["label"]] = s
    pinned = [s for s in merged.values() if s.get("pinned") and s.get("content", "").strip()]
    pinned.sort(key=lambda s: s["label"])
    return pinned

def render_pinned_context(slots: List[Dict[str, Any]]) -> str:
    if not slots:
        return ""
    lines = ["# agentmemory pinned slots", ""]
    for s in slots:
        lines.append(f"## {s['label']}")
        lines.append(s["content"].strip())
        lines.append("")
    return "\n".join(lines)

def slot_list(kv: StateKV) -> Dict[str, Any]:
    p_slots = kv.list(KV.slots)
    g_slots = kv.list(KV.globalSlots)
    merged = {}
    for s in g_slots:
        merged[s["label"]] = s
    for s in p_slots:
        merged[s["label"]] = s
    slots = sorted(list(merged.values()), key=lambda s: s["label"])
    return {"success": True, "slots": slots}

def slot_get(kv: StateKV, label: str) -> Dict[str, Any]:
    project = kv.get(KV.slots, label)
    if project:
        return {"success": True, "slot": project, "scope": "project"}
    global_s = kv.get(KV.globalSlots, label)
    if global_s:
        return {"success": True, "slot": global_s, "scope": "global"}
    return {"success": False, "error": "slot not found"}

def slot_create(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    label = data.get("label")
    if not label or not re.match(r'^[a-z][a-z0-9_]*$', label):
        return {"success": False, "error": "label required (lowercase, starts with letter, [a-z0-9_])"}

    scope = data.get("scope") or "project"
    if scope not in ("project", "global"):
        return {"success": False, "error": "scope must be 'project' or 'global'"}

    limit = data.get("sizeLimit") or 2000
    if not isinstance(limit, int) or limit < 1 or limit > 20000:
        return {"success": False, "error": "sizeLimit must be an integer between 1 and 20000"}

    content = strip_private_data(data.get("content") or "")
    if len(content) > limit:
        return {"success": False, "error": f"content exceeds sizeLimit ({len(content)} > {limit})"}

    description = data.get("description") or ""
    pinned = data.get("pinned", True)

    target_kv = KV.globalSlots if scope == "global" else KV.slots
    existing = kv.get(target_kv, label)
    if existing:
        return {"success": False, "error": f"slot already exists in {scope} scope"}

    now = datetime.datetime.utcnow().isoformat() + "Z"
    slot = {
        "label": label,
        "content": content,
        "sizeLimit": limit,
        "description": description,
        "pinned": pinned,
        "readOnly": False,
        "scope": scope,
        "createdAt": now,
        "updatedAt": now,
    }
    kv.set(target_kv, label, slot)
    safe_audit(kv, "slot_create", "mem::slot-create", [label], {"scope": scope, "sizeLimit": limit, "pinned": pinned})
    
    # Commit to Dolt
    agent_id = data.get("agentId") or get_agent_id()
    commit_if_enabled(kv, f"Create slot: {label}", agent_id)

    return {"success": True, "slot": slot}

def slot_append(kv: StateKV, label: str, text: str, agent_id: Optional[str] = None) -> Dict[str, Any]:
    res = slot_get(kv, label)
    if not res.get("success"):
        return {"success": False, "error": "slot not found"}

    slot = res["slot"]
    scope = res["scope"]
    target_kv = KV.globalSlots if scope == "global" else KV.slots

    if slot.get("readOnly"):
        return {"success": False, "error": "slot is read-only"}

    content = slot.get("content") or ""
    sep = "\n" if content and not content.endswith("\n") else ""
    next_content = content + sep + strip_private_data(text)

    limit = slot.get("sizeLimit") or 2000
    if len(next_content) > limit:
        return {
            "success": False,
            "error": f"append would exceed sizeLimit ({len(next_content)} > {limit})",
            "currentSize": len(content),
            "sizeLimit": limit
        }

    slot["content"] = next_content
    slot["updatedAt"] = datetime.datetime.utcnow().isoformat() + "Z"
    kv.set(target_kv, label, slot)

    safe_audit(kv, "slot_append", "mem::slot-append", [label], {"scope": scope, "added": len(text), "total": len(next_content)})
    
    # Commit to Dolt
    commit_if_enabled(kv, f"Append slot: {label}", agent_id or get_agent_id())

    return {"success": True, "slot": slot, "size": len(next_content)}

def slot_replace(kv: StateKV, label: str, content: str, agent_id: Optional[str] = None) -> Dict[str, Any]:
    res = slot_get(kv, label)
    if not res.get("success"):
        return {"success": False, "error": "slot not found"}

    slot = res["slot"]
    scope = res["scope"]
    target_kv = KV.globalSlots if scope == "global" else KV.slots

    if slot.get("readOnly"):
        return {"success": False, "error": "slot is read-only"}

    content = strip_private_data(content)
    limit = slot.get("sizeLimit") or 2000
    if len(content) > limit:
        return {
            "success": False,
            "error": f"content exceeds sizeLimit ({len(content)} > {limit})",
            "sizeLimit": limit
        }

    before_len = len(slot.get("content") or "")
    slot["content"] = content
    slot["updatedAt"] = datetime.datetime.utcnow().isoformat() + "Z"
    kv.set(target_kv, label, slot)

    safe_audit(kv, "slot_replace", "mem::slot-replace", [label], {"scope": scope, "before": before_len, "after": len(content)})
    
    # Commit to Dolt
    commit_if_enabled(kv, f"Replace slot: {label}", agent_id or get_agent_id())

    return {"success": True, "slot": slot, "size": len(content)}

def slot_delete(kv: StateKV, label: str, agent_id: Optional[str] = None) -> Dict[str, Any]:
    res = slot_get(kv, label)
    if not res.get("success"):
        return {"success": False, "error": "slot not found"}

    slot = res["slot"]
    scope = res["scope"]
    target_kv = KV.globalSlots if scope == "global" else KV.slots

    if slot.get("readOnly"):
        return {"success": False, "error": "slot is read-only"}

    kv.delete(target_kv, label)
    safe_audit(kv, "slot_delete", "mem::slot-delete", [label], {"scope": scope, "size": len(slot.get("content") or "")})
    
    # Commit to Dolt
    commit_if_enabled(kv, f"Delete slot: {label}", agent_id or get_agent_id())

    return {"success": True}


def slot_reflect(kv: StateKV, session_id: str, max_obs: int = 50) -> Dict[str, Any]:
    observations = kv.list(KV.observations(session_id))
    if not observations:
        return {"success": True, "applied": 0, "reason": "no observations for session"}

    recent = sorted(observations, key=lambda x: x.get("timestamp", ""), reverse=True)[:max_obs]

    pending_lines = []
    pattern_counts = {}
    files = set()

    for obs in recent:
        title = (obs.get("title") or "").lower()
        narrative = (obs.get("narrative") or "").lower()
        if "todo" in narrative or "todo" in title:
            pending_lines.append(f"- {obs.get('title') or obs['id']}")
        if obs.get("type") == "error":
            pattern_counts["errors"] = pattern_counts.get("errors", 0) + 1
        if obs.get("type") == "command_run":
            pattern_counts["commands"] = pattern_counts.get("commands", 0) + 1
        for f in obs.get("files") or []:
            files.add(f)

    applied = 0
    now = datetime.datetime.utcnow().isoformat() + "Z"

    if pending_lines:
        res = slot_get(kv, "pending_items")
        if res.get("success"):
            slot = res["slot"]
            scope = res["scope"]
            target_kv = scopeKv = KV.globalSlots if scope == "global" else KV.slots
            already = set((slot.get("content") or "").split("\n"))
            fresh = [l for l in pending_lines if l not in already]
            if fresh:
                sep = "\n" if slot.get("content") and not slot["content"].endswith("\n") else ""
                next_content = (slot.get("content") or "") + sep + "\n".join(fresh)
                limit = slot.get("sizeLimit") or 2000
                if len(next_content) > limit:
                    next_content = next_content[-limit:]
                slot["content"] = next_content
                slot["updatedAt"] = now
                kv.set(target_kv, "pending_items", slot)
                applied += 1

    if pattern_counts:
        res = slot_get(kv, "session_patterns")
        if res.get("success"):
            slot = res["slot"]
            scope = res["scope"]
            target_kv = KV.globalSlots if scope == "global" else KV.slots
            summary = [f"last reflection: {now}"]
            for k, v in pattern_counts.items():
                summary.append(f"- {k}: {v} in last {len(recent)} observations")
            next_content = "\n".join(summary)
            limit = slot.get("sizeLimit") or 2000
            if len(next_content) > limit:
                next_content = next_content[:limit]
            slot["content"] = next_content
            slot["updatedAt"] = now
            kv.set(target_kv, "session_patterns", slot)
            applied += 1

    if files:
        res = slot_get(kv, "project_context")
        if res.get("success"):
            slot = res["slot"]
            scope = res["scope"]
            target_kv = KV.globalSlots if scope == "global" else KV.slots
            already = slot.get("content") or ""
            fresh = [f for f in files if f not in already][:20]
            if fresh:
                header_line = "Files touched in recent sessions:" if not already else ""
                sep = "\n" if already and not already.endswith("\n") else ""
                lines = [already]
                if header_line:
                    lines.append(header_line)
                for f in fresh:
                    lines.append(f"- {f}")
                next_content = sep.join([l for l in lines if l])
                limit = slot.get("sizeLimit") or 2000
                if len(next_content) > limit:
                    next_content = next_content[-limit:]
                slot["content"] = next_content
                slot["updatedAt"] = now
                kv.set(target_kv, "project_context", slot)
                applied += 1

    if applied > 0:
        safe_audit(kv, "slot_reflect", "mem::slot-reflect", [session_id], {"observationCount": len(recent), "slotsUpdated": applied})
        commit_if_enabled(kv, f"Slot reflect: updated {applied} slots in session {session_id[:8]}", "system")

    return {"success": True, "applied": applied, "observationsReviewed": len(recent)}


# =====================================================================
# Lessons Learned System
# =====================================================================

def reinforce_lesson(lesson: Dict[str, Any]) -> None:
    now = datetime.datetime.utcnow().isoformat() + "Z"
    lesson["reinforcements"] = lesson.get("reinforcements", 0) + 1
    conf = lesson.get("confidence", 0.5)
    lesson["confidence"] = min(1.0, conf + 0.1 * (1 - conf))
    lesson["lastReinforcedAt"] = now
    lesson["updatedAt"] = now

def lesson_save(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    content = data.get("content")
    if not content or not content.strip():
        return {"success": False, "error": "content is required"}
    content = strip_private_data(content)
    context_str = strip_private_data(data.get("context") or "")

    agent_id = data.get("agentId") or get_agent_id()
    fp = fingerprint_id("lsn", content)
    existing = kv.get(KV.lessons, fp)

    if existing and not existing.get("deleted"):
        reinforce_lesson(existing)
        if context_str and not existing.get("context"):
            existing["context"] = context_str
        kv.set(KV.lessons, existing["id"], existing)
        safe_audit(kv, "lesson_strengthen", "mem::lesson-save", [existing["id"]])
        
        # Commit to Dolt
        commit_if_enabled(kv, f"Strengthen lesson: {existing.get('content', '')[:60]}", agent_id)
        
        return {"success": True, "action": "strengthened", "lesson": existing}

    confidence = data.get("confidence")
    if not isinstance(confidence, (int, float)) or confidence < 0 or confidence > 1:
        confidence = 0.5

    now = datetime.datetime.utcnow().isoformat() + "Z"
    lesson = {
        "id": fp,
        "content": content.strip(),
        "context": context_str.strip(),
        "confidence": confidence,
        "reinforcements": 0,
        "source": data.get("source") or "manual",
        "sourceIds": data.get("sourceIds") or [],
        "project": data.get("project"),
        "tags": data.get("tags") or [],
        "createdAt": now,
        "updatedAt": now,
        "decayRate": 0.05,
    }
    kv.set(KV.lessons, lesson["id"], lesson)
    safe_audit(kv, "lesson_save", "mem::lesson-save", [lesson["id"]])
    
    # Commit to Dolt
    commit_if_enabled(kv, f"Create lesson: {lesson['content'][:60]}", agent_id)

    return {"success": True, "action": "created", "lesson": lesson}


def lesson_list(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    limit = data.get("limit") or 50
    min_confidence = data.get("minConfidence") or 0.0
    all_lessons = kv.list(KV.lessons)

    lessons = [
        l for l in all_lessons
        if not l.get("deleted") and l.get("confidence", 0.5) >= min_confidence
    ]

    project = data.get("project")
    if project:
        lessons = [l for l in lessons if l.get("project") == project]
    source = data.get("source")
    if source:
        lessons = [l for l in lessons if l.get("source") == source]

    lessons.sort(key=lambda x: x.get("confidence", 0.5), reverse=True)
    return {"success": True, "lessons": lessons[:limit]}

def lesson_recall(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    query = data.get("query")
    if not query or not query.strip():
        return {"success": False, "error": "query is required"}

    query_lower = query.lower()
    min_confidence = data.get("minConfidence") or 0.1
    limit = data.get("limit") or 10

    all_lessons = kv.list(KV.lessons)
    lessons = [
        l for l in all_lessons
        if not l.get("deleted") and l.get("confidence", 0.5) >= min_confidence
    ]

    project = data.get("project")
    if project:
        lessons = [l for l in lessons if l.get("project") == project]

    scored = []
    terms = [t for t in query_lower.split() if len(t) > 1]

    for l in lessons:
        text = f"{l.get('content', '')} {l.get('context', '')} {' '.join(l.get('tags') or [])}".lower()
        match_count = sum(1 for t in terms if t in text)
        if match_count == 0:
            continue

        relevance = match_count / len(terms)
        baseline = l.get("lastReinforcedAt") or l.get("createdAt")
        import dateutil.parser
        dt = dateutil.parser.parse(baseline)
        days = (datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) - dt.replace(tzinfo=datetime.timezone.utc)).total_seconds() / (3600 * 24)
        recency_boost = 1 / (1 + days * 0.01)
        score = l.get("confidence", 0.5) * relevance * recency_boost
        scored.append({"lesson": l, "score": score})

    scored.sort(key=lambda x: x["score"], reverse=True)
    results = []
    for s in scored[:limit]:
        item = dict(s["lesson"])
        item["score"] = round(s["score"], 3)
        results.append(item)

    safe_audit(kv, "lesson_recall", "mem::lesson-recall", [], {"query": query, "resultCount": len(results)})
    return {"success": True, "lessons": results}

def lesson_strengthen(kv: StateKV, lesson_id: str) -> Dict[str, Any]:
    lesson = kv.get(KV.lessons, lesson_id)
    if not lesson or lesson.get("deleted"):
        return {"success": False, "error": "lesson not found"}

    reinforce_lesson(lesson)
    kv.set(KV.lessons, lesson["id"], lesson)
    safe_audit(kv, "lesson_strengthen", "mem::lesson-strengthen", [lesson["id"]])
    
    # Commit to Dolt
    commit_if_enabled(kv, f"Strengthen lesson: {lesson.get('content', '')[:60]}", get_agent_id())
    
    return {"success": True, "lesson": lesson}

def lesson_decay_sweep(kv: StateKV) -> Dict[str, Any]:
    all_lessons = kv.list(KV.lessons)
    decayed = 0
    soft_deleted = 0
    now = datetime.datetime.utcnow()
    timestamp = now.isoformat() + "Z"

    for l in all_lessons:
        if l.get("deleted"):
            continue
        baseline_str = l.get("lastDecayedAt") or l.get("lastReinforcedAt") or l["createdAt"]
        import dateutil.parser
        dt = dateutil.parser.parse(baseline_str)
        weeks = (now.replace(tzinfo=datetime.timezone.utc) - dt.replace(tzinfo=datetime.timezone.utc)).total_seconds() / (3600 * 24 * 7)
        if weeks < 1.0:
            continue

        decay = l.get("decayRate", 0.05) * weeks
        new_conf = max(0.05, l.get("confidence", 0.5) - decay)
        
        if new_conf != l.get("confidence"):
            before = l.get("confidence", 0.5)
            l["confidence"] = round(new_conf, 3)
            l["lastDecayedAt"] = timestamp
            l["updatedAt"] = timestamp

            if l["confidence"] <= 0.1 and l.get("reinforcements", 0) == 0:
                l["deleted"] = True
                soft_deleted += 1
            else:
                decayed += 1

            kv.set(KV.lessons, l["id"], l)
            safe_audit(kv, "lesson_strengthen", "mem::lesson-decay-sweep", [l["id"]], {
                "action": "soft-delete" if l.get("deleted") else "decay",
                "actor": "system",
                "reason": "decay-sweep",
                "before": {"confidence": before, "deleted": False},
                "after": {"confidence": l["confidence"], "deleted": bool(l.get("deleted"))}
            })

    if decayed > 0 or soft_deleted > 0:
        commit_if_enabled(kv, f"Lesson decay sweep: decayed {decayed}, soft-deleted {soft_deleted}", "system")

    return {"success": True, "decayed": decayed, "softDeleted": soft_deleted, "total": len(all_lessons)}


# =====================================================================
# Database Rebuilder (Index Bootstrapper)
# =====================================================================

def rebuild_index(kv: StateKV) -> int:
    _bm25_index.clear()
    if _vector_index:
        _vector_index.clear()

    # Backfill BM25 with observations
    sessions = kv.list(KV.sessions)
    total_indexed = 0

    for sess in sessions:
        sid = sess.get("id")
        if not sid:
            continue
        obs_list = kv.list(KV.observations(sid))
        for obs in obs_list:
            # Only index compressed (non-raw) observations
            if obs.get("title") and obs.get("narrative"):
                _bm25_index.add(obs)
                comb_text = obs["title"] + " " + obs["narrative"]
                vector_index_add_guarded(obs["id"], sid, comb_text, {"kind": "observation", "logId": obs["id"]})
                total_indexed += 1

    # Backfill BM25 with memories
    memories = kv.list(KV.memories)
    for mem in memories:
        if mem.get("isLatest") is False:
            continue
        if not mem.get("title") or not mem.get("content"):
            continue
        converted = memory_to_observation(mem)
        _bm25_index.add(converted)
        comb_text = mem["title"] + " " + mem["content"]
        vector_index_add_guarded(mem["id"], "memory", comb_text, {"kind": "memory", "logId": mem["id"]})
        total_indexed += 1

    if _index_persistence and total_indexed > 0:
        _index_persistence.schedule_save()

    return total_indexed

# =====================================================================
# Advanced Function Stubs / CRUD Operations
# =====================================================================

def list_sessions(kv: StateKV) -> List[Dict[str, Any]]:
    sessions = kv.list(KV.sessions)
    for s in sessions:
        sid = s.get("id")
        if sid:
            summary = kv.get(KV.summaries, sid)
            if summary:
                s["title"] = summary.get("title")
                s["summary"] = summary.get("narrative")
    sessions.sort(key=lambda s: s.get("startedAt", ""), reverse=True)
    return sessions

def get_session(kv: StateKV, session_id: str) -> Optional[Dict[str, Any]]:
    s = kv.get(KV.sessions, session_id)
    if s:
        summary = kv.get(KV.summaries, session_id)
        if summary:
            s["title"] = summary.get("title")
            s["summary"] = summary.get("narrative")
    return s

def create_session(kv: StateKV, session: Dict[str, Any]) -> Dict[str, Any]:
    auto_complete_old_active_sessions(kv, session["id"])
    kv.set(KV.sessions, session["id"], session)
    return session

def end_session(kv: StateKV, session_id: str) -> bool:
    now = datetime.datetime.utcnow().isoformat() + "Z"
    kv.update(KV.sessions, session_id, [
        {"type": "set", "path": "endedAt", "value": now},
        {"type": "set", "path": "status", "value": "completed"}
    ])
    return True

def timeline(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    # Simple timeline query returning observations sorted by timestamp
    anchor = data.get("anchor")
    project = data.get("project")
    session_id = data.get("sessionId")
    before = data.get("before") or 10
    after = data.get("after") or 10

    sessions = kv.list(KV.sessions)
    if session_id:
        sessions = [s for s in sessions if s.get("id") == session_id]
    elif project:
        sessions = [s for s in sessions if s.get("project") == project]

    all_obs = []
    for s in sessions:
        all_obs.extend(kv.list(KV.observations(s["id"])))

    # sort by timestamp
    all_obs.sort(key=lambda x: x.get("timestamp", ""))
    
    anchor_idx = -1
    for idx, obs in enumerate(all_obs):
        if obs["id"] == anchor or obs.get("timestamp", "") >= (anchor or ""):
            anchor_idx = idx
            break

    if anchor_idx == -1:
        anchor_idx = len(all_obs) // 2

    start = max(0, anchor_idx - before)
    end = min(len(all_obs), anchor_idx + after + 1)

    return {
        "success": True,
        "observations": all_obs[start:end],
        "anchorIndex": anchor_idx - start
    }

def get_project_profile(kv: StateKV, project: str) -> Dict[str, Any]:
    prof = kv.get(KV.profiles, project)
    if not prof:
        prof = {
            "project": project,
            "topConcepts": [],
            "topFiles": [],
            "conventions": [],
            "commonErrors": [],
            "updatedAt": datetime.datetime.utcnow().isoformat() + "Z"
        }
    if not prof.get("topConcepts") and not prof.get("topFiles"):
        prof = build_project_profile(kv, project)
    return prof

def build_project_profile(kv: StateKV, project: str) -> Dict[str, Any]:
    prof = kv.get(KV.profiles, project)
    if not prof:
        prof = {
            "project": project,
            "topConcepts": [],
            "topFiles": [],
            "conventions": [],
            "commonErrors": [],
            "updatedAt": datetime.datetime.utcnow().isoformat() + "Z"
        }

    # Stored profile may lack topConcepts/topFiles — compute from observations + memories if empty
    if not prof.get("topConcepts") and not prof.get("topFiles"):
        import re as _re, json as _j, os.path as _osp
        from collections import Counter
        sessions = kv.list(KV.sessions)
        project_sessions = [s for s in sessions if s.get("project") == project]
        concept_counts = Counter()
        file_counts = Counter()

        def _harvest_file(path, fc, cc):
            if not isinstance(path, str) or not path:
                return
            fc[path] += 1
            parts = _re.split(r'[\\/]', path)
            fname = parts[-1] if parts else ""
            skip = {"tmp", "temp", "claude", "appdata", "local", "users", "windows"}
            for part in parts[:-1]:
                p = part.lower().strip()
                if p and len(p) > 2 and p not in skip and not _re.match(r'^[a-z]:|^\.|^--', p):
                    cc[p] += 1
            stem = _osp.splitext(fname)[0]
            if stem and len(stem) > 2:
                cc[stem.lower()] += 1
            ext = _osp.splitext(fname)[1].lstrip(".")
            if ext in ("py", "ts", "js", "jsx", "tsx", "go", "rs", "java", "cs", "cpp"):
                cc[ext] += 1

        for s in project_sessions:
            sid = s.get("id", "")
            if not sid:
                continue
            for o in kv.list(KV.observations(sid)):
                for c in (o.get("concepts") or []):
                    if isinstance(c, str) and c:
                        concept_counts[c] += 1
                for f in (o.get("files") or []):
                    _harvest_file(f, file_counts, concept_counts)
                tn = o.get("toolName")
                if tn:
                    concept_counts[tn] += 1
                ti = o.get("toolInput")
                if isinstance(ti, str):
                    try: ti = _j.loads(ti)
                    except Exception: ti = {}
                if isinstance(ti, dict):
                    for fk in ("path", "file_path", "file", "filename"):
                        _harvest_file(ti.get(fk, ""), file_counts, concept_counts)
                narr = o.get("narrative") or o.get("raw") or ""
                if isinstance(narr, str) and narr.startswith("{"):
                    try:
                        nd = _j.loads(narr)
                        if isinstance(nd, dict):
                            tn2 = nd.get("toolName") or nd.get("tool_name")
                            if tn2: concept_counts[tn2] += 1
                            for fk in ("path", "file_path", "file", "filename"):
                                _harvest_file(nd.get(fk, ""), file_counts, concept_counts)
                    except Exception:
                        pass

        # memories for this project
        for m in kv.list(KV.memories):
            if m.get("project") == project:
                for c in (m.get("concepts") or []):
                    if c: concept_counts[c] += 1
                for f in (m.get("files") or []):
                    _harvest_file(f, file_counts, concept_counts)

        prof["topConcepts"] = [{"concept": c, "frequency": n} for c, n in concept_counts.most_common(20)]
        prof["topFiles"] = [{"file": f, "frequency": n} for f, n in file_counts.most_common(20)]
        prof["sessionCount"] = len(project_sessions)

    return prof

def export_data(kv: StateKV, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    if data is None:
        data = {}
    raw_max = data.get("maxSessions")
    max_sessions = None
    if raw_max is not None:
        try:
            max_sessions = min(max(int(raw_max), 1), 1000)
        except Exception:
            pass
            
    raw_offset = data.get("offset")
    offset = 0
    if raw_offset is not None:
        try:
            offset = max(int(raw_offset), 0)
        except Exception:
            pass
            
    all_sessions = kv.list(KV.sessions)
    all_sessions.sort(key=lambda s: s.get("startedAt", ""), reverse=True)
    
    if max_sessions is not None:
        paginated_sessions = all_sessions[offset:offset + max_sessions]
    else:
        paginated_sessions = all_sessions
        
    memories = kv.list(KV.memories)
    summaries = kv.list(KV.summaries)
    
    observations = {}
    for s in paginated_sessions:
        sid = s.get("id")
        if sid:
            obs = kv.list(KV.observations(sid))
            if obs:
                observations[sid] = obs
                
    profiles = []
    unique_projects = list(set(s.get("project") for s in paginated_sessions if s.get("project")))
    for proj in unique_projects:
        p = kv.get(KV.profiles, proj)
        if p:
            profiles.append(p)
            
    graph_nodes = kv.list(KV.graphNodes)
    graph_edges = kv.list(KV.graphEdges)
    semantic_memories = kv.list(KV.semantic)
    procedural_memories = kv.list(KV.procedural)
    actions = kv.list(KV.actions)
    action_edges = kv.list(KV.actionEdges)
    sentinels = kv.list(KV.sentinels)
    sketches = kv.list(KV.sketches)
    crystals = kv.list(KV.crystals)
    facets = kv.list(KV.facets)
    lessons = kv.list(KV.lessons)
    insights = kv.list(KV.insights)
    routines = kv.list(KV.routines)
    signals = kv.list(KV.signals)
    checkpoints = kv.list(KV.checkpoints)
    access_logs = kv.list(KV.accessLog)
    
    res = {
        "version": "0.9.21",
        "exportedAt": datetime.datetime.utcnow().isoformat() + "Z",
        "sessions": paginated_sessions,
        "observations": observations,
        "memories": memories,
        "summaries": summaries
    }
    if profiles: res["profiles"] = profiles
    if graph_nodes: res["graphNodes"] = graph_nodes
    if graph_edges: res["graphEdges"] = graph_edges
    if semantic_memories: res["semanticMemories"] = semantic_memories
    if procedural_memories: res["proceduralMemories"] = procedural_memories
    if actions: res["actions"] = actions
    if action_edges: res["actionEdges"] = action_edges
    if sentinels: res["sentinels"] = sentinels
    if sketches: res["sketches"] = sketches
    if crystals: res["crystals"] = crystals
    if facets: res["facets"] = facets
    if lessons: res["lessons"] = lessons
    if insights: res["insights"] = insights
    if routines: res["routines"] = routines
    if signals: res["signals"] = signals
    if checkpoints: res["checkpoints"] = checkpoints
    if access_logs: res["accessLogs"] = access_logs
    
    if max_sessions is not None:
        res["pagination"] = {
            "offset": offset,
            "limit": max_sessions,
            "total": len(all_sessions),
            "hasMore": offset + max_sessions < len(all_sessions)
        }
    return res


def set_project_profile(kv: StateKV, project: str, profile: Dict[str, Any]) -> Dict[str, Any]:
    profile["updatedAt"] = datetime.datetime.utcnow().isoformat() + "Z"
    kv.set(KV.profiles, project, profile)
    
    # Commit to Dolt
    commit_if_enabled(kv, f"Set project profile for {project}", get_agent_id())
    
    return profile

def get_relations(kv: StateKV) -> List[Dict[str, Any]]:
    return kv.list(KV.relations)

def add_relation(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    rel = {
        "id": generate_id("rel"),
        "sourceId": data["sourceId"],
        "targetId": data["targetId"],
        "type": data["type"],
        "createdAt": datetime.datetime.utcnow().isoformat() + "Z"
    }
    kv.set(KV.relations, rel["id"], rel)
    
    # Commit to Dolt
    agent_id = data.get("agentId") or get_agent_id()
    commit_if_enabled(kv, f"Add relation {rel['type']} between {rel['sourceId']} and {rel['targetId']}", agent_id)

    return rel

def evolve_memory(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    # Update memory content and create a new version
    mem_id = data["memoryId"]
    new_content = data["newContent"]
    new_title = data.get("newTitle")

    existing = kv.get(KV.memories, mem_id)
    if not existing:
        raise ValueError("Memory not found")

    existing["isLatest"] = False
    kv.set(KV.memories, existing["id"], existing)

    now = datetime.datetime.utcnow().isoformat() + "Z"
    new_mem = dict(existing)
    new_mem["id"] = generate_id("mem")
    new_mem["content"] = new_content
    if new_title:
        new_mem["title"] = new_title
    else:
        new_mem["title"] = new_content[:80]
    new_mem["version"] = existing.get("version", 1) + 1
    new_mem["parentId"] = existing["id"]
    new_mem["supersedes"] = [existing["id"]]
    new_mem["createdAt"] = now
    new_mem["updatedAt"] = now
    new_mem["isLatest"] = True

    kv.set(KV.memories, new_mem["id"], new_mem)

    # Re-index
    try:
        _bm25_index.add(memory_to_observation(new_mem))
        _bm25_index.remove(existing["id"])
    except Exception:
        pass

    comb_text = new_mem["title"] + " " + new_mem["content"]
    vector_index_add_guarded(new_mem["id"], "memory", comb_text, {"kind": "memory", "logId": new_mem["id"]})
    if _vector_index:
        _vector_index.remove(existing["id"])

    if _index_persistence:
        _index_persistence.schedule_save()

    # Commit to Dolt
    agent_id = data.get("agentId") or get_agent_id() or new_mem.get("agentId")
    commit_if_enabled(kv, f"Evolve memory {new_mem['id']} (v{new_mem['version']}): {new_mem['title']}", agent_id)

    return {"success": True, "memory": new_mem}

def auto_forget(kv: StateKV, dry_run: bool = False) -> Dict[str, Any]:
    now_dt = datetime.datetime.utcnow()
    now_str = now_dt.isoformat() + "Z"
    evicted_memories = []
    evicted_observations = []

    # 1. Evict expired memories
    memories = kv.list(KV.memories)
    for mem in memories:
        forget_after = mem.get("forgetAfter")
        if forget_after:
            try:
                import dateutil.parser
                fa_dt = dateutil.parser.parse(forget_after)
                if fa_dt.tzinfo:
                    fa_dt = fa_dt.replace(tzinfo=None)
                if fa_dt < now_dt:
                    evicted_memories.append(mem["id"])
            except Exception as e:
                print(f"[auto_forget] Failed to parse forgetAfter '{forget_after}': {e}")

    # 2. Evict low-value old observations (importance <= 2, age > 180 days)
    sessions = kv.list(KV.sessions)
    for sess in sessions:
        sid = sess.get("id")
        if not sid:
            continue
        obs_list = kv.list(KV.observations(sid))
        for obs in obs_list:
            importance = obs.get("importance")
            ts = obs.get("timestamp")
            if importance is not None and ts:
                try:
                    import dateutil.parser
                    ts_dt = dateutil.parser.parse(ts)
                    if ts_dt.tzinfo:
                        ts_dt = ts_dt.replace(tzinfo=None)
                    age_days = (now_dt - ts_dt).days
                    if importance <= 2 and age_days > 180:
                        evicted_observations.append((sid, obs["id"]))
                except Exception as e:
                    print(f"[auto_forget] Failed to parse timestamp '{ts}': {e}")

    if not dry_run:
        for mem_id in evicted_memories:
            mem = kv.get(KV.memories, mem_id)
            kv.delete(KV.memories, mem_id)
            if mem and mem.get("imageRef"):
                ref = mem["imageRef"]
                refs = kv.get(KV.imageRefs, ref) or 0
                if refs > 0:
                    kv.set(KV.imageRefs, ref, refs - 1)
            _bm25_index.remove(mem_id)
            if _vector_index:
                _vector_index.remove(mem_id)

        for sid, obs_id in evicted_observations:
            obs = kv.get(KV.observations(sid), obs_id)
            kv.delete(KV.observations(sid), obs_id)
            if obs:
                img = obs.get("imageData") or obs.get("imageRef")
                if img:
                    refs = kv.get(KV.imageRefs, img) or 0
                    if refs > 0:
                        kv.set(KV.imageRefs, img, refs - 1)
            _bm25_index.remove(obs_id)
            if _vector_index:
                _vector_index.remove(obs_id)

        if evicted_memories or evicted_observations:
            if _index_persistence:
                _index_persistence.schedule_save()
            safe_audit(
                kv,
                "auto_forget",
                "mem::auto_forget",
                evicted_memories + [oid for _, oid in evicted_observations],
                {
                    "evictedMemoriesCount": len(evicted_memories),
                    "evictedObservationsCount": len(evicted_observations),
                    "dryRun": False
                }
            )
            commit_if_enabled(kv, f"Auto forget: evicted {len(evicted_memories)} memories, {len(evicted_observations)} observations", "system")

    return {
        "success": True,
        "evictedMemories": evicted_memories,
        "evictedObservations": [oid for _, oid in evicted_observations],
        "evicted": len(evicted_memories) + len(evicted_observations),
        "dryRun": dry_run
    }

def health_check(kv: StateKV) -> Dict[str, Any]:
    db_status = "connected"
    try:
        conn = kv._get_conn()
        conn.close()
    except Exception:
        db_status = "disconnected"
    return {
        "status": "healthy" if db_status == "connected" else "degraded",
        "service": "agentmemory",
        "version": "0.9.8",
        "database": "dolt",
        "databaseStatus": db_status
    }

def strip_xml_wrappers(raw: str) -> str:
    if not raw:
        return ""
    cleaned = raw.strip()
    cleaned = re.sub(r'```xml\s*\n?', '', cleaned, flags=re.IGNORECASE)
    cleaned = re.sub(r'```', '', cleaned)
    cleaned = cleaned.strip()
    root_match = re.search(r'(<[a-zA-Z_][a-zA-Z0-9_-]*>[\s\S]*<\/[a-zA-Z_][a-zA-Z0-9_-]*>)', cleaned)
    if root_match:
        return root_match.group(1).strip()
    return cleaned

def get_xml_tag(text: str, tag: str) -> Optional[str]:
    cleaned = strip_xml_wrappers(text)
    pattern = rf"<{tag}>(.*?)</{tag}>"
    match = re.search(pattern, cleaned, re.DOTALL)
    return match.group(1).strip() if match else None

def get_xml_children(text: str, parent_tag: str, child_tag: str) -> List[str]:
    parent_content = get_xml_tag(text, parent_tag)
    if not parent_content:
        return []
    pattern = rf"<{child_tag}>(.*?)</{child_tag}>"
    return [m.strip() for m in re.findall(pattern, parent_content, re.DOTALL)]

def generate_content(system_instruction: str, prompt: str) -> str:
    api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
    if not api_key:
        raise ValueError("No Gemini/Google API key found")
    model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
    payload = {
        "contents": [
            {
                "role": "user",
                "parts": [
                    {"text": prompt}
                ]
            }
        ],
        "systemInstruction": {
            "parts": [
                {"text": system_instruction}
            ]
        },
        "generationConfig": {
            "temperature": 0.2
        }
    }
    
    req_data = json.dumps(payload).encode("utf-8")
    import urllib.request
    req = urllib.request.Request(
        url,
        data=req_data,
        headers={"Content-Type": "application/json"},
        method="POST"
    )
    
    try:
        with urllib.request.urlopen(req, timeout=60.0) as response:
            resp_data = json.loads(response.read().decode("utf-8"))
            
        candidates = resp_data.get("candidates", [])
        if not candidates:
            raise RuntimeError("Gemini generateContent returned no candidates")
        
        parts = candidates[0].get("content", {}).get("parts", [])
        if not parts:
            raise RuntimeError("Gemini generateContent candidate content had no parts")
            
        return parts[0].get("text", "")
    except Exception as e:
        raise RuntimeError(f"Gemini generateContent call failed: {e}")

def summarize(kv: StateKV, data: Dict[str, Any]) -> Dict[str, Any]:
    session_id = data.get("sessionId")
    if not session_id:
        return {"success": False, "error": "sessionId is required"}
    
    session = kv.get(KV.sessions, session_id)
    if not session:
        return {"success": False, "error": "session_not_found"}
        
    observations = kv.list(KV.observations(session_id))
    compressed = [o for o in observations if o.get("title")]
    if not compressed:
        return {"success": False, "error": "no_observations"}
        
    SUMMARY_SYSTEM = """You are a session summarization assistant. Your job is to read all raw tool executions and outcomes from a coding session and produce a high-fidelity summary.

    

    Output XML:

    <summary>

      <title>Concise title summarizing the session</title>

      <narrative>1-2 paragraphs of narrative describing what was done, what succeeded, and what failed</narrative>

      <decisions>

        <decision>Architectural decision, key insight, or choice made</decision>

      </decisions>

      <files>

        <file>path/to/modified/file</file>

      </files>

      <concepts>

        <concept>important concept, library, tool, or command used</concept>

      </concepts>

    </summary>"""

    chunk_size = 400
    chunks = [compressed[i:i + chunk_size] for i in range(0, len(compressed), chunk_size)]
    
    partial_summaries = []
    for idx, chunk in enumerate(chunks):
        obs_text = ""
        for o in chunk:
            obs_text += f"[{o.get('type')}] {o.get('title')}\n{o.get('narrative') or ''}\nFiles: {', '.join(o.get('files') or [])}\n\n"
        
        prompt = f"Summarize this chunk {idx+1}/{len(chunks)} of observations:\n\n{obs_text}"
        try:
            response = generate_content(SUMMARY_SYSTEM, prompt)
            cleaned = strip_xml_wrappers(response)
            title = get_xml_tag(cleaned, "title")
            if not title:
                continue
            partial_summaries.append({
                "title": title,
                "narrative": get_xml_tag(cleaned, "narrative") or "",
                "keyDecisions": get_xml_children(cleaned, "decisions", "decision"),
                "filesModified": get_xml_children(cleaned, "files", "file"),
                "concepts": get_xml_children(cleaned, "concepts", "concept"),
            })
        except Exception as e:
            last_error = str(e)
            print(f"[summarize] Chunk {idx+1} failed: {e}")
            
    if not partial_summaries:
        return {"success": False, "error": f"No chunks summarized successfully. Last error: {last_error}"}
        
    if len(partial_summaries) == 1:
        final_summary = {
            "sessionId": session_id,
            "project": session.get("project"),
            "createdAt": datetime.datetime.utcnow().isoformat() + "Z",
            "title": partial_summaries[0]["title"],
            "narrative": partial_summaries[0]["narrative"],
            "keyDecisions": partial_summaries[0]["keyDecisions"],
            "filesModified": partial_summaries[0]["filesModified"],
            "concepts": partial_summaries[0]["concepts"],
            "observationCount": len(compressed)
        }
    else:
        REDUCE_SYSTEM = """You are a session summarization reducer. Reduce multiple partial chunk summaries into a single final summary.

        

        Output XML:

        <summary>

          <title>Concise final title summarizing the entire session</title>

          <narrative>Comprehensive narrative describing what was done, what succeeded, and what failed</narrative>

          <decisions>

            <decision>Architectural decision, key insight, or choice made</decision>

          </decisions>

          <files>

            <file>path/to/modified/file</file>

          </files>

          <concepts>

            <concept>important concept, library, tool, or command used</concept>

          </concepts>

        </summary>"""
        
        reduce_prompt = "Reduce these partial summaries:\n\n"
        for idx, ps in enumerate(partial_summaries):
            reduce_prompt += f"[Chunk {idx+1}]\nTitle: {ps['title']}\nNarrative: {ps['narrative']}\nDecisions: {', '.join(ps['keyDecisions'])}\nFiles: {', '.join(ps['filesModified'])}\nConcepts: {', '.join(ps['concepts'])}\n\n"
            
        try:
            response = generate_content(REDUCE_SYSTEM, reduce_prompt)
            cleaned = strip_xml_wrappers(response)
            final_summary = {
                "sessionId": session_id,
                "project": session.get("project"),
                "createdAt": datetime.datetime.utcnow().isoformat() + "Z",
                "title": get_xml_tag(cleaned, "title") or partial_summaries[0]["title"],
                "narrative": get_xml_tag(cleaned, "narrative") or "",
                "keyDecisions": get_xml_children(cleaned, "decisions", "decision"),
                "filesModified": get_xml_children(cleaned, "files", "file"),
                "concepts": get_xml_children(cleaned, "concepts", "concept"),
                "observationCount": len(compressed)
            }
        except Exception as e:
            return {"success": False, "error": f"Reduction failed: {e}"}
            
    kv.set(KV.summaries, session_id, final_summary)
    
    session = kv.get(KV.sessions, session_id)
    if session:
        session["title"] = final_summary["title"]
        session["summary"] = final_summary["narrative"]
        kv.set(KV.sessions, session_id, session)

    safe_audit(kv, "compress", "mem::summarize", [session_id], {
        "title": final_summary["title"],
        "observationCount": len(compressed)
    })
    
    return {"success": True, "summary": final_summary}

def consolidate(kv: StateKV, project: Optional[str] = None, min_observations: int = 10) -> Dict[str, Any]:
    sessions = list_sessions(kv)
    if project:
        sessions = [s for s in sessions if s.get("project") == project]
        
    all_obs = []
    for s in sessions:
        obs_list = kv.list(KV.observations(s["id"]))
        for o in obs_list:
            if o.get("title") and o.get("importance", 5) >= 5:
                all_obs.append((o, s["id"]))
                
    if len(all_obs) < min_observations:
        return {"consolidated": 0, "reason": "insufficient_observations", "success": True}
        
    # Group observations by concepts
    concept_groups = {}
    for obs, sid in all_obs:
        concepts = obs.get("concepts") or []
        for c in concepts:
            key = c.lower().strip()
            if not key:
                continue
            if key not in concept_groups:
                concept_groups[key] = []
            concept_groups[key].append((obs, sid))
            
    # Sort groups that have >= 3 observations by size descending
    sorted_groups = sorted(
        [(k, g) for k, g in concept_groups.items() if len(g) >= 3],
        key=lambda x: len(x[1]),
        reverse=True
    )
    
    consolidated_count = 0
    existing_memories = kv.list(KV.memories)
    
    MAX_LLM_CALLS = 10
    llm_calls = 0
    
    # Prompt templates
    CONSOLIDATION_SYSTEM = """You are a memory consolidation engine. Given a set of related observations from coding sessions, synthesize them into a single long-term memory.

    

    Output XML:

    <memory>

      <type>pattern|preference|architecture|bug|workflow|fact</type>

      <title>Concise memory title (max 80 chars)</title>

      <content>2-4 sentence description of the learned insight</content>

      <concepts>

        <concept>key term</concept>

      </concepts>

      <files>

        <file>relevant/file/path</file>

      </files>

      <strength>1-10 how confident/important this memory is</strength>

    </memory>"""
    
    for concept, obs_group in sorted_groups:
        if llm_calls >= MAX_LLM_CALLS:
            break
            
        # Get top 8 by importance
        top = sorted(obs_group, key=lambda x: x[0].get("importance", 5), reverse=True)[:8]
        session_ids = list(set([x[1] for x in top]))
        obs_ids = list(set([x[0]["id"] for x in top]))
        
        prompt_parts = []
        for obs, sid in top:
            prompt_parts.append(f"[{obs.get('type')}] {obs.get('title')}\n{obs.get('narrative') or ''}\nFiles: {', '.join(obs.get('files') or [])}\nImportance: {obs.get('importance', 5)}")
        obs_prompt = "\n\n".join(prompt_parts)
        
        try:
            response = generate_content(CONSOLIDATION_SYSTEM, f"Concept: \"{concept}\"\n\nObservations:\n{obs_prompt}")
            llm_calls += 1
            
            cleaned = strip_xml_wrappers(response)
            m_type = get_xml_tag(cleaned, "type") or "fact"
            m_title = get_xml_tag(cleaned, "title")
            m_content = get_xml_tag(cleaned, "content")
            
            if not m_title or not m_content:
                continue
                
            m_strength_str = get_xml_tag(cleaned, "strength") or "5"
            try:
                m_strength = max(1, min(10, int(m_strength_str)))
            except Exception:
                m_strength = 5
                
            concepts_list = get_xml_children(cleaned, "concepts", "concept")
            files_list = get_xml_children(cleaned, "files", "file")
            
            now = datetime.datetime.utcnow().isoformat() + "Z"
            
            # Find existing memory with same title
            existing_match = None
            for mem in existing_memories:
                if mem.get("title", "").lower() == m_title.lower() and mem.get("isLatest") is not False:
                    if not project or not mem.get("project") or mem.get("project") == project:
                        existing_match = mem
                        break
                        
            if existing_match:
                existing_match["isLatest"] = False
                kv.set(KV.memories, existing_match["id"], existing_match)
                
                evolved = {
                    "id": generate_id("mem"),
                    "createdAt": now,
                    "updatedAt": now,
                    "type": m_type,
                    "title": m_title,
                    "content": m_content,
                    "concepts": concepts_list,
                    "files": files_list,
                    "sessionIds": session_ids,
                    "strength": m_strength,
                    "version": (existing_match.get("version") or 1) + 1,
                    "parentId": existing_match["id"],
                    "supersedes": [existing_match["id"]] + (existing_match.get("supersedes") or []),
                    "sourceObservationIds": obs_ids,
                    "isLatest": True
                }
                if project:
                    evolved["project"] = project
                kv.set(KV.memories, evolved["id"], evolved)
                consolidated_count += 1
            else:
                memory = {
                    "id": generate_id("mem"),
                    "createdAt": now,
                    "updatedAt": now,
                    "type": m_type,
                    "title": m_title,
                    "content": m_content,
                    "concepts": concepts_list,
                    "files": files_list,
                    "sessionIds": session_ids,
                    "strength": m_strength,
                    "version": 1,
                    "sourceObservationIds": obs_ids,
                    "isLatest": True
                }
                if project:
                    memory["project"] = project
                kv.set(KV.memories, memory["id"], memory)
                consolidated_count += 1
                
        except Exception as e:
            print(f"[consolidate] Concept '{concept}' failed: {e}")

    # === Semantic Memory Fact Merger ===
    summaries = kv.list(KV.summaries)
    new_facts_count = 0
    if len(summaries) >= 5:
        recent_summaries = sorted(
            summaries,
            key=lambda s: s.get("createdAt", ""),
            reverse=True
        )[:20]
        
        SEMANTIC_MERGE_SYSTEM = """You are a memory consolidation engine. Given overlapping episodic memories (session summaries), extract stable factual knowledge.

        

        Output format (XML):

        <facts>

          <fact confidence="0.0-1.0">Concise factual statement</fact>

        </facts>

        

        Rules:

        - Extract only facts that appear in 2+ episodes or are highly confident

        - Confidence reflects how well-supported the fact is across episodes

        - Combine overlapping information into single concise facts

        - Skip ephemeral details (specific error messages, temporary states)"""
        
        prompt_parts = []
        for i, s in enumerate(recent_summaries):
            prompt_parts.append(f"[Episode {i + 1}]\nTitle: {s.get('title')}\nNarrative: {s.get('narrative') or ''}\nConcepts: {', '.join(s.get('concepts') or [])}")
        merge_prompt = "Consolidate these episodic memories into stable facts:\n\n" + "\n\n".join(prompt_parts)
        
        try:
            response = generate_content(SEMANTIC_MERGE_SYSTEM, merge_prompt)
            fact_matches = re.findall(r'<fact\s+confidence="([^"]+)">([^<]+)</fact>', response, re.DOTALL)
            
            existing_semantic = kv.list(KV.semantic)
            now = datetime.datetime.utcnow().isoformat() + "Z"
            
            for conf_str, fact_text in fact_matches:
                fact_text = fact_text.strip()
                try:
                    confidence = float(conf_str)
                except Exception:
                    confidence = 0.5
                    
                existing = None
                for es in existing_semantic:
                    if es.get("fact", "").lower() == fact_text.lower():
                        existing = es
                        break
                        
                if existing:
                    existing["accessCount"] = (existing.get("accessCount") or 0) + 1
                    existing["lastAccessedAt"] = now
                    existing["updatedAt"] = now
                    existing["confidence"] = max(existing.get("confidence", 0.5), confidence)
                    kv.set(KV.semantic, existing["id"], existing)
                else:
                    sem = {
                        "id": generate_id("sem"),
                        "fact": fact_text,
                        "confidence": confidence,
                        "sourceSessionIds": [s["sessionId"] for s in recent_summaries if "sessionId" in s],
                        "sourceMemoryIds": [],
                        "accessCount": 1,
                        "lastAccessedAt": now,
                        "strength": confidence,
                        "createdAt": now,
                        "updatedAt": now
                    }
                    kv.set(KV.semantic, sem["id"], sem)
                    new_facts_count += 1
        except Exception as e:
            print(f"[consolidate] Semantic merge failed: {e}")

    # === Procedural Memory Extraction ===
    memories = kv.list(KV.memories)
    new_procs_count = 0
    patterns = []
    for m in memories:
        if m.get("isLatest") is not False and m.get("type") == "pattern":
            freq = len(m.get("sessionIds") or [])
            if freq >= 2:
                patterns.append({"content": m.get("content", ""), "frequency": freq})
                
    if len(patterns) >= 2:
        PROCEDURAL_EXTRACTION_SYSTEM = """You are a procedural memory extractor. Given repeated patterns and workflows observed across sessions, extract reusable procedures.

        

        Output format (XML):

        <procedures>

          <procedure name="short descriptive name" trigger="when to use this procedure">

            <step>Step 1 description</step>

            <step>Step 2 description</step>

          </procedure>

        </procedures>

        

        Rules:

        - Only extract procedures observed 2+ times

        - Steps should be concrete and actionable

        - Trigger condition should be specific enough to match automatically"""
        
        prompt_parts = []
        for i, p in enumerate(patterns):
            prompt_parts.append(f"[Pattern {i + 1}] (seen {p['frequency']}x)\n{p['content']}")
        proc_prompt = "Extract reusable procedures from these recurring patterns:\n\n" + "\n\n".join(prompt_parts)
        
        try:
            response = generate_content(PROCEDURAL_EXTRACTION_SYSTEM, proc_prompt)
            proc_matches = re.findall(r'<procedure\s+name="([^"]+)"\s+trigger="([^"]+)">([\s\S]*?)</procedure>', response, re.DOTALL)
            
            existing_procs = kv.list(KV.procedural)
            now = datetime.datetime.utcnow().isoformat() + "Z"
            
            for name, trigger, steps_block in proc_matches:
                steps = [s.strip() for s in re.findall(r'<step>([^<]+)</step>', steps_block, re.DOTALL)]
                
                existing = None
                for ep in existing_procs:
                    if ep.get("name", "").lower() == name.lower():
                        existing = ep
                        break
                        
                if existing:
                    existing["frequency"] = (existing.get("frequency") or 1) + 1
                    existing["updatedAt"] = now
                    existing["strength"] = min(1.0, (existing.get("strength") or 0.5) + 0.1)
                    kv.set(KV.procedural, existing["id"], existing)
                else:
                    proc = {
                        "id": generate_id("proc"),
                        "name": name,
                        "steps": steps,
                        "triggerCondition": trigger,
                        "frequency": 1,
                        "sourceSessionIds": [],
                        "strength": 0.5,
                        "createdAt": now,
                        "updatedAt": now
                    }
                    kv.set(KV.procedural, proc["id"], proc)
                    new_procs_count += 1
        except Exception as e:
            print(f"[consolidate] Procedural extraction failed: {e}")

    res_summary = {
        "success": True,
        "consolidated": consolidated_count,
        "totalObservations": len(all_obs),
        "semantic": {
            "newFacts": new_facts_count,
            "totalSummaries": len(summaries)
        },
        "procedural": {
            "newProcedures": new_procs_count,
            "patternsAnalyzed": len(patterns)
        }
    }
    safe_audit(kv, "consolidate", "mem::consolidate-pipeline", [], res_summary)
    commit_if_enabled(kv, f"Consolidation complete: consolidated={consolidated_count}, facts={new_facts_count}, procs={new_procs_count}", "system")
    return res_summary

# Setup persistence helper wire-ups
def set_index_persistence(persistence: IndexPersistence) -> None:
    global _index_persistence
    _index_persistence = persistence

def set_embedding_provider(provider) -> None:
    global _embedding_provider, _hybrid_search
    _embedding_provider = provider
    _hybrid_search = HybridSearch(
        _bm25_index,
        _vector_index,
        _embedding_provider,
        None
    )

def set_stream_broadcaster(broadcaster) -> None:
    global _stream_broadcaster
    _stream_broadcaster = broadcaster

def broadcast_stream(payload: Dict[str, Any]) -> None:
    if _stream_broadcaster:
        try:
            _stream_broadcaster(payload)
        except Exception as e:
            print(f"[broadcaster] Failed: {e}")