theapemachine commited on 13 days ago

Commit

be04d92

1 Parent(s): 45cc459

Enhance README and scripts for cognitive architecture testing

- Updated README to clarify the functionality of `UnifiedField.predict()`, including the expected output format and common follow-up actions.
- Added a minimal example in the README demonstrating the use of `TopologyMapper` for graph projection.
- Improved error handling in `chat.py` by removing redundant traceback imports and ensuring consistent exception logging.
- Refactored `compare_iterative.py` to consolidate shared parameters for scoring bridges, enhancing maintainability.
- Enhanced `smoke_extract.py` with detailed logging and summary statistics for grounding checks, improving diagnostic capabilities.
- Updated `runner.py` and `tasks.py` for better error handling and logging during task evaluation and sample loading.
- Introduced new validation checks in `StructuralCausalModel` to ensure proper parent variable definitions and counterfactual limits.

Files changed (39) hide show

README.md +37 -1
scripts/chat.py +1 -3
scripts/compare_iterative.py +39 -15
scripts/smoke_extract.py +64 -8
tensegrity/__init__.py +7 -3
tensegrity/bench/runner.py +29 -8
tensegrity/bench/tasks.py +1 -1
tensegrity/broca/benchmark.py +0 -2
tensegrity/broca/controller.py +5 -2
tensegrity/broca/schemas.py +1 -0
tensegrity/causal/arena.py +22 -13
tensegrity/causal/from_proposal.py +2 -2
tensegrity/causal/scm.py +77 -12
tensegrity/core/morton.py +3 -2
tensegrity/engine/causal_energy.py +22 -8
tensegrity/engine/fhrr.py +52 -16
tensegrity/engine/ngc.py +7 -0
tensegrity/engine/scoring.py +4 -1
tensegrity/engine/unified_field.py +16 -1
tensegrity/graft/__init__.py +5 -0
tensegrity/graft/logit_bias.py +45 -5
tensegrity/graft/pipeline.py +44 -10
tensegrity/graft/vocabulary.py +102 -12
tensegrity/inference/__init__.py +0 -3
tensegrity/legacy/__init__.py +2 -0
tensegrity/legacy/v1/agent.py +53 -8
tensegrity/legacy/v1/blanket.py +49 -25
tensegrity/legacy/v1/morton.py +73 -24
tensegrity/memory/episodic.py +5 -0
tensegrity/memory/epistemic.py +12 -2
tensegrity/pipeline/canonical.py +30 -20
tensegrity/pipeline/iterative.py +35 -22
tests/test_architecture_alignment.py +1 -1
tests/test_async_graft.py +2 -0
tests/test_engine.py +5 -1
tests/test_graft.py +1 -0
tests/test_needle.py +7 -0
tests/test_scoring_bench.py +0 -2
tests/test_tensegrity.py +5 -1

README.md CHANGED Viewed

@@ -45,9 +45,14 @@ cycle = field.observe(
 )
 print(cycle["energy"].total)
-print(field.predict())
 ```
 The old Morton/POMDP frontend is still available for migration and baselines:
 ```python
@@ -97,8 +102,39 @@ explicit. It projects an arbitrary acyclic SCM graph into NGC-compatible layers:
 - same-layer or inverted edges receive virtual parent nodes one layer above the
   endpoints, turning lateral causal structure into shared vertical dependency.
 ## Semantic Grafting
 `VocabularyGrounding.from_keywords(...)` remains as a deterministic baseline.
 For less brittle grounding, `VocabularyGrounding.from_semantic_projection(...)`
 uses frozen phrase/token embeddings and cosine proximity to build weighted

 )
 print(cycle["energy"].total)
+expected_obs = field.predict()          # np.ndarray, shape (obs_dim,) — settled NGC readout
+# This is the sensory prediction from the current internal state (not a class label).
+print("expected observation vector (first 8 dims):", expected_obs[:8])
+# Common follow-ups: flatten to a label by argmax over logits elsewhere, or pipe into a probe / monitor.
 ```
+**`UnifiedField.predict()`** returns a **`numpy.ndarray`** of shape **`(obs_dim,)`**: the **predicted next observation** vector from the settled hierarchical circuit after the last `observe` (NGC’s `predict_observation()`). Assign it (as above), inspect slice or norm, or send it to downstream binding / decoding—there is no bundled string label.
 The old Morton/POMDP frontend is still available for migration and baselines:
 ```python
 - same-layer or inverted edges receive virtual parent nodes one layer above the
   endpoints, turning lateral causal structure into shared vertical dependency.
+Minimal example (four variables so one graph can show **direct**, **bypass**, and **same-layer lateral** edges). The mapper API is **`TopologyMapper.project_graph(...)`** (or **`TopologyMapper.from_scm(scm, ...)`** with the same `variable_layers`):
+```python
+import networkx as nx
+from tensegrity.causal.scm import StructuralCausalModel
+from tensegrity.engine.causal_energy import TopologyMapper
+scm = StructuralCausalModel("topology_demo")
+scm.add_variable("A", n_values=4, parents=[])
+scm.add_variable("D", n_values=4, parents=["A"])   # bypass path A → D
+scm.add_variable("B", n_values=4, parents=["A"])   # direct step A → B
+scm.add_variable("C", n_values=4, parents=["B"])   # lateral topology: B,C share a layer below
+variable_layers = {"A": 3, "D": 0, "B": 2, "C": 2}
+# A→B / A→D: direct + bypass | B→C at same abstract layer → virtual parent in the embedding
+mapping = TopologyMapper(expand_layers=True).from_scm(
+    scm,
+    n_layers=8,
+    variable_layers=variable_layers,
+)
+print(dict(mapping.embedded_layers))   # layer index per node after relays / vparents
+print(mapping.ngc_layer_sizes())       # e.g. widths per layer → output "shape" at a glance
+```
 ## Semantic Grafting
+```python
+from tensegrity.graft.vocabulary import VocabularyGrounding
+# Keyword baseline: VocabularyGrounding.from_keywords(...)
+# Semantic: VocabularyGrounding.from_semantic_projection(...)
+```
 `VocabularyGrounding.from_keywords(...)` remains as a deterministic baseline.
 For less brittle grounding, `VocabularyGrounding.from_semantic_projection(...)`
 uses frozen phrase/token embeddings and cosine proximity to build weighted

scripts/chat.py CHANGED Viewed

@@ -25,7 +25,7 @@ from __future__ import annotations
 import argparse
 import json
 import sys
-from typing import Dict, List
 from tensegrity.graft.pipeline import HybridPipeline
@@ -141,7 +141,6 @@ def main():
         try:
             pipe.process_observation(line)
         except Exception as e:
-            import traceback
             print(f"[perception failed: {type(e).__name__}: {e}]")
             traceback.print_exc()
             continue
@@ -154,7 +153,6 @@ def main():
                 max_tokens=100,
             )
         except Exception as e:
-            import traceback
             print(f"[generation failed: {type(e).__name__}: {e}]")
             traceback.print_exc()
             continue

 import argparse
 import json
 import sys
+import traceback
 from tensegrity.graft.pipeline import HybridPipeline
         try:
             pipe.process_observation(line)
         except Exception as e:
             print(f"[perception failed: {type(e).__name__}: {e}]")
             traceback.print_exc()
             continue
                 max_tokens=100,
             )
         except Exception as e:
             print(f"[generation failed: {type(e).__name__}: {e}]")
             traceback.print_exc()
             continue

scripts/compare_iterative.py CHANGED Viewed

@@ -8,6 +8,7 @@ from __future__ import annotations
 import time
 import argparse
 import logging
 import numpy as np
@@ -34,23 +35,32 @@ def run_task(task_name: str, n: int):
         print(f"  [{task_name}] no samples")
         return None
     single = ScoringBridge(
-        obs_dim=256, hidden_dims=[128, 32], fhrr_dim=2048,
-        ngc_settle_steps=30, ngc_learning_rate=0.01,
-        hopfield_beta=0.05, confidence_threshold=0.15,
-        context_settle_steps=40, choice_settle_steps=25,
-        context_learning_epochs=3,
     )
     iterative = IterativeCognitiveScorer(
-        obs_dim=256, hidden_dims=[128, 32], fhrr_dim=2048,
-        ngc_settle_steps=30, ngc_learning_rate=0.01,
-        hopfield_beta=0.05,
-        max_iterations=6, convergence_top_p=0.75,
-        context_settle_steps=40, choice_settle_steps=25,
-        context_learning_epochs=3,
-        w_sbert=1.0, w_fhrr=0.3, w_ngc=0.6,
-        belief_step=0.6, shaping_lr_scale=0.5,
-        use_hopfield=True, hopfield_steps=2,
     )
     n_total = len(samples)
@@ -75,7 +85,21 @@ def run_task(task_name: str, n: int):
         sa = np.array(scores_s)
         if np.allclose(sa, 0.0):
             # use raw sbert sim as tiebreaker (single's gate = uninformative)
-            sims = single._sentence_similarities(s.prompt, s.choices)
             pred_s = int(np.argmax(sims))
         else:
             pred_s = int(np.argmax(sa))

 import time
 import argparse
 import logging
+import warnings
 import numpy as np
         print(f"  [{task_name}] no samples")
         return None
+    shared_params = {
+        "obs_dim": 256,
+        "hidden_dims": [128, 32],
+        "fhrr_dim": 2048,
+        "ngc_settle_steps": 30,
+        "ngc_learning_rate": 0.01,
+        "hopfield_beta": 0.05,
+        "context_settle_steps": 40,
+        "choice_settle_steps": 25,
+        "context_learning_epochs": 3,
+    }
     single = ScoringBridge(
+        **shared_params,
+        confidence_threshold=0.15,
     )
     iterative = IterativeCognitiveScorer(
+        **shared_params,
+        max_iterations=6,
+        convergence_top_p=0.75,
+        w_sbert=1.0,
+        w_fhrr=0.3,
+        w_ngc=0.6,
+        belief_step=0.6,
+        shaping_lr_scale=0.5,
+        use_hopfield=True,
+        hopfield_steps=2,
     )
     n_total = len(samples)
         sa = np.array(scores_s)
         if np.allclose(sa, 0.0):
             # use raw sbert sim as tiebreaker (single's gate = uninformative)
+            if hasattr(single, "sentence_similarities"):
+                sims = single.sentence_similarities(s.prompt, s.choices)
+            elif hasattr(single, "_sentence_similarities"):
+                warnings.warn(
+                    "ScoringBridge has no public sentence_similarities(); using "
+                    "_sentence_similarities (private). Prefer adding a stable public API.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                sims = single._sentence_similarities(s.prompt, s.choices)
+            else:
+                raise AttributeError(
+                    "ScoringBridge exposes no sentence_similarities() or "
+                    "_sentence_similarities(); add a public API on ScoringBridge for tie-breaks.",
+                )
             pred_s = int(np.argmax(sims))
         else:
             pred_s = int(np.argmax(sa))

scripts/smoke_extract.py CHANGED Viewed

@@ -12,16 +12,24 @@ roles bound to actual phrases.
 """
 from __future__ import annotations
 import time
-from typing import List, Literal
 from pydantic import BaseModel, Field
 from outlines import models, generate
 MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
 class Entity(BaseModel):
     text: str = Field(description="The exact phrase as it appears in the prompt")
@@ -45,8 +53,13 @@ class PromptStructure(BaseModel):
     roles: List[RoleBinding]
 # Hand-picked items spanning the benchmark behavior zones.
-ITEMS = [
     {
         "task": "truthfulqa",
         "prompt": "What happens if you crack your knuckles a lot?",
@@ -89,7 +102,7 @@ ITEMS = [
 ]
-def build_prompt(item) -> str:
     return (
         "You convert a question into structured fields. Do NOT answer the question. "
         "Do NOT guess. Only extract what is literally in the text.\n\n"
@@ -102,7 +115,7 @@ def build_prompt(item) -> str:
     )
-def main():
     print(f"Loading {MODEL_NAME}...")
     t0 = time.time()
     model = models.transformers(MODEL_NAME)
@@ -110,6 +123,13 @@ def main():
     gen = generate.json(model, PromptStructure)
     for i, item in enumerate(ITEMS):
         print("=" * 78)
         print(f"[{i}] {item['task']}")
@@ -119,23 +139,43 @@ def main():
         try:
             s = gen(build_prompt(item), max_tokens=400)
         except Exception as e:
             print(f"  FAILED: {type(e).__name__}: {e}")
             continue
         dt = time.time() - t0
-        # Grounding check: are entity/role fillers actually substrings of the prompt?
         text = item["prompt"].lower()
         ent_grounded = sum(1 for e in s.entities if e.text.lower() in text)
         role_grounded = sum(1 for r in s.roles if r.filler.lower() in text)
         print(f"\n  entities ({len(s.entities)}, {ent_grounded} grounded, {dt:.1f}s):")
         for e in s.entities:
             mark = "" if e.text.lower() in text else "  [NOT IN PROMPT]"
             print(f"    {e.kind:<10} {e.text!r}{mark}")
-        print(f"\n  relations ({len(s.relations)}):")
         for r in s.relations:
-            print(f"    ({r.subject!r}) -[{r.predicate}]-> ({r.object!r})")
         print(f"\n  roles ({len(s.roles)}, {role_grounded} grounded):")
         for r in s.roles:
@@ -143,6 +183,22 @@ def main():
             print(f"    {r.role:<18} := {r.filler!r}{mark}")
         print()
 if __name__ == "__main__":
     main()

 """
 from __future__ import annotations
+import logging
 import time
+import traceback
+from typing import List, Literal, TypedDict
 from pydantic import BaseModel, Field
 from outlines import models, generate
 MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
+_LOGGER = logging.getLogger(__name__)
+try:
+    from outlines.errors import OutlinesStructuredGenerationError as _OutlineGenError
+except ImportError:
+    _OutlineGenError = None  # type: ignore[misc, assignment]
 class Entity(BaseModel):
     text: str = Field(description="The exact phrase as it appears in the prompt")
     roles: List[RoleBinding]
+class SmokeItem(TypedDict):
+    task: str
+    prompt: str
 # Hand-picked items spanning the benchmark behavior zones.
+ITEMS: list[SmokeItem] = [
     {
         "task": "truthfulqa",
         "prompt": "What happens if you crack your knuckles a lot?",
 ]
+def build_prompt(item: SmokeItem) -> str:
     return (
         "You convert a question into structured fields. Do NOT answer the question. "
         "Do NOT guess. Only extract what is literally in the text.\n\n"
     )
+def main() -> None:
     print(f"Loading {MODEL_NAME}...")
     t0 = time.time()
     model = models.transformers(MODEL_NAME)
     gen = generate.json(model, PromptStructure)
+    total_items = len(ITEMS)
+    failures = 0
+    total_entities = grounded_entities = 0
+    total_relations = grounded_relations = 0
+    total_roles = grounded_roles = 0
+    time_sum = 0.0
     for i, item in enumerate(ITEMS):
         print("=" * 78)
         print(f"[{i}] {item['task']}")
         try:
             s = gen(build_prompt(item), max_tokens=400)
         except Exception as e:
+            failures += 1
+            if _OutlineGenError is not None and isinstance(e, _OutlineGenError):
+                _LOGGER.exception("Outlines structured generation failed [item %s]", i)
+            else:
+                _LOGGER.exception("Generation failed [item %s]", i)
             print(f"  FAILED: {type(e).__name__}: {e}")
+            print(traceback.format_exc())
             continue
         dt = time.time() - t0
+        time_sum += dt
+        # Grounding check: are entity/role fillers — and relation ends — actually substrings of the prompt?
         text = item["prompt"].lower()
         ent_grounded = sum(1 for e in s.entities if e.text.lower() in text)
         role_grounded = sum(1 for r in s.roles if r.filler.lower() in text)
+        rel_grounded = sum(
+            1 for r in s.relations
+            if r.subject.lower() in text and r.object.lower() in text
+        )
+        total_entities += len(s.entities)
+        grounded_entities += ent_grounded
+        total_relations += len(s.relations)
+        grounded_relations += rel_grounded
+        total_roles += len(s.roles)
+        grounded_roles += role_grounded
         print(f"\n  entities ({len(s.entities)}, {ent_grounded} grounded, {dt:.1f}s):")
         for e in s.entities:
             mark = "" if e.text.lower() in text else "  [NOT IN PROMPT]"
             print(f"    {e.kind:<10} {e.text!r}{mark}")
+        print(f"\n  relations ({len(s.relations)}, {rel_grounded} grounded in prompt text):")
         for r in s.relations:
+            ok_rel = r.subject.lower() in text and r.object.lower() in text
+            mark = "" if ok_rel else "  [NOT IN PROMPT]"
+            print(f"    ({r.subject!r}) -[{r.predicate}]-> ({r.object!r}){mark}")
         print(f"\n  roles ({len(s.roles)}, {role_grounded} grounded):")
         for r in s.roles:
             print(f"    {r.role:<18} := {r.filler!r}{mark}")
         print()
+    ok = total_items - failures
+    avg_dt = time_sum / ok if ok else 0.0
+    eg = grounded_entities / total_entities if total_entities else 0.0
+    rg = grounded_roles / total_roles if total_roles else 0.0
+    rlg = grounded_relations / total_relations if total_relations else 0.0
+    print("=" * 78)
+    print(
+        "SUMMARY:",
+        f"items={total_items}, failures={failures}, ok={ok},",
+        f"entity grounding={eg:.1%} ({grounded_entities}/{total_entities}),",
+        f"role grounding={rg:.1%} ({grounded_roles}/{total_roles}),",
+        f"relation grounding={rlg:.1%} ({grounded_relations}/{total_relations}),",
+        f"avg time (success)={avg_dt:.2f}s",
+    )
 if __name__ == "__main__":
     main()

tensegrity/__init__.py CHANGED Viewed

@@ -7,9 +7,13 @@ The primary engine is now the V2 ``UnifiedField`` stack:
     FHRR encoding -> hierarchical predictive coding -> Hopfield memory
     -> optional causal energy terms
-Legacy V1 components remain importable from ``tensegrity.legacy.v1``:
-    from tensegrity.legacy.v1 import TensegrityAgent, MortonEncoder
 Top-level exports intentionally expose the unified field as the default
 architecture. Deprecated V1 names are resolved lazily for migration only.

     FHRR encoding -> hierarchical predictive coding -> Hopfield memory
     -> optional causal energy terms
+Legacy V1 components (`TensegrityAgent`, `MortonEncoder`, `MarkovBlanket`) remain
+importable from ``tensegrity.legacy.v1``. Several other names are re-exported lazily
+via ``tensegrity`` for migration only: ``EpistemicMemory``, ``EpisodicMemory``, and
+``AssociativeMemory`` from ``tensegrity.memory.*``; ``CausalArena`` and
+``StructuralCausalModel`` from ``tensegrity.causal.*``; ``FreeEnergyEngine`` and
+``BeliefPropagator`` from ``tensegrity.inference.*``. Those are **not** defined under
+``tensegrity.legacy.v1``—use the module paths above when importing explicitly.
 Top-level exports intentionally expose the unified field as the default
 architecture. Deprecated V1 names are resolved lazily for migration only.

tensegrity/bench/runner.py CHANGED Viewed

@@ -257,7 +257,7 @@ class EvalRunner:
                 prompt, return_tensors="pt",
                 truncation=True, max_length=512,
             )["input_ids"]
             n_prompt = prompt_ids.shape[1]
             n_total = inputs["input_ids"].shape[1]
             log_probs = torch.nn.functional.log_softmax(logits[0], dim=-1)
@@ -278,7 +278,23 @@ class EvalRunner:
         (UnifiedField, FreeEnergyEngine, EpistemicMemory, EpisodicMemory,
         AssociativeMemory, log-lik CausalArena), Broca dynamic SCM injection,
         EnergyCausalArena + TopologyMapper for per-choice causal competition,
-        NGC top-down falsification."""
         import os
         import numpy as np
@@ -300,11 +316,13 @@ class EvalRunner:
         from tensegrity.pipeline.canonical import CanonicalPipeline
         if not hasattr(self, "_canonical"):
-            # use_llm_broca defaults False to avoid LLM calls during MC scoring;
-            # the LLM stays out of the reasoning path. Broca is still reachable
-            # for the chat-mode narration step (HybridPipeline).
             self._canonical = CanonicalPipeline(
-                hypothesis_labels=list(sample.choices),
                 use_llm_broca=False,
                 enable_hypothesis_generation=False,
                 model_name=self.model_name,
@@ -450,8 +468,11 @@ class EvalRunner:
         # doesn't leak priors across tasks (different label spaces).
         if hasattr(self, "_canonical") and hasattr(self._canonical, "reset_session"):
             self._canonical.reset_session()
-        if hasattr(self, "_iter_scorer") and hasattr(self._iter_scorer, "reset_session"):
-            self._iter_scorer.reset_session()
         results = []
         for i, sample in enumerate(samples):

                 prompt, return_tensors="pt",
                 truncation=True, max_length=512,
             )["input_ids"]
             n_prompt = prompt_ids.shape[1]
             n_total = inputs["input_ids"].shape[1]
             log_probs = torch.nn.functional.log_softmax(logits[0], dim=-1)
         (UnifiedField, FreeEnergyEngine, EpistemicMemory, EpisodicMemory,
         AssociativeMemory, log-lik CausalArena), Broca dynamic SCM injection,
         EnergyCausalArena + TopologyMapper for per-choice causal competition,
+        NGC top-down falsification.
+        **Bench-specific behavior**: In ``single`` scorer mode (`TENSEGRITY_SCORER` env),
+        :meth:`ScoringBridge.reset` is called **once per benchmark sample**, so episodic /
+        Hopfield state does not accumulate across MC items — each example is isolated.
+        In the default canonical mode, reuse a single :class:`CanonicalPipeline` for all
+        samples — per-item hypotheses and SMCs come from ``reset_for_item`` /
+        ``_soft_reset_in_place``. Rebuilding the pipeline on each row would recreate
+        the agent stack and repeatedly load sentence-transformer weights into memory.
+        :meth:`CanonicalPipeline.reset_session` is invoked **once per task**
+        (``EvalRunner.evaluate_task``), wiping cross-task leakage while permitting
+        within-task learning where applicable.
+        Prefer ``canonical`` for behavior aligned with HybridPipeline/session semantics;
+        use ``single`` for a deterministic, isolated field snapshot per sample.
+        """
         import os
         import numpy as np
         from tensegrity.pipeline.canonical import CanonicalPipeline
         if not hasattr(self, "_canonical"):
+            # One CanonicalPipeline instance for all samples on this Runner. Hypothesis texts
+            # and per-choice SCMs are updated per sample inside ``reset_for_item`` /
+            # ``_soft_reset_in_place``; rebuilding ``CanonicalPipeline`` whenever multi-choice
+            # strings changed would recreate ``TensegrityAgent`` / FHRR SBERT loaders and spam
+            # "Loading weights" for each benchmark row (see CanonicalPipeline docs).
             self._canonical = CanonicalPipeline(
+                hypothesis_labels=None,
                 use_llm_broca=False,
                 enable_hypothesis_generation=False,
                 model_name=self.model_name,
         # doesn't leak priors across tasks (different label spaces).
         if hasattr(self, "_canonical") and hasattr(self._canonical, "reset_session"):
             self._canonical.reset_session()
+        if hasattr(self, "_field_scorer"):
+            if hasattr(self._field_scorer, "reset_session"):
+                self._field_scorer.reset_session()
+            elif hasattr(self._field_scorer, "reset"):
+                self._field_scorer.reset()
         results = []
         for i, sample in enumerate(samples):

tensegrity/bench/tasks.py CHANGED Viewed

@@ -435,7 +435,7 @@ def load_task_samples(name: str, max_samples: Optional[int] = None) -> List[Task
             samples.append(sample)
         except Exception as e:
-            logger.error(f"Error adapting task {name}: {e}")
             continue  # Skip malformed rows
     return samples

             samples.append(sample)
         except Exception as e:
+            logger.exception("Error adapting task %s at row %s: %s", name, i, e)
             continue  # Skip malformed rows
     return samples

tensegrity/broca/benchmark.py CHANGED Viewed

@@ -240,7 +240,6 @@ def run_tensegrity_agent(scenario: GameScenario, verbose: bool = True) -> Dict[s
     for turn in range(len(scenario.clues) + 3):  # Extra turns for questions
         clue = game.get_next_clue()
         if clue is None:
             break
@@ -284,7 +283,6 @@ def run_tensegrity_agent(scenario: GameScenario, verbose: bool = True) -> Dict[s
     for h in gold:
         p = gold[h]
         q = agent_probs.get(h, 1e-16)
         if p > 0:
             kl_div += p * np.log(p / max(q, 1e-16))

     for turn in range(len(scenario.clues) + 3):  # Extra turns for questions
         clue = game.get_next_clue()
         if clue is None:
             break
     for h in gold:
         p = gold[h]
         q = agent_probs.get(h, 1e-16)
         if p > 0:
             kl_div += p * np.log(p / max(q, 1e-16))

tensegrity/broca/controller.py CHANGED Viewed

@@ -569,8 +569,11 @@ class CognitiveController:
             if max_prob > 0.85:
                 action_type = "state_conclusion"
             elif max_prob < 0.15 and any(h.probability > 0.3 for h in self.belief_state.hypotheses):
-                logger.info(f"Some hypothesis just dropped — eliminating: {max_prob}")
-                # Some hypothesis just dropped — eliminate it
                 pass  # Let the EFE-selected action stand
         # Build the action content

             if max_prob > 0.85:
                 action_type = "state_conclusion"
             elif max_prob < 0.15 and any(h.probability > 0.3 for h in self.belief_state.hypotheses):
+                logger.info(
+                    "Competing hypotheses remain (max_prob=%.3f) — keeping EFE-selected "
+                    "action; no hypothesis elimination performed.",
+                    max_prob,
+                )
                 pass  # Let the EFE-selected action stand
         # Build the action content

tensegrity/broca/schemas.py CHANGED Viewed

@@ -177,6 +177,7 @@ class Utterance(BaseModel):
     style_register: Literal["formal", "casual", "technical", "empathetic"] = Field(
         default="casual",
         alias="register",
     )

     style_register: Literal["formal", "casual", "technical", "empathetic"] = Field(
         default="casual",
         alias="register",
+        serialization_alias="register",
     )

tensegrity/causal/arena.py CHANGED Viewed

@@ -229,6 +229,15 @@ class CausalArena:
         In practice: find the variable where models disagree most about
         the effect of intervention, and suggest intervening on it.
         """
         if len(self.models) < 2:
             return {'intervention': None, 'expected_info_gain': 0.0}
@@ -254,9 +263,9 @@ class CausalArena:
                 info_gain = self._estimate_info_gain(
                     var, val,
                     n_samples=n_samples,
-                    n_outcome_samples=n_outcome_samples,
                 )
                 if info_gain > best_info_gain:
                     best_info_gain = info_gain
                     best_experiment = {'variable': var, 'value': val}
@@ -294,33 +303,33 @@ class CausalArena:
         # Estimate expected posterior entropy after seeing outcomes
         # Use model-averaged predictions
         expected_tension = 0.0
-        n_outcome_samples = min(n_samples, max(1, int(n_outcome_samples)))
         for name, outcomes in predicted_outcomes.items():
             model_weight = current_posterior.get(name, 1.0 / len(self.models))
-            for outcome in outcomes[:n_outcome_samples]:
                 # What would the posterior look like if we saw this outcome?
                 hypothetical_log_liks = {}
                 for m_name, model in self.models.items():
                     hypothetical_log_liks[m_name] = model.log_evidence([outcome])
                 # Hypothetical posterior
                 hyp_evidence = {m: self.model_log_evidence[m] + hypothetical_log_liks[m]
                                for m in self.models}
                 max_e = max(hyp_evidence.values())
                 log_Z = max_e + np.log(sum(
                     np.exp(e - max_e) for e in hyp_evidence.values()))
                 hyp_posterior = {m: np.exp(e - log_Z) for m, e in hyp_evidence.items()}
                 expected_tension += model_weight * self._compute_tension(hyp_posterior)
-        expected_tension /= max(n_outcome_samples, 1)
         # Information gain = current uncertainty - expected uncertainty after experiment
         return current_tension - expected_tension

         In practice: find the variable where models disagree most about
         the effect of intervention, and suggest intervening on it.
         """
+        if not isinstance(n_samples, int) or n_samples <= 0:
+            raise ValueError(f"n_samples must be a positive int, got {n_samples!r}")
+        if not isinstance(n_outcome_samples, int) or n_outcome_samples < 1:
+            raise ValueError(
+                f"n_outcome_samples must be an int >= 1, got {n_outcome_samples!r}"
+            )
+        outcome_cap = min(n_samples, n_outcome_samples)
         if len(self.models) < 2:
             return {'intervention': None, 'expected_info_gain': 0.0}
                 info_gain = self._estimate_info_gain(
                     var, val,
                     n_samples=n_samples,
+                    n_outcome_samples=outcome_cap,
                 )
                 if info_gain > best_info_gain:
                     best_info_gain = info_gain
                     best_experiment = {'variable': var, 'value': val}
         # Estimate expected posterior entropy after seeing outcomes
         # Use model-averaged predictions
         expected_tension = 0.0
+        effective_outcome_samples = min(n_samples, max(1, n_outcome_samples))
         for name, outcomes in predicted_outcomes.items():
             model_weight = current_posterior.get(name, 1.0 / len(self.models))
+            for outcome in outcomes[:effective_outcome_samples]:
                 # What would the posterior look like if we saw this outcome?
                 hypothetical_log_liks = {}
                 for m_name, model in self.models.items():
                     hypothetical_log_liks[m_name] = model.log_evidence([outcome])
                 # Hypothetical posterior
                 hyp_evidence = {m: self.model_log_evidence[m] + hypothetical_log_liks[m]
                                for m in self.models}
                 max_e = max(hyp_evidence.values())
                 log_Z = max_e + np.log(sum(
                     np.exp(e - max_e) for e in hyp_evidence.values()))
                 hyp_posterior = {m: np.exp(e - log_Z) for m, e in hyp_evidence.items()}
                 expected_tension += model_weight * self._compute_tension(hyp_posterior)
+        expected_tension /= max(effective_outcome_samples, 1)
         # Information gain = current uncertainty - expected uncertainty after experiment
         return current_tension - expected_tension

tensegrity/causal/from_proposal.py CHANGED Viewed

@@ -21,8 +21,8 @@ def build_scm_from_proposal(proposal: ProposedSCM, n_values: int = 4) -> Structu
     that edge is dropped (``G.remove_edge``) and a debug log is emitted — earlier edges
     are never removed. Variable order follows a topological sort when the retained graph is non-empty.
     """
-    if n_values <= 0:
-        raise ValueError(f"n_values must be a positive integer, got {n_values}")
     G = nx.DiGraph()

     that edge is dropped (``G.remove_edge``) and a debug log is emitted — earlier edges
     are never removed. Variable order follows a topological sort when the retained graph is non-empty.
     """
+    if not isinstance(n_values, int) or n_values <= 0:
+        raise ValueError(f"n_values must be a positive integer, got {n_values!r}")
     G = nx.DiGraph()

tensegrity/causal/scm.py CHANGED Viewed

@@ -25,6 +25,13 @@ from copy import deepcopy
 from itertools import product
 class CausalMechanism:
     """
     A single causal mechanism: V_i := f_i(parents, noise).
@@ -71,15 +78,22 @@ class CausalMechanism:
         return self.cpt_params / self.cpt_params.sum(axis=0, keepdims=True)
     def parent_config_index(self, parent_values: Dict[str, int]) -> int:
-        """Convert parent values to a CPT column index."""
         if not self.parents:
             return 0
         idx = 0
         stride = 1
         for p, card in zip(self.parents, self.parent_cardinalities):
-            value = int(parent_values.get(p, 0))
             idx += (value % max(int(card), 1)) * stride
             stride *= max(int(card), 1)
@@ -151,13 +165,21 @@ class StructuralCausalModel:
     def add_variable(self, name: str, n_values: int = 4,
                      parents: Optional[List[str]] = None,
                      noise_scale: float = 0.1):
-        """Add a variable with its causal mechanism."""
         parents = parents or []
-        for parent in parents:
-            if parent not in self.graph:
-                # Auto-create parent as root node with the child's cardinality.
-                self.add_variable(parent, n_values)
         parent_cardinalities = [self.mechanisms[p].n_values for p in parents]
@@ -257,7 +279,11 @@ class StructuralCausalModel:
     def counterfactual(self, evidence: Dict[str, int],
                        interventions: Dict[str, int],
-                       query: List[str]) -> Dict[str, np.ndarray]:
         """
         Rung 3 — Counterfactual: P(Y_{do(x)} | observed evidence).
@@ -287,6 +313,8 @@ class StructuralCausalModel:
         if not posterior_worlds:
             return cf_results
         order = self.topological_order()
         affected: Set[str] = set()
         for var in interventions:
@@ -327,7 +355,41 @@ class StructuralCausalModel:
                         updated[var] = int(v)
                         next_worlds.append((updated, weight * float(p_v)))
                 worlds = next_worlds
             for values, weight in worlds:
                 for q in cf_results:
@@ -374,14 +436,17 @@ class StructuralCausalModel:
     def _joint_probability(self, assignment: Dict[str, int]) -> float:
         """P(assignment) under the SCM, assuming all variables are assigned."""
-        prob = 1.0
         for var in self.topological_order():
             mech = self.mechanisms[var]
             parent_vals = {p: assignment[p] for p in mech.parents}
-            prob *= float(np.exp(mech.log_prob(assignment[var], parent_vals)))
-            if prob <= 0.0:
                 return 0.0
-        return float(prob)
     def _enumerate_joint_assignments(
         self,

 from itertools import product
+class ControlledExpansionError(RuntimeError):
+    """Raised when counterfactual world enumeration exceeds a configured limit."""
+# Default safeguard for branching in ``StructuralCausalModel.counterfactual``.
+_DEFAULT_MAX_CF_WORLDS = 250_000
 class CausalMechanism:
     """
     A single causal mechanism: V_i := f_i(parents, noise).
         return self.cpt_params / self.cpt_params.sum(axis=0, keepdims=True)
     def parent_config_index(self, parent_values: Dict[str, int]) -> int:
+        """Convert parent values to a CPT column index. All listed parents must be present."""
         if not self.parents:
             return 0
+        missing = [p for p in self.parents if p not in parent_values]
+        if missing:
+            raise KeyError(
+                f"CausalMechanism({self.name!r}): parent_values missing keys {missing}; "
+                f"expected all of {list(self.parents)}"
+            )
         idx = 0
         stride = 1
         for p, card in zip(self.parents, self.parent_cardinalities):
+            value = int(parent_values[p])
             idx += (value % max(int(card), 1)) * stride
             stride *= max(int(card), 1)
     def add_variable(self, name: str, n_values: int = 4,
                      parents: Optional[List[str]] = None,
                      noise_scale: float = 0.1):
+        """Add a variable with its causal mechanism.
+        Every ``parent`` must already exist — call ``add_variable`` for parents first
+        with the desired ``n_values``. Parent cardinalities in CPT indexing come from
+        ``self.mechanisms[parent].n_values``.
+        """
         parents = parents or []
+        missing = [p for p in parents if p not in self.graph]
+        if missing:
+            raise ValueError(
+                f"{self.name}: undefined parent variable(s) {missing} for '{name}'. "
+                "Declare each parent with add_variable(name, n_values=...) before "
+                "listing it in parents=[...]."
+            )
         parent_cardinalities = [self.mechanisms[p].n_values for p in parents]
     def counterfactual(self, evidence: Dict[str, int],
                        interventions: Dict[str, int],
+                       query: List[str],
+                       *,
+                       max_cf_worlds: Optional[int] = None,
+                       prune_relative_weight_floor: Optional[float] = None,
+                       prune_worlds_top_k: Optional[int] = None) -> Dict[str, np.ndarray]:
         """
         Rung 3 — Counterfactual: P(Y_{do(x)} | observed evidence).
         if not posterior_worlds:
             return cf_results
+        ml = _DEFAULT_MAX_CF_WORLDS if max_cf_worlds is None else max(1, int(max_cf_worlds))
         order = self.topological_order()
         affected: Set[str] = set()
         for var in interventions:
                         updated[var] = int(v)
                         next_worlds.append((updated, weight * float(p_v)))
+                if len(next_worlds) > ml:
+                    raise ControlledExpansionError(
+                        f"{self.name}: counterfactual branch count {len(next_worlds)} exceeds "
+                        f"max_cf_worlds={ml}; reduce SCM size / intervention breadth, "
+                        "or raise max_cf_worlds / use prune_worlds_top_k."
+                    )
                 worlds = next_worlds
+                if prune_worlds_top_k is not None and int(prune_worlds_top_k) > 0:
+                    tk = int(prune_worlds_top_k)
+                    if len(worlds) > tk:
+                        tot_before = sum(w for _, w in worlds)
+                        worlds = sorted(worlds, key=lambda t: -t[1])[:tk]
+                        tot_after = sum(w for _, w in worlds)
+                        if tot_before > 0 and tot_after > 0 and tot_after < tot_before:
+                            scale = tot_before / tot_after
+                            worlds = [(a, float(w * scale)) for a, w in worlds]
+                if prune_relative_weight_floor is not None:
+                    pq = float(prune_relative_weight_floor)
+                    if pq > 0.0 and worlds:
+                        mass = sum(w for _, w in worlds)
+                        if mass > 0:
+                            thresh = pq * mass
+                            kept = [(a, w) for a, w in worlds if w >= thresh]
+                            if kept:
+                                m2 = sum(w for _, w in kept)
+                                if m2 < mass > 0 and m2 > 0:
+                                    scale = mass / m2
+                                    worlds = [(a, w * scale) for a, w in kept]
+                                elif m2 <= 0:
+                                    raise ControlledExpansionError(
+                                        f"{self.name}: counterfactual pruning eliminated all worlds "
+                                        f"(increase prune_relative_weight_floor beyond {thresh})."
+                                    )
+                                else:
+                                    worlds = kept
             for values, weight in worlds:
                 for q in cf_results:
     def _joint_probability(self, assignment: Dict[str, int]) -> float:
         """P(assignment) under the SCM, assuming all variables are assigned."""
+        log_joint = 0.0
         for var in self.topological_order():
             mech = self.mechanisms[var]
             parent_vals = {p: assignment[p] for p in mech.parents}
+            ell = float(mech.log_prob(assignment[var], parent_vals))
+            if not np.isfinite(ell):
                 return 0.0
+            log_joint += ell
+        if not np.isfinite(log_joint) or log_joint < -900.0:
+            return float(np.exp(max(log_joint, -900.0)))
+        return float(np.exp(log_joint))
     def _enumerate_joint_assignments(
         self,

tensegrity/core/morton.py CHANGED Viewed

@@ -3,8 +3,9 @@
 import warnings
 warnings.warn(
-    "tensegrity.core.morton is legacy V1; use tensegrity.legacy.v1.morton "
-    "for the old Morton-coded frontend.",
     DeprecationWarning,
     stacklevel=2,
 )

 import warnings
 warnings.warn(
+    "tensegrity.core.morton is legacy V1; import from tensegrity.legacy.v1.morton "
+    "for the Morton-coded frontend (same API — re-export only). There is no "
+    "alternative module beyond legacy.v1 for this shim.",
     DeprecationWarning,
     stacklevel=2,
 )

tensegrity/engine/causal_energy.py CHANGED Viewed

@@ -16,6 +16,7 @@ when an energy-based readout of SCM fit is required.
 import numpy as np
 from dataclasses import dataclass, field
 from typing import Dict, List, Any, Optional, Tuple
 import networkx as nx
 from tensegrity.causal.scm import StructuralCausalModel
@@ -75,7 +76,11 @@ class VirtualParent:
     source: str
     target: str
     layer: int
-    children: Tuple[str, str]
 @dataclass
@@ -96,16 +101,16 @@ class TopologyMapping:
     virtual_parents: Dict[str, VirtualParent] = field(default_factory=dict)
     original_edges: List[Tuple[str, str]] = field(default_factory=list)
-    @property
     def layer_nodes(self) -> Dict[int, List[str]]:
         layers: Dict[int, List[str]] = {}
         for node, layer in self.embedded_layers.items():
             layers.setdefault(layer, []).append(node)
         for nodes in layers.values():
             nodes.sort()
         return dict(sorted(layers.items()))
     def ngc_layer_sizes(self, min_width: int = 1) -> List[int]:
@@ -156,7 +161,17 @@ class TopologyMapper:
     """
     Embed arbitrary SCM DAG topology into hierarchical predictive-coding wiring.
-    The mapper makes the Friston/Pearl handshake explicit:
     * A causal edge from layer k to k-1 becomes a direct top-down prediction.
     * A bypass edge spanning multiple layers receives relay nodes, one per
@@ -243,7 +258,7 @@ class TopologyMapper:
                 continue
             virtual_layer = max(source_layer, target_layer) + 1
             if n_layers is not None and virtual_layer >= n_layers and not self.expand_layers:
                 raise ValueError(
                     f"edge {source!r}->{target!r} needs virtual parent layer {virtual_layer}, "
@@ -262,7 +277,6 @@ class TopologyMapper:
                 source=source,
                 target=target,
                 layer=virtual_layer,
-                children=(source, target),
             )
             virtual_parents[virtual] = vp

 import numpy as np
 from dataclasses import dataclass, field
+from functools import cached_property
 from typing import Dict, List, Any, Optional, Tuple
 import networkx as nx
 from tensegrity.causal.scm import StructuralCausalModel
     source: str
     target: str
     layer: int
+    @property
+    def children(self) -> Tuple[str, str]:
+        """The two SCM variables summarized by this virtual parent."""
+        return (self.source, self.target)
 @dataclass
     virtual_parents: Dict[str, VirtualParent] = field(default_factory=dict)
     original_edges: List[Tuple[str, str]] = field(default_factory=list)
+    @cached_property
     def layer_nodes(self) -> Dict[int, List[str]]:
         layers: Dict[int, List[str]] = {}
         for node, layer in self.embedded_layers.items():
             layers.setdefault(layer, []).append(node)
         for nodes in layers.values():
             nodes.sort()
         return dict(sorted(layers.items()))
     def ngc_layer_sizes(self, min_width: int = 1) -> List[int]:
     """
     Embed arbitrary SCM DAG topology into hierarchical predictive-coding wiring.
+    Constructor flag ``expand_layers`` interacts with ``n_layers``:
+    - When ``expand_layers`` is **False**, a lateral or same-layer edge requiring a virtual
+      parent strictly above ``n_layers - 1`` raises ``ValueError`` (no implicit layer growth).
+    - When ``expand_layers`` is **True**, virtual-parent nodes **may occupy layer indices equal
+      to or **greater than** ``n_layers - 1** as needed — the mapper extends the conceptual
+      hierarchy upward so horizontal dependencies become shared parents. Caller layer counts
+      (e.g. ``ngc_layer_sizes``) must account for the actual maximum embedded layer index.
+    The mapper otherwise makes the Friston/Pearl handshake explicit:
     * A causal edge from layer k to k-1 becomes a direct top-down prediction.
     * A bypass edge spanning multiple layers receives relay nodes, one per
                 continue
             virtual_layer = max(source_layer, target_layer) + 1
             if n_layers is not None and virtual_layer >= n_layers and not self.expand_layers:
                 raise ValueError(
                     f"edge {source!r}->{target!r} needs virtual parent layer {virtual_layer}, "
                 source=source,
                 target=target,
                 layer=virtual_layer,
             )
             virtual_parents[virtual] = vp

tensegrity/engine/fhrr.py CHANGED Viewed

@@ -19,8 +19,10 @@ This is what gives the cognitive layer real semantic knowledge.
 """
 import hashlib
 import numpy as np
-from typing import Optional, List, Tuple, Dict, Union
 import logging
 logger = logging.getLogger(__name__)
@@ -93,6 +95,13 @@ class FHRRCodebook:
         return [(f"#{int(i)}", float(sims[i])) for i in top_idx]
 class SemanticFHRRCodebook(FHRRCodebook):
     """
@@ -149,11 +158,34 @@ class SemanticFHRRCodebook(FHRRCodebook):
             self._proj /= np.sqrt(self._sbert_dim)
             logger.info(f"SemanticFHRR: loaded {self._sbert_model_name} "
                        f"(dim={self._sbert_dim}) → FHRR(dim={self.dim})")
-        except Exception as exc:
-            logger.warning("SemanticFHRR: falling back to deterministic random vectors (%s)", exc)
             self._sbert = "FALLBACK"
             self._proj = None
     def _embed_to_phasor(self, embedding: np.ndarray) -> np.ndarray:
         projected = self._proj @ embedding.astype(np.float32)
         proj_std = np.std(projected)
@@ -262,8 +294,9 @@ class FHRREncoder:
         self.features = SemanticFHRRCodebook(dim=dim, sbert_model=sbert_model) if semantic \
             else FHRRCodebook(n_features, dim, seed=3000)
-        self._position_cache: Dict[int, np.ndarray] = {}
         self._position_cache_max = 4096
         for role in ["position", "value", "type", "attribute", "relation",
                      "subject", "object", "time", "channel"]:
@@ -285,20 +318,23 @@ class FHRREncoder:
     def encode_position(self, x: int) -> np.ndarray:
         x = int(x)
-        cached = self._position_cache.get(x)
-        if cached is not None:
-            return cached.copy()
         result = np.ones(self.dim, dtype=np.complex64)
         for base, m in zip(self._pos_bases, self.moduli):
             result = result * (base ** (x % m))
-        if len(self._position_cache) < self._position_cache_max:
-            self._position_cache[x] = result.copy()
-        return result
     def encode_value(self, value: float, precision: int = 100) -> np.ndarray:
         return self.encode_position(int(round(value * precision)))

 """
 import hashlib
+import threading
+from collections import OrderedDict
 import numpy as np
+from typing import Any, Optional, List, Tuple, Dict, Union
 import logging
 logger = logging.getLogger(__name__)
         return [(f"#{int(i)}", float(sims[i])) for i in top_idx]
+    def get_sbert_model(self) -> Optional[Any]:
+        """Non-semantic codebook has no SBERT model."""
+        return None
+    def has_sbert(self) -> bool:
+        return False
 class SemanticFHRRCodebook(FHRRCodebook):
     """
             self._proj /= np.sqrt(self._sbert_dim)
             logger.info(f"SemanticFHRR: loaded {self._sbert_model_name} "
                        f"(dim={self._sbert_dim}) → FHRR(dim={self.dim})")
+        except ImportError as exc:
+            logger.warning(
+                "SemanticFHRR: sentence_transformers unavailable (%s); deterministic vectors",
+                exc,
+            )
             self._sbert = "FALLBACK"
             self._proj = None
+        except OSError as exc:
+            logger.warning(
+                "SemanticFHRR: SBERT model load failed (%s); deterministic vectors",
+                exc,
+            )
+            self._sbert = "FALLBACK"
+            self._proj = None
+        except Exception:
+            logger.exception("SemanticFHRR: unexpected error loading SBERT")
+            raise
+    def get_sbert_model(self) -> Optional[Any]:
+        """Return the loaded ``SentenceTransformer`` when available; else ``None``."""
+        self._ensure_sbert()
+        if self._sbert is None or self._sbert == "FALLBACK":
+            return None
+        return self._sbert
+    def has_sbert(self) -> bool:
+        return self.get_sbert_model() is not None
     def _embed_to_phasor(self, embedding: np.ndarray) -> np.ndarray:
         projected = self._proj @ embedding.astype(np.float32)
         proj_std = np.std(projected)
         self.features = SemanticFHRRCodebook(dim=dim, sbert_model=sbert_model) if semantic \
             else FHRRCodebook(n_features, dim, seed=3000)
+        self._position_cache: OrderedDict[int, np.ndarray] = OrderedDict()
         self._position_cache_max = 4096
+        self._position_cache_lock = threading.Lock()
         for role in ["position", "value", "type", "attribute", "relation",
                      "subject", "object", "time", "channel"]:
     def encode_position(self, x: int) -> np.ndarray:
         x = int(x)
+        with self._position_cache_lock:
+            cached = self._position_cache.get(x)
+            if cached is not None:
+                self._position_cache.move_to_end(x)
+                return cached.copy()
         result = np.ones(self.dim, dtype=np.complex64)
         for base, m in zip(self._pos_bases, self.moduli):
             result = result * (base ** (x % m))
+        copied = result.copy()
+        with self._position_cache_lock:
+            while len(self._position_cache) >= self._position_cache_max:
+                self._position_cache.popitem(last=False)
+            self._position_cache[x] = copied
+            return copied.copy()
     def encode_value(self, value: float, precision: int = 100) -> np.ndarray:
         return self.encode_position(int(round(value * precision)))

tensegrity/engine/ngc.py CHANGED Viewed

@@ -390,6 +390,13 @@ class PredictiveCodingCircuit:
         """Drop recorded energy / error traces."""
         self.energy_history.clear()
         self.error_history.clear()
     def reinitialize(self, weight_seed: int = 12345) -> None:
         """Reset layer states and resample W/E."""

         """Drop recorded energy / error traces."""
         self.energy_history.clear()
         self.error_history.clear()
+    def soft_reset(self) -> None:
+        """Clear layer activations and history without resampling prediction weights."""
+        self.layers = []
+        self._initialized = False
+        self._last_obs = None
+        self.clear_history()
     def reinitialize(self, weight_seed: int = 12345) -> None:
         """Reset layer states and resample W/E."""

tensegrity/engine/scoring.py CHANGED Viewed

@@ -274,7 +274,6 @@ class ScoringBridge:
         if hasattr(features, '_ensure_sbert') and getattr(features, '_sbert', None) is None:
             features._ensure_sbert()
         if hasattr(features, '_sbert') and features._sbert is not None and features._sbert != "FALLBACK":
-            features._ensure_sbert()
             embs = features._sbert.encode([prompt] + choices, show_progress_bar=False)
             pe, pn = embs[0], np.linalg.norm(embs[0])
             return [float(np.dot(pe, embs[i+1]) / (pn * np.linalg.norm(embs[i+1])))
@@ -289,6 +288,10 @@ class ScoringBridge:
             out.append(self.field.encoder.similarity(pf, enc))
         return out
     def reset(self):
         self.field.ngc.reinitialize(12345)
         self.field.memory.patterns.clear()

         if hasattr(features, '_ensure_sbert') and getattr(features, '_sbert', None) is None:
             features._ensure_sbert()
         if hasattr(features, '_sbert') and features._sbert is not None and features._sbert != "FALLBACK":
             embs = features._sbert.encode([prompt] + choices, show_progress_bar=False)
             pe, pn = embs[0], np.linalg.norm(embs[0])
             return [float(np.dot(pe, embs[i+1]) / (pn * np.linalg.norm(embs[i+1])))
             out.append(self.field.encoder.similarity(pf, enc))
         return out
+    def sentence_similarities(self, prompt, choices):
+        """Public alias for SBERT/FHRR sentence-level similarity tie-breaks (see ``_sentence_similarities``)."""
+        return self._sentence_similarities(prompt, choices)
     def reset(self):
         self.field.ngc.reinitialize(12345)
         self.field.memory.patterns.clear()

tensegrity/engine/unified_field.py CHANGED Viewed

@@ -25,11 +25,14 @@ of the system minimizes its own local VFE, and the global behavior emerges
 from the composition of these local optimizations.
 """
 import numpy as np
 from typing import Dict, List, Optional, Any, Tuple, Deque
 from dataclasses import dataclass
 from collections import deque
 from .fhrr import FHRREncoder, bind, bundle, unbind
 from .ngc import PredictiveCodingCircuit
@@ -67,7 +70,13 @@ class HopfieldMemoryBank:
         self.patterns: deque = deque(maxlen=capacity)
         self._matrix: Optional[np.ndarray] = None
         self._dirty = True
     def store(self, pattern: np.ndarray, normalize: bool = True):
         """Store a pattern (FHRR vector — use real part for Hopfield)."""
         p = np.real(pattern).astype(np.float64) if np.iscomplexobj(pattern) else pattern.astype(np.float64)
@@ -111,6 +120,12 @@ class HopfieldMemoryBank:
         # Energy
         sims = self._matrix.T @ xi
         if self.beta <= 1e-12:
             energy = float(0.5 * np.dot(xi, xi) - np.mean(sims))
         else:
             log_sum_exp = np.log(np.sum(np.exp(self.beta * sims - self.beta * sims.max()))) + self.beta * sims.max()

 from the composition of these local optimizations.
 """
+import logging
 import numpy as np
 from typing import Dict, List, Optional, Any, Tuple, Deque
 from dataclasses import dataclass
 from collections import deque
+_logger = logging.getLogger(__name__)
 from .fhrr import FHRREncoder, bind, bundle, unbind
 from .ngc import PredictiveCodingCircuit
         self.patterns: deque = deque(maxlen=capacity)
         self._matrix: Optional[np.ndarray] = None
         self._dirty = True
+    def clear(self) -> None:
+        """Remove all stored patterns; invalidate the pattern matrix cache."""
+        self.patterns.clear()
+        self._matrix = None
+        self._dirty = True
     def store(self, pattern: np.ndarray, normalize: bool = True):
         """Store a pattern (FHRR vector — use real part for Hopfield)."""
         p = np.real(pattern).astype(np.float64) if np.iscomplexobj(pattern) else pattern.astype(np.float64)
         # Energy
         sims = self._matrix.T @ xi
         if self.beta <= 1e-12:
+            _logger.warning(
+                "HopfieldMemoryBank.retrieve: self.beta=%g is near zero; "
+                "energy uses approximate uniform-attention form "
+                "(0.5||xi||² - mean(sims)) instead of -lse/beta)",
+                float(self.beta),
+            )
             energy = float(0.5 * np.dot(xi, xi) - np.mean(sims))
         else:
             log_sum_exp = np.log(np.sum(np.exp(self.beta * sims - self.beta * sims.max()))) + self.beta * sims.max()

tensegrity/graft/__init__.py CHANGED Viewed

@@ -22,6 +22,11 @@ __all__ = (
 )
 def __getattr__(name: str) -> Any:
     if name == "HybridPipeline":
         value = getattr(import_module("tensegrity.graft.pipeline"), name)

 )
+def __dir__():
+    merged = set(globals().keys()) | set(__all__)
+    return sorted(merged)
 def __getattr__(name: str) -> Any:
     if name == "HybridPipeline":
         value = getattr(import_module("tensegrity.graft.pipeline"), name)

tensegrity/graft/logit_bias.py CHANGED Viewed

@@ -38,6 +38,28 @@ import logging
 logger = logging.getLogger(__name__)
 # Import torch lazily — only needed when actually grafting to a local model
 torch = None
@@ -92,8 +114,12 @@ class TensegrityLogitsProcessor:
         """
         Args:
             hypothesis_tokens: {hyp_id: set of token_ids} from VocabularyGrounding
-            hypothesis_token_scores: optional semantic weights per token from
-                      VocabularyGrounding.from_semantic_projection
             belief_fn: Callable that returns current posteriors {hyp_id: probability}
                       Sync mode: called each decode step. Async mode: polled in a worker thread.
             vocab_size: LLM vocabulary size
@@ -106,8 +132,6 @@ class TensegrityLogitsProcessor:
             async_beliefs: If True, belief_fn runs in a daemon thread; __call__ is O(1) bias add
             belief_poll_s: Sleep between async polls (seconds)
         """
-        _ensure_torch()
         self.hypothesis_tokens = hypothesis_tokens
         self.hypothesis_token_scores = hypothesis_token_scores or {}
         self.belief_fn = belief_fn
@@ -119,6 +143,11 @@ class TensegrityLogitsProcessor:
         self.max_bias = max_bias
         self.async_beliefs = async_beliefs
         self.belief_poll_s = belief_poll_s
         # State tracking
         self.state = GraftState()
@@ -243,6 +272,7 @@ class TensegrityLogitsProcessor:
                         if 0 <= tid < self.vocab_size:
                             if not np.isneginf(bias[tid]):
                                 weighted_b = b * float(token_scores.get(tid, 1.0))
                                 bias[tid] += weighted_b
                                 if weighted_b > 0:
                                     boosted += 1
@@ -301,6 +331,11 @@ class StaticLogitBiasBuilder:
     Builds a static logit_bias dict from the current belief state.
     Less powerful than the LogitsProcessor (no per-step updates),
     but works with any OpenAI-compatible API.
     """
     def __init__(self, hypothesis_tokens: Dict[str, Set[int]],
@@ -313,7 +348,11 @@ class StaticLogitBiasBuilder:
         self.scale = scale
         self.suppress_threshold = suppress_threshold
         self.max_bias = max_bias
     def build(self, posteriors: Dict[str, float]) -> Dict[int, float]:
         """
         Build a static logit_bias dict for API calls.
@@ -339,6 +378,7 @@ class StaticLogitBiasBuilder:
                 b = max(-self.max_bias, min(self.max_bias, b))
                 for tid in token_ids:
                     weighted_b = b * float(token_scores.get(tid, 1.0))
                     bias[tid] = bias.get(tid, 0.0) + weighted_b
         return bias

 logger = logging.getLogger(__name__)
+def _validate_hypothesis_token_scores_weights(
+    hypothesis_token_scores: Optional[Dict[str, Dict[int, float]]],
+    *,
+    context: str,
+) -> None:
+    """``hypothesis_token_scores`` weights must lie in **[0.0, 1.0]** (see graft docstrings)."""
+    if not hypothesis_token_scores:
+        return
+    bad = []
+    for hyp_id, m in hypothesis_token_scores.items():
+        for tid, w in m.items():
+            wf = float(w)
+            if not (0.0 <= wf <= 1.0):
+                bad.append(f"{hyp_id}[{tid}]={wf!r}")
+    if bad:
+        raise ValueError(
+            f"{context}: each hypothesis_token_scores value must be in [0.0, 1.0]; "
+            f"misconfigured entries (showing up to 12): {bad[:12]}"
+        )
 # Import torch lazily — only needed when actually grafting to a local model
 torch = None
         """
         Args:
             hypothesis_tokens: {hyp_id: set of token_ids} from VocabularyGrounding
+            hypothesis_token_scores: optional per-token **weights** in **[0.0, 1.0]**
+                from ``VocabularyGrounding.from_semantic_projection`` (stored on
+                ``VocabularyGrounding.hypothesis_token_scores``).
+                **0.0** applies no incremental bias mass to that token; **1.0** applies the
+                full clamped hypothesis bias ``b`` before per-token stacking. Values outside
+                ``[0.0, 1.0]`` raise ``ValueError`` at processor construction time.
             belief_fn: Callable that returns current posteriors {hyp_id: probability}
                       Sync mode: called each decode step. Async mode: polled in a worker thread.
             vocab_size: LLM vocabulary size
             async_beliefs: If True, belief_fn runs in a daemon thread; __call__ is O(1) bias add
             belief_poll_s: Sleep between async polls (seconds)
         """
         self.hypothesis_tokens = hypothesis_tokens
         self.hypothesis_token_scores = hypothesis_token_scores or {}
         self.belief_fn = belief_fn
         self.max_bias = max_bias
         self.async_beliefs = async_beliefs
         self.belief_poll_s = belief_poll_s
+        _validate_hypothesis_token_scores_weights(
+            self.hypothesis_token_scores,
+            context="TensegrityLogitsProcessor",
+        )
         # State tracking
         self.state = GraftState()
                         if 0 <= tid < self.vocab_size:
                             if not np.isneginf(bias[tid]):
                                 weighted_b = b * float(token_scores.get(tid, 1.0))
+                                weighted_b = max(-self.max_bias, min(self.max_bias, weighted_b))
                                 bias[tid] += weighted_b
                                 if weighted_b > 0:
                                     boosted += 1
     Builds a static logit_bias dict from the current belief state.
     Less powerful than the LogitsProcessor (no per-step updates),
     but works with any OpenAI-compatible API.
+    ``hypothesis_token_scores`` matches ``TensegrityLogitsProcessor``: optional
+    ``{hyp_id: {token_id: weight}}`` with each **weight** in **[0.0, 1.0]** —
+    cosine-style scores must be scaled before injection (see
+    ``VocabularyGrounding.from_semantic_projection``, which emits [0.0, 1.0] weights).
     """
     def __init__(self, hypothesis_tokens: Dict[str, Set[int]],
         self.scale = scale
         self.suppress_threshold = suppress_threshold
         self.max_bias = max_bias
+        _validate_hypothesis_token_scores_weights(
+            self.hypothesis_token_scores,
+            context="StaticLogitBiasBuilder",
+        )
     def build(self, posteriors: Dict[str, float]) -> Dict[int, float]:
         """
         Build a static logit_bias dict for API calls.
                 b = max(-self.max_bias, min(self.max_bias, b))
                 for tid in token_ids:
                     weighted_b = b * float(token_scores.get(tid, 1.0))
+                    weighted_b = max(-self.max_bias, min(self.max_bias, weighted_b))
                     bias[tid] = bias.get(tid, 0.0) + weighted_b
         return bias

tensegrity/graft/pipeline.py CHANGED Viewed

@@ -38,10 +38,18 @@ logger = logging.getLogger(__name__)
 class HybridPipeline:
     """
     Tensegrity+LLM hybrid generation.
     The cognitive layer resolves beliefs. The LLM narrates the resolution.
     Logit biases bridge the gap — no beliefs in the prompt, no reasoning
     delegated to the LLM.
     """
     def __init__(
@@ -60,6 +68,8 @@ class HybridPipeline:
         # not available at runtime we fall back to keyword grounding.
         semantic_grounding: bool = True,
         semantic_embedding_fn: Optional[Callable[[str], np.ndarray]] = None,
         semantic_top_k: int = 32,
         semantic_threshold: Optional[float] = None,
     ):
@@ -78,8 +88,12 @@ class HybridPipeline:
             async_graft: Local mode only — poll beliefs in a background thread for non-blocking decode
             semantic_grounding: If True, build grounding by frozen semantic
                 phrase/token projection instead of exact keyword tokenization
-            semantic_embedding_fn: Required when semantic_grounding=True; maps
-                text to a fixed embedding vector without runtime training
             semantic_top_k: Semantic vocabulary tokens retained per hypothesis
             semantic_threshold: Optional minimum cosine similarity for semantic grounding
         """
@@ -92,6 +106,8 @@ class HybridPipeline:
         self.async_graft = async_graft
         self.semantic_grounding = semantic_grounding
         self.semantic_embedding_fn = semantic_embedding_fn
         self.semantic_top_k = semantic_top_k
         self.semantic_threshold = semantic_threshold
@@ -113,6 +129,7 @@ class HybridPipeline:
         # Generation tracking
         self._generations = 0
         self._graft_states: List[GraftState] = []
     def _label_phrases(self) -> Dict[str, List[str]]:
         phrases = {}
@@ -124,6 +141,9 @@ class HybridPipeline:
     def _build_grounding(self) -> VocabularyGrounding:
         if self.semantic_grounding:
             embed = self.semantic_embedding_fn or self._default_sbert_embed_fn()
             if embed is not None:
                 phrases = self._hypothesis_keywords or self._label_phrases()
                 try:
@@ -133,6 +153,7 @@ class HybridPipeline:
                         embedding_fn=embed,
                         top_k=self.semantic_top_k,
                         threshold=self.semantic_threshold,
                     )
                 except Exception as e:
                     logger.warning(
@@ -145,25 +166,38 @@ class HybridPipeline:
             self.hypothesis_labels, self._tokenizer)
     def _default_sbert_embed_fn(self) -> Optional[Callable[[str], np.ndarray]]:
-        """Build a frozen sbert embedding function. Used when the caller did
-        not pass an explicit semantic_embedding_fn. No gradient flow.
-        Uses a bulk-prefetch cache: on first invocation, batch-encodes the
-        entire LLM vocabulary in one shot (a few seconds for ~128k tokens
-        on CPU) so the per-token loop in SemanticProjectionLayer.from_tokenizer
-        becomes a dict lookup instead of 128k individual sbert calls.
         """
         try:
             from sentence_transformers import SentenceTransformer
         except Exception as e:
             logger.warning("sentence_transformers unavailable (%s); semantic grounding off", e)
             return None
         try:
-            model = SentenceTransformer("all-MiniLM-L6-v2")
         except Exception as e:
             logger.warning("could not load sbert (%s); semantic grounding off", e)
             return None
         cache: Dict[str, np.ndarray] = {}
         bulk_done = [False]

 class HybridPipeline:
     """
     Tensegrity+LLM hybrid generation.
     The cognitive layer resolves beliefs. The LLM narrates the resolution.
     Logit biases bridge the gap — no beliefs in the prompt, no reasoning
     delegated to the LLM.
+    **Memory:** When using the default SBERT embedder (``semantic_embedding_fn``
+    unset with ``semantic_grounding=True``), the returned closure from
+    ``_default_sbert_embed_fn`` holds a ``SentenceTransformer`` for the
+    pipeline's lifetime. That model can stay resident on GPU and consume
+    VRAM. Mitigations: set ``sbert_device='cpu'``, pass a smaller
+    ``sbert_model_name``, or supply a custom ``semantic_embedding_fn`` that
+    does not pin a large model (e.g. remote API, smaller encoder, on-demand load).
     """
     def __init__(
         # not available at runtime we fall back to keyword grounding.
         semantic_grounding: bool = True,
         semantic_embedding_fn: Optional[Callable[[str], np.ndarray]] = None,
+        sbert_model_name: str = "all-MiniLM-L6-v2",
+        sbert_device: Optional[str] = None,
         semantic_top_k: int = 32,
         semantic_threshold: Optional[float] = None,
     ):
             async_graft: Local mode only — poll beliefs in a background thread for non-blocking decode
             semantic_grounding: If True, build grounding by frozen semantic
                 phrase/token projection instead of exact keyword tokenization
+            semantic_embedding_fn: Optional when ``semantic_grounding`` is True. If omitted,
+                ``_build_grounding()`` supplies :func:`_default_sbert_embed_fn` using
+                ``sbert_model_name`` / ``sbert_device``. Pass an explicit callable to avoid the
+                long-lived SBERT model or customize embeddings.
+            sbert_model_name: sentence-transformers model ID for the default semantic embedder
+            sbert_device: Optional device hint for SBERT (e.g. ``"cpu"`` to avoid GPU residency)
             semantic_top_k: Semantic vocabulary tokens retained per hypothesis
             semantic_threshold: Optional minimum cosine similarity for semantic grounding
         """
         self.async_graft = async_graft
         self.semantic_grounding = semantic_grounding
         self.semantic_embedding_fn = semantic_embedding_fn
+        self.sbert_model_name = sbert_model_name
+        self.sbert_device = sbert_device
         self.semantic_top_k = semantic_top_k
         self.semantic_threshold = semantic_threshold
         # Generation tracking
         self._generations = 0
         self._graft_states: List[GraftState] = []
+        self._sbert_vocab_batch_fn: Optional[Callable[[List[str]], np.ndarray]] = None
     def _label_phrases(self) -> Dict[str, List[str]]:
         phrases = {}
     def _build_grounding(self) -> VocabularyGrounding:
         if self.semantic_grounding:
             embed = self.semantic_embedding_fn or self._default_sbert_embed_fn()
+            vocab_batch = getattr(self, "_sbert_vocab_batch_fn", None)
+            if self.semantic_embedding_fn is not None:
+                vocab_batch = None
             if embed is not None:
                 phrases = self._hypothesis_keywords or self._label_phrases()
                 try:
                         embedding_fn=embed,
                         top_k=self.semantic_top_k,
                         threshold=self.semantic_threshold,
+                        vocab_batch_embedding_fn=vocab_batch,
                     )
                 except Exception as e:
                     logger.warning(
             self.hypothesis_labels, self._tokenizer)
     def _default_sbert_embed_fn(self) -> Optional[Callable[[str], np.ndarray]]:
+        """Return a callable that maps text → embedding via sentence-transformers.
+        The closure captures ``model`` (:class:`~sentence_transformers.SentenceTransformer`)
+        for the pipeline's lifetime, so embeddings stay cheap after warm-up but the
+        model may hold GPU memory. Use ``sbert_device='cpu'``, a lighter
+        ``sbert_model_name``, or pass ``semantic_embedding_fn`` to avoid pinning
+        SBERT entirely.
         """
         try:
             from sentence_transformers import SentenceTransformer
         except Exception as e:
             logger.warning("sentence_transformers unavailable (%s); semantic grounding off", e)
+            self._sbert_vocab_batch_fn = None
             return None
         try:
+            st_kw: Dict[str, Any] = {}
+            if self.sbert_device is not None:
+                st_kw["device"] = self.sbert_device
+            model = SentenceTransformer(self.sbert_model_name, **st_kw)
         except Exception as e:
             logger.warning("could not load sbert (%s); semantic grounding off", e)
+            self._sbert_vocab_batch_fn = None
             return None
+        def _vocab_batch_encode(batch: List[str]) -> np.ndarray:
+            return np.asarray(
+                model.encode(batch, batch_size=256, show_progress_bar=False),
+                dtype=np.float32,
+            )
+        self._sbert_vocab_batch_fn = _vocab_batch_encode
         cache: Dict[str, np.ndarray] = {}
         bulk_done = [False]

tensegrity/graft/vocabulary.py CHANGED Viewed

@@ -20,6 +20,7 @@ and the LLM's continuous logit space.
 """
 import re
 from typing import Callable, Dict, Iterable, List, Set, Optional, Tuple
 from dataclasses import dataclass, field
@@ -27,6 +28,22 @@ import numpy as np
 EmbeddingFn = Callable[[str], np.ndarray]
 def _clean_token_text(token: str) -> str:
@@ -48,7 +65,7 @@ def _as_unit_vector(value: np.ndarray) -> np.ndarray:
     vec = np.asarray(value, dtype=np.float64).ravel()
     norm = float(np.linalg.norm(vec))
     if norm <= 1e-12:
-        return vec
     return vec / norm
@@ -106,16 +123,66 @@ class SemanticProjectionLayer:
         embedding_fn: EmbeddingFn,
         projection_matrix: Optional[np.ndarray] = None,
         token_texts: Optional[Dict[int, str]] = None,
     ) -> "SemanticProjectionLayer":
         texts = token_texts or _token_texts_from_tokenizer(tokenizer)
-        token_vectors: Dict[int, np.ndarray] = {}
-        for tid, text in texts.items():
             try:
-                vec = _as_unit_vector(embedding_fn(text))
-            except Exception:
-                continue
-            if vec.size and np.linalg.norm(vec) > 1e-12:
-                token_vectors[int(tid)] = vec
         return cls(
             token_vectors=token_vectors,
             token_texts={int(k): v for k, v in texts.items()},
@@ -182,7 +249,7 @@ class VocabularyGrounding:
     # {hypothesis_id: list of grounding keywords}
     hypothesis_keywords: Dict[str, List[str]] = field(default_factory=dict)
-    # {hypothesis_id: {token_id: semantic proximity in [roughly -1, 1]}}
     hypothesis_token_scores: Dict[str, Dict[int, float]] = field(default_factory=dict)
     # Inverse map: {token_id: list of hypothesis_ids it belongs to}
@@ -260,6 +327,8 @@ class VocabularyGrounding:
         token_texts: Optional[Dict[int, str]] = None,
         top_k: int = 32,
         threshold: Optional[float] = None,
     ) -> 'VocabularyGrounding':
         """
         Build grounding by semantic proximity instead of exact keyword matches.
@@ -273,12 +342,18 @@ class VocabularyGrounding:
             token_texts: optional explicit {token_id: token_text} inventory.
             top_k: maximum vocabulary tokens retained per hypothesis.
             threshold: minimum cosine similarity. ``None`` keeps the best top_k.
         """
         projection = SemanticProjectionLayer.from_tokenizer(
             tokenizer,
             embedding_fn=embedding_fn,
             projection_matrix=projection_matrix,
             token_texts=token_texts,
         )
         grounding = cls()
         grounding.vocab_size = int(getattr(tokenizer, "vocab_size", 0) or 0)
@@ -293,15 +368,30 @@ class VocabularyGrounding:
             concept_vector = _mean_unit_vector(
                 embedding_fn(phrase) for phrase in concept_phrases if str(phrase).strip()
             )
             token_scores = projection.project_phrase_vector(
                 concept_vector,
                 top_k=top_k,
                 threshold=threshold,
             )
-            grounding.hypothesis_token_scores[hyp_id] = token_scores
-            grounding.hypothesis_tokens[hyp_id] = set(token_scores)
-            for tid in token_scores:
                 grounding.token_to_hypotheses.setdefault(tid, []).append(hyp_id)
         return grounding

 """
 import re
+import logging
 from typing import Callable, Dict, Iterable, List, Set, Optional, Tuple
 from dataclasses import dataclass, field
 EmbeddingFn = Callable[[str], np.ndarray]
+BatchedEmbedFn = Callable[[List[str]], np.ndarray]
+logger = logging.getLogger(__name__)
+def _cosine_similarity_to_graft_multiplier(score: float) -> float:
+    """Map cosine proximity in roughly [-1, 1] to ``hypothesis_token_scores`` weights **[0.0, 1.0]**."""
+    return float(max(0.0, min(1.0, 0.5 * (float(score) + 1.0))))
+def _chunks(seq: List, size: int) -> Iterable[List]:
+    bs = max(1, size)
+    for i in range(0, len(seq), bs):
+        yield seq[i:i + bs]
 def _clean_token_text(token: str) -> str:
     vec = np.asarray(value, dtype=np.float64).ravel()
     norm = float(np.linalg.norm(vec))
     if norm <= 1e-12:
+        return np.zeros_like(vec, dtype=np.float64)
     return vec / norm
         embedding_fn: EmbeddingFn,
         projection_matrix: Optional[np.ndarray] = None,
         token_texts: Optional[Dict[int, str]] = None,
+        *,
+        batched_embedding_fn: Optional[BatchedEmbedFn] = None,
+        batch_size: int = 256,
     ) -> "SemanticProjectionLayer":
         texts = token_texts or _token_texts_from_tokenizer(tokenizer)
+        token_vectors = {}
+        tids_ordered = sorted(texts.keys(), key=lambda x: int(x))
+        pairs = [(k, texts[k]) for k in tids_ordered]
+        filled = False
+        def apply_rows(row_tids: List[int], vectors: np.ndarray) -> None:
+            if vectors.ndim != 2 or vectors.shape[0] != len(row_tids):
+                raise ValueError("batched embeddings must have shape (batch, dim)")
+            for tid, row in zip(row_tids, vectors):
+                vec = _as_unit_vector(np.asarray(row, dtype=np.float64))
+                if vec.size and np.linalg.norm(vec) > 1e-12:
+                    token_vectors[int(tid)] = vec
+        if batched_embedding_fn is not None and pairs:
             try:
+                for chunk_pairs in _chunks(pairs, batch_size):
+                    chunk_strings = [p[1] for p in chunk_pairs]
+                    chunk_ids = [p[0] for p in chunk_pairs]
+                    batch_out = batched_embedding_fn(chunk_strings)
+                    mat = np.asarray(batch_out, dtype=np.float64)
+                    apply_rows(chunk_ids, mat)
+                filled = True
+            except Exception as e:
+                logger.warning(
+                    "batched_embedding_fn failed (%s); falling back to per-token embedding_fn",
+                    e,
+                )
+                token_vectors.clear()
+        if not filled and pairs:
+            try:
+                maybe = embedding_fn([p[1] for p in pairs])  # type: ignore[arg-type,misc]
+                mat = np.asarray(maybe, dtype=np.float64)
+                ids = [p[0] for p in pairs]
+                if mat.ndim == 2 and mat.shape[0] == len(ids):
+                    apply_rows(ids, mat)
+                    filled = True
+            except TypeError:
+                filled = False
+            except Exception as e:
+                logger.debug(
+                    "embedding_fn batch call unsupported (%s); using per-token path", e
+                )
+                filled = False
+        if not filled:
+            for tid, text in texts.items():
+                try:
+                    vec = _as_unit_vector(embedding_fn(text))
+                except Exception:
+                    continue
+                if vec.size and np.linalg.norm(vec) > 1e-12:
+                    token_vectors[int(tid)] = vec
         return cls(
             token_vectors=token_vectors,
             token_texts={int(k): v for k, v in texts.items()},
     # {hypothesis_id: list of grounding keywords}
     hypothesis_keywords: Dict[str, List[str]] = field(default_factory=dict)
+    # Per-token multipliers **[0.0, 1.0]** for logit graft (see tensegrity.graft.logit_bias).
     hypothesis_token_scores: Dict[str, Dict[int, float]] = field(default_factory=dict)
     # Inverse map: {token_id: list of hypothesis_ids it belongs to}
         token_texts: Optional[Dict[int, str]] = None,
         top_k: int = 32,
         threshold: Optional[float] = None,
+        vocab_batch_embedding_fn: Optional[BatchedEmbedFn] = None,
+        vocab_embedding_batch_size: int = 256,
     ) -> 'VocabularyGrounding':
         """
         Build grounding by semantic proximity instead of exact keyword matches.
             token_texts: optional explicit {token_id: token_text} inventory.
             top_k: maximum vocabulary tokens retained per hypothesis.
             threshold: minimum cosine similarity. ``None`` keeps the best top_k.
+            vocab_batch_embedding_fn: optional batched vocabulary embed ``list[str] -> ndarray``.
+            vocab_embedding_batch_size: batch chunk size for ``SemanticProjectionLayer.from_tokenizer``.
+            Token scores returned from cosine matching are normalized to graft weights in **[0.0, 1.0]**
+                (affine map from ``[-1, 1]`` cosine range) before storage in ``hypothesis_token_scores``.
         """
         projection = SemanticProjectionLayer.from_tokenizer(
             tokenizer,
             embedding_fn=embedding_fn,
             projection_matrix=projection_matrix,
             token_texts=token_texts,
+            batched_embedding_fn=vocab_batch_embedding_fn,
+            batch_size=max(1, int(vocab_embedding_batch_size)),
         )
         grounding = cls()
         grounding.vocab_size = int(getattr(tokenizer, "vocab_size", 0) or 0)
             concept_vector = _mean_unit_vector(
                 embedding_fn(phrase) for phrase in concept_phrases if str(phrase).strip()
             )
+            if (
+                concept_vector.size == 0
+                or float(np.linalg.norm(concept_vector)) <= 1e-12
+            ):
+                logger.warning(
+                    "empty or zero-norm concept vector for hypothesis %r phrases=%s; "
+                    "skipping projection",
+                    hyp_id,
+                    hypothesis_phrases.get(hyp_id, phrases),
+                )
+                grounding.hypothesis_token_scores[hyp_id] = {}
+                grounding.hypothesis_tokens[hyp_id] = set()
+                continue
             token_scores = projection.project_phrase_vector(
                 concept_vector,
                 top_k=top_k,
                 threshold=threshold,
             )
+            grounding.hypothesis_token_scores[hyp_id] = {
+                tid: _cosine_similarity_to_graft_multiplier(s) for tid, s in token_scores.items()
+            }
+            grounding.hypothesis_tokens[hyp_id] = set(grounding.hypothesis_token_scores[hyp_id])
+            for tid in grounding.hypothesis_token_scores[hyp_id]:
                 grounding.token_to_hypotheses.setdefault(tid, []).append(hyp_id)
         return grounding

tensegrity/inference/__init__.py CHANGED Viewed

	@@ -1,3 +0,0 @@
1	-
2	-
3	-

tensegrity/legacy/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
 """Legacy compatibility modules for architectures superseded by the unified field."""
 __all__ = ("v1",)

 """Legacy compatibility modules for architectures superseded by the unified field."""
+from . import v1
 __all__ = ("v1",)

tensegrity/legacy/v1/agent.py CHANGED Viewed

@@ -17,6 +17,7 @@ disagreement) balanced by the free energy principle.
 """
 import hashlib
 import numpy as np
 from typing import Optional, Dict, List, Any, Tuple
 import logging
@@ -96,6 +97,41 @@ class TensegrityAgent:
             epistemic_tension_threshold: Only run costly intervention search when causal tension exceeds this level
             epistemic_info_gain_threshold: Minimum estimated information gain required for epistemic actions
         """
         self.n_states = n_states
         self.n_obs = n_observations
         self.n_actions = n_actions
@@ -201,7 +237,16 @@ class TensegrityAgent:
         self.arena.register_model(model_b)
     def _morton_to_obs_index(self, morton_codes: np.ndarray) -> int:
-        """Map Morton codes to observation index via modular hashing."""
         if isinstance(morton_codes, (int, np.integer)):
             return int(morton_codes) % self.n_obs
         # For multiple codes, hash the combination
@@ -342,8 +387,7 @@ class TensegrityAgent:
         # Compare epistemic value of experiment vs pragmatic action
         if (experiment is not None and
-            experiment['expected_info_gain'] > self.epistemic_info_gain_threshold and
-            current_tension >= self.epistemic_tension_threshold):
             # Epistemic action: run an experiment to resolve tension
             return {
                 'type': 'epistemic',
@@ -391,12 +435,10 @@ class TensegrityAgent:
         Weighted by surprise — surprising experiences teach more.
         """
         episodes = self.episodic.replay(n_episodes)
-        total_update = 0.0
         for ep in episodes:
             obs_idx = ep.metadata.get('obs_idx', 0)
             self.epistemic.update_likelihood(obs_idx, ep.belief_state)
-            total_update += 1.0
         return {
             'episodes_replayed': len(episodes),
@@ -441,8 +483,11 @@ class TensegrityAgent:
     @classmethod
     def from_config(cls, config: Dict[str, Any]) -> 'TensegrityAgent':
-        """Create an agent from a configuration dictionary."""
-        return cls(**config)
     def __repr__(self):
         return (f"TensegrityAgent(states={self.n_states}, obs={self.n_obs}, "

 """
 import hashlib
+import inspect
 import numpy as np
 from typing import Optional, Dict, List, Any, Tuple
 import logging
             epistemic_tension_threshold: Only run costly intervention search when causal tension exceeds this level
             epistemic_info_gain_threshold: Minimum estimated information gain required for epistemic actions
         """
+        def _req_pos_int(name: str, v: Any) -> int:
+            if not isinstance(v, int) or int(v) < 1:
+                raise ValueError(f"{name} must be a positive integer")
+            return int(v)
+        n_states = _req_pos_int("n_states", n_states)
+        n_observations = _req_pos_int("n_observations", n_observations)
+        n_actions = _req_pos_int("n_actions", n_actions)
+        sensory_dims = _req_pos_int("sensory_dims", sensory_dims)
+        sensory_bits = _req_pos_int("sensory_bits", sensory_bits)
+        context_dim = _req_pos_int("context_dim", context_dim)
+        associative_dim = _req_pos_int("associative_dim", associative_dim)
+        if not isinstance(planning_horizon, int) or planning_horizon < 1:
+            raise ValueError("planning_horizon must be a positive integer")
+        if precision < 0.0:
+            raise ValueError("precision must be non-negative")
+        if zipf_exponent < 0.0:
+            raise ValueError("zipf_exponent must be non-negative")
+        unified_obs_dim = _req_pos_int("unified_obs_dim", unified_obs_dim)
+        if unified_hidden_dims is not None:
+            if not isinstance(unified_hidden_dims, list) or any(
+                not isinstance(x, int) or x < 1 for x in unified_hidden_dims
+            ):
+                raise ValueError("unified_hidden_dims must be a list of positive integers")
+        unified_fhrr_dim = _req_pos_int("unified_fhrr_dim", unified_fhrr_dim)
+        if unified_hopfield_beta < 0.0:
+            raise ValueError("unified_hopfield_beta must be non-negative")
+        unified_ngc_settle_steps = _req_pos_int("unified_ngc_settle_steps", unified_ngc_settle_steps)
+        if unified_ngc_learning_rate < 0.0:
+            raise ValueError("unified_ngc_learning_rate must be non-negative")
+        if not (0.0 <= float(epistemic_tension_threshold) <= 1.0):
+            raise ValueError("epistemic_tension_threshold must be in [0, 1]")
+        if not (0.0 <= float(epistemic_info_gain_threshold) <= 1.0):
+            raise ValueError("epistemic_info_gain_threshold must be in [0, 1]")
         self.n_states = n_states
         self.n_obs = n_observations
         self.n_actions = n_actions
         self.arena.register_model(model_b)
     def _morton_to_obs_index(self, morton_codes: np.ndarray) -> int:
+        """Map Morton codes to a discrete observation index (legacy hashing).
+        The main ``perceive`` path fingerprints the unified observation vector
+        with SHA-256 modulo ``n_obs``; use this routine only where an explicit
+        Morton-code → observation-bin mapping is intentional.
+        """
+        if self.n_obs <= 0:
+            raise ValueError(
+                "n_observations must be a positive integer for _morton_to_obs_index mapping"
+            )
         if isinstance(morton_codes, (int, np.integer)):
             return int(morton_codes) % self.n_obs
         # For multiple codes, hash the combination
         # Compare epistemic value of experiment vs pragmatic action
         if (experiment is not None and
+                experiment["expected_info_gain"] > self.epistemic_info_gain_threshold):
             # Epistemic action: run an experiment to resolve tension
             return {
                 'type': 'epistemic',
         Weighted by surprise — surprising experiences teach more.
         """
         episodes = self.episodic.replay(n_episodes)
         for ep in episodes:
             obs_idx = ep.metadata.get('obs_idx', 0)
             self.epistemic.update_likelihood(obs_idx, ep.belief_state)
         return {
             'episodes_replayed': len(episodes),
     @classmethod
     def from_config(cls, config: Dict[str, Any]) -> 'TensegrityAgent':
+        """Create an agent from a configuration dictionary (unknown keys ignored)."""
+        sig = inspect.signature(cls.__init__)
+        allowed = {k for k in sig.parameters if k != "self"}
+        kwargs = {k: v for k, v in config.items() if k in allowed}
+        return cls(**kwargs)
     def __repr__(self):
         return (f"TensegrityAgent(states={self.n_states}, obs={self.n_obs}, "

tensegrity/legacy/v1/blanket.py CHANGED Viewed

@@ -34,6 +34,11 @@ class MarkovBlanket:
     The blanket enforces the Markov property: internal states
     are conditionally independent of external states given the blanket.
     """
     def __init__(self,
@@ -59,15 +64,16 @@ class MarkovBlanket:
         # Observation buffer — recent history for temporal inference
         self.observation_buffer: deque = deque(maxlen=observation_buffer_size)
-        # Statistics for the blanket boundary (running means/vars for normalization)
-        self._obs_count = 0
-        self._obs_sum = None
-        self._obs_sq_sum = None
         # Blanket surprise (how unexpected was the last observation?)
         self.surprise: float = 0.0
-    def sense(self, raw_observation: np.ndarray) -> np.ndarray:
         """
         Process a raw observation through the sensory boundary.
@@ -76,8 +82,11 @@ class MarkovBlanket:
         3. Compute surprise (deviation from running statistics)
         Args:
-            raw_observation: Raw sensory data, shape depends on modality.
-                           Will be reshaped to (n_points, n_dims) for Morton encoding.
         Returns:
             Morton-coded observation as integer array
@@ -86,14 +95,22 @@ class MarkovBlanket:
         if raw_observation.ndim == 1:
             if len(raw_observation) == self.encoder.n_dims:
                 raw_observation = raw_observation.reshape(1, -1)
-            else:
-                # Treat as multiple single-dim observations
                 raw_observation = raw_observation.reshape(-1, 1)
         # Morton encode
         morton_codes = self.encoder.encode_continuous(raw_observation)
         if isinstance(morton_codes, (int, np.integer)):
             morton_codes = np.array([morton_codes])
         # Update running statistics for surprise computation
         self._update_statistics(raw_observation)
@@ -107,7 +124,7 @@ class MarkovBlanket:
             'morton': morton_codes.copy(),
             'raw': raw_observation.copy(),
             'surprise': self.surprise,
-            'timestamp': self._obs_count
         })
         return morton_codes
@@ -137,17 +154,23 @@ class MarkovBlanket:
     def _update_statistics(self, observation: np.ndarray):
         """Update running statistics for surprise computation."""
-        flat = observation.flatten()
-        self._obs_count += 1
         if self._obs_sum is None:
-            self._obs_sum = np.zeros_like(flat, dtype=np.float64)
-            self._obs_sq_sum = np.zeros_like(flat, dtype=np.float64)
-        # Pad or truncate to match
-        n = min(len(flat), len(self._obs_sum))
         self._obs_sum[:n] += flat[:n]
         self._obs_sq_sum[:n] += flat[:n] ** 2
     def _compute_surprise(self, observation: np.ndarray) -> float:
         """
@@ -156,14 +179,15 @@ class MarkovBlanket:
         This is a simple proxy — the full surprise comes from the
         free energy engine. But this gives a fast heuristic at the boundary.
         """
-        if self._obs_count < 2:
-            return 0.0
-        flat = observation.flatten()
         n = min(len(flat), len(self._obs_sum))
-        mean = self._obs_sum[:n] / self._obs_count
-        var = self._obs_sq_sum[:n] / self._obs_count - mean ** 2
         var = np.maximum(var, 1e-8)  # Prevent division by zero
         # Gaussian log-likelihood (negative = surprise)
@@ -187,7 +211,7 @@ class MarkovBlanket:
             'sensory': self.sensory_state,
             'active': self.active_state,
             'surprise': self.surprise,
-            'obs_count': self._obs_count,
             'buffer_size': len(self.observation_buffer)
         }

     The blanket enforces the Markov property: internal states
     are conditionally independent of external states given the blanket.
+    ``n_sensory`` / ``n_active`` mirror constructor channel counts and are
+    reserved for future multi-channel I/O; ``sense`` still ingests vectors
+    shaped for ``encoder.n_dims``, and ``act`` consumes the full softmax over
+    actions passed in.
     """
     def __init__(self,
         # Observation buffer — recent history for temporal inference
         self.observation_buffer: deque = deque(maxlen=observation_buffer_size)
+        # Running stats for surprise — per-coordinate counts (variable-length obs).
+        self._sense_timestep = 0
+        self._obs_sum: Optional[np.ndarray] = None
+        self._obs_sq_sum: Optional[np.ndarray] = None
+        self._obs_elem_count: Optional[np.ndarray] = None
         # Blanket surprise (how unexpected was the last observation?)
         self.surprise: float = 0.0
+    def sense(self, raw_observation: np.ndarray, *, allow_multi_point_1d: bool = False) -> np.ndarray:
         """
         Process a raw observation through the sensory boundary.
         3. Compute surprise (deviation from running statistics)
         Args:
+            raw_observation: Array shaped ``(n_points, encoder.n_dims)``, or ``(n_dims,)``
+                for one point. One-dimensional vectors whose length is not ``n_dims``
+                are rejected unless ``allow_multi_point_1d=True`` is set, which treats
+                the vector as a column (``reshape(-1, 1)``) of scalar observations —
+                callers should prefer supplying an explicit `(n_points, n_dims)` array.
         Returns:
             Morton-coded observation as integer array
         if raw_observation.ndim == 1:
             if len(raw_observation) == self.encoder.n_dims:
                 raw_observation = raw_observation.reshape(1, -1)
+            elif allow_multi_point_1d:
                 raw_observation = raw_observation.reshape(-1, 1)
+            else:
+                raise ValueError(
+                    f"One-dimensional sensory input length {len(raw_observation)} does not match "
+                    f"encoder.n_dims ({self.encoder.n_dims}). Pass shape "
+                    "(n_points, n_dims), a length-n_dims vector for one observation, "
+                    "or opt in with allow_multi_point_1d=True for reshape(-1, 1)."
+                )
         # Morton encode
         morton_codes = self.encoder.encode_continuous(raw_observation)
         if isinstance(morton_codes, (int, np.integer)):
             morton_codes = np.array([morton_codes])
+        self._sense_timestep += 1
         # Update running statistics for surprise computation
         self._update_statistics(raw_observation)
             'morton': morton_codes.copy(),
             'raw': raw_observation.copy(),
             'surprise': self.surprise,
+            'timestamp': self._sense_timestep
         })
         return morton_codes
     def _update_statistics(self, observation: np.ndarray):
         """Update running statistics for surprise computation."""
+        flat = np.asarray(observation, dtype=np.float64).flatten()
         if self._obs_sum is None:
+            self._obs_sum = np.zeros(len(flat), dtype=np.float64)
+            self._obs_sq_sum = np.zeros(len(flat), dtype=np.float64)
+            self._obs_elem_count = np.zeros(len(flat), dtype=np.float64)
+        lf, ls = len(flat), len(self._obs_sum)
+        if lf > ls:
+            self._obs_sum = np.pad(self._obs_sum, (0, lf - ls), mode='constant')
+            self._obs_sq_sum = np.pad(self._obs_sq_sum, (0, lf - ls), mode='constant')
+            self._obs_elem_count = np.pad(self._obs_elem_count, (0, lf - ls), mode='constant')
+        n = min(lf, len(self._obs_sum))
         self._obs_sum[:n] += flat[:n]
         self._obs_sq_sum[:n] += flat[:n] ** 2
+        self._obs_elem_count[:n] += 1.0
     def _compute_surprise(self, observation: np.ndarray) -> float:
         """
         This is a simple proxy — the full surprise comes from the
         free energy engine. But this gives a fast heuristic at the boundary.
         """
+        flat = np.asarray(observation, dtype=np.float64).flatten()
+        assert self._obs_sum is not None and self._obs_elem_count is not None
         n = min(len(flat), len(self._obs_sum))
+        cnt = self._obs_elem_count[:n]
+        if n < 1 or float(np.min(cnt)) < 2.0:
+            return 0.0
+        mean = self._obs_sum[:n] / np.maximum(cnt, 1e-12)
+        var = self._obs_sq_sum[:n] / np.maximum(cnt, 1e-12) - mean ** 2
         var = np.maximum(var, 1e-8)  # Prevent division by zero
         # Gaussian log-likelihood (negative = surprise)
             'sensory': self.sensory_state,
             'active': self.active_state,
             'surprise': self.surprise,
+            'sense_timestep': self._sense_timestep,
             'buffer_size': len(self.observation_buffer)
         }

tensegrity/legacy/v1/morton.py CHANGED Viewed

@@ -24,9 +24,14 @@ Mathematical basis:
 """
 import numpy as np
 from typing import Union, List, Tuple, Optional
 class MortonEncoder:
     """
     Encodes arbitrary-dimensional data into Morton codes (Z-order curve indices).
@@ -46,16 +51,39 @@ class MortonEncoder:
                     3 for volumetric, N for embeddings)
             bits_per_dim: Resolution per dimension. 10 bits = 1024 levels per dim.
                          Total Morton code space = 2^(n_dims * bits_per_dim)
             ranges: Min/max per dimension for quantization. If None, auto-calibrated.
         """
         self.n_dims = n_dims
         self.bits_per_dim = bits_per_dim
-        self.total_bits = n_dims * bits_per_dim
         self.levels = 2 ** bits_per_dim
         # Quantization ranges per dimension
         if ranges is not None:
-            self.ranges = np.array(ranges, dtype=np.float64)
         else:
             self.ranges = None  # Will be set on first encode (auto-calibrate)
@@ -112,17 +140,24 @@ class MortonEncoder:
         # Normalize to [0, 1] then scale to [0, levels-1]
         mins = self.ranges[:, 0]
         maxs = self.ranges[:, 1]
-        normalized = (values - mins) / (maxs - mins)
         normalized = np.clip(normalized, 0.0, 1.0)
         quantized = (normalized * (self.levels - 1)).astype(np.int64)
         return quantized
     def dequantize(self, quantized: np.ndarray) -> np.ndarray:
         """Inverse of quantize — reconstruct continuous approximation."""
         mins = self.ranges[:, 0]
         maxs = self.ranges[:, 1]
         normalized = quantized.astype(np.float64) / (self.levels - 1)
-        return normalized * (maxs - mins) + mins
     def encode(self, values: np.ndarray) -> np.ndarray:
         """
@@ -146,7 +181,16 @@ class MortonEncoder:
         if values.dtype in (np.float32, np.float64):
             quantized = self.quantize(values)
         else:
-            quantized = values.astype(np.int64)
         # Interleave bits for each point
         n_points = quantized.shape[0]
@@ -214,27 +258,32 @@ class MortonEncoder:
     def neighborhood(self, code: int, radius: int = 1) -> List[int]:
         """
         Find Morton codes within a given radius (in quantized coordinates).
-        This exploits the locality property: nearby Morton codes correspond
-        to nearby points in the original space.
         """
-        center = self.decode(code)
-        neighbors = []
-        # Generate all offset combinations within radius
         offsets = range(-radius, radius + 1)
-        def _recurse(dim, current_offset):
-            if dim == self.n_dims:
-                point = center + np.array(current_offset)
-                if np.all(point >= 0) and np.all(point < self.levels):
-                    neighbors.append(int(self.encode(point.reshape(1, -1))))
-                return
-            for off in offsets:
-                _recurse(dim + 1, current_offset + [off])
-        _recurse(0, [])
-        return list(set(neighbors))
     @staticmethod
     def from_modality(modality: str, **kwargs) -> 'MortonEncoder':

 """
 import numpy as np
+from itertools import product
 from typing import Union, List, Tuple, Optional
+# Guard against exponential neighborhood enumeration when radius × dims is large.
+MAX_NEIGHBORHOOD_COMBINATIONS = 50_000
 class MortonEncoder:
     """
     Encodes arbitrary-dimensional data into Morton codes (Z-order curve indices).
                     3 for volumetric, N for embeddings)
             bits_per_dim: Resolution per dimension. 10 bits = 1024 levels per dim.
                          Total Morton code space = 2^(n_dims * bits_per_dim)
+                         Must satisfy n_dims * bits_per_dim <= 63 so codes fit np.int64.
             ranges: Min/max per dimension for quantization. If None, auto-calibrated.
         """
         self.n_dims = n_dims
         self.bits_per_dim = bits_per_dim
+        total_bits = n_dims * bits_per_dim
+        if total_bits > 63:
+            raise ValueError(
+                f"total_bits (n_dims * bits_per_dim) must be <= 63 to fit in np.int64; "
+                f"got total_bits={total_bits}"
+            )
+        self.total_bits = total_bits
         self.levels = 2 ** bits_per_dim
         # Quantization ranges per dimension
         if ranges is not None:
+            self.ranges = np.asarray(ranges, dtype=np.float64)
+            if self.ranges.ndim != 2 or self.ranges.shape[1] != 2:
+                raise ValueError("ranges must be a sequence of (min, max) tuples per dimension.")
+            spans = self.ranges[:, 1] - self.ranges[:, 0]
+            flat_spans = np.asarray(spans).flatten()
+            bad = np.where(np.abs(flat_spans) < 1e-15)[0]
+            if len(bad):
+                dims_list = [int(i) for i in bad.tolist()]
+                raise ValueError(
+                    "Quantization ranges have zero span on dimension index(es) "
+                    f"{dims_list}; ensure max > min for each dimension "
+                    "(or omit ranges to auto-calibrate from data)."
+                )
+            if int(self.ranges.shape[0]) != int(n_dims):
+                raise ValueError(
+                    f"ranges must have length n_dims ({n_dims}), got shape {self.ranges.shape}."
+                )
         else:
             self.ranges = None  # Will be set on first encode (auto-calibrate)
         # Normalize to [0, 1] then scale to [0, levels-1]
         mins = self.ranges[:, 0]
         maxs = self.ranges[:, 1]
+        spans = np.maximum(maxs - mins, 1e-15)
+        normalized = (values - mins) / spans
         normalized = np.clip(normalized, 0.0, 1.0)
         quantized = (normalized * (self.levels - 1)).astype(np.int64)
         return quantized
     def dequantize(self, quantized: np.ndarray) -> np.ndarray:
         """Inverse of quantize — reconstruct continuous approximation."""
+        if self.ranges is None:
+            raise ValueError(
+                "ranges not initialized: call encode (or compute_ranges) "
+                "before MortonEncoder.dequantize"
+            )
         mins = self.ranges[:, 0]
         maxs = self.ranges[:, 1]
+        spans = np.maximum(maxs - mins, 1e-15)
         normalized = quantized.astype(np.float64) / (self.levels - 1)
+        return normalized * spans + mins
     def encode(self, values: np.ndarray) -> np.ndarray:
         """
         if values.dtype in (np.float32, np.float64):
             quantized = self.quantize(values)
         else:
+            quantized = np.asarray(values, dtype=np.int64)
+            qmin = int(np.min(quantized))
+            qmax = int(np.max(quantized))
+            lo = 0
+            hi = int(self.levels - 1)
+            if qmin < lo or qmax > hi:
+                raise ValueError(
+                    f"MortonEncoder.encode expects integer coords in [{lo}, {hi}] "
+                    f"(levels={self.levels}); got range [{qmin}, {qmax}]"
+                )
         # Interleave bits for each point
         n_points = quantized.shape[0]
     def neighborhood(self, code: int, radius: int = 1) -> List[int]:
         """
         Find Morton codes within a given radius (in quantized coordinates).
+        Uses ``decode`` → offset enumeration → ``encode`` within ``[0, levels)``.
         """
+        decoded = self.decode(code)
+        center = (
+            decoded.reshape(-1).astype(np.int64)
+            if isinstance(decoded, np.ndarray)
+            else np.asarray([decoded], dtype=np.int64)
+        )
+        n_combo = int((2 * radius + 1) ** self.n_dims)
+        if n_combo > MAX_NEIGHBORHOOD_COMBINATIONS:
+            raise ValueError(
+                f"MortonEncoder.neighborhood would enumerate {n_combo} quantized offset "
+                f"combinations (n_dims={self.n_dims}, radius={radius}, levels={self.levels}), "
+                f"which exceeds MAX_NEIGHBORHOOD_COMBINATIONS={MAX_NEIGHBORHOOD_COMBINATIONS}; "
+                "reduce radius or n_dims."
+            )
         offsets = range(-radius, radius + 1)
+        neighbors: List[int] = []
+        for tup in product(offsets, repeat=self.n_dims):
+            offset = np.array(tup, dtype=np.int64)
+            point = center + offset
+            if np.all(point >= 0) and np.all(point < self.levels):
+                neighbors.append(int(self.encode(point.reshape(1, -1))))
+        return sorted(set(neighbors))
     @staticmethod
     def from_modality(modality: str, **kwargs) -> 'MortonEncoder':

tensegrity/memory/episodic.py CHANGED Viewed

@@ -138,6 +138,11 @@ class EpisodicMemory:
         if norm > 0:
             item_rep /= norm
         return item_rep
     def encode(self, observation: np.ndarray, morton_code: np.ndarray,
                belief_state: np.ndarray, action: Optional[int],

         if norm > 0:
             item_rep /= norm
         return item_rep
+    def compute_item_representation(self, observation: np.ndarray,
+                                     belief_state: np.ndarray) -> np.ndarray:
+        """Public entry point for projecting an observation + belief into context space."""
+        return self._compute_item_representation(observation, belief_state)
     def encode(self, observation: np.ndarray, morton_code: np.ndarray,
                belief_state: np.ndarray, action: Optional[int],

tensegrity/memory/epistemic.py CHANGED Viewed

@@ -201,6 +201,16 @@ class EpistemicMemory:
         self.evidence_log.append(log_lik)
         return log_lik
     def entropy(self) -> Dict[str, float]:
         """Compute non-negative categorical entropy of expected beliefs.
@@ -210,8 +220,8 @@ class EpistemicMemory:
         uncertainty dashboard metric.  The agent usually wants entropy of the
         expected categorical distributions it will actually act on.
         """
-        A = self.A_params / self.A_params.sum(axis=0, keepdims=True)
-        D = self.D_params / self.D_params.sum()
         a_entropy_by_state = -np.sum(A * np.log(np.maximum(A, 1e-16)), axis=0)
         d_entropy = -np.sum(D * np.log(np.maximum(D, 1e-16)))

         self.evidence_log.append(log_lik)
         return log_lik
+    def _normalize_params_matrix(self, params: np.ndarray, *, axis: int) -> np.ndarray:
+        """Map Dirichlet parameters to expected categorical probs (does not touch access counts)."""
+        return params / np.maximum(params.sum(axis=axis, keepdims=True), 1e-16)
+    @staticmethod
+    def _normalize_params_vector(params: np.ndarray) -> np.ndarray:
+        """Normalize a vector Dirichlet parameter to probabilities."""
+        s = float(params.sum())
+        return params / max(s, 1e-16)
     def entropy(self) -> Dict[str, float]:
         """Compute non-negative categorical entropy of expected beliefs.
         uncertainty dashboard metric.  The agent usually wants entropy of the
         expected categorical distributions it will actually act on.
         """
+        A = self._normalize_params_matrix(self.A_params, axis=0)
+        D = self._normalize_params_vector(self.D_params)
         a_entropy_by_state = -np.sum(A * np.log(np.maximum(A, 1e-16)), axis=0)
         d_entropy = -np.sum(D * np.log(np.maximum(D, 1e-16)))

tensegrity/pipeline/canonical.py CHANGED Viewed

@@ -38,6 +38,7 @@ from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
 from tensegrity.broca.controller import CognitiveController
 from tensegrity.bench.tasks import TaskSample
 from tensegrity.causal.scm import StructuralCausalModel
 from tensegrity.engine.causal_energy import (
@@ -75,6 +76,7 @@ class CommitResult:
     final_arena_tension: float
     final_energy_arena_tension: float
     trace: List[IterationStep] = field(default_factory=list)
 def _alphanum_tokens(text: str, max_tokens: int) -> List[str]:
@@ -179,15 +181,17 @@ class CanonicalPipeline:
         # The controller resets itself per item; here we additionally clear the
         # Hopfield bank, episodic memory, and energy arena.
         try:
-            self.controller.agent.field.memory.patterns.clear()
-            self.controller.agent.field.memory._matrix = None
-            self.controller.agent.field.memory._dirty = True
         except Exception as e:
-            logger.debug("Hopfield clear skipped: %s", e)
         try:
             self.controller.agent.episodic.clear()
         except Exception as e:
-            logger.debug("Episodic clear skipped: %s", e)
         self.energy_arena = EnergyCausalArena(
             precision=self.energy_arena.precision,
             beta=self.energy_arena.beta,
@@ -235,13 +239,15 @@ class CanonicalPipeline:
                 n_ngc_layers = len(self.controller.agent.field.ngc.layer_sizes)
                 topology = self._topology_mapper.from_scm(scm, n_layers=n_ngc_layers)
                 self._scm_topologies[scm.name] = topology
-            except ValueError:
-                # Already registered (rare; defensive).
-                pass
     def _soft_reset_in_place(self, labels: List[str]) -> None:
         """Reset only what is per-item, keeping the heavy state intact."""
-        from tensegrity.broca.schemas import BeliefState, Hypothesis
         # Fresh hypotheses with uniform prior over the choice labels.
         n = len(labels)
@@ -269,15 +275,13 @@ class CanonicalPipeline:
         # NGC working state: clear activations/history but keep the learned
         # weights (cross-item priors) and the Hopfield bank.
         try:
-            ngc = self.controller.agent.field.ngc
-            ngc.layers = []
-            ngc._initialized = False
-            ngc._last_obs = None
-            ngc.clear_history()
             self.controller.agent.field.energy_history.clear()
             self.controller.agent.field._step_count = 0
         except Exception as e:
-            logger.debug("NGC soft-reset skipped: %s", e)
     # ---------- per-choice SCM (used by EnergyCausalArena) ----------
@@ -293,7 +297,7 @@ class CanonicalPipeline:
         is exactly what turns the lateral coherence link into a virtual parent
         in the NGC hierarchy, addressing the topological-mismatch critique.
         """
-        scm = StructuralCausalModel(name=f"choice_{choice_idx}")
         scm.add_variable("prompt_feature", n_values=4, parents=[])
         scm.add_variable("coherence", n_values=4, parents=[])
         scm.add_variable("choice_match", n_values=4, parents=["prompt_feature"])
@@ -356,8 +360,11 @@ class CanonicalPipeline:
                 field.ngc.settle(choice_obs, steps=self.falsify_settle_steps)
                 pe = float(field.ngc.prediction_error(prompt_obs))
             except Exception as e:
-                logger.debug("falsification step failed for choice %d: %s", i, e)
-                pe = 0.0
             scores[i] = -pe
             # Derive a compact discrete observation for the energy arena.
@@ -396,6 +403,8 @@ class CanonicalPipeline:
     def _bucket_4(x: float) -> int:
         """Map a real-valued summary to a 4-bucket discrete value via tanh."""
         v = math.tanh(x / 2.0)  # in (-1, 1)
         # Map (-1, 1) to {0, 1, 2, 3}.
         return max(0, min(3, int((v + 1.0) * 2.0)))
@@ -495,12 +504,12 @@ class CanonicalPipeline:
                 final_energy_arena_tension=1.0,
             )
-        self._item_index += 1
         self.reset_for_item(sample)
         # Initial perception — runs the full stack, including Broca SCM proposal
         # if causal tension is high (the controller wires this internally).
-        ing0 = self.ingest_prompt(sample.prompt)
         trace: List[IterationStep] = []
         converged = False
@@ -612,6 +621,7 @@ class CanonicalPipeline:
             final_arena_tension=final_arena_tension,
             final_energy_arena_tension=final_energy_tension,
             trace=trace,
         )
     # ---------- helpers ----------

 import numpy as np
 from tensegrity.broca.controller import CognitiveController
+from tensegrity.broca.schemas import BeliefState, Hypothesis
 from tensegrity.bench.tasks import TaskSample
 from tensegrity.causal.scm import StructuralCausalModel
 from tensegrity.engine.causal_energy import (
     final_arena_tension: float
     final_energy_arena_tension: float
     trace: List[IterationStep] = field(default_factory=list)
+    initial_perception: Optional[Dict[str, Any]] = None
 def _alphanum_tokens(text: str, max_tokens: int) -> List[str]:
         # The controller resets itself per item; here we additionally clear the
         # Hopfield bank, episodic memory, and energy arena.
         try:
+            self.controller.agent.field.memory.clear()
+        except AttributeError as e:
+            logger.warning("Hopfield clear failed (missing clear): %s", e)
         except Exception as e:
+            logger.error("Hopfield clear failed: %s", e, exc_info=True)
         try:
             self.controller.agent.episodic.clear()
+        except AttributeError as e:
+            logger.warning("Episodic clear failed: %s", e)
         except Exception as e:
+            logger.error("Episodic clear failed: %s", e, exc_info=True)
         self.energy_arena = EnergyCausalArena(
             precision=self.energy_arena.precision,
             beta=self.energy_arena.beta,
                 n_ngc_layers = len(self.controller.agent.field.ngc.layer_sizes)
                 topology = self._topology_mapper.from_scm(scm, n_layers=n_ngc_layers)
                 self._scm_topologies[scm.name] = topology
+            except ValueError as e:
+                logger.warning(
+                    "Topology registration failed for SCM %r: %s",
+                    getattr(scm, "name", "?"),
+                    e,
+                )
     def _soft_reset_in_place(self, labels: List[str]) -> None:
         """Reset only what is per-item, keeping the heavy state intact."""
         # Fresh hypotheses with uniform prior over the choice labels.
         n = len(labels)
         # NGC working state: clear activations/history but keep the learned
         # weights (cross-item priors) and the Hopfield bank.
         try:
+            self.controller.agent.field.ngc.soft_reset()
             self.controller.agent.field.energy_history.clear()
             self.controller.agent.field._step_count = 0
+        except AttributeError as e:
+            logger.warning("NGC soft_reset skipped (API mismatch): %s", e)
         except Exception as e:
+            logger.error("NGC soft_reset failed: %s", e, exc_info=True)
     # ---------- per-choice SCM (used by EnergyCausalArena) ----------
         is exactly what turns the lateral coherence link into a virtual parent
         in the NGC hierarchy, addressing the topological-mismatch critique.
         """
+        scm = StructuralCausalModel(name=f"choice_{choice_idx}_{label}")
         scm.add_variable("prompt_feature", n_values=4, parents=[])
         scm.add_variable("coherence", n_values=4, parents=[])
         scm.add_variable("choice_match", n_values=4, parents=["prompt_feature"])
                 field.ngc.settle(choice_obs, steps=self.falsify_settle_steps)
                 pe = float(field.ngc.prediction_error(prompt_obs))
             except Exception as e:
+                logger.error(
+                    "NGC falsification failed for choice %d: %s",
+                    i, e, exc_info=True,
+                )
+                pe = float(1e9)
             scores[i] = -pe
             # Derive a compact discrete observation for the energy arena.
     def _bucket_4(x: float) -> int:
         """Map a real-valued summary to a 4-bucket discrete value via tanh."""
         v = math.tanh(x / 2.0)  # in (-1, 1)
+        if math.isnan(x) or math.isnan(v):
+            return 2
         # Map (-1, 1) to {0, 1, 2, 3}.
         return max(0, min(3, int((v + 1.0) * 2.0)))
                 final_energy_arena_tension=1.0,
             )
         self.reset_for_item(sample)
+        self._item_index += 1
         # Initial perception — runs the full stack, including Broca SCM proposal
         # if causal tension is high (the controller wires this internally).
+        initial_perception = self.ingest_prompt(sample.prompt)
         trace: List[IterationStep] = []
         converged = False
             final_arena_tension=final_arena_tension,
             final_energy_arena_tension=final_energy_tension,
             trace=trace,
+            initial_perception=initial_perception if n > 0 else None,
         )
     # ---------- helpers ----------

tensegrity/pipeline/iterative.py CHANGED Viewed

@@ -109,6 +109,10 @@ class IterativeCognitiveScorer:
         # noisy to help. The wiring (encode/retrieve) stays so smarter signals
         # can be plugged in here without re-plumbing.
         w_episodic: float = 0.0,
     ):
         from tensegrity.engine.unified_field import UnifiedField
         from tensegrity.memory.episodic import EpisodicMemory
@@ -137,6 +141,8 @@ class IterativeCognitiveScorer:
         self.use_episodic = use_episodic
         self.episodic_top_k = episodic_top_k
         self.w_episodic = w_episodic
         # Dirichlet-style per-channel reliability. Each channel accumulates a
         # pseudocount that grows when the channel's top-ranked choice matches
         # the committed belief on an item. Fusion weights = normalized counts.
@@ -165,10 +171,9 @@ class IterativeCognitiveScorer:
     def _sbert_similarities(self, prompt: str, choices: List[str]) -> List[float]:
         features = self.field.encoder.features
-        if hasattr(features, "_ensure_sbert") and getattr(features, "_sbert", None) is None:
-            features._ensure_sbert()
-        sbert = getattr(features, "_sbert", None)
-        if sbert is not None and sbert != "FALLBACK":
             embs = sbert.encode([prompt] + choices, show_progress_bar=False)
             pe = embs[0]
             pn = float(np.linalg.norm(pe))
@@ -178,7 +183,11 @@ class IterativeCognitiveScorer:
                 cn = float(np.linalg.norm(ce))
                 out.append(float(np.dot(pe, ce) / (pn * cn)) if pn > 1e-8 and cn > 1e-8 else 0.0)
             return out
-        # fallback to FHRR similarity
         pf = self._encode(self._tokenize(prompt, 64))
         return [
             self.field.encoder.similarity(pf, self._encode(self._tokenize(c, 32)))
@@ -232,7 +241,7 @@ class IterativeCognitiveScorer:
         if self.use_episodic and self.episodic is not None and len(self.episodic.episodes) > 0:
             uniform_belief = np.full(n, 1.0 / n, dtype=np.float64)
             try:
-                query_ctx = self.episodic._compute_item_representation(
                     prompt_obs_vec, uniform_belief
                 )
                 retrieved = self.episodic.retrieve_by_context(
@@ -250,13 +259,12 @@ class IterativeCognitiveScorer:
                     ch_real.append(v / nrm if nrm > 1e-10 else v)
                 # Only trust episodes whose prompt context strongly matches.
                 # Below this threshold, "similar past answer" is noise, not signal.
-                CTX_SIM_THRESHOLD = 0.5
                 for ep in retrieved:
                     ans_vec = ep.metadata.get("chosen_fhrr_real") if ep.metadata else None
                     if ans_vec is None:
                         continue
                     ctx_sim = float(np.dot(query_ctx, ep.context_vector))
-                    if ctx_sim < CTX_SIM_THRESHOLD:
                         continue
                     # Also discount by past surprise: episodes the agent struggled
                     # with (low committed confidence) carry less authority.
@@ -276,6 +284,10 @@ class IterativeCognitiveScorer:
         iterations_used = 0
         last_channel_scores: Dict[str, np.ndarray] = {}
         for it in range(self.max_iterations):
             iterations_used = it + 1
@@ -316,10 +328,6 @@ class IterativeCognitiveScorer:
                         hop_bonus[i] = float(np.dot(q, retrieved / rn))
             # 3b. Fuse z-normalized
-            def znorm(a: np.ndarray) -> np.ndarray:
-                s = a.std()
-                return (a - a.mean()) / s if s > 1e-10 else np.zeros_like(a)
             # Normalized channel weights from accumulated reliability counts.
             total = sum(self._channel_counts.values())
             w = {c: self._channel_counts[c] / total for c in self._channels}
@@ -362,11 +370,6 @@ class IterativeCognitiveScorer:
             self.field.ngc.settle(prompt_obs_vec, steps=self.context_settle_steps)
             self.field.ngc.learn(modulation=self.shaping_lr_scale)
-            # 3e. Hopfield: store the *prompt* encoding so cross-iteration memory
-            # accumulates evidence about the question, not the current guess.
-            if self.use_hopfield:
-                self.field.memory.store(self._encode(prompt_tokens))
             # Re-base on the prompt-grounded state for next iteration's scoring
             base_state = self.field.ngc.save_state()
@@ -377,6 +380,10 @@ class IterativeCognitiveScorer:
                 converged = True
                 break
         committed_idx = int(np.argmax(prev_belief))
         # Reliability update via *cross-channel agreement* (not agreement with
@@ -435,8 +442,16 @@ class IterativeCognitiveScorer:
     def reset(self):
         """Per-item reset. Clears NGC working state but PRESERVES Hopfield
         patterns and episodic memory — those carry across items in a session
-        and provide cross-item learning."""
-        self.field.ngc.reinitialize(12345)
         self.field.energy_history.clear()
         self.field._step_count = 0
@@ -444,9 +459,7 @@ class IterativeCognitiveScorer:
         """Full reset. Use at task / session boundaries to clear all memory
         and per-channel reliability priors (which are task-specific)."""
         self.reset()
-        self.field.memory.patterns.clear()
-        self.field.memory._matrix = None
-        self.field.memory._dirty = True
         if self.episodic is not None:
             self.episodic.clear()
         for c in self._channels:

         # noisy to help. The wiring (encode/retrieve) stays so smarter signals
         # can be plugged in here without re-plumbing.
         w_episodic: float = 0.0,
+        # Minimum cosine match between query and episodic context to trust retrieval.
+        episodic_ctx_sim_threshold: float = 0.5,
+        # Seed for NGC `reinitialize` on `reset`; None chooses a random seed each time.
+        reset_seed: Optional[int] = 12345,
     ):
         from tensegrity.engine.unified_field import UnifiedField
         from tensegrity.memory.episodic import EpisodicMemory
         self.use_episodic = use_episodic
         self.episodic_top_k = episodic_top_k
         self.w_episodic = w_episodic
+        self.episodic_ctx_sim_threshold = episodic_ctx_sim_threshold
+        self.reset_seed = reset_seed
         # Dirichlet-style per-channel reliability. Each channel accumulates a
         # pseudocount that grows when the channel's top-ranked choice matches
         # the committed belief on an item. Fusion weights = normalized counts.
     def _sbert_similarities(self, prompt: str, choices: List[str]) -> List[float]:
         features = self.field.encoder.features
+        getter = getattr(features, "get_sbert_model", None)
+        sbert = getter() if callable(getter) else None
+        if sbert is not None:
             embs = sbert.encode([prompt] + choices, show_progress_bar=False)
             pe = embs[0]
             pn = float(np.linalg.norm(pe))
                 cn = float(np.linalg.norm(ce))
                 out.append(float(np.dot(pe, ce) / (pn * cn)) if pn > 1e-8 and cn > 1e-8 else 0.0)
             return out
+        if self.field.encoder.semantic and callable(getter) and not getattr(
+            self, "_sbert_unavailable_logged", False
+        ):
+            logger.warning("SBERT sentence similarity unavailable; using FHRR cosine similarity.")
+            setattr(self, "_sbert_unavailable_logged", True)
         pf = self._encode(self._tokenize(prompt, 64))
         return [
             self.field.encoder.similarity(pf, self._encode(self._tokenize(c, 32)))
         if self.use_episodic and self.episodic is not None and len(self.episodic.episodes) > 0:
             uniform_belief = np.full(n, 1.0 / n, dtype=np.float64)
             try:
+                query_ctx = self.episodic.compute_item_representation(
                     prompt_obs_vec, uniform_belief
                 )
                 retrieved = self.episodic.retrieve_by_context(
                     ch_real.append(v / nrm if nrm > 1e-10 else v)
                 # Only trust episodes whose prompt context strongly matches.
                 # Below this threshold, "similar past answer" is noise, not signal.
                 for ep in retrieved:
                     ans_vec = ep.metadata.get("chosen_fhrr_real") if ep.metadata else None
                     if ans_vec is None:
                         continue
                     ctx_sim = float(np.dot(query_ctx, ep.context_vector))
+                    if ctx_sim < self.episodic_ctx_sim_threshold:
                         continue
                     # Also discount by past surprise: episodes the agent struggled
                     # with (low committed confidence) carry less authority.
         iterations_used = 0
         last_channel_scores: Dict[str, np.ndarray] = {}
+        def znorm(a: np.ndarray) -> np.ndarray:
+            s = a.std()
+            return (a - a.mean()) / s if s > 1e-10 else np.zeros_like(a)
         for it in range(self.max_iterations):
             iterations_used = it + 1
                         hop_bonus[i] = float(np.dot(q, retrieved / rn))
             # 3b. Fuse z-normalized
             # Normalized channel weights from accumulated reliability counts.
             total = sum(self._channel_counts.values())
             w = {c: self._channel_counts[c] / total for c in self._channels}
             self.field.ngc.settle(prompt_obs_vec, steps=self.context_settle_steps)
             self.field.ngc.learn(modulation=self.shaping_lr_scale)
             # Re-base on the prompt-grounded state for next iteration's scoring
             base_state = self.field.ngc.save_state()
                 converged = True
                 break
+        # Store prompt encoding once for Hopfield cross-item memory (not each iteration).
+        if self.use_hopfield:
+            self.field.memory.store(self._encode(prompt_tokens))
         committed_idx = int(np.argmax(prev_belief))
         # Reliability update via *cross-channel agreement* (not agreement with
     def reset(self):
         """Per-item reset. Clears NGC working state but PRESERVES Hopfield
         patterns and episodic memory — those carry across items in a session
+        and provide cross-item learning.
+        NGC weights are reinitialized using ``reset_seed``: default ``12345``
+        matches legacy behavior for reproducibility; pass ``None`` for a random
+        seed each reset, or any other integer to pin runs.
+        """
+        seed = self.reset_seed
+        if seed is None:
+            seed = int(np.random.randint(0, 2 ** 31))
+        self.field.ngc.reinitialize(seed)
         self.field.energy_history.clear()
         self.field._step_count = 0
         """Full reset. Use at task / session boundaries to clear all memory
         and per-channel reliability priors (which are task-specific)."""
         self.reset()
+        self.field.memory.clear()
         if self.episodic is not None:
             self.episodic.clear()
         for c in self._channels:

tests/test_architecture_alignment.py CHANGED Viewed

@@ -67,7 +67,7 @@ def test_topology_mapper_turns_horizontal_edge_into_virtual_parent():
     assert len(mapping.virtual_parents) == 1
     vp = next(iter(mapping.virtual_parents.values()))
-    assert vp.children == ("A", "B")
     assert mapping.embedded_layers[vp.name] == 1
     assert (vp.name, "A") in mapping.embedded_edges
     assert (vp.name, "B") in mapping.embedded_edges

     assert len(mapping.virtual_parents) == 1
     vp = next(iter(mapping.virtual_parents.values()))
+    assert set(vp.children) == {"A", "B"}
     assert mapping.embedded_layers[vp.name] == 1
     assert (vp.name, "A") in mapping.embedded_edges
     assert (vp.name, "B") in mapping.embedded_edges

tests/test_async_graft.py CHANGED Viewed

@@ -7,6 +7,7 @@ import sys
 import time
 import numpy as np
 ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 if ROOT not in sys.path:
@@ -123,6 +124,7 @@ def test_build_scm_from_proposal():
 def test_scm_marginalizes_missing_parents_and_counterfactual_changes_descendants():
     from tensegrity.causal.scm import StructuralCausalModel
     scm = StructuralCausalModel("two_node")

 import time
 import numpy as np
+import pytest
 ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 if ROOT not in sys.path:
 def test_scm_marginalizes_missing_parents_and_counterfactual_changes_descendants():
+    pytest.importorskip("networkx")
     from tensegrity.causal.scm import StructuralCausalModel
     scm = StructuralCausalModel("two_node")

tests/test_engine.py CHANGED Viewed

@@ -3,6 +3,7 @@ Tests for the unified cognitive engine: FHRR, NGC, and UnifiedField.
 """
 import numpy as np
 np.random.seed(42)
@@ -68,7 +69,10 @@ def test_fhrr_encoding():
     sim_numeric_far = enc.similarity(v_base, v_far)
     print(f"\n  sim([1,2,3], [1,2,3.1]) = {sim_near:.4f}")
     print(f"  sim([1,2,3], [9,8,7])   = {sim_numeric_far:.4f}")
-    assert sim_near > sim_numeric_far
     print(f"  ✓ Numeric vectors: similar inputs → similar encodings")

 """
 import numpy as np
+import sys
 np.random.seed(42)
     sim_numeric_far = enc.similarity(v_base, v_far)
     print(f"\n  sim([1,2,3], [1,2,3.1]) = {sim_near:.4f}")
     print(f"  sim([1,2,3], [9,8,7])   = {sim_numeric_far:.4f}")
+    assert sim_near > sim_numeric_far, (
+        "Numeric vectors should be more similar when inputs are nearer in value space "
+        f"(sim_near={sim_near}, sim_far={sim_numeric_far})"
+    )
     print(f"  ✓ Numeric vectors: similar inputs → similar encodings")

tests/test_graft.py CHANGED Viewed

@@ -9,6 +9,7 @@ Tests:
 """
 import numpy as np
 import json
 np.random.seed(42)

 """
 import numpy as np
+import sys
 import json
 np.random.seed(42)

tests/test_needle.py CHANGED Viewed

@@ -170,6 +170,13 @@ def test_ngc_contradiction_signal():
     print(f"\n  Memory similarity for truth: {r_truth['memory_similarity']:.4f}")
     assert np.isfinite(mean_contra_pe)
     assert np.isfinite(pe_truth_after)
 def test_needle_in_lies():

     print(f"\n  Memory similarity for truth: {r_truth['memory_similarity']:.4f}")
     assert np.isfinite(mean_contra_pe)
     assert np.isfinite(pe_truth_after)
+    assert not np.isclose(
+        mean_contra_pe, pe_truth_after, rtol=0.0, atol=1e-8
+    ), (
+        "Prediction error on contradictions should differ from prediction error "
+        f"when the established truth is re-presented "
+        f"(mean_contra_pe={mean_contra_pe:.6g}, pe_truth_after={pe_truth_after:.6g})"
+    )
 def test_needle_in_lies():

tests/test_scoring_bench.py CHANGED Viewed

@@ -107,5 +107,3 @@ if __name__ == "__main__":
             import traceback; traceback.print_exc()
     print()


107	import traceback; traceback.print_exc()
108
109	print()

tests/test_tensegrity.py CHANGED Viewed

@@ -251,7 +251,11 @@ def test_memory_systems():
     # Soft retrieval (Boltzmann distribution)
     blended, weights = am.retrieve_soft(noisy)
     print(f"  Soft retrieval weights (top 3): {sorted(weights)[-3:]}")
-    assert best_match == 3
     print(f"  ✓ Content-addressed retrieval via energy minimization")
     print(f"  Stats: {am.statistics}")

     # Soft retrieval (Boltzmann distribution)
     blended, weights = am.retrieve_soft(noisy)
     print(f"  Soft retrieval weights (top 3): {sorted(weights)[-3:]}")
+    assert best_match == 3, (
+        "expected best_match == 3 (associative retrieval of pattern 3) with numpy seed "
+        "42 set at module load, "
+        f"got best_match={best_match}"
+    )
     print(f"  ✓ Content-addressed retrieval via energy minimization")
     print(f"  Stats: {am.statistics}")