Harry00 commited on 22 days ago

Commit

ebaf2ce

verified ·

1 Parent(s): fdbd59c

feat: complete MLE engine implementation

Browse files

Files changed (29) hide show

mle/__pycache__/__init__.cpython-312.pyc +0 -0
mle/__pycache__/demo.cpython-312.pyc +0 -0
mle/binding/__init__.py +1 -0
mle/binding/__pycache__/__init__.cpython-312.pyc +0 -0
mle/binding/__pycache__/semantic_binding.cpython-312.pyc +0 -0
mle/binding/semantic_binding.py +350 -0
mle/demo.py +292 -0
mle/energy/__init__.py +1 -0
mle/energy/__pycache__/__init__.cpython-312.pyc +0 -0
mle/energy/__pycache__/energy_model.cpython-312.pyc +0 -0
mle/energy/energy_model.py +494 -0
mle/inference/__init__.py +1 -0
mle/inference/__pycache__/__init__.cpython-312.pyc +0 -0
mle/inference/__pycache__/reasoning_engine.cpython-312.pyc +0 -0
mle/inference/reasoning_engine.py +555 -0
mle/memory/__init__.py +1 -0
mle/memory/__pycache__/__init__.cpython-312.pyc +0 -0
mle/memory/__pycache__/sparse_address_table.cpython-312.pyc +0 -0
mle/memory/sparse_address_table.py +378 -0
mle/routing/__init__.py +1 -0
mle/routing/__pycache__/__init__.cpython-312.pyc +0 -0
mle/routing/__pycache__/recursive_jit_router.cpython-312.pyc +0 -0
mle/routing/recursive_jit_router.py +242 -0
mle/tests/__pycache__/test_full_system.cpython-312.pyc +0 -0
mle/tests/test_full_system.py +904 -0
mle/utils/__init__.py +7 -0
mle/utils/__pycache__/__init__.cpython-312.pyc +0 -0
mle/utils/__pycache__/simd_ops.cpython-312.pyc +0 -0
mle/utils/simd_ops.py +404 -0

mle/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.59 kB). View file

mle/__pycache__/demo.cpython-312.pyc ADDED Viewed

Binary file (13.1 kB). View file

mle/binding/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .semantic_binding import HRRBinding, BinaryBinding, BindingEngine

mle/binding/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (230 Bytes). View file

mle/binding/__pycache__/semantic_binding.cpython-312.pyc ADDED Viewed

Binary file (22.5 kB). View file

mle/binding/semantic_binding.py ADDED Viewed

	@@ -0,0 +1,350 @@

+"""
+MLE Binding Module: Semantic Binding Operations
+=================================================
+Implements circular convolution-based binding for composing and
+decomposing semantic relations between hyperdimensional vectors.
+Two implementations:
+1. FFT-based (HRR): High precision, O(N log N), works on real-valued vectors
+2. Binary (BSC): O(N/64) via XOR, works directly on packed uint64 vectors
+The binding operation creates a new vector C = bind(A, B) such that:
+- C is quasi-orthogonal to both A and B
+- unbind(C, B) ≈ A  (recoverable)
+- bind is commutative and associative
+This enables representing structured relations:
+- "cat IS_A animal" → bind(cat, IS_A) stores a trace recoverable with animal
+- Analogies: unbind(bind(king, male), bind(queen, female)) ≈ identity
+"""
+import numpy as np
+from typing import Optional, Tuple, List
+import logging
+from ..utils.simd_ops import (
+    N_BITS, N_WORDS,
+    xor_vectors, random_binary_vector, random_binary_vectors,
+    hamming_distance, hamming_similarity, majority_vote, popcount
+)
+logger = logging.getLogger(__name__)
+# ══════════════════════════════════════════════════════════════════════════════
+# FFT-based Circular Convolution (Holographic Reduced Representations)
+# ══════════════════════════════════════════════════════════════════════════════
+class HRRBinding:
+    """
+    Holographic Reduced Representations via circular convolution.
+    Operates on real-valued vectors of dimension N.
+    Uses numpy.fft for O(N log N) binding/unbinding.
+    Properties:
+    - bind(A, B) = circular_conv(A, B) via IFFT(FFT(A) * FFT(B))
+    - unbind(C, B) = circular_corr(C, B) via IFFT(FFT(C) * conj(FFT(B)))
+    - Similarity-preserving: cos(A, B) relates to cos(bind(A,X), bind(B,X))
+    """
+    def __init__(self, dim: int = N_BITS):
+        self.dim = dim
+        # Pre-allocate FFT workspace
+        self._fft_len = dim  # Use full-length FFT
+    @staticmethod
+    def random_vector(dim: int = N_BITS) -> np.ndarray:
+        """Generate a random unit-length HRR vector."""
+        v = np.random.randn(dim).astype(np.float32)
+        v /= np.linalg.norm(v)
+        return v
+    @staticmethod
+    def bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+        """Circular convolution: bind two vectors.
+        C = IFFT(FFT(A) ⊙ FFT(B))
+        """
+        A = np.fft.rfft(a)
+        B = np.fft.rfft(b)
+        return np.fft.irfft(A * B, n=len(a)).astype(np.float32)
+    @staticmethod
+    def unbind(c: np.ndarray, b: np.ndarray) -> np.ndarray:
+        """Circular correlation: recover A from C = bind(A, B).
+        A ≈ IFFT(FFT(C) ⊙ conj(FFT(B)))
+        """
+        C = np.fft.rfft(c)
+        B = np.fft.rfft(b)
+        return np.fft.irfft(C * np.conj(B), n=len(c)).astype(np.float32)
+    @staticmethod
+    def bundle(*vectors: np.ndarray) -> np.ndarray:
+        """Superposition (sum + normalize) of multiple HRR vectors."""
+        s = np.sum(vectors, axis=0)
+        norm = np.linalg.norm(s)
+        if norm > 1e-8:
+            s /= norm
+        return s.astype(np.float32)
+    @staticmethod
+    def similarity(a: np.ndarray, b: np.ndarray) -> float:
+        """Cosine similarity between HRR vectors."""
+        dot = np.dot(a, b)
+        na = np.linalg.norm(a)
+        nb = np.linalg.norm(b)
+        if na < 1e-8 or nb < 1e-8:
+            return 0.0
+        return float(dot / (na * nb))
+    @staticmethod
+    def permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
+        """Cyclic permutation (for positional encoding / sequence ordering)."""
+        return np.roll(v, shift).astype(np.float32)
+    @staticmethod
+    def inverse_permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
+        """Inverse cyclic permutation."""
+        return np.roll(v, -shift).astype(np.float32)
+    @classmethod
+    def bind_sequence(cls, vectors: List[np.ndarray]) -> np.ndarray:
+        """Bind a sequence with positional encoding via permutation.
+        S = Σ_i permute(V_i, i)
+        Preserves order information.
+        """
+        result = np.zeros_like(vectors[0])
+        for i, v in enumerate(vectors):
+            result += cls.permute(v, i)
+        norm = np.linalg.norm(result)
+        if norm > 1e-8:
+            result /= norm
+        return result.astype(np.float32)
+    @classmethod
+    def encode_pair(cls, role: np.ndarray, filler: np.ndarray) -> np.ndarray:
+        """Encode a role-filler pair: bind(role, filler)."""
+        return cls.bind(role, filler)
+    @classmethod
+    def decode_filler(cls, structure: np.ndarray, role: np.ndarray) -> np.ndarray:
+        """Extract filler from structure given role: unbind(structure, role)."""
+        return cls.unbind(structure, role)
+    @classmethod
+    def encode_triple(cls, subject: np.ndarray, relation: np.ndarray,
+                      obj: np.ndarray) -> np.ndarray:
+        """Encode a knowledge triple (s, r, o).
+        T = bind(bind(subject, relation), object)
+        """
+        return cls.bind(cls.bind(subject, relation), obj)
+# ══════════════════════════════════════════════════════════════════════════════
+# Binary Binding (BSC - Binary Spatter Codes)
+# ══════════════════════════════════════════════════════════════════════════════
+class BinaryBinding:
+    """
+    Binary Spatter Code binding via XOR.
+    Operates directly on packed uint64 vectors (512 bytes for 4096 bits).
+    Extremely fast on CPU: single XOR instruction per 64-bit word.
+    Properties:
+    - bind(A, B) = A ⊕ B (XOR)
+    - unbind(C, B) = C ⊕ B = A (XOR is self-inverse → exact recovery!)
+    - bundle = majority vote
+    - similarity = normalized Hamming distance
+    """
+    @staticmethod
+    def bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+        """Binary binding via XOR. Self-inverse: bind(bind(a,b), b) = a."""
+        return xor_vectors(a, b)
+    @staticmethod
+    def unbind(c: np.ndarray, b: np.ndarray) -> np.ndarray:
+        """Binary unbinding = XOR (since XOR is its own inverse)."""
+        return xor_vectors(c, b)
+    @staticmethod
+    def bundle(*vectors: np.ndarray) -> np.ndarray:
+        """Majority-vote bundling. Requires odd number of vectors for tie-breaking."""
+        if len(vectors) == 1:
+            return vectors[0].copy()
+        vecs = np.stack(vectors)
+        return majority_vote(np.ascontiguousarray(vecs))
+    @staticmethod
+    def similarity(a: np.ndarray, b: np.ndarray) -> float:
+        """Normalized Hamming similarity [0, 1]."""
+        return hamming_similarity(a, b)
+    @staticmethod
+    def permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
+        """Bit-level cyclic permutation for sequence encoding.
+        Shifts all bits by `shift` positions cyclically.
+        """
+        bits = np.unpackbits(v.view(np.uint8))
+        shifted = np.roll(bits, shift)
+        return np.packbits(shifted).view(np.uint64).copy()
+    @staticmethod
+    def inverse_permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
+        """Inverse bit-level cyclic permutation."""
+        bits = np.unpackbits(v.view(np.uint8))
+        shifted = np.roll(bits, -shift)
+        return np.packbits(shifted).view(np.uint64).copy()
+    @classmethod
+    def bind_sequence(cls, vectors: List[np.ndarray]) -> np.ndarray:
+        """Bind a sequence with positional encoding.
+        S = bundle(permute(V_0, 0), permute(V_1, 1), ..., permute(V_n, n))
+        """
+        positioned = [cls.permute(v, i) for i, v in enumerate(vectors)]
+        return cls.bundle(*positioned)
+    @classmethod
+    def encode_pair(cls, role: np.ndarray, filler: np.ndarray) -> np.ndarray:
+        """Encode role-filler: bind(role, filler)."""
+        return cls.bind(role, filler)
+    @classmethod
+    def decode_filler(cls, structure: np.ndarray, role: np.ndarray) -> np.ndarray:
+        """Decode filler from structure given role."""
+        return cls.unbind(structure, role)
+    @classmethod
+    def encode_triple(cls, subject: np.ndarray, relation: np.ndarray,
+                      obj: np.ndarray) -> np.ndarray:
+        """Encode knowledge triple (s, r, o) = bind(bind(s, r), o)."""
+        return cls.bind(cls.bind(subject, relation), obj)
+    @classmethod
+    def create_analogy_query(cls, a: np.ndarray, b: np.ndarray,
+                             c: np.ndarray) -> np.ndarray:
+        """Create analogy query: a:b :: c:?
+        Relation R = bind(a, b)   [XOR extracts the difference]
+        Query = bind(c, R)        [apply same relation to c]
+        """
+        relation = cls.bind(a, b)
+        return cls.bind(c, relation)
+# ══════════════════════════════════════════════════════════════════════════════
+# Hybrid Binding Engine
+# ══════════════════════════════════════════════════════════════════════════════
+class BindingEngine:
+    """
+    Unified binding engine that supports both binary and real-valued operations.
+    The engine maintains a concept codebook (binary vectors for fast routing)
+    and can convert between binary and real domains for FFT operations.
+    """
+    def __init__(self, dim: int = N_BITS, use_binary: bool = True):
+        self.dim = dim
+        self.use_binary = use_binary
+        self.binary = BinaryBinding()
+        self.hrr = HRRBinding(dim)
+        # Concept codebook: name → binary vector
+        self._codebook: dict = {}
+    def register_concept(self, name: str, vector: Optional[np.ndarray] = None) -> np.ndarray:
+        """Register a named concept with a binary vector."""
+        if vector is None:
+            vector = random_binary_vector()
+        self._codebook[name] = vector.copy()
+        return vector
+    def get_concept(self, name: str) -> Optional[np.ndarray]:
+        """Get binary vector for a named concept."""
+        return self._codebook.get(name)
+    def bind(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
+        """Bind two vectors using the configured method."""
+        if self.use_binary and a.dtype == np.uint64:
+            return self.binary.bind(a, b)
+        return self.hrr.bind(a, b)
+    def unbind(self, c: np.ndarray, b: np.ndarray) -> np.ndarray:
+        """Unbind: recover A from C = bind(A, B) given B."""
+        if self.use_binary and c.dtype == np.uint64:
+            return self.binary.unbind(c, b)
+        return self.hrr.unbind(c, b)
+    def bundle(self, *vectors: np.ndarray) -> np.ndarray:
+        """Bundle multiple vectors."""
+        if self.use_binary and vectors[0].dtype == np.uint64:
+            return self.binary.bundle(*vectors)
+        return self.hrr.bundle(*vectors)
+    def similarity(self, a: np.ndarray, b: np.ndarray) -> float:
+        """Compute similarity between vectors."""
+        if self.use_binary and a.dtype == np.uint64:
+            return self.binary.similarity(a, b)
+        return self.hrr.similarity(a, b)
+    def encode_relation(self, subject: str, relation: str, obj: str) -> np.ndarray:
+        """Encode a semantic relation between named concepts.
+        Auto-registers unknown concepts.
+        """
+        for name in [subject, relation, obj]:
+            if name not in self._codebook:
+                self.register_concept(name)
+        s = self._codebook[subject]
+        r = self._codebook[relation]
+        o = self._codebook[obj]
+        if self.use_binary:
+            return self.binary.encode_triple(s, r, o)
+        return self.hrr.encode_triple(
+            self._to_real(s), self._to_real(r), self._to_real(o)
+        )
+    def solve_analogy(self, a: str, b: str, c: str,
+                      candidates: Optional[List[str]] = None) -> List[Tuple[str, float]]:
+        """Solve analogy a:b :: c:?
+        Returns ranked candidates with similarity scores.
+        """
+        va = self._codebook.get(a)
+        vb = self._codebook.get(b)
+        vc = self._codebook.get(c)
+        if va is None or vb is None or vc is None:
+            raise ValueError(f"Unknown concept(s): {a}, {b}, {c}")
+        if self.use_binary:
+            query = self.binary.create_analogy_query(va, vb, vc)
+        else:
+            query = self.hrr.unbind(
+                self.hrr.bind(self._to_real(vb), self._to_real(vc)),
+                self._to_real(va)
+            )
+        # Search candidates
+        search_names = candidates or list(self._codebook.keys())
+        results = []
+        for name in search_names:
+            vec = self._codebook[name]
+            if self.use_binary:
+                sim = self.binary.similarity(query, vec)
+            else:
+                sim = self.hrr.similarity(query, self._to_real(vec))
+            results.append((name, sim))
+        results.sort(key=lambda x: x[1], reverse=True)
+        return results
+    def _to_real(self, binary_vec: np.ndarray) -> np.ndarray:
+        """Convert packed binary vector to real-valued ±1 vector."""
+        bits = np.unpackbits(binary_vec.view(np.uint8)).astype(np.float32)
+        return (2.0 * bits - 1.0)  # {0,1} → {-1, +1}
+    def _to_binary(self, real_vec: np.ndarray) -> np.ndarray:
+        """Convert real-valued vector to packed binary (threshold at 0)."""
+        bits = (real_vec > 0).astype(np.uint8)
+        return np.packbits(bits).view(np.uint64).copy()

mle/demo.py ADDED Viewed

	@@ -0,0 +1,292 @@

+#!/usr/bin/env python3
+"""
+MLE — Morpho-Logic Engine — Interactive Demo
+==============================================
+Demonstrates the full reasoning pipeline:
+1. Knowledge base construction
+2. Query injection → routing → binding → energy minimization → response
+3. Analogy solving
+4. Concept composition
+5. Multi-hop reasoning
+"""
+import numpy as np
+import time
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+from mle import MorphoLogicEngine
+from mle.binding import BinaryBinding
+def banner(text):
+    w = 70
+    print(f"\n{'━'*w}")
+    print(f"  {text}")
+    print(f"{'━'*w}")
+def main():
+    print("""
+╔══════════════════════════════════════════════════════════════════════╗
+║                                                                      ║
+║     ███╗   ███╗ ██╗     ███████╗                                     ║
+║     ████╗ ████║ ██║     ██╔════╝                                     ║
+║     ██╔████╔██║ ██║     █████╗      Morpho-Logic Engine              ║
+║     ██║╚██╔╝██║ ██║     ██╔══╝      v0.1.0                          ║
+║     ██║ ╚═╝ ██║ ███████╗███████╗                                     ║
+║     ╚═╝     ╚═╝ ╚══════╝╚══════╝    Energy-Based Reasoning AI       ║
+║                                                                      ║
+║  4096-bit Hyperdimensional Vectors · SIMD-Optimized · CPU-Native     ║
+║  Sparse Distributed Memory · Circular Convolution Binding            ║
+║  Hopfield Energy Dynamics · Gradient-Free Reasoning                  ║
+║                                                                      ║
+╚══════════════════════════════════════════════════════════════════════╝
+""")
+    np.random.seed(42)
+    # ══════════════════════════════════════════════════════════════════════
+    # 1. Initialize Engine
+    # ══════════════════════════════════════════════════════════════════════
+    banner("1. INITIALIZING ENGINE")
+    engine = MorphoLogicEngine(
+        beam_width=500,
+        max_routing_depth=3,
+        max_reasoning_steps=5,
+        energy_mode='hybrid',
+        hopfield_beta=8.0,
+        relaxation_iterations=30,
+    )
+    print("  ✓ Engine initialized (beam=500, hybrid energy, Hopfield β=8.0)")
+    # ══════════════════════════════════════════════════════════════════════
+    # 2. Build Knowledge Base
+    # ══════════════════════════════════════════════════════════════════════
+    banner("2. BUILDING KNOWLEDGE BASE")
+    # Concepts organized by category
+    knowledge = {
+        'Animals': {
+            'concepts': ['cat', 'dog', 'fish', 'bird', 'horse', 'eagle', 'dolphin', 'snake'],
+            'relations': [
+                ('cat', 'is_a', 'animal'), ('dog', 'is_a', 'animal'),
+                ('fish', 'is_a', 'animal'), ('bird', 'is_a', 'animal'),
+                ('horse', 'is_a', 'animal'), ('eagle', 'is_a', 'bird'),
+                ('dolphin', 'is_a', 'animal'), ('snake', 'is_a', 'animal'),
+                ('cat', 'is_a', 'pet'), ('dog', 'is_a', 'pet'),
+                ('fish', 'lives_in', 'water'), ('dolphin', 'lives_in', 'water'),
+                ('bird', 'has', 'wing'), ('bird', 'can', 'fly'),
+                ('eagle', 'can', 'fly'), ('fish', 'can', 'swim'),
+                ('dolphin', 'can', 'swim'), ('horse', 'can', 'run'),
+                ('snake', 'can', 'crawl'),
+            ]
+        },
+        'Nature': {
+            'concepts': ['water', 'ocean', 'river', 'sky', 'forest', 'mountain',
+                        'tree', 'leaf', 'flower', 'rain'],
+            'relations': [
+                ('tree', 'has', 'leaf'), ('tree', 'in', 'forest'),
+                ('flower', 'has', 'color'), ('rain', 'from', 'sky'),
+                ('river', 'contains', 'water'), ('ocean', 'contains', 'water'),
+                ('mountain', 'has', 'peak'),
+            ]
+        },
+        'Royalty': {
+            'concepts': ['king', 'queen', 'prince', 'princess',
+                        'man', 'woman', 'child', 'crown', 'throne'],
+            'relations': [
+                ('king', 'is_a', 'man'), ('queen', 'is_a', 'woman'),
+                ('prince', 'is_a', 'man'), ('princess', 'is_a', 'woman'),
+                ('king', 'married_to', 'queen'),
+                ('king', 'has', 'crown'), ('king', 'sits_on', 'throne'),
+                ('prince', 'child_of', 'king'),
+                ('princess', 'child_of', 'queen'),
+            ]
+        },
+        'Vehicles': {
+            'concepts': ['car', 'boat', 'airplane', 'bicycle',
+                        'wheel', 'engine', 'road', 'wing'],
+            'relations': [
+                ('car', 'has', 'wheel'), ('car', 'has', 'engine'),
+                ('car', 'on', 'road'), ('boat', 'on', 'water'),
+                ('airplane', 'has', 'wing'), ('airplane', 'can', 'fly'),
+                ('bicycle', 'has', 'wheel'),
+            ]
+        },
+        'Abstract': {
+            'concepts': ['animal', 'pet', 'color', 'peak', 'speed', 'size'],
+            'relations': []
+        }
+    }
+    # Meta-relations
+    meta_relations = ['is_a', 'has', 'can', 'lives_in', 'in', 'on',
+                      'married_to', 'child_of', 'from', 'contains', 'sits_on']
+    total_concepts = 0
+    total_relations = 0
+    for category, data in knowledge.items():
+        for c in data['concepts']:
+            engine.add_concept(c)
+            total_concepts += 1
+        for s, r, o in data['relations']:
+            engine.add_relation(s, r, o)
+            total_relations += 1
+    for r in meta_relations:
+        if engine.binding.get_concept(r) is None:
+            engine.add_concept(r)
+            total_concepts += 1
+    stats = engine.stats()
+    print(f"  ✓ {total_concepts} concepts loaded")
+    print(f"  ✓ {total_relations} relations stored")
+    print(f"  ✓ Memory: {stats['memory']['size']} entries, "
+          f"{stats['memory']['memory_mb']:.2f} MB")
+    # ══════════════════════════════════════════════════════════════════════
+    # 3. Simple Concept Queries
+    # ══════════════════════════════════════════════════════════════════════
+    banner("3. CONCEPT QUERIES")
+    queries = ["cat", "ocean", "king", "airplane"]
+    for q in queries:
+        result = engine.reason(q, max_steps=3)
+        top3 = result['response']['nearest_concepts'][:3]
+        energies = [s.energy for s in result['reasoning_chain']
+                    if s.energy != float('inf')]
+        e_str = " → ".join(f"{e:.4f}" for e in energies) if energies else "N/A"
+        print(f"\n  Query: '{q}'")
+        print(f"  Nearest: {[(n, f'{s:.3f}') for n, s in top3]}")
+        print(f"  Energy:  {e_str}")
+        print(f"  Steps: {result['num_steps']}, Latency: {result['latency_ms']:.0f}ms")
+    # ══════════════════════════════════════════════════════════════════════
+    # 4. Association
+    # ══════════════════════════════════════════════════════════════════════
+    banner("4. ASSOCIATION QUERIES")
+    for concept in ["cat", "water", "king"]:
+        assocs = engine.associate(concept, top_k=5)
+        print(f"\n  '{concept}' → {[(n, f'{s:.3f}') for n, s in assocs[:5]]}")
+    # ══════════════════════════════════════════════════════════════════════
+    # 5. Analogy Solving
+    # ══════════════════════════════════════════════════════════════════════
+    banner("5. ANALOGY SOLVING")
+    analogies = [
+        ("king", "man", "queen"),       # king:man :: queen:?  → woman
+        ("bird", "fly", "fish"),         # bird:fly :: fish:?  → swim
+        ("car", "road", "boat"),         # car:road :: boat:?  → water
+    ]
+    for a, b, c in analogies:
+        result = engine.solve_analogy(a, b, c)
+        ranking = result['codebook_ranking'][:5]
+        print(f"\n  {a} : {b} :: {c} : ?")
+        print(f"  Top-5: {[(n, f'{s:.3f}') for n, s in ranking]}")
+        print(f"  Latency: {result['latency_ms']:.0f}ms")
+    # ══════════════════════════════════════════════════════════════════════
+    # 6. Concept Composition
+    # ══════════════════════════════════════════════════════════════════════
+    banner("6. CONCEPT COMPOSITION")
+    compositions = [
+        ("water", "animal"),     # → fish/dolphin
+        ("sky", "animal"),       # → bird/eagle
+        ("man", "crown"),        # → king
+    ]
+    for concepts in compositions:
+        result = engine.compose(*concepts)
+        top5 = result['response']['nearest_concepts'][:5]
+        print(f"\n  {' + '.join(concepts)} → ?")
+        print(f"  Top-5: {[(n, f'{s:.3f}') for n, s in top5]}")
+    # ══════════════════════════════════════════════════════════════════════
+    # 7. Structured Queries (Role-Filler)
+    # ══════════════════════════════════════════════════════════════════════
+    banner("7. STRUCTURED QUERIES")
+    struct_queries = [
+        ({"subject": "bird", "relation": "can"}, ["subject", "relation"]),
+        ({"subject": "king", "relation": "has"}, ["subject", "relation"]),
+    ]
+    for query_dict, roles in struct_queries:
+        result = engine.reason(query_dict, max_steps=2, roles=roles)
+        print(f"\n  Query: {query_dict}")
+        if result['response'].get('role_fillers'):
+            for role, fillers in result['response']['role_fillers'].items():
+                print(f"    {role} → {[(n, f'{s:.3f}') for n, s in fillers[:3]]}")
+    # ══════════════════════════════════════════════════════════════════════
+    # 8. Multi-Step Reasoning Chain
+    # ══════════════════════════════════════════════════════════════════════
+    banner("8. MULTI-STEP REASONING (Energy Trajectory)")
+    result = engine.reason("forest", max_steps=5)
+    chain = result['reasoning_chain']
+    print(f"\n  Query: 'forest'")
+    print(f"  Steps: {len(chain)}")
+    for i, step in enumerate(chain):
+        routing = step.routing_result
+        n_candidates = len(routing.indices) if routing else 0
+        print(f"    Step {i}: E={step.energy:.4f}, "
+              f"candidates={n_candidates}, "
+              f"t={step.timestamp:.3f}s")
+    print(f"\n  Total energy reduction: {result['total_energy_reduction']:.4f}")
+    print(f"  Total latency: {result['latency_ms']:.0f}ms")
+    print(f"\n  Final response:")
+    top5 = result['response']['nearest_concepts'][:5]
+    print(f"    Nearest: {[(n, f'{s:.3f}') for n, s in top5]}")
+    # ══════════════════════════════════════════════════════════════════════
+    # 9. Performance Summary
+    # ══════════════════════════════════════════════════════════════════════
+    banner("9. PERFORMANCE SUMMARY")
+    # Benchmark: 100 random queries
+    latencies = []
+    for _ in range(100):
+        concept = np.random.choice(list(engine.binding._codebook.keys()))
+        t0 = time.perf_counter()
+        engine.reason(concept, max_steps=2)
+        latencies.append((time.perf_counter() - t0) * 1000)
+    print(f"""
+  Query Performance (100 queries, 2 reasoning steps):
+    Average latency:  {np.mean(latencies):>8.1f} ms
+    Median latency:   {np.median(latencies):>8.1f} ms
+    P95 latency:      {np.percentile(latencies, 95):>8.1f} ms
+    P99 latency:      {np.percentile(latencies, 99):>8.1f} ms
+    Throughput:       {1000/np.mean(latencies):>8.1f} queries/sec
+  Memory Footprint:
+    Entries:          {stats['memory']['size']:>8d}
+    Memory:           {stats['memory']['memory_mb']:>8.2f} MB
+    Per entry:        {stats['memory']['memory_mb']*1024*1024/max(stats['memory']['size'],1):>8.0f} bytes
+  Architecture:
+    Vector dimension: {4096:>8d} bits
+    Storage per vec:  {512:>8d} bytes (64 × uint64)
+    Beam width:       {engine.router.beam_width:>8d}
+    LSH tables:       {engine.memory.lsh.n_tables:>8d}
+    LSH projections:  {engine.memory.lsh.n_projections:>8d}
+""")
+    print("━" * 70)
+    print("  MLE Demo Complete ✓")
+    print("━" * 70)
+if __name__ == '__main__':
+    main()

mle/energy/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .energy_model import EnergyFunction, RelaxationDynamics, HopfieldDynamics, EnergyModel

mle/energy/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (259 Bytes). View file

mle/energy/__pycache__/energy_model.cpython-312.pyc ADDED Viewed

Binary file (22.9 kB). View file

mle/energy/energy_model.py ADDED Viewed

	@@ -0,0 +1,494 @@

+"""
+MLE Energy Module: Energy-Based Reasoning Dynamics
+====================================================
+Replaces backpropagation with local energy functions and relaxation dynamics.
+The energy landscape guides reasoning:
+- Low energy states = coherent, consistent representations
+- High energy states = contradictions, ambiguity
+- Reasoning = trajectory from high to low energy (relaxation)
+Energy components:
+1. Compatibility energy: measures how well activated memories agree
+2. Binding coherence: measures structural consistency of relations
+3. Sparsity energy: encourages focused activation (not everything at once)
+4. Temporal smoothness: penalizes erratic state changes
+The system uses iterative relaxation (not gradient descent):
+- Each step flips/adjusts the state component that most reduces energy
+- Convergence is guaranteed for bounded energy functions
+- No gradients stored → O(1) memory per parameter
+"""
+import numpy as np
+from typing import Optional, Tuple, List, Dict, Any
+import logging
+from ..utils.simd_ops import (
+    N_BITS, N_WORDS,
+    hamming_distance, hamming_batch, hamming_similarity,
+    xor_vectors, popcount, random_binary_vector
+)
+logger = logging.getLogger(__name__)
+class EnergyFunction:
+    """
+    Composite energy function for the MLE system.
+    E(state) = α·E_compat + β·E_binding + γ·E_sparse + δ·E_smooth
+    where:
+    - E_compat: pairwise compatibility between activated vectors
+    - E_binding: coherence of bound structures
+    - E_sparse: activation sparsity (L1 penalty on activations)
+    - E_smooth: temporal smoothness (distance from previous state)
+    """
+    def __init__(
+        self,
+        alpha: float = 1.0,    # compatibility weight
+        beta: float = 0.5,     # binding coherence weight
+        gamma: float = 0.1,    # sparsity weight
+        delta: float = 0.05,   # smoothness weight
+        temperature: float = 1.0,
+    ):
+        self.alpha = alpha
+        self.beta = beta
+        self.gamma = gamma
+        self.delta = delta
+        self.temperature = temperature
+    def compatibility_energy(self, state_vector: np.ndarray,
+                              context_vectors: np.ndarray,
+                              weights: Optional[np.ndarray] = None) -> float:
+        """Measure how compatible the state is with context vectors.
+        Lower = more compatible.
+        E_compat = -Σ_i w_i · sim(state, context_i)
+        Uses normalized Hamming similarity.
+        """
+        if len(context_vectors) == 0:
+            return 0.0
+        dists = hamming_batch(state_vector, np.ascontiguousarray(context_vectors))
+        similarities = 1.0 - dists.astype(np.float64) / N_BITS
+        if weights is not None:
+            weighted_sim = (similarities * weights).sum()
+        else:
+            weighted_sim = similarities.mean()
+        return -weighted_sim  # negative because we minimize energy
+    def binding_coherence_energy(self, bindings: List[Tuple[np.ndarray, np.ndarray, np.ndarray]]) -> float:
+        """Measure coherence of bound structures.
+        Each binding is (bound_vec, role, expected_filler).
+        E_binding = Σ_i hamming(unbind(bound_i, role_i), filler_i) / N
+        Lower = bindings are more intact.
+        """
+        if not bindings:
+            return 0.0
+        total = 0.0
+        for bound_vec, role, expected_filler in bindings:
+            # Unbind: XOR with role
+            recovered = xor_vectors(bound_vec, role)
+            dist = hamming_distance(recovered, expected_filler)
+            total += dist / N_BITS
+        return total / len(bindings)
+    def sparsity_energy(self, activations: np.ndarray) -> float:
+        """L1 penalty on activations to encourage focused reasoning.
+        E_sparse = ||activations||_1
+        """
+        return float(np.abs(activations).sum())
+    def smoothness_energy(self, current_state: np.ndarray,
+                          previous_state: Optional[np.ndarray] = None) -> float:
+        """Penalize large state changes (temporal smoothness).
+        E_smooth = hamming(current, previous) / N
+        """
+        if previous_state is None:
+            return 0.0
+        return hamming_distance(current_state, previous_state) / N_BITS
+    def total_energy(self, state_vector: np.ndarray,
+                     context_vectors: np.ndarray,
+                     activations: np.ndarray,
+                     bindings: Optional[List] = None,
+                     previous_state: Optional[np.ndarray] = None,
+                     weights: Optional[np.ndarray] = None) -> Dict[str, float]:
+        """Compute total energy and its components.
+        Returns dict with individual terms and total.
+        """
+        e_compat = self.alpha * self.compatibility_energy(
+            state_vector, context_vectors, weights
+        )
+        e_binding = self.beta * self.binding_coherence_energy(bindings or [])
+        e_sparse = self.gamma * self.sparsity_energy(activations)
+        e_smooth = self.delta * self.smoothness_energy(state_vector, previous_state)
+        total = e_compat + e_binding + e_sparse + e_smooth
+        return {
+            'total': total,
+            'compatibility': e_compat,
+            'binding_coherence': e_binding,
+            'sparsity': e_sparse,
+            'smoothness': e_smooth,
+        }
+class RelaxationDynamics:
+    """
+    Iterative relaxation engine for energy minimization.
+    At each step:
+    1. Compute energy of current state
+    2. Generate candidate perturbations (bit flips, vector swaps)
+    3. Accept the perturbation that most reduces energy
+    4. Repeat until convergence or max iterations
+    This implements a greedy local search with optional stochasticity
+    (simulated annealing schedule).
+    """
+    def __init__(
+        self,
+        energy_fn: EnergyFunction,
+        max_iterations: int = 50,
+        n_candidates: int = 32,     # perturbations to try per step
+        flip_fraction: float = 0.05, # fraction of bits to flip per perturbation
+        cooling_rate: float = 0.95,
+        initial_temperature: float = 1.0,
+        convergence_threshold: float = 1e-4,
+    ):
+        self.energy_fn = energy_fn
+        self.max_iterations = max_iterations
+        self.n_candidates = n_candidates
+        self.flip_fraction = flip_fraction
+        self.cooling_rate = cooling_rate
+        self.initial_temp = initial_temperature
+        self.convergence_threshold = convergence_threshold
+    def _perturb(self, state: np.ndarray, n_flips: int) -> np.ndarray:
+        """Create a perturbation by flipping random bits."""
+        bits = np.unpackbits(state.view(np.uint8)).copy()
+        flip_positions = np.random.choice(N_BITS, n_flips, replace=False)
+        bits[flip_positions] ^= 1
+        return np.packbits(bits).view(np.uint64).copy()
+    def _perturb_toward(self, state: np.ndarray, target: np.ndarray,
+                         strength: float = 0.1) -> np.ndarray:
+        """Perturb state toward a target vector (biased flip)."""
+        state_bits = np.unpackbits(state.view(np.uint8))
+        target_bits = np.unpackbits(target.view(np.uint8))
+        # Find differing positions
+        diff = np.where(state_bits != target_bits)[0]
+        if len(diff) == 0:
+            return state.copy()
+        # Flip a fraction of differing bits toward target
+        n_flip = max(1, int(len(diff) * strength))
+        to_flip = np.random.choice(diff, min(n_flip, len(diff)), replace=False)
+        new_bits = state_bits.copy()
+        new_bits[to_flip] = target_bits[to_flip]
+        return np.packbits(new_bits).view(np.uint64).copy()
+    def relax(self, initial_state: np.ndarray,
+              context_vectors: np.ndarray,
+              activations: np.ndarray,
+              bindings: Optional[List] = None,
+              attractors: Optional[np.ndarray] = None,
+              weights: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """
+        Run relaxation dynamics to minimize energy.
+        Args:
+            initial_state: starting state vector (N_WORDS,) uint64
+            context_vectors: activated memory contents (M, N_WORDS) uint64
+            activations: activation strengths (M,) float64
+            bindings: list of (bound, role, filler) tuples to check coherence
+            attractors: target vectors to bias perturbations toward
+            weights: per-context weights for compatibility
+        Returns:
+            Dict with final_state, energy_trajectory, iterations, etc.
+        """
+        state = initial_state.copy()
+        n_flips = max(1, int(N_BITS * self.flip_fraction))
+        temperature = self.initial_temp
+        # Compute initial energy
+        energy_info = self.energy_fn.total_energy(
+            state, context_vectors, activations, bindings, None, weights
+        )
+        current_energy = energy_info['total']
+        trajectory = [energy_info]
+        best_state = state.copy()
+        best_energy = current_energy
+        prev_state = None
+        for iteration in range(self.max_iterations):
+            # Generate candidate perturbations
+            candidates = []
+            for _ in range(self.n_candidates):
+                if attractors is not None and len(attractors) > 0 and np.random.random() < 0.5:
+                    # Biased perturbation toward a random attractor
+                    target_idx = np.random.randint(len(attractors))
+                    candidate = self._perturb_toward(
+                        state, attractors[target_idx],
+                        strength=self.flip_fraction
+                    )
+                else:
+                    candidate = self._perturb(state, n_flips)
+                candidates.append(candidate)
+            # Evaluate all candidates
+            best_candidate = None
+            best_candidate_energy = current_energy
+            for candidate in candidates:
+                e = self.energy_fn.total_energy(
+                    candidate, context_vectors, activations,
+                    bindings, prev_state, weights
+                )
+                if e['total'] < best_candidate_energy:
+                    best_candidate = candidate
+                    best_candidate_energy = e['total']
+                    best_candidate_info = e
+            # Accept or reject (simulated annealing)
+            if best_candidate is not None:
+                delta_e = best_candidate_energy - current_energy
+                if delta_e < 0 or np.random.random() < np.exp(-delta_e / max(temperature, 1e-8)):
+                    prev_state = state.copy()
+                    state = best_candidate
+                    current_energy = best_candidate_energy
+                    trajectory.append(best_candidate_info)
+                    if current_energy < best_energy:
+                        best_state = state.copy()
+                        best_energy = current_energy
+            # Cool down
+            temperature *= self.cooling_rate
+            # Check convergence
+            if len(trajectory) >= 2:
+                improvement = abs(trajectory[-2]['total'] - trajectory[-1]['total'])
+                if improvement < self.convergence_threshold:
+                    logger.debug(f"Relaxation converged at iteration {iteration}")
+                    break
+        return {
+            'final_state': best_state,
+            'final_energy': best_energy,
+            'trajectory': trajectory,
+            'iterations': iteration + 1,
+            'converged': iteration < self.max_iterations - 1,
+            'energy_reduction': trajectory[0]['total'] - best_energy,
+        }
+class HopfieldDynamics:
+    """
+    Modern Hopfield Network dynamics for continuous-state energy minimization.
+    Uses the update rule from Ramsauer et al. 2020:
+        ξ_new = X @ softmax(β · X^T @ ξ)
+    where X contains stored patterns as columns, ξ is the current state,
+    and β is inverse temperature.
+    Energy: E(ξ) = -lse(β, X^T @ ξ) + 0.5·||ξ||²
+    Adapted for binary vectors by converting to ±1 real-valued representation.
+    """
+    def __init__(
+        self,
+        beta: float = 8.0,       # inverse temperature (higher = sharper attention)
+        max_iterations: int = 20,
+        convergence_eps: float = 1e-6,
+    ):
+        self.beta = beta
+        self.max_iterations = max_iterations
+        self.convergence_eps = convergence_eps
+    def _to_bipolar(self, packed: np.ndarray) -> np.ndarray:
+        """Convert packed binary to ±1 float."""
+        bits = np.unpackbits(packed.view(np.uint8)).astype(np.float64)
+        return 2.0 * bits - 1.0
+    def _to_packed(self, bipolar: np.ndarray) -> np.ndarray:
+        """Convert ±1 float to packed binary."""
+        bits = (bipolar > 0).astype(np.uint8)
+        return np.packbits(bits).view(np.uint64).copy()
+    def energy(self, xi: np.ndarray, X: np.ndarray) -> float:
+        """Compute Hopfield energy.
+        xi: (D,) current state
+        X:  (D, N) stored patterns as columns
+        E = -logsumexp(β · X^T @ ξ) / β + 0.5·||ξ||²
+        """
+        scores = self.beta * (X.T @ xi)
+        max_score = scores.max()
+        lse = max_score + np.log(np.sum(np.exp(scores - max_score)))
+        return -lse / self.beta + 0.5 * np.dot(xi, xi)
+    def update(self, xi: np.ndarray, X: np.ndarray) -> np.ndarray:
+        """Single Hopfield update step.
+        ξ_new = X @ softmax(β · X^T @ ξ)
+        """
+        scores = self.beta * (X.T @ xi)
+        # Numerically stable softmax
+        scores -= scores.max()
+        exp_scores = np.exp(scores)
+        attention = exp_scores / (exp_scores.sum() + 1e-12)
+        return X @ attention
+    def relax(self, query_packed: np.ndarray,
+              patterns_packed: np.ndarray,
+              activations: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """
+        Run Hopfield relaxation from query toward stored patterns.
+        Args:
+            query_packed: (N_WORDS,) uint64 initial state
+            patterns_packed: (M, N_WORDS) uint64 stored patterns
+            activations: (M,) optional weights for patterns
+        Returns:
+            Dict with final state, energy trajectory, attention weights, etc.
+        """
+        # Convert to bipolar
+        xi = self._to_bipolar(query_packed)
+        X = np.stack([self._to_bipolar(p) for p in patterns_packed]).T  # (D, M)
+        if activations is not None:
+            # Weight patterns by activation
+            X = X * activations[np.newaxis, :]
+        trajectory = []
+        attention_history = []
+        for iteration in range(self.max_iterations):
+            e = self.energy(xi, X)
+            trajectory.append(e)
+            # Update
+            xi_new = self.update(xi, X)
+            # Track attention
+            scores = self.beta * (X.T @ xi)
+            scores -= scores.max()
+            attention = np.exp(scores) / (np.exp(scores).sum() + 1e-12)
+            attention_history.append(attention.copy())
+            # Check convergence
+            diff = np.linalg.norm(xi_new - xi)
+            xi = xi_new
+            if diff < self.convergence_eps:
+                break
+        final_packed = self._to_packed(xi)
+        final_energy = self.energy(xi, X)
+        trajectory.append(final_energy)
+        return {
+            'final_state': final_packed,
+            'final_state_real': xi,
+            'final_energy': final_energy,
+            'energy_trajectory': trajectory,
+            'attention_weights': attention_history[-1] if attention_history else None,
+            'iterations': iteration + 1,
+            'converged': iteration < self.max_iterations - 1,
+        }
+class EnergyModel:
+    """
+    Unified energy model combining local energy function with relaxation dynamics.
+    Supports two modes:
+    1. Binary relaxation: direct bit-flip search (greedy + SA)
+    2. Hopfield relaxation: continuous-state update → discretize
+    The energy model drives the reasoning process:
+    - Query activates memories (high energy, incoherent state)
+    - Relaxation reduces energy (resolves conflicts, strengthens coherence)
+    - Final state represents the "answer" (lowest energy configuration)
+    """
+    def __init__(
+        self,
+        mode: str = 'hybrid',  # 'binary', 'hopfield', or 'hybrid'
+        energy_fn: Optional[EnergyFunction] = None,
+        binary_dynamics: Optional[RelaxationDynamics] = None,
+        hopfield_dynamics: Optional[HopfieldDynamics] = None,
+    ):
+        self.mode = mode
+        self.energy_fn = energy_fn or EnergyFunction()
+        self.binary_dynamics = binary_dynamics or RelaxationDynamics(self.energy_fn)
+        self.hopfield_dynamics = hopfield_dynamics or HopfieldDynamics()
+    def minimize(self, initial_state: np.ndarray,
+                 context_vectors: np.ndarray,
+                 activations: np.ndarray,
+                 bindings: Optional[List] = None,
+                 **kwargs) -> Dict[str, Any]:
+        """
+        Minimize energy from initial state.
+        In hybrid mode:
+        1. First run Hopfield (fast convergence, continuous)
+        2. Then refine with binary relaxation (exact, discrete)
+        """
+        if self.mode == 'hopfield':
+            return self.hopfield_dynamics.relax(
+                initial_state, context_vectors, activations
+            )
+        elif self.mode == 'binary':
+            return self.binary_dynamics.relax(
+                initial_state, context_vectors, activations, bindings, **kwargs
+            )
+        else:  # hybrid
+            # Phase 1: Hopfield for fast coarse convergence
+            hopfield_result = self.hopfield_dynamics.relax(
+                initial_state, context_vectors, activations
+            )
+            # Phase 2: Binary refinement
+            binary_result = self.binary_dynamics.relax(
+                hopfield_result['final_state'],
+                context_vectors, activations, bindings,
+                attractors=context_vectors[:min(10, len(context_vectors))],
+                **kwargs
+            )
+            # Combine results
+            return {
+                'final_state': binary_result['final_state'],
+                'final_energy': binary_result['final_energy'],
+                'hopfield_energy_trajectory': hopfield_result['energy_trajectory'],
+                'binary_energy_trajectory': [t['total'] for t in binary_result['trajectory']],
+                'hopfield_attention': hopfield_result.get('attention_weights'),
+                'total_iterations': (
+                    hopfield_result['iterations'] + binary_result['iterations']
+                ),
+                'converged': binary_result['converged'],
+                'energy_reduction': (
+                    hopfield_result['energy_trajectory'][0] -
+                    binary_result['final_energy']
+                ) if hopfield_result['energy_trajectory'] else 0.0,
+            }

mle/inference/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .reasoning_engine import ReasoningEngine, QueryEncoder, ResponseDecoder

mle/inference/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (238 Bytes). View file

mle/inference/__pycache__/reasoning_engine.cpython-312.pyc ADDED Viewed

Binary file (25.5 kB). View file

mle/inference/reasoning_engine.py ADDED Viewed

	@@ -0,0 +1,555 @@

+"""
+MLE Inference Module: Reasoning Engine
+========================================
+The complete reasoning pipeline that integrates all modules:
+    Query → Encoding → Routing → Binding → Energy Minimization → Response
+The "chain of thought" is an internal trajectory through state space:
+1. ENCODE: Transform input query into a binary vector
+2. ROUTE: Activate relevant memory regions (top-500 beam)
+3. BIND: Construct composite representations from activated memories
+4. RELAX: Minimize energy to find coherent state
+5. DECODE: Extract response from final state
+Multiple reasoning iterations are possible (iterative deepening):
+each iteration uses the previous result as context for the next.
+"""
+import numpy as np
+from typing import Optional, List, Dict, Any, Tuple
+import logging
+import time
+import hashlib
+from ..memory.sparse_address_table import SparseAddressTable
+from ..routing.recursive_jit_router import RecursiveJITRouter, RoutingResult
+from ..binding.semantic_binding import BindingEngine, BinaryBinding
+from ..energy.energy_model import EnergyModel, EnergyFunction, RelaxationDynamics, HopfieldDynamics
+from ..utils.simd_ops import (
+    N_BITS, N_WORDS,
+    random_binary_vector, random_binary_vectors,
+    hamming_distance, hamming_similarity,
+    xor_vectors, majority_vote
+)
+logger = logging.getLogger(__name__)
+class QueryEncoder:
+    """
+    Encode text/symbolic queries into binary vectors.
+    Encoding strategies:
+    1. Named concept lookup (exact match in codebook)
+    2. Hash-based encoding (deterministic binary vector from string)
+    3. Composite encoding (bind multiple concepts together)
+    """
+    def __init__(self, binding_engine: BindingEngine):
+        self.binding = binding_engine
+    def encode(self, query: Any) -> np.ndarray:
+        """Encode a query into a binary vector.
+        Supports:
+        - str: hash-based or codebook lookup
+        - np.ndarray: pass-through (already a vector)
+        - list of str: composite binding of concepts
+        - dict: role-filler structure encoding
+        """
+        if isinstance(query, np.ndarray):
+            return query.copy()
+        elif isinstance(query, str):
+            return self._encode_string(query)
+        elif isinstance(query, (list, tuple)):
+            return self._encode_composite(query)
+        elif isinstance(query, dict):
+            return self._encode_structure(query)
+        else:
+            raise TypeError(f"Cannot encode query of type {type(query)}")
+    def _encode_string(self, text: str) -> np.ndarray:
+        """Encode a string to binary vector.
+        First tries codebook lookup, then falls back to hash.
+        """
+        # Try codebook lookup
+        vec = self.binding.get_concept(text)
+        if vec is not None:
+            return vec.copy()
+        # Hash-based encoding: deterministic binary vector from string
+        return self._hash_to_vector(text)
+    def _hash_to_vector(self, text: str) -> np.ndarray:
+        """Deterministic hash-based encoding.
+        Uses SHA-512 repeatedly to fill 4096 bits.
+        """
+        n_hashes = N_BITS // 512 + 1  # SHA-512 = 512 bits
+        bits = []
+        for i in range(n_hashes):
+            h = hashlib.sha512(f"{text}:{i}".encode()).digest()
+            bits.append(np.frombuffer(h, dtype=np.uint8))
+        all_bytes = np.concatenate(bits)[:N_BITS // 8]
+        return all_bytes.view(np.uint64).copy()
+    def _encode_composite(self, concepts: List[str]) -> np.ndarray:
+        """Encode a composite query by bundling encoded concepts."""
+        vectors = [self._encode_string(c) for c in concepts]
+        if len(vectors) == 1:
+            return vectors[0]
+        return BinaryBinding.bundle(*vectors)
+    def _encode_structure(self, structure: Dict[str, str]) -> np.ndarray:
+        """Encode a structured query (role:filler pairs).
+        Example: {"subject": "cat", "relation": "is_a"} →
+                 bind(encode("subject"), encode("cat")) ⊕ bind(encode("relation"), encode("is_a"))
+        """
+        pairs = []
+        for role, filler in structure.items():
+            role_vec = self._encode_string(role)
+            filler_vec = self._encode_string(filler)
+            pairs.append(BinaryBinding.bind(role_vec, filler_vec))
+        return BinaryBinding.bundle(*pairs) if len(pairs) > 1 else pairs[0]
+class ResponseDecoder:
+    """
+    Decode final state vector into interpretable response.
+    Strategies:
+    1. Nearest neighbor: find closest named concept(s) in codebook
+    2. Unbinding: recover specific fillers for given roles
+    3. Activation readout: return most-activated memory entries
+    """
+    def __init__(self, memory: SparseAddressTable, binding_engine: BindingEngine):
+        self.memory = memory
+        self.binding = binding_engine
+    def decode(self, state: np.ndarray, top_k: int = 5,
+               roles: Optional[List[str]] = None) -> Dict[str, Any]:
+        """Decode state vector into response.
+        Returns dict with:
+        - nearest_concepts: closest named concepts with similarities
+        - nearest_memories: closest memory entries
+        - role_fillers: decoded role-filler pairs (if roles provided)
+        - confidence: overall decoding confidence
+        """
+        result = {}
+        # 1. Nearest concepts in codebook
+        concept_scores = []
+        for name, vec in self.binding._codebook.items():
+            sim = hamming_similarity(state, vec)
+            concept_scores.append((name, float(sim)))
+        concept_scores.sort(key=lambda x: x[1], reverse=True)
+        result['nearest_concepts'] = concept_scores[:top_k]
+        # 2. Nearest memory entries
+        memory_results = self.memory.query_nearest(state, k=top_k, use_lsh=True)
+        nearest_memories = []
+        for idx, dist in memory_results:
+            meta = self.memory._metadata[idx]
+            name = meta.get('name', f'entry_{idx}') if meta else f'entry_{idx}'
+            sim = 1.0 - dist / N_BITS
+            nearest_memories.append({
+                'index': idx,
+                'name': name,
+                'distance': dist,
+                'similarity': sim,
+                'metadata': meta,
+            })
+        result['nearest_memories'] = nearest_memories
+        # 3. Role-filler decoding
+        if roles:
+            role_fillers = {}
+            for role_name in roles:
+                role_vec = self.binding.get_concept(role_name)
+                if role_vec is None:
+                    role_vec = QueryEncoder(self.binding)._hash_to_vector(role_name)
+                filler = BinaryBinding.unbind(state, role_vec)
+                # Find nearest concept to recovered filler
+                filler_scores = []
+                for name, vec in self.binding._codebook.items():
+                    sim = hamming_similarity(filler, vec)
+                    filler_scores.append((name, float(sim)))
+                filler_scores.sort(key=lambda x: x[1], reverse=True)
+                role_fillers[role_name] = filler_scores[:3]
+            result['role_fillers'] = role_fillers
+        # 4. Confidence (based on how close the best match is)
+        if concept_scores:
+            result['confidence'] = concept_scores[0][1]
+        else:
+            result['confidence'] = 0.0
+        return result
+class ReasoningStep:
+    """Record of a single reasoning step in the inference chain."""
+    __slots__ = ['step_num', 'state', 'energy', 'energy_components',
+                 'routing_result', 'decoded', 'timestamp']
+    def __init__(self, step_num: int):
+        self.step_num = step_num
+        self.state: Optional[np.ndarray] = None
+        self.energy: float = float('inf')
+        self.energy_components: Dict[str, float] = {}
+        self.routing_result: Optional[RoutingResult] = None
+        self.decoded: Optional[Dict] = None
+        self.timestamp: float = 0.0
+class ReasoningEngine:
+    """
+    The MLE Reasoning Engine — core inference loop.
+    Architecture:
+    ┌──────────┐     ┌──────────┐     ┌──────────┐     ┌──────────┐
+    │  Query   │────▶│  Router  │────▶│  Binder  │────▶│  Energy  │
+    │ Encoder  │     │ (Beam)   │     │ (Compose)│     │ (Relax)  │
+    └──────────┘     └──────────┘     └──────────┘     └──────────┘
+         │                                                    │
+         │           ┌──────────┐     ┌──────────┐           │
+         └───────────│ Response │◀────│  Decode  │◀──────────┘
+                     │          │     │          │
+                     └──────────┘     └──────────┘
+    Reasoning loop:
+    1. Encode query → binary vector
+    2. Route → find relevant memories (beam search)
+    3. Activate memories, construct bindings
+    4. Minimize energy (Hopfield + binary relaxation)
+    5. Decode final state → response
+    6. [Optional] Use result as new query → iterate
+    """
+    def __init__(
+        self,
+        memory: Optional[SparseAddressTable] = None,
+        beam_width: int = 500,
+        max_routing_depth: int = 3,
+        max_reasoning_steps: int = 5,
+        energy_mode: str = 'hybrid',
+        hopfield_beta: float = 8.0,
+        relaxation_iterations: int = 50,
+    ):
+        # Memory
+        self.memory = memory or SparseAddressTable()
+        # Binding engine
+        self.binding = BindingEngine(use_binary=True)
+        # Router
+        self.router = RecursiveJITRouter(
+            memory=self.memory,
+            beam_width=beam_width,
+            max_depth=max_routing_depth,
+        )
+        # Energy model
+        energy_fn = EnergyFunction()
+        self.energy_model = EnergyModel(
+            mode=energy_mode,
+            energy_fn=energy_fn,
+            binary_dynamics=RelaxationDynamics(
+                energy_fn,
+                max_iterations=relaxation_iterations,
+                n_candidates=32,
+            ),
+            hopfield_dynamics=HopfieldDynamics(
+                beta=hopfield_beta,
+                max_iterations=20,
+            ),
+        )
+        # Encoder/Decoder
+        self.encoder = QueryEncoder(self.binding)
+        self.decoder = ResponseDecoder(self.memory, self.binding)
+        # Config
+        self.max_reasoning_steps = max_reasoning_steps
+    # ── Knowledge Management ──────────────────────────────────────────────
+    def add_concept(self, name: str, metadata: Optional[Dict] = None) -> np.ndarray:
+        """Add a named concept to both memory and binding codebook."""
+        vec = self.binding.register_concept(name)
+        self.memory.store_concept(name, content=vec, metadata=metadata)
+        return vec
+    def add_relation(self, subject: str, relation: str, obj: str,
+                     metadata: Optional[Dict] = None) -> int:
+        """Store a semantic relation as a bound vector in memory."""
+        # Ensure all concepts exist
+        for name in [subject, relation, obj]:
+            if self.binding.get_concept(name) is None:
+                self.add_concept(name)
+        # Create bound representation
+        s = self.binding.get_concept(subject)
+        r = self.binding.get_concept(relation)
+        o = self.binding.get_concept(obj)
+        bound = BinaryBinding.encode_triple(s, r, o)
+        # Store in memory
+        meta = metadata or {}
+        meta.update({
+            'type': 'relation',
+            'subject': subject,
+            'relation': relation,
+            'object': obj,
+            'name': f"{subject}_{relation}_{obj}",
+        })
+        # Address = blend of subject and relation (for routing)
+        address = BinaryBinding.bundle(s, r, o)
+        return self.memory.store(address, bound, metadata=meta,
+                                  name=f"{subject}_{relation}_{obj}")
+    # ── Core Reasoning ────────────────────────────────────────────────────
+    def reason(self, query: Any, max_steps: Optional[int] = None,
+               roles: Optional[List[str]] = None,
+               verbose: bool = False) -> Dict[str, Any]:
+        """
+        Execute full reasoning pipeline.
+        Args:
+            query: input query (string, vector, list of strings, or dict)
+            max_steps: override max reasoning iterations
+            roles: roles to decode in response
+            verbose: print step-by-step info
+        Returns:
+            Dict with:
+            - response: decoded response dict
+            - reasoning_chain: list of ReasoningStep objects
+            - total_energy_reduction: cumulative energy decrease
+            - latency_ms: total time
+        """
+        t0 = time.perf_counter()
+        steps = max_steps or self.max_reasoning_steps
+        chain: List[ReasoningStep] = []
+        # ── Step 0: Encode query ──────────────────────────────────────
+        query_vec = self.encoder.encode(query)
+        current_state = query_vec.copy()
+        if verbose:
+            logger.info(f"Query encoded. Starting reasoning with max {steps} steps.")
+        for step_num in range(steps):
+            step = ReasoningStep(step_num)
+            step.timestamp = time.perf_counter() - t0
+            # ── Step 1: Route — find relevant memories ────────────────
+            routing = self.router.route_and_activate(current_state)
+            step.routing_result = routing
+            if verbose:
+                logger.info(
+                    f"Step {step_num}: Routed to {len(routing.indices)} candidates "
+                    f"(depth={routing.depth}, latency={routing.latency_ms:.1f}ms)"
+                )
+            if len(routing.indices) == 0:
+                step.state = current_state
+                chain.append(step)
+                break
+            # ── Step 2: Gather context from activated memories ────────
+            active_indices = routing.indices[:min(100, len(routing.indices))]
+            context_vectors = np.ascontiguousarray(
+                self.memory._contents[active_indices]
+            )
+            act_strengths = routing.activations[:len(active_indices)]
+            # ── Step 3: Construct bindings for coherence checking ─────
+            bindings = self._extract_bindings(active_indices)
+            # ── Step 4: Energy minimization ───────────────────────────
+            energy_result = self.energy_model.minimize(
+                current_state,
+                context_vectors,
+                act_strengths,
+                bindings=bindings,
+            )
+            new_state = energy_result['final_state']
+            step.state = new_state
+            step.energy = energy_result['final_energy']
+            if verbose:
+                logger.info(
+                    f"Step {step_num}: Energy {step.energy:.4f} "
+                    f"(iterations={energy_result.get('total_iterations', '?')})"
+                )
+            chain.append(step)
+            # ── Check convergence ─────────────────────────────────────
+            if step_num > 0:
+                state_change = hamming_distance(current_state, new_state) / N_BITS
+                energy_change = abs(chain[-2].energy - step.energy)
+                if state_change < 0.01 and energy_change < 1e-4:
+                    if verbose:
+                        logger.info(f"Reasoning converged at step {step_num}")
+                    break
+            current_state = new_state
+        # ── Decode final state ────────────────────────────────────────
+        final_state = chain[-1].state if chain else query_vec
+        response = self.decoder.decode(final_state, top_k=10, roles=roles)
+        # Add decoding to last step
+        if chain:
+            chain[-1].decoded = response
+        total_time = (time.perf_counter() - t0) * 1000
+        # Energy trajectory
+        energies = [s.energy for s in chain if s.energy != float('inf')]
+        energy_reduction = (energies[0] - energies[-1]) if len(energies) >= 2 else 0.0
+        return {
+            'response': response,
+            'reasoning_chain': chain,
+            'total_energy_reduction': energy_reduction,
+            'latency_ms': total_time,
+            'num_steps': len(chain),
+            'final_state': final_state,
+        }
+    def _extract_bindings(self, indices: np.ndarray) -> List[Tuple]:
+        """Extract binding triples from activated memory entries for coherence checking."""
+        bindings = []
+        for idx in indices:
+            meta = self.memory._metadata[idx]
+            if meta and meta.get('type') == 'relation':
+                subj = meta.get('subject')
+                rel = meta.get('relation')
+                obj = meta.get('object')
+                if subj and rel and obj:
+                    s_vec = self.binding.get_concept(subj)
+                    r_vec = self.binding.get_concept(rel)
+                    o_vec = self.binding.get_concept(obj)
+                    if s_vec is not None and r_vec is not None and o_vec is not None:
+                        bound = self.memory._contents[idx]
+                        # Check: unbind(bound, bind(s,r)) should ≈ o
+                        role_composed = BinaryBinding.bind(s_vec, r_vec)
+                        bindings.append((bound, role_composed, o_vec))
+        return bindings[:20]  # Limit for performance
+    # ── Analogy Solving ───────────────────────────────────────────────
+    def solve_analogy(self, a: str, b: str, c: str,
+                      candidates: Optional[List[str]] = None) -> Dict[str, Any]:
+        """
+        Solve analogy: a is to b as c is to ?
+        Uses binding-based approach:
+        1. Compute relation R = bind(a, b)  [XOR extracts difference]
+        2. Apply: query = bind(c, R)
+        3. Route query through memory
+        4. Decode nearest match
+        """
+        t0 = time.perf_counter()
+        # Get or create concept vectors
+        for name in [a, b, c]:
+            if self.binding.get_concept(name) is None:
+                self.add_concept(name)
+        va = self.binding.get_concept(a)
+        vb = self.binding.get_concept(b)
+        vc = self.binding.get_concept(c)
+        # Compute analogy query
+        relation = BinaryBinding.bind(va, vb)
+        query = BinaryBinding.bind(vc, relation)
+        # Route and find matches
+        result = self.reason(query, max_steps=2)
+        # Also do direct codebook comparison
+        codebook_results = self.binding.solve_analogy(a, b, c, candidates)
+        return {
+            'query': f"{a} : {b} :: {c} : ?",
+            'codebook_ranking': codebook_results[:10],
+            'memory_response': result['response'],
+            'latency_ms': (time.perf_counter() - t0) * 1000,
+        }
+    # ── Association ───────────────────────────────────────────────────
+    def associate(self, concept: str, top_k: int = 5) -> List[Tuple[str, float]]:
+        """Find concepts most associated with the given concept."""
+        vec = self.binding.get_concept(concept)
+        if vec is None:
+            vec = self.encoder.encode(concept)
+        # Route through memory
+        routing = self.router.route(vec, beam_width=100)
+        if len(routing.indices) == 0:
+            return []
+        # Collect associated concepts
+        associations = []
+        seen = set()
+        for idx in routing.indices:
+            meta = self.memory._metadata[idx]
+            if meta:
+                name = meta.get('name', '')
+                if name and name != concept and name not in seen:
+                    sim = 1.0 - routing.distances[len(associations)] / N_BITS \
+                        if len(associations) < len(routing.distances) else 0.0
+                    associations.append((name, float(sim)))
+                    seen.add(name)
+            if len(associations) >= top_k:
+                break
+        return associations
+    # ── Composition ───────────────────────────────────────────────────
+    def compose(self, *concepts: str) -> Dict[str, Any]:
+        """Compose multiple concepts and find what the composition represents."""
+        vectors = []
+        for c in concepts:
+            vec = self.binding.get_concept(c)
+            if vec is None:
+                self.add_concept(c)
+                vec = self.binding.get_concept(c)
+            vectors.append(vec)
+        # Bundle all concepts
+        if len(vectors) == 1:
+            composed = vectors[0]
+        else:
+            composed = BinaryBinding.bundle(*vectors)
+        # Decode what this composition means
+        result = self.reason(composed, max_steps=2)
+        return {
+            'input_concepts': concepts,
+            'response': result['response'],
+            'latency_ms': result['latency_ms'],
+        }
+    # ── Stats ─────────────────────────────────────────────────────────
+    def stats(self) -> Dict[str, Any]:
+        """Return engine statistics."""
+        return {
+            'memory': self.memory.stats(),
+            'codebook_size': len(self.binding._codebook),
+            'beam_width': self.router.beam_width,
+            'energy_mode': self.energy_model.mode,
+        }

mle/memory/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .sparse_address_table import SparseAddressTable, HammingLSH, MemoryEntry

mle/memory/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (236 Bytes). View file

mle/memory/__pycache__/sparse_address_table.cpython-312.pyc ADDED Viewed

Binary file (22 kB). View file

mle/memory/sparse_address_table.py ADDED Viewed

	@@ -0,0 +1,378 @@

+"""
+MLE Memory Module: Sparse Address Table
+========================================
+Distributed memory indexed by 4096-bit binary vectors.
+Semantic proximity is encoded via Hamming distance.
+Features:
+- Bit-packed storage (512 bytes/vector) with cache-aligned layout
+- LSH index for sub-linear approximate nearest neighbor search
+- Multi-resolution indexing (coarse + fine search)
+- Metadata/payload attachment per entry
+"""
+import numpy as np
+from collections import defaultdict
+from typing import List, Tuple, Optional, Dict, Any
+import logging
+from ..utils.simd_ops import (
+    N_BITS, N_WORDS, N_BYTES,
+    random_binary_vector, random_binary_vectors,
+    hamming_distance, hamming_batch, hamming_topk,
+    xor_vectors, popcount, majority_vote, hamming_similarity
+)
+logger = logging.getLogger(__name__)
+class HammingLSH:
+    """Locality-Sensitive Hashing for Hamming space.
+    Uses random bit sampling as the LSH family:
+        h_i(v) = v[bit_index_i]
+        P(h(a) == h(b)) = 1 - hamming(a,b)/n
+    Multiple hash tables with K-bit signatures for amplification.
+    """
+    def __init__(
+        self,
+        n_bits: int = N_BITS,
+        n_tables: int = 32,
+        n_projections: int = 8,
+        seed: int = 42
+    ):
+        self.n_bits = n_bits
+        self.n_tables = n_tables
+        self.n_projections = n_projections
+        rng = np.random.RandomState(seed)
+        # Random bit indices for each table: which bits to sample
+        self.bit_indices = [
+            rng.choice(n_bits, n_projections, replace=False)
+            for _ in range(n_tables)
+        ]
+        # Hash tables: table_idx -> {hash_key -> list of vector indices}
+        self.tables: List[Dict[bytes, List[int]]] = [
+            defaultdict(list) for _ in range(n_tables)
+        ]
+        self.n_indexed = 0
+    def _compute_hash(self, bits_unpacked: np.ndarray, table_idx: int) -> bytes:
+        """Extract hash signature from unpacked bit array."""
+        sig = bits_unpacked[self.bit_indices[table_idx]]
+        return np.packbits(sig).tobytes()
+    def _unpack_vector(self, packed: np.ndarray) -> np.ndarray:
+        """Unpack uint64 vector to bit array."""
+        return np.unpackbits(packed.view(np.uint8))
+    def add(self, packed_vector: np.ndarray, idx: int):
+        """Add a single vector to all hash tables."""
+        bits = self._unpack_vector(packed_vector)
+        for t in range(self.n_tables):
+            h = self._compute_hash(bits, t)
+            self.tables[t][h].append(idx)
+        self.n_indexed += 1
+    def add_batch(self, packed_vectors: np.ndarray, start_idx: int = 0):
+        """Add multiple vectors to all hash tables."""
+        for i in range(len(packed_vectors)):
+            self.add(packed_vectors[i], start_idx + i)
+    def query_candidates(self, packed_query: np.ndarray, max_candidates: int = 2000) -> np.ndarray:
+        """Find candidate indices via LSH (before exact reranking).
+        Returns deduplicated candidate indices.
+        """
+        bits = self._unpack_vector(packed_query)
+        candidates = set()
+        for t in range(self.n_tables):
+            h = self._compute_hash(bits, t)
+            bucket = self.tables[t].get(h, [])
+            candidates.update(bucket)
+            if len(candidates) >= max_candidates:
+                break
+        return np.array(list(candidates)[:max_candidates], dtype=np.int64)
+    def query_multi_probe(self, packed_query: np.ndarray, n_probes: int = 3,
+                          max_candidates: int = 2000) -> np.ndarray:
+        """Multi-probe LSH: also check neighboring buckets by flipping bits.
+        Increases recall at cost of more bucket lookups.
+        For short signatures (n_projections <= 12), we can flip multiple
+        bits combinatorially.
+        """
+        bits = self._unpack_vector(packed_query)
+        candidates = set()
+        for t in range(self.n_tables):
+            # Original bucket
+            h = self._compute_hash(bits, t)
+            candidates.update(self.tables[t].get(h, []))
+            # Probe neighboring buckets: flip each single projection bit
+            probe_bits = bits.copy()
+            n_probe_bits = min(n_probes, self.n_projections)
+            for probe in range(n_probe_bits):
+                bit_pos = self.bit_indices[t][probe]
+                probe_bits[bit_pos] ^= 1
+                h2 = self._compute_hash(probe_bits, t)
+                candidates.update(self.tables[t].get(h2, []))
+                probe_bits[bit_pos] ^= 1  # restore
+            # Also probe 2-bit flips for the first few bits
+            if n_probes >= 2 and self.n_projections >= 2:
+                for i in range(min(n_probes, self.n_projections)):
+                    for j in range(i + 1, min(n_probes, self.n_projections)):
+                        probe_bits = bits.copy()
+                        probe_bits[self.bit_indices[t][i]] ^= 1
+                        probe_bits[self.bit_indices[t][j]] ^= 1
+                        h3 = self._compute_hash(probe_bits, t)
+                        candidates.update(self.tables[t].get(h3, []))
+            if len(candidates) >= max_candidates:
+                break
+        return np.array(list(candidates)[:max_candidates], dtype=np.int64)
+class MemoryEntry:
+    """A single entry in the Sparse Address Table."""
+    __slots__ = ['address', 'content', 'metadata', 'activation', 'timestamp']
+    def __init__(self, address: np.ndarray, content: np.ndarray,
+                 metadata: Optional[Dict[str, Any]] = None):
+        self.address = address          # (N_WORDS,) uint64 - the index key
+        self.content = content          # (N_WORDS,) uint64 - stored data
+        self.metadata = metadata or {}  # arbitrary metadata
+        self.activation = 0.0           # current activation level
+        self.timestamp = 0              # last access time
+class SparseAddressTable:
+    """
+    Distributed memory indexed by 4096-bit binary vectors.
+    Architecture:
+    - Primary storage: contiguous (N, N_WORDS) uint64 matrix for SIMD batch ops
+    - LSH index: multi-table bit-sampling for sub-linear ANN search
+    - Content storage: separate matrix (decoupled address/content)
+    - Activation tracking: for energy-based dynamics
+    Memory layout is Structure of Arrays (SoA) for cache locality
+    during batch Hamming distance computation.
+    """
+    def __init__(
+        self,
+        capacity: int = 100_000,
+        lsh_tables: int = 32,
+        lsh_projections: int = 8,
+        lsh_seed: int = 42
+    ):
+        self.capacity = capacity
+        self.size = 0
+        # SoA layout: addresses and contents as contiguous matrices
+        self._addresses = np.zeros((capacity, N_WORDS), dtype=np.uint64)
+        self._contents = np.zeros((capacity, N_WORDS), dtype=np.uint64)
+        # Metadata and activation stored separately
+        self._metadata: List[Dict[str, Any]] = [None] * capacity
+        self._activations = np.zeros(capacity, dtype=np.float64)
+        self._timestamps = np.zeros(capacity, dtype=np.int64)
+        # LSH index — use short signatures (8-bit) with many tables (32)
+        # for high recall on 4096-bit vectors
+        self.lsh = HammingLSH(
+            n_bits=N_BITS,
+            n_tables=lsh_tables,
+            n_projections=lsh_projections,
+            seed=lsh_seed
+        )
+        # Global step counter for timestamps
+        self._step = 0
+        # Symbol table: name -> index mapping for named concepts
+        self._symbol_table: Dict[str, int] = {}
+    @property
+    def addresses(self) -> np.ndarray:
+        """Active address vectors. Shape: (size, N_WORDS)."""
+        return self._addresses[:self.size]
+    @property
+    def contents(self) -> np.ndarray:
+        """Active content vectors. Shape: (size, N_WORDS)."""
+        return self._contents[:self.size]
+    @property
+    def activations(self) -> np.ndarray:
+        """Active activation levels. Shape: (size,)."""
+        return self._activations[:self.size]
+    def store(self, address: np.ndarray, content: np.ndarray,
+              metadata: Optional[Dict[str, Any]] = None,
+              name: Optional[str] = None) -> int:
+        """Store a new entry. Returns the entry index."""
+        if self.size >= self.capacity:
+            self._grow()
+        idx = self.size
+        self._addresses[idx] = address
+        self._contents[idx] = content
+        self._metadata[idx] = metadata or {}
+        self._timestamps[idx] = self._step
+        self._step += 1
+        # Index in LSH
+        self.lsh.add(address, idx)
+        if name:
+            self._symbol_table[name] = idx
+        self.size += 1
+        return idx
+    def store_concept(self, name: str, content: Optional[np.ndarray] = None,
+                      metadata: Optional[Dict[str, Any]] = None) -> int:
+        """Store a named concept with auto-generated address."""
+        address = random_binary_vector()
+        if content is None:
+            content = random_binary_vector()
+        meta = metadata or {}
+        meta['name'] = name
+        return self.store(address, content, metadata=meta, name=name)
+    def get_by_name(self, name: str) -> Optional[Tuple[np.ndarray, np.ndarray, Dict]]:
+        """Retrieve entry by symbolic name."""
+        idx = self._symbol_table.get(name)
+        if idx is None:
+            return None
+        return (self._addresses[idx].copy(),
+                self._contents[idx].copy(),
+                self._metadata[idx])
+    def get_address_by_name(self, name: str) -> Optional[np.ndarray]:
+        """Get the address vector for a named concept."""
+        idx = self._symbol_table.get(name)
+        if idx is None:
+            return None
+        return self._addresses[idx].copy()
+    def get_content_by_name(self, name: str) -> Optional[np.ndarray]:
+        """Get the content vector for a named concept."""
+        idx = self._symbol_table.get(name)
+        if idx is None:
+            return None
+        return self._contents[idx].copy()
+    def query_nearest(self, query: np.ndarray, k: int = 10,
+                      use_lsh: bool = True) -> List[Tuple[int, int]]:
+        """Find k nearest entries by Hamming distance to query address.
+        Args:
+            query: (N_WORDS,) uint64 query vector
+            k: number of results
+            use_lsh: if True, use LSH pre-filter; if False, exact scan
+        Returns:
+            List of (index, distance) tuples, sorted by distance ascending.
+        """
+        if self.size == 0:
+            return []
+        if use_lsh and self.size > 1000:
+            # LSH pre-filter → exact rerank
+            candidates = self.lsh.query_multi_probe(query, max_candidates=max(k * 10, 2000))
+            if len(candidates) == 0:
+                # Fallback to exact
+                candidates = np.arange(self.size, dtype=np.int64)
+            candidate_vecs = np.ascontiguousarray(self._addresses[candidates])
+            dists = hamming_batch(query, candidate_vecs)
+            if k < len(candidates):
+                top_local = np.argpartition(dists, k)[:k]
+            else:
+                top_local = np.arange(len(candidates))
+            order = np.argsort(dists[top_local])
+            sorted_local = top_local[order]
+            return [(int(candidates[i]), int(dists[i])) for i in sorted_local]
+        else:
+            # Exact search
+            indices, distances = hamming_topk(query, self.addresses, k=k)
+            return [(int(idx), int(dist)) for idx, dist in zip(indices, distances)]
+    def query_radius(self, query: np.ndarray, radius: int) -> List[Tuple[int, int]]:
+        """Find all entries within Hamming radius of query."""
+        if self.size == 0:
+            return []
+        dists = hamming_batch(query, self.addresses)
+        mask = dists <= radius
+        indices = np.where(mask)[0]
+        return [(int(i), int(dists[i])) for i in indices]
+    def activate(self, indices: np.ndarray, strengths: np.ndarray):
+        """Set activation levels for specified entries."""
+        self._activations[indices] = strengths
+    def decay_activations(self, factor: float = 0.95):
+        """Exponential decay of all activations."""
+        self._activations[:self.size] *= factor
+    def get_active(self, threshold: float = 0.1) -> np.ndarray:
+        """Get indices of entries with activation above threshold."""
+        return np.where(self._activations[:self.size] > threshold)[0]
+    def read_activated(self, threshold: float = 0.1) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """Read contents of activated entries.
+        Returns: (indices, content_vectors, activation_strengths)
+        """
+        active_idx = self.get_active(threshold)
+        if len(active_idx) == 0:
+            return (np.array([], dtype=np.int64),
+                    np.zeros((0, N_WORDS), dtype=np.uint64),
+                    np.array([], dtype=np.float64))
+        return (active_idx,
+                self._contents[active_idx],
+                self._activations[active_idx])
+    def _grow(self, factor: float = 1.5):
+        """Grow internal storage when capacity is exceeded."""
+        new_cap = int(self.capacity * factor)
+        logger.info(f"Growing SparseAddressTable from {self.capacity} to {new_cap}")
+        new_addr = np.zeros((new_cap, N_WORDS), dtype=np.uint64)
+        new_cont = np.zeros((new_cap, N_WORDS), dtype=np.uint64)
+        new_act = np.zeros(new_cap, dtype=np.float64)
+        new_ts = np.zeros(new_cap, dtype=np.int64)
+        new_addr[:self.size] = self._addresses[:self.size]
+        new_cont[:self.size] = self._contents[:self.size]
+        new_act[:self.size] = self._activations[:self.size]
+        new_ts[:self.size] = self._timestamps[:self.size]
+        self._addresses = new_addr
+        self._contents = new_cont
+        self._activations = new_act
+        self._timestamps = new_ts
+        self._metadata.extend([None] * (new_cap - self.capacity))
+        self.capacity = new_cap
+    def stats(self) -> Dict[str, Any]:
+        """Return memory statistics."""
+        mem_bytes = self.size * N_BYTES * 2  # addresses + contents
+        return {
+            'size': self.size,
+            'capacity': self.capacity,
+            'memory_mb': mem_bytes / (1024 * 1024),
+            'lsh_tables': self.lsh.n_tables,
+            'lsh_projections': self.lsh.n_projections,
+            'active_entries': int((self._activations[:self.size] > 0.1).sum()),
+            'named_symbols': len(self._symbol_table),
+        }
+    def __repr__(self):
+        return (f"SparseAddressTable(size={self.size}, capacity={self.capacity}, "
+                f"symbols={len(self._symbol_table)})")

mle/routing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .recursive_jit_router import RecursiveJITRouter, RoutingResult

mle/routing/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (218 Bytes). View file

mle/routing/__pycache__/recursive_jit_router.cpython-312.pyc ADDED Viewed

Binary file (11.9 kB). View file

mle/routing/recursive_jit_router.py ADDED Viewed

	@@ -0,0 +1,242 @@

+"""
+MLE Routing Module: Recursive JIT Routing
+==========================================
+Dynamic beam-search routing through memory space.
+Given a query encoded as a 4096-bit binary vector, the router:
+1. Uses LSH to find initial candidate set
+2. Recursively refines by expanding neighborhoods of top candidates
+3. Maintains a beam of top-K (default 500) candidates at each step
+4. Terminates when beam stabilizes or max depth reached
+The routing is "JIT" because it lazily explores only promising regions,
+avoiding full memory scans. Each recursion level enriches the candidate
+set with neighbors of the current best matches.
+"""
+import numpy as np
+from typing import List, Tuple, Optional, Dict, Any
+import logging
+import time
+from ..memory.sparse_address_table import SparseAddressTable
+from ..utils.simd_ops import (
+    N_WORDS, N_BITS,
+    hamming_batch, hamming_topk, hamming_distance,
+    xor_vectors, random_binary_vector
+)
+logger = logging.getLogger(__name__)
+class RoutingResult:
+    """Result of a routing query."""
+    __slots__ = ['indices', 'distances', 'activations', 'depth', 'latency_ms',
+                 'candidates_explored', 'beam_history']
+    def __init__(self):
+        self.indices: np.ndarray = np.array([], dtype=np.int64)
+        self.distances: np.ndarray = np.array([], dtype=np.int32)
+        self.activations: np.ndarray = np.array([], dtype=np.float64)
+        self.depth: int = 0
+        self.latency_ms: float = 0.0
+        self.candidates_explored: int = 0
+        self.beam_history: List[float] = []  # mean distance per depth
+    def top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:
+        """Get top-k results."""
+        k = min(k, len(self.indices))
+        return self.indices[:k], self.distances[:k]
+class RecursiveJITRouter:
+    """
+    Recursive JIT Routing Engine.
+    Search strategy:
+    1. INIT: LSH query → initial candidate set (~2000 candidates)
+    2. REFINE: For each recursion level:
+       a. Compute exact Hamming distances for all candidates
+       b. Select top-K beam (default 500)
+       c. Expand: for each beam member, find its LSH neighbors
+       d. Merge new candidates into pool (deduplicated)
+    3. TERMINATE: When beam doesn't change or max_depth reached
+    The expansion step is key: it allows the router to "walk" through
+    memory space, following semantic chains. Two concepts connected
+    via intermediate nodes can be found even if they're not directly
+    similar to the query.
+    """
+    def __init__(
+        self,
+        memory: SparseAddressTable,
+        beam_width: int = 500,
+        max_depth: int = 3,
+        expansion_factor: int = 5,
+        convergence_threshold: float = 0.01,
+        lsh_candidates_per_probe: int = 2000,
+    ):
+        self.memory = memory
+        self.beam_width = beam_width
+        self.max_depth = max_depth
+        self.expansion_factor = expansion_factor
+        self.convergence_threshold = convergence_threshold
+        self.lsh_candidates = lsh_candidates_per_probe
+    def route(self, query: np.ndarray, beam_width: Optional[int] = None,
+              max_depth: Optional[int] = None) -> RoutingResult:
+        """Execute recursive JIT routing for a query vector.
+        Args:
+            query: (N_WORDS,) uint64 binary vector
+            beam_width: override default beam width
+            max_depth: override default max depth
+        Returns:
+            RoutingResult with sorted candidates
+        """
+        t0 = time.perf_counter()
+        bw = beam_width or self.beam_width
+        md = max_depth or self.max_depth
+        result = RoutingResult()
+        if self.memory.size == 0:
+            result.latency_ms = (time.perf_counter() - t0) * 1000
+            return result
+        # ── Phase 1: Initial candidate set via LSH ────────────────────────
+        candidate_set = set()
+        lsh_candidates = self.memory.lsh.query_multi_probe(
+            query, n_probes=3, max_candidates=self.lsh_candidates
+        )
+        candidate_set.update(lsh_candidates.tolist())
+        # If LSH returns too few, add random samples
+        if len(candidate_set) < bw and self.memory.size > bw:
+            random_fill = np.random.choice(
+                self.memory.size,
+                min(bw * 2, self.memory.size),
+                replace=False
+            )
+            candidate_set.update(random_fill.tolist())
+        # ── Phase 2: Recursive refinement ─────────────────────────────────
+        prev_beam_mean = float('inf')
+        for depth in range(md):
+            # Convert candidate set to array
+            candidates = np.array(sorted(candidate_set), dtype=np.int64)
+            # Compute exact Hamming distances
+            candidate_vecs = np.ascontiguousarray(
+                self.memory._addresses[candidates]
+            )
+            dists = hamming_batch(query, candidate_vecs)
+            # Select top-K beam
+            actual_k = min(bw, len(candidates))
+            if actual_k < len(candidates):
+                beam_local_idx = np.argpartition(dists, actual_k)[:actual_k]
+            else:
+                beam_local_idx = np.arange(len(candidates))
+            beam_order = np.argsort(dists[beam_local_idx])
+            beam_local_idx = beam_local_idx[beam_order]
+            beam_indices = candidates[beam_local_idx]
+            beam_dists = dists[beam_local_idx]
+            # Track convergence
+            beam_mean = float(beam_dists.mean()) if len(beam_dists) > 0 else float('inf')
+            result.beam_history.append(beam_mean)
+            # Check convergence
+            improvement = (prev_beam_mean - beam_mean) / max(prev_beam_mean, 1.0)
+            if improvement < self.convergence_threshold and depth > 0:
+                logger.debug(f"Routing converged at depth {depth}, improvement={improvement:.4f}")
+                break
+            prev_beam_mean = beam_mean
+            # ── Expansion: find neighbors of top beam members ─────────
+            if depth < md - 1:
+                n_expand = min(self.expansion_factor, len(beam_indices))
+                for i in range(n_expand):
+                    idx = beam_indices[i]
+                    # Use the beam member's address as a new query
+                    member_addr = self.memory._addresses[idx]
+                    new_candidates = self.memory.lsh.query_multi_probe(
+                        member_addr,
+                        n_probes=2,
+                        max_candidates=self.lsh_candidates // self.expansion_factor
+                    )
+                    candidate_set.update(new_candidates.tolist())
+        # ── Phase 3: Compute final activations ────────────────────────────
+        # Activation = inverse normalized Hamming distance (softmax-like)
+        max_dist = N_BITS
+        similarities = 1.0 - beam_dists.astype(np.float64) / max_dist
+        # Temperature-scaled softmax for activations
+        temperature = 0.1
+        exp_sim = np.exp(similarities / temperature)
+        activations = exp_sim / (exp_sim.sum() + 1e-12)
+        # Populate result
+        result.indices = beam_indices
+        result.distances = beam_dists
+        result.activations = activations
+        result.depth = depth + 1
+        result.candidates_explored = len(candidate_set)
+        result.latency_ms = (time.perf_counter() - t0) * 1000
+        return result
+    def route_and_activate(self, query: np.ndarray, **kwargs) -> RoutingResult:
+        """Route and update memory activations."""
+        result = self.route(query, **kwargs)
+        if len(result.indices) > 0:
+            # Decay existing activations
+            self.memory.decay_activations(factor=0.9)
+            # Set new activations
+            self.memory.activate(result.indices, result.activations)
+        return result
+    def multi_hop_route(self, query: np.ndarray, hops: int = 2,
+                        beam_width: Optional[int] = None) -> List[RoutingResult]:
+        """Multi-hop routing: each hop uses the centroid of previous results as query.
+        This enables "chain of thought" routing through semantic space:
+        Query → Hop1 results → Centroid → Hop2 results → ...
+        """
+        bw = beam_width or self.beam_width
+        results = []
+        current_query = query.copy()
+        for hop in range(hops):
+            result = self.route(current_query, beam_width=bw)
+            results.append(result)
+            if len(result.indices) == 0:
+                break
+            # Compute weighted centroid of top results for next hop
+            top_k = min(10, len(result.indices))
+            top_contents = self.memory._contents[result.indices[:top_k]]
+            weights = result.activations[:top_k]
+            # Weighted majority vote as centroid
+            # Weight by activation: more activated = more influence
+            current_query = self._weighted_centroid(top_contents, weights)
+        return results
+    def _weighted_centroid(self, vectors: np.ndarray, weights: np.ndarray) -> np.ndarray:
+        """Compute weighted centroid of binary vectors via weighted majority vote."""
+        # Unpack to bits
+        bits = np.unpackbits(vectors.view(np.uint8), axis=1).astype(np.float64)
+        # Weighted sum
+        weighted = (bits * weights[:, np.newaxis]).sum(axis=0)
+        # Threshold at weighted midpoint
+        threshold = weights.sum() / 2.0
+        centroid_bits = (weighted > threshold).astype(np.uint8)
+        return np.packbits(centroid_bits).view(np.uint64).copy()

mle/tests/__pycache__/test_full_system.cpython-312.pyc ADDED Viewed

Binary file (37.4 kB). View file

mle/tests/test_full_system.py ADDED Viewed

	@@ -0,0 +1,904 @@

+"""
+MLE Comprehensive Test Suite
+===============================
+Tests covering:
+1. SIMD operations correctness & performance
+2. Memory storage & retrieval
+3. LSH indexing quality
+4. Routing latency & scalability
+5. Binding operations (binary & HRR)
+6. Energy convergence
+7. Reasoning capabilities (association, analogy, composition)
+8. End-to-end integration
+"""
+import numpy as np
+import time
+import sys
+import os
+# Add project root to path
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from mle.utils.simd_ops import (
+    N_BITS, N_WORDS,
+    hamming_distance, hamming_batch, hamming_topk,
+    hamming_similarity, xor_vectors, popcount,
+    majority_vote, random_binary_vector, random_binary_vectors,
+    normalize_density, get_native_lib
+)
+from mle.memory.sparse_address_table import SparseAddressTable, HammingLSH
+from mle.routing.recursive_jit_router import RecursiveJITRouter
+from mle.binding.semantic_binding import HRRBinding, BinaryBinding, BindingEngine
+from mle.energy.energy_model import EnergyFunction, RelaxationDynamics, HopfieldDynamics, EnergyModel
+from mle.inference.reasoning_engine import ReasoningEngine
+def header(title):
+    print(f"\n{'='*70}")
+    print(f"  {title}")
+    print(f"{'='*70}")
+def check(condition, message):
+    status = "✓" if condition else "✗"
+    print(f"  [{status}] {message}")
+    return condition
+# ══════════════════════════════════════════════════════════════════════════════
+# 1. SIMD OPERATIONS
+# ══════════════════════════════════════════════════════════════════════════════
+def test_simd_operations():
+    header("1. SIMD Operations")
+    all_pass = True
+    np.random.seed(42)
+    # Check native lib
+    lib = get_native_lib()
+    all_pass &= check(lib is not None, f"Native SIMD library compiled: {lib is not None}")
+    # Basic Hamming distance
+    a = random_binary_vector()
+    b = random_binary_vector()
+    dist = hamming_distance(a, b)
+    all_pass &= check(
+        1800 < dist < 2200,
+        f"Random vector Hamming distance ≈ N/2: {dist} (expected ~2048)"
+    )
+    # Self-distance = 0
+    all_pass &= check(
+        hamming_distance(a, a) == 0,
+        "Self-distance = 0"
+    )
+    # XOR identity: dist(a, a⊕b) should relate to popcount(b)
+    xor_ab = xor_vectors(a, b)
+    d1 = hamming_distance(a, xor_ab)
+    d2 = popcount(b)
+    # d1 should equal popcount(a XOR (a XOR b)) = popcount(b)
+    all_pass &= check(
+        d1 == d2,
+        f"XOR identity: dist(a, a⊕b) = popcount(b): {d1} == {d2}"
+    )
+    # Batch Hamming distance
+    corpus = random_binary_vectors(1000)
+    dists = hamming_batch(a, corpus)
+    all_pass &= check(
+        dists.shape == (1000,),
+        f"Batch Hamming shape: {dists.shape}"
+    )
+    all_pass &= check(
+        np.all(dists >= 0) and np.all(dists <= N_BITS),
+        f"Batch Hamming range: [{dists.min()}, {dists.max()}]"
+    )
+    # Top-K
+    indices, distances = hamming_topk(a, corpus, k=10)
+    all_pass &= check(
+        len(indices) == 10,
+        f"Top-10 returned: {len(indices)}"
+    )
+    all_pass &= check(
+        np.all(np.diff(distances) >= 0),
+        f"Top-K sorted ascending: {distances[:5]}..."
+    )
+    # Verify top-K correctness against full sort
+    full_sort_idx = np.argsort(dists)[:10]
+    full_sort_dist = dists[full_sort_idx]
+    all_pass &= check(
+        np.array_equal(distances, full_sort_dist),
+        f"Top-K matches full sort: {np.array_equal(distances, full_sort_dist)}"
+    )
+    # Majority vote
+    vecs = random_binary_vectors(5)
+    mv = majority_vote(np.ascontiguousarray(vecs))
+    all_pass &= check(
+        mv.shape == (N_WORDS,) and mv.dtype == np.uint64,
+        f"Majority vote shape/dtype: {mv.shape}, {mv.dtype}"
+    )
+    # Normalize density
+    v = random_binary_vector()
+    v_norm = normalize_density(v, 0.5)
+    actual_density = popcount(v_norm) / N_BITS
+    all_pass &= check(
+        abs(actual_density - 0.5) < 0.01,
+        f"Density normalization: {actual_density:.4f} (target 0.5)"
+    )
+    # ── Performance benchmark ──
+    print()
+    corpus_sizes = [1_000, 10_000, 100_000]
+    for n in corpus_sizes:
+        corpus = random_binary_vectors(n)
+        query = random_binary_vector()
+        # Batch Hamming
+        t0 = time.perf_counter()
+        for _ in range(10):
+            hamming_batch(query, corpus)
+        elapsed = (time.perf_counter() - t0) / 10 * 1000
+        throughput = n / elapsed * 1000
+        print(f"  ⏱ Batch Hamming ({n:>7d} vecs): {elapsed:>7.2f} ms"
+              f"  ({throughput/1e6:.1f}M vecs/s)")
+        # Top-500
+        t0 = time.perf_counter()
+        for _ in range(10):
+            hamming_topk(query, corpus, k=500)
+        elapsed = (time.perf_counter() - t0) / 10 * 1000
+        print(f"  ⏱ Top-500      ({n:>7d} vecs): {elapsed:>7.2f} ms")
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# 2. MEMORY & LSH
+# ══════════════════════════════════════════════════════════════════════════════
+def test_memory_and_lsh():
+    header("2. Memory & LSH Indexing")
+    all_pass = True
+    np.random.seed(42)
+    # Create memory
+    mem = SparseAddressTable(capacity=10000, lsh_tables=16, lsh_projections=24)
+    all_pass &= check(mem.size == 0, f"Empty memory: size={mem.size}")
+    # Store concepts
+    n_concepts = 5000
+    addresses = random_binary_vectors(n_concepts)
+    contents = random_binary_vectors(n_concepts)
+    t0 = time.perf_counter()
+    for i in range(n_concepts):
+        mem.store(addresses[i], contents[i],
+                  metadata={'name': f'concept_{i}', 'index': i})
+    store_time = (time.perf_counter() - t0) * 1000
+    all_pass &= check(
+        mem.size == n_concepts,
+        f"Stored {n_concepts} concepts in {store_time:.1f}ms"
+    )
+    # Exact search
+    query = addresses[42].copy()
+    results = mem.query_nearest(query, k=5, use_lsh=False)
+    all_pass &= check(
+        results[0][0] == 42 and results[0][1] == 0,
+        f"Exact retrieval: found correct entry (dist=0)"
+    )
+    # LSH search
+    results_lsh = mem.query_nearest(query, k=5, use_lsh=True)
+    found_exact = any(idx == 42 for idx, _ in results_lsh)
+    all_pass &= check(
+        found_exact,
+        f"LSH retrieval: found exact match in top-5"
+    )
+    # Near-duplicate search
+    near = addresses[42].copy()
+    bits = np.unpackbits(near.view(np.uint8))
+    # Flip 50 random bits (~1.2% difference)
+    flip_pos = np.random.choice(N_BITS, 50, replace=False)
+    bits[flip_pos] ^= 1
+    near_modified = np.packbits(bits).view(np.uint64).copy()
+    results_near = mem.query_nearest(near_modified, k=10, use_lsh=True)
+    all_pass &= check(
+        results_near[0][1] <= 100,
+        f"Near-duplicate found: best distance = {results_near[0][1]} (flipped 50 bits)"
+    )
+    # Named concept
+    cat_idx = mem.store_concept("cat", metadata={'category': 'animal'})
+    retrieved = mem.get_by_name("cat")
+    all_pass &= check(
+        retrieved is not None,
+        f"Named concept 'cat' stored and retrieved"
+    )
+    # Activation
+    mem.activate(np.array([0, 1, 2]), np.array([0.9, 0.5, 0.3]))
+    active = mem.get_active(threshold=0.4)
+    all_pass &= check(
+        len(active) == 2,
+        f"Activation: {len(active)} entries above threshold 0.4"
+    )
+    mem.decay_activations(0.5)
+    active_after = mem.get_active(threshold=0.4)
+    all_pass &= check(
+        len(active_after) == 1,
+        f"After decay: {len(active_after)} entries above threshold 0.4"
+    )
+    # Stats
+    stats = mem.stats()
+    all_pass &= check(
+        stats['size'] == n_concepts + 1,
+        f"Memory stats: {stats}"
+    )
+    # ── LSH Recall benchmark ──
+    # Test with near-duplicates (meaningful LSH scenario)
+    # Create clusters: for 100 base vectors, create 5 near-duplicates each (50 bits flipped)
+    print()
+    mem2 = SparseAddressTable(capacity=2000, lsh_tables=32, lsh_projections=8)
+    base_vecs = random_binary_vectors(100)
+    cluster_map = {}  # idx -> cluster_id
+    next_idx = 0
+    for cid in range(100):
+        mem2.store(base_vecs[cid], base_vecs[cid])
+        cluster_map[next_idx] = cid
+        next_idx += 1
+        for _ in range(5):
+            bits = np.unpackbits(base_vecs[cid].view(np.uint8)).copy()
+            flips = np.random.choice(N_BITS, 100, replace=False)
+            bits[flips] ^= 1
+            variant = np.packbits(bits).view(np.uint64).copy()
+            mem2.store(variant, variant)
+            cluster_map[next_idx] = cid
+            next_idx += 1
+    # For each base vector, check if LSH finds its cluster members
+    recall_tests = 100
+    total_recall = 0
+    for cid in range(recall_tests):
+        query = base_vecs[cid]
+        lsh_results = mem2.query_nearest(query, k=10, use_lsh=True)
+        # Count how many results are from the same cluster
+        lsh_ids = [idx for idx, _ in lsh_results]
+        same_cluster = sum(1 for idx in lsh_ids if cluster_map.get(idx) == cid)
+        # Each cluster has 6 members; top-10 should find most
+        total_recall += same_cluster / min(6, 10)
+    avg_recall = total_recall / recall_tests
+    all_pass &= check(
+        avg_recall > 0.3,
+        f"LSH Cluster Recall@10: {avg_recall:.2%} (near-duplicates, 100 clusters)"
+    )
+    # Also verify that exact self-lookup always works via LSH
+    exact_recall = 0
+    for cid in range(recall_tests):
+        query = base_vecs[cid]
+        lsh_results = mem2.query_nearest(query, k=1, use_lsh=True)
+        if lsh_results and lsh_results[0][1] == 0:
+            exact_recall += 1
+    all_pass &= check(
+        exact_recall == recall_tests,
+        f"LSH Exact self-lookup: {exact_recall}/{recall_tests}"
+    )
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# 3. ROUTING
+# ══════════════════════════════════════════════════════════════════════════════
+def test_routing():
+    header("3. Recursive JIT Routing")
+    all_pass = True
+    np.random.seed(42)
+    # Build memory with 10K entries
+    mem = SparseAddressTable(capacity=20000)
+    n = 10000
+    addresses = random_binary_vectors(n)
+    contents = random_binary_vectors(n)
+    for i in range(n):
+        mem.store(addresses[i], contents[i], metadata={'name': f'v_{i}'})
+    router = RecursiveJITRouter(
+        memory=mem,
+        beam_width=500,
+        max_depth=3,
+        expansion_factor=5,
+    )
+    # Basic routing
+    query = addresses[100].copy()
+    result = router.route(query)
+    all_pass &= check(
+        len(result.indices) > 0,
+        f"Routing returned {len(result.indices)} results"
+    )
+    all_pass &= check(
+        result.distances[0] == 0,
+        f"Exact match found at distance 0"
+    )
+    all_pass &= check(
+        result.latency_ms < 1000,
+        f"Routing latency: {result.latency_ms:.1f}ms (target < 1000ms)"
+    )
+    # Random query routing
+    random_q = random_binary_vector()
+    result_rnd = router.route(random_q)
+    all_pass &= check(
+        len(result_rnd.indices) == 500,
+        f"Beam width respected: {len(result_rnd.indices)} (target 500)"
+    )
+    all_pass &= check(
+        np.all(np.diff(result_rnd.distances) >= 0),
+        "Results sorted by distance"
+    )
+    # Beam convergence (distances should decrease across depth)
+    all_pass &= check(
+        len(result_rnd.beam_history) > 0,
+        f"Beam history recorded: {len(result_rnd.beam_history)} depths, "
+        f"means={[f'{m:.0f}' for m in result_rnd.beam_history]}"
+    )
+    # Route and activate
+    result_act = router.route_and_activate(random_q)
+    active = mem.get_active(threshold=0.001)
+    all_pass &= check(
+        len(active) > 0,
+        f"Route-and-activate: {len(active)} entries activated"
+    )
+    # Multi-hop routing
+    results_multi = router.multi_hop_route(random_q, hops=2)
+    all_pass &= check(
+        len(results_multi) == 2,
+        f"Multi-hop routing: {len(results_multi)} hops completed"
+    )
+    # ── Scalability benchmark ──
+    print()
+    for n_test in [1_000, 10_000, 50_000]:
+        mem_test = SparseAddressTable(capacity=n_test + 1000)
+        addrs = random_binary_vectors(n_test)
+        conts = random_binary_vectors(n_test)
+        for i in range(n_test):
+            mem_test.store(addrs[i], conts[i])
+        r_test = RecursiveJITRouter(mem_test, beam_width=500, max_depth=3)
+        latencies = []
+        for _ in range(10):
+            q = random_binary_vector()
+            res = r_test.route(q)
+            latencies.append(res.latency_ms)
+        avg_lat = np.mean(latencies)
+        p99_lat = np.percentile(latencies, 99)
+        print(f"  ⏱ Routing ({n_test:>6d} entries): "
+              f"avg={avg_lat:.1f}ms, p99={p99_lat:.1f}ms, "
+              f"explored={res.candidates_explored}")
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# 4. BINDING OPERATIONS
+# ══════════════════════════════════════════════════════════════════════════════
+def test_binding():
+    header("4. Binding Operations")
+    all_pass = True
+    np.random.seed(42)
+    # ── Binary binding (BSC) ──
+    print("  --- Binary Binding (BSC/XOR) ---")
+    a = random_binary_vector()
+    b = random_binary_vector()
+    # Bind + unbind = identity
+    bound = BinaryBinding.bind(a, b)
+    recovered = BinaryBinding.unbind(bound, b)
+    all_pass &= check(
+        hamming_distance(a, recovered) == 0,
+        "XOR bind+unbind = exact recovery"
+    )
+    # Bound is quasi-orthogonal to inputs
+    sim_ab = hamming_similarity(bound, a)
+    sim_bb = hamming_similarity(bound, b)
+    all_pass &= check(
+        abs(sim_ab - 0.5) < 0.05 and abs(sim_bb - 0.5) < 0.05,
+        f"Bound quasi-orthogonal to inputs: sim(C,A)={sim_ab:.3f}, sim(C,B)={sim_bb:.3f}"
+    )
+    # Bundle (majority vote)
+    c = random_binary_vector()
+    bundled = BinaryBinding.bundle(a, b, c)
+    sim_a = hamming_similarity(bundled, a)
+    sim_b = hamming_similarity(bundled, b)
+    sim_c = hamming_similarity(bundled, c)
+    all_pass &= check(
+        sim_a > 0.55 and sim_b > 0.55 and sim_c > 0.55,
+        f"Bundle preserves similarity: {sim_a:.3f}, {sim_b:.3f}, {sim_c:.3f}"
+    )
+    # Permutation
+    perm_a = BinaryBinding.permute(a, 1)
+    inv_perm_a = BinaryBinding.inverse_permute(perm_a, 1)
+    all_pass &= check(
+        hamming_distance(a, inv_perm_a) == 0,
+        "Permutation + inverse = identity"
+    )
+    all_pass &= check(
+        hamming_similarity(a, perm_a) < 0.55,
+        f"Permuted is dissimilar: sim={hamming_similarity(a, perm_a):.3f}"
+    )
+    # Triple encoding
+    s, r, o = random_binary_vector(), random_binary_vector(), random_binary_vector()
+    triple = BinaryBinding.encode_triple(s, r, o)
+    # Decode object: unbind(unbind(triple, s), r)
+    decoded_o = BinaryBinding.unbind(BinaryBinding.unbind(triple, s), r)
+    all_pass &= check(
+        hamming_distance(o, decoded_o) == 0,
+        "Triple encode/decode: exact recovery of object"
+    )
+    # ── HRR binding (circular convolution) ──
+    print("  --- HRR Binding (Circular Convolution) ---")
+    dim = 4096
+    ha = HRRBinding.random_vector(dim)
+    hb = HRRBinding.random_vector(dim)
+    # Bind + unbind ≈ identity (approximate for HRR)
+    hbound = HRRBinding.bind(ha, hb)
+    hrecovered = HRRBinding.unbind(hbound, hb)
+    hrr_sim = HRRBinding.similarity(ha, hrecovered)
+    all_pass &= check(
+        hrr_sim > 0.3,
+        f"HRR bind+unbind similarity: {hrr_sim:.3f} (should be >> 0, indicating recovery)"
+    )
+    # Bound is quasi-orthogonal
+    hrr_orth = HRRBinding.similarity(hbound, ha)
+    all_pass &= check(
+        abs(hrr_orth) < 0.2,
+        f"HRR bound quasi-orthogonal: sim={hrr_orth:.3f}"
+    )
+    # Bundle preserves components
+    hc = HRRBinding.random_vector(dim)
+    hbundled = HRRBinding.bundle(ha, hb, hc)
+    all_pass &= check(
+        HRRBinding.similarity(hbundled, ha) > 0.2,
+        f"HRR bundle preserves components: sim={HRRBinding.similarity(hbundled, ha):.3f}"
+    )
+    # ── Binding Engine ──
+    print("  --- Binding Engine ---")
+    engine = BindingEngine(use_binary=True)
+    engine.register_concept("king")
+    engine.register_concept("queen")
+    engine.register_concept("man")
+    engine.register_concept("woman")
+    sim_kk = engine.similarity(engine.get_concept("king"), engine.get_concept("king"))
+    sim_kq = engine.similarity(engine.get_concept("king"), engine.get_concept("queen"))
+    all_pass &= check(
+        sim_kk == 1.0,
+        f"Self-similarity = 1.0: {sim_kk}"
+    )
+    all_pass &= check(
+        abs(sim_kq - 0.5) < 0.05,
+        f"Random concept similarity ≈ 0.5: {sim_kq:.3f}"
+    )
+    # ── Performance ──
+    print()
+    n_ops = 10000
+    t0 = time.perf_counter()
+    for _ in range(n_ops):
+        BinaryBinding.bind(a, b)
+    elapsed = (time.perf_counter() - t0) * 1000
+    print(f"  ⏱ Binary bind: {n_ops} ops in {elapsed:.1f}ms "
+          f"({n_ops/elapsed*1000:.0f} ops/s)")
+    t0 = time.perf_counter()
+    for _ in range(n_ops):
+        HRRBinding.bind(ha, hb)
+    elapsed = (time.perf_counter() - t0) * 1000
+    print(f"  ⏱ HRR bind:    {n_ops} ops in {elapsed:.1f}ms "
+          f"({n_ops/elapsed*1000:.0f} ops/s)")
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# 5. ENERGY & CONVERGENCE
+# ══════════════════════════════════════════════════════════════════════════════
+def test_energy_convergence():
+    header("5. Energy Model & Convergence")
+    all_pass = True
+    np.random.seed(42)
+    # Create some context vectors
+    n_context = 20
+    context = random_binary_vectors(n_context)
+    activations = np.random.dirichlet(np.ones(n_context))
+    # ── Energy function ──
+    efn = EnergyFunction(alpha=1.0, beta=0.5, gamma=0.1, delta=0.05)
+    # Random state should have moderate energy
+    state = random_binary_vector()
+    e = efn.total_energy(state, context, activations)
+    all_pass &= check(
+        'total' in e and 'compatibility' in e,
+        f"Energy components computed: {list(e.keys())}"
+    )
+    all_pass &= check(
+        isinstance(e['total'], float),
+        f"Total energy: {e['total']:.4f}"
+    )
+    # ── Binary relaxation ──
+    print("  --- Binary Relaxation ---")
+    dynamics = RelaxationDynamics(
+        efn, max_iterations=30, n_candidates=16, flip_fraction=0.05
+    )
+    result = dynamics.relax(state, context, activations)
+    initial_e = result['trajectory'][0]['total']
+    final_e = result['final_energy']
+    all_pass &= check(
+        final_e <= initial_e + 0.01,  # allow tiny float imprecision
+        f"Energy decreased: {initial_e:.4f} → {final_e:.4f} "
+        f"(Δ = {initial_e - final_e:.4f})"
+    )
+    all_pass &= check(
+        result['iterations'] > 0,
+        f"Iterations: {result['iterations']}"
+    )
+    # Check trajectory is generally decreasing
+    traj_energies = [t['total'] for t in result['trajectory']]
+    decreasing_steps = sum(1 for i in range(1, len(traj_energies))
+                          if traj_energies[i] <= traj_energies[i-1] + 0.001)
+    pct_decreasing = decreasing_steps / max(len(traj_energies) - 1, 1)
+    all_pass &= check(
+        pct_decreasing > 0.5,
+        f"Trajectory mostly decreasing: {pct_decreasing:.0%}"
+    )
+    # ── Hopfield relaxation ──
+    print("  --- Hopfield Dynamics ---")
+    hopfield = HopfieldDynamics(beta=8.0, max_iterations=20)
+    h_result = hopfield.relax(state, context, activations)
+    h_traj = h_result['energy_trajectory']
+    all_pass &= check(
+        len(h_traj) > 1,
+        f"Hopfield trajectory: {len(h_traj)} steps"
+    )
+    all_pass &= check(
+        h_traj[-1] <= h_traj[0] + 0.01,
+        f"Hopfield energy decreased: {h_traj[0]:.4f} → {h_traj[-1]:.4f}"
+    )
+    # Attention should be concentrated
+    att = h_result.get('attention_weights')
+    if att is not None:
+        max_att = att.max()
+        all_pass &= check(
+            max_att > 1.0 / n_context,
+            f"Hopfield attention concentrated: max={max_att:.4f} (uniform={1/n_context:.4f})"
+        )
+    # ── Hybrid model ──
+    print("  --- Hybrid Energy Model ---")
+    model = EnergyModel(mode='hybrid')
+    hybrid_result = model.minimize(state, context, activations)
+    all_pass &= check(
+        'final_state' in hybrid_result,
+        f"Hybrid model produced final state"
+    )
+    all_pass &= check(
+        hybrid_result['converged'] or hybrid_result['total_iterations'] > 0,
+        f"Hybrid: {hybrid_result['total_iterations']} total iterations, "
+        f"converged={hybrid_result['converged']}"
+    )
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# 6. REASONING CAPABILITIES
+# ══════════════════════════════════════════════════════════════════════════════
+def test_reasoning():
+    header("6. Reasoning Capabilities")
+    all_pass = True
+    np.random.seed(42)
+    engine = ReasoningEngine(
+        beam_width=200,
+        max_routing_depth=2,
+        max_reasoning_steps=3,
+        energy_mode='hybrid',
+        relaxation_iterations=20,
+    )
+    # ── Build knowledge base ──
+    print("  --- Building Knowledge Base ---")
+    concepts = [
+        "cat", "dog", "animal", "pet",
+        "fish", "water", "ocean", "river",
+        "bird", "sky", "wing", "fly",
+        "car", "road", "wheel", "engine",
+        "tree", "leaf", "forest", "green",
+        "sun", "moon", "star", "night",
+        "king", "queen", "prince", "princess",
+        "man", "woman", "child", "person",
+    ]
+    for c in concepts:
+        engine.add_concept(c)
+    relations = [
+        ("cat", "is_a", "animal"),
+        ("dog", "is_a", "animal"),
+        ("cat", "is_a", "pet"),
+        ("dog", "is_a", "pet"),
+        ("fish", "lives_in", "water"),
+        ("fish", "is_a", "animal"),
+        ("bird", "has", "wing"),
+        ("bird", "can", "fly"),
+        ("bird", "is_a", "animal"),
+        ("car", "has", "wheel"),
+        ("car", "on", "road"),
+        ("tree", "has", "leaf"),
+        ("tree", "in", "forest"),
+        ("leaf", "is", "green"),
+        ("king", "is_a", "man"),
+        ("queen", "is_a", "woman"),
+        ("prince", "is_a", "man"),
+        ("princess", "is_a", "woman"),
+        ("king", "married_to", "queen"),
+        ("sun", "in", "sky"),
+        ("moon", "in", "sky"),
+        ("star", "in", "sky"),
+    ]
+    for s, r, o in relations:
+        engine.add_relation(s, r, o)
+    stats = engine.stats()
+    all_pass &= check(
+        stats['codebook_size'] >= len(concepts),
+        f"Knowledge base: {stats['codebook_size']} concepts, "
+        f"{stats['memory']['size']} memory entries"
+    )
+    # ── Test 1: Association ──
+    print("  --- Association ---")
+    assoc_cat = engine.associate("cat", top_k=10)
+    all_pass &= check(
+        len(assoc_cat) > 0,
+        f"Association for 'cat': {len(assoc_cat)} results"
+    )
+    if assoc_cat:
+        print(f"    Top associations: {assoc_cat[:5]}")
+    # ── Test 2: Concept Query ──
+    print("  --- Concept Query ---")
+    result = engine.reason("cat", max_steps=2)
+    all_pass &= check(
+        result['response'] is not None,
+        f"Reasoning on 'cat': {result['num_steps']} steps, "
+        f"{result['latency_ms']:.1f}ms"
+    )
+    if result['response']['nearest_concepts']:
+        top_concept = result['response']['nearest_concepts'][0]
+        print(f"    Nearest concept: {top_concept[0]} (sim={top_concept[1]:.3f})")
+    # ── Test 3: Energy convergence during reasoning ──
+    print("  --- Energy Convergence ---")
+    energies = [s.energy for s in result['reasoning_chain'] if s.energy != float('inf')]
+    if len(energies) >= 2:
+        all_pass &= check(
+            energies[-1] <= energies[0] + 0.01,
+            f"Energy decreased during reasoning: {energies[0]:.4f} → {energies[-1]:.4f}"
+        )
+    print(f"    Energy trajectory: {[f'{e:.4f}' for e in energies]}")
+    # ── Test 4: Analogy ──
+    print("  --- Analogy ---")
+    analogy_result = engine.solve_analogy("king", "man", "queen")
+    all_pass &= check(
+        analogy_result is not None,
+        f"Analogy 'king:man :: queen:?': completed in {analogy_result['latency_ms']:.1f}ms"
+    )
+    if analogy_result['codebook_ranking']:
+        top_answer = analogy_result['codebook_ranking'][0]
+        print(f"    Top answer: {top_answer[0]} (sim={top_answer[1]:.3f})")
+        top_5 = [(n, f"{s:.3f}") for n, s in analogy_result['codebook_ranking'][:5]]
+        print(f"    Top-5: {top_5}")
+    # ── Test 5: Composition ──
+    print("  --- Composition ---")
+    comp_result = engine.compose("water", "animal")
+    all_pass &= check(
+        comp_result is not None,
+        f"Composition 'water + animal': {comp_result['latency_ms']:.1f}ms"
+    )
+    if comp_result['response']['nearest_concepts']:
+        top = comp_result['response']['nearest_concepts'][:5]
+        print(f"    Nearest to 'water+animal': {[(n, f'{s:.3f}') for n, s in top]}")
+    # ── Test 6: Structured query ──
+    print("  --- Structured Query ---")
+    struct_result = engine.reason(
+        {"subject": "bird", "relation": "can"},
+        max_steps=2,
+        roles=["subject", "relation"]
+    )
+    all_pass &= check(
+        struct_result is not None,
+        f"Structured query completed: {struct_result['latency_ms']:.1f}ms"
+    )
+    if struct_result['response'].get('role_fillers'):
+        for role, fillers in struct_result['response']['role_fillers'].items():
+            print(f"    Role '{role}': {fillers[:3]}")
+    # ── Test 7: Multi-step reasoning convergence ──
+    print("  --- Multi-step Convergence ---")
+    deep_result = engine.reason("forest", max_steps=5)
+    chain = deep_result['reasoning_chain']
+    all_pass &= check(
+        len(chain) > 0,
+        f"Multi-step reasoning: {len(chain)} steps, {deep_result['latency_ms']:.1f}ms"
+    )
+    step_energies = [s.energy for s in chain if s.energy != float('inf')]
+    if step_energies:
+        print(f"    Step energies: {[f'{e:.4f}' for e in step_energies]}")
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# 7. END-TO-END INTEGRATION
+# ══════════════════════════════════════════════════════════════════════════════
+def test_integration():
+    header("7. End-to-End Integration")
+    all_pass = True
+    np.random.seed(42)
+    # Build a larger knowledge base
+    engine = ReasoningEngine(
+        beam_width=500,
+        max_routing_depth=3,
+        max_reasoning_steps=3,
+        energy_mode='hybrid',
+    )
+    # Create 1000 random concepts with some structure
+    n_base = 500
+    categories = ["animal", "plant", "vehicle", "tool", "place"]
+    for cat in categories:
+        engine.add_concept(cat)
+    for i in range(n_base):
+        name = f"concept_{i}"
+        engine.add_concept(name)
+        cat = categories[i % len(categories)]
+        engine.add_relation(name, "is_a", cat)
+    stats = engine.stats()
+    print(f"  Knowledge base: {stats}")
+    # Test full pipeline
+    t0 = time.perf_counter()
+    result = engine.reason("concept_42", max_steps=3)
+    total_ms = (time.perf_counter() - t0) * 1000
+    all_pass &= check(
+        result['response'] is not None,
+        f"Full pipeline completed in {total_ms:.1f}ms"
+    )
+    # Test batch queries
+    print()
+    print("  --- Batch Query Benchmark ---")
+    latencies = []
+    for i in range(50):
+        query = f"concept_{np.random.randint(n_base)}"
+        t0 = time.perf_counter()
+        r = engine.reason(query, max_steps=2)
+        latencies.append((time.perf_counter() - t0) * 1000)
+    avg_lat = np.mean(latencies)
+    p50_lat = np.percentile(latencies, 50)
+    p99_lat = np.percentile(latencies, 99)
+    all_pass &= check(
+        True,
+        f"50 queries: avg={avg_lat:.1f}ms, p50={p50_lat:.1f}ms, p99={p99_lat:.1f}ms"
+    )
+    # Memory efficiency
+    mem_stats = engine.memory.stats()
+    print(f"  Memory usage: {mem_stats['memory_mb']:.2f} MB "
+          f"for {mem_stats['size']} entries")
+    bytes_per_entry = mem_stats['memory_mb'] * 1024 * 1024 / max(mem_stats['size'], 1)
+    all_pass &= check(
+        bytes_per_entry < 2048,  # Should be ~1024 bytes (512 addr + 512 content)
+        f"Memory efficiency: {bytes_per_entry:.0f} bytes/entry (target ≤ 1024)"
+    )
+    return all_pass
+# ══════════════════════════════════════════════════════════════════════════════
+# MAIN
+# ══════════════════════════════════════════════════════════════════════════════
+def main():
+    print("\n" + "█" * 70)
+    print("  MLE — Morpho-Logic Engine — Comprehensive Test Suite")
+    print("█" * 70)
+    results = {}
+    tests = [
+        ("SIMD Operations", test_simd_operations),
+        ("Memory & LSH", test_memory_and_lsh),
+        ("Routing", test_routing),
+        ("Binding", test_binding),
+        ("Energy Convergence", test_energy_convergence),
+        ("Reasoning", test_reasoning),
+        ("Integration", test_integration),
+    ]
+    for name, test_fn in tests:
+        try:
+            results[name] = test_fn()
+        except Exception as e:
+            print(f"\n  ✗✗✗ {name} FAILED with exception: {e}")
+            import traceback
+            traceback.print_exc()
+            results[name] = False
+    # Summary
+    header("TEST SUMMARY")
+    total = len(results)
+    passed = sum(1 for v in results.values() if v)
+    for name, result in results.items():
+        status = "PASS ✓" if result else "FAIL ✗"
+        print(f"  [{status}] {name}")
+    print(f"\n  Total: {passed}/{total} test groups passed")
+    print("█" * 70)
+    return 0 if passed == total else 1
+if __name__ == '__main__':
+    exit(main())

mle/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .simd_ops import (
+    N_BITS, N_WORDS, N_BYTES,
+    hamming_distance, hamming_batch, hamming_topk,
+    hamming_similarity, xor_vectors, popcount,
+    majority_vote, random_binary_vector, random_binary_vectors,
+    normalize_density,
+)

mle/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (477 Bytes). View file

mle/utils/__pycache__/simd_ops.cpython-312.pyc ADDED Viewed

Binary file (20 kB). View file

mle/utils/simd_ops.py ADDED Viewed

	@@ -0,0 +1,404 @@

+"""
+MLE SIMD-Optimized Bitwise Operations
+=====================================
+Hardware-accelerated Hamming distance, popcount, and batch XOR operations.
+Uses ctypes to call GCC-compiled C with -march=native for automatic SIMD
+vectorization (AVX-512 VPOPCNTQ / AVX2 POPCNT / SSE4.2 POPCNT).
+Fallback: pure NumPy LUT-based popcount for portability.
+"""
+import numpy as np
+import ctypes
+import tempfile
+import subprocess
+import os
+import logging
+from pathlib import Path
+logger = logging.getLogger(__name__)
+# ── Constants ──────────────────────────────────────────────────────────────────
+N_BITS = 4096
+N_WORDS = N_BITS // 64   # 64 uint64 words = 512 bytes per vector
+N_BYTES = N_BITS // 8    # 512 bytes
+# ── Compile native SIMD library ───────────────────────────────────────────────
+_NATIVE_C_SOURCE = r"""
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+/* Single-pair Hamming distance: XOR + popcount over N uint64 words */
+int hamming_single(const uint64_t *a, const uint64_t *b, int n_words) {
+    int cnt = 0;
+    for (int i = 0; i < n_words; i++)
+        cnt += __builtin_popcountll(a[i] ^ b[i]);
+    return cnt;
+}
+/* Batch Hamming: query (1 x n_words) vs corpus (n_vecs x n_words)
+   Results written to out[n_vecs]. Layout: corpus is row-major contiguous. */
+void hamming_batch(const uint64_t *query, const uint64_t *corpus,
+                   int n_words, int n_vecs, int *out) {
+    for (int v = 0; v < n_vecs; v++) {
+        int cnt = 0;
+        const uint64_t *row = corpus + (long)v * n_words;
+        for (int w = 0; w < n_words; w++)
+            cnt += __builtin_popcountll(query[w] ^ row[w]);
+        out[v] = cnt;
+    }
+}
+/* Batch Hamming with top-K selection (partial sort).
+   Returns indices of top_k smallest distances.
+   Uses a simple max-heap of size top_k for O(N log K). */
+static void swap_int(int *a, int *b) { int t = *a; *a = *b; *b = t; }
+static void sift_down_max(int *heap_dist, int *heap_idx, int size, int i) {
+    while (1) {
+        int largest = i, l = 2*i+1, r = 2*i+2;
+        if (l < size && heap_dist[l] > heap_dist[largest]) largest = l;
+        if (r < size && heap_dist[r] > heap_dist[largest]) largest = r;
+        if (largest == i) break;
+        swap_int(&heap_dist[i], &heap_dist[largest]);
+        swap_int(&heap_idx[i], &heap_idx[largest]);
+        i = largest;
+    }
+}
+void hamming_topk(const uint64_t *query, const uint64_t *corpus,
+                  int n_words, int n_vecs, int top_k,
+                  int *out_indices, int *out_dists) {
+    /* Initialize heap with first top_k elements */
+    int heap_size = (top_k < n_vecs) ? top_k : n_vecs;
+    for (int v = 0; v < heap_size; v++) {
+        int cnt = 0;
+        const uint64_t *row = corpus + (long)v * n_words;
+        for (int w = 0; w < n_words; w++)
+            cnt += __builtin_popcountll(query[w] ^ row[w]);
+        out_dists[v] = cnt;
+        out_indices[v] = v;
+    }
+    /* Build max-heap */
+    for (int i = heap_size/2 - 1; i >= 0; i--)
+        sift_down_max(out_dists, out_indices, heap_size, i);
+    /* Process remaining vectors */
+    for (int v = heap_size; v < n_vecs; v++) {
+        int cnt = 0;
+        const uint64_t *row = corpus + (long)v * n_words;
+        for (int w = 0; w < n_words; w++)
+            cnt += __builtin_popcountll(query[w] ^ row[w]);
+        if (cnt < out_dists[0]) {
+            out_dists[0] = cnt;
+            out_indices[0] = v;
+            sift_down_max(out_dists, out_indices, heap_size, 0);
+        }
+    }
+}
+/* Popcount of a single vector (count of 1-bits) */
+int popcount_vec(const uint64_t *a, int n_words) {
+    int cnt = 0;
+    for (int i = 0; i < n_words; i++)
+        cnt += __builtin_popcountll(a[i]);
+    return cnt;
+}
+/* Batch XOR: out[i] = a[i] ^ b[i] for vectors of n_words */
+void xor_vectors(const uint64_t *a, const uint64_t *b, uint64_t *out, int n_words) {
+    for (int i = 0; i < n_words; i++)
+        out[i] = a[i] ^ b[i];
+}
+/* Batch majority vote: given n_vecs vectors of n_words uint64,
+   compute per-bit majority. Result in out[n_words]. */
+void majority_vote(const uint64_t *vecs, int n_vecs, int n_words, uint64_t *out) {
+    int n_bits = n_words * 64;
+    int threshold = n_vecs / 2;
+    /* Count per-bit using word-level iteration */
+    for (int w = 0; w < n_words; w++) {
+        uint64_t result = 0;
+        for (int b = 0; b < 64; b++) {
+            int count = 0;
+            uint64_t mask = (uint64_t)1 << b;
+            for (int v = 0; v < n_vecs; v++)
+                count += ((vecs[(long)v * n_words + w] & mask) != 0);
+            if (count > threshold)
+                result |= mask;
+        }
+        out[w] = result;
+    }
+}
+"""
+_lib = None
+_lib_path = None
+def _compile_native():
+    """Compile the C library with native SIMD optimization."""
+    global _lib, _lib_path
+    if _lib is not None:
+        return _lib
+    src_path = os.path.join(tempfile.gettempdir(), "mle_simd_ops.c")
+    lib_path = os.path.join(tempfile.gettempdir(), "mle_simd_ops.so")
+    _lib_path = lib_path
+    with open(src_path, "w") as f:
+        f.write(_NATIVE_C_SOURCE)
+    try:
+        subprocess.run(
+            ["gcc", "-O3", "-march=native", "-shared", "-fPIC",
+             "-o", lib_path, src_path],
+            check=True, capture_output=True, text=True
+        )
+        lib = ctypes.CDLL(lib_path)
+        # hamming_single
+        lib.hamming_single.restype = ctypes.c_int
+        lib.hamming_single.argtypes = [
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.c_int
+        ]
+        # hamming_batch
+        lib.hamming_batch.restype = None
+        lib.hamming_batch.argtypes = [
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.c_int, ctypes.c_int,
+            ctypes.POINTER(ctypes.c_int)
+        ]
+        # hamming_topk
+        lib.hamming_topk.restype = None
+        lib.hamming_topk.argtypes = [
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.c_int, ctypes.c_int, ctypes.c_int,
+            ctypes.POINTER(ctypes.c_int),
+            ctypes.POINTER(ctypes.c_int)
+        ]
+        # popcount_vec
+        lib.popcount_vec.restype = ctypes.c_int
+        lib.popcount_vec.argtypes = [
+            ctypes.POINTER(ctypes.c_uint64), ctypes.c_int
+        ]
+        # xor_vectors
+        lib.xor_vectors.restype = None
+        lib.xor_vectors.argtypes = [
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.c_int
+        ]
+        # majority_vote
+        lib.majority_vote.restype = None
+        lib.majority_vote.argtypes = [
+            ctypes.POINTER(ctypes.c_uint64),
+            ctypes.c_int, ctypes.c_int,
+            ctypes.POINTER(ctypes.c_uint64)
+        ]
+        _lib = lib
+        logger.info("Native SIMD library compiled successfully with -march=native")
+        return lib
+    except Exception as e:
+        logger.warning(f"Failed to compile native SIMD library: {e}. Using NumPy fallback.")
+        return None
+def get_native_lib():
+    """Get the compiled native library (lazy initialization)."""
+    return _compile_native()
+# ── NumPy Fallback Operations ─────────────────────────────────────────────────
+# LUT for byte-level popcount (256 entries)
+_POPCOUNT_LUT = np.array([bin(i).count('1') for i in range(256)], dtype=np.int32)
+def _np_hamming_single(a: np.ndarray, b: np.ndarray) -> int:
+    """Pure NumPy Hamming distance between two packed uint64 vectors."""
+    xor = np.bitwise_xor(a, b).view(np.uint8)
+    return int(_POPCOUNT_LUT[xor].sum())
+def _np_hamming_batch(query: np.ndarray, corpus: np.ndarray) -> np.ndarray:
+    """Pure NumPy batch Hamming distance. query: (N_WORDS,), corpus: (M, N_WORDS)."""
+    xor = np.bitwise_xor(query[np.newaxis, :], corpus)  # (M, N_WORDS)
+    xor_bytes = xor.view(np.uint8)  # (M, N_BYTES)
+    return _POPCOUNT_LUT[xor_bytes].reshape(len(corpus), -1).sum(axis=1)
+# ── Public API (auto-selects native or fallback) ─────────────────────────────
+def _as_ptr64(arr: np.ndarray):
+    """Get ctypes pointer to uint64 array."""
+    return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_uint64))
+def _as_ptr32(arr: np.ndarray):
+    """Get ctypes pointer to int32 array."""
+    return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
+def hamming_distance(a: np.ndarray, b: np.ndarray) -> int:
+    """Compute Hamming distance between two 4096-bit packed vectors.
+    a, b: np.ndarray of shape (N_WORDS,) dtype=uint64.
+    """
+    lib = get_native_lib()
+    if lib is not None:
+        return lib.hamming_single(_as_ptr64(a), _as_ptr64(b), N_WORDS)
+    return _np_hamming_single(a, b)
+def hamming_batch(query: np.ndarray, corpus: np.ndarray) -> np.ndarray:
+    """Compute Hamming distances from query to all corpus vectors.
+    query:  (N_WORDS,) uint64
+    corpus: (M, N_WORDS) uint64, C-contiguous
+    Returns: (M,) int32 array of distances.
+    """
+    assert corpus.flags['C_CONTIGUOUS'], "Corpus must be C-contiguous for SIMD"
+    n_vecs = corpus.shape[0]
+    lib = get_native_lib()
+    if lib is not None:
+        out = np.empty(n_vecs, dtype=np.int32)
+        lib.hamming_batch(
+            _as_ptr64(query), _as_ptr64(corpus),
+            N_WORDS, n_vecs, _as_ptr32(out)
+        )
+        return out
+    return _np_hamming_batch(query, corpus).astype(np.int32)
+def hamming_topk(query: np.ndarray, corpus: np.ndarray, k: int = 500):
+    """Find top-k nearest vectors by Hamming distance.
+    Returns: (indices, distances) each of shape (k,), sorted ascending by distance.
+    Uses O(N log K) max-heap in native code.
+    """
+    assert corpus.flags['C_CONTIGUOUS'], "Corpus must be C-contiguous"
+    n_vecs = corpus.shape[0]
+    actual_k = min(k, n_vecs)
+    lib = get_native_lib()
+    if lib is not None:
+        out_idx = np.empty(actual_k, dtype=np.int32)
+        out_dist = np.empty(actual_k, dtype=np.int32)
+        lib.hamming_topk(
+            _as_ptr64(query), _as_ptr64(corpus),
+            N_WORDS, n_vecs, actual_k,
+            _as_ptr32(out_idx), _as_ptr32(out_dist)
+        )
+        # Sort by distance (heap output is unordered)
+        order = np.argsort(out_dist)
+        return out_idx[order], out_dist[order]
+    else:
+        dists = _np_hamming_batch(query, corpus)
+        if actual_k < n_vecs:
+            idx = np.argpartition(dists, actual_k)[:actual_k]
+        else:
+            idx = np.arange(n_vecs)
+        order = np.argsort(dists[idx])
+        sorted_idx = idx[order]
+        return sorted_idx.astype(np.int32), dists[sorted_idx].astype(np.int32)
+def xor_vectors(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    """Bitwise XOR of two packed uint64 vectors."""
+    lib = get_native_lib()
+    if lib is not None:
+        out = np.empty(N_WORDS, dtype=np.uint64)
+        lib.xor_vectors(_as_ptr64(a), _as_ptr64(b), _as_ptr64(out), N_WORDS)
+        return out
+    return np.bitwise_xor(a, b)
+def popcount(a: np.ndarray) -> int:
+    """Count number of 1-bits in packed uint64 vector."""
+    lib = get_native_lib()
+    if lib is not None:
+        return lib.popcount_vec(_as_ptr64(a), N_WORDS)
+    return int(_POPCOUNT_LUT[a.view(np.uint8)].sum())
+def majority_vote(vectors: np.ndarray) -> np.ndarray:
+    """Bitwise majority vote across multiple packed uint64 vectors.
+    vectors: (M, N_WORDS) uint64, C-contiguous.
+    Returns: (N_WORDS,) uint64.
+    """
+    assert vectors.flags['C_CONTIGUOUS']
+    n_vecs = vectors.shape[0]
+    lib = get_native_lib()
+    if lib is not None:
+        out = np.empty(N_WORDS, dtype=np.uint64)
+        lib.majority_vote(_as_ptr64(vectors), n_vecs, N_WORDS, _as_ptr64(out))
+        return out
+    # NumPy fallback: unpack, sum, threshold
+    bits = np.unpackbits(vectors.view(np.uint8), axis=1)  # (M, N_BITS)
+    summed = bits.astype(np.int32).sum(axis=0)
+    majority = (summed > n_vecs / 2).astype(np.uint8)
+    return np.packbits(majority).view(np.uint64)
+# ── Vector Generation ─────────────────────────────────────────────────────────
+def random_binary_vector(n_words: int = N_WORDS) -> np.ndarray:
+    """Generate a random 4096-bit vector, stored as packed uint64.
+    Each bit is iid Bernoulli(0.5) → balanced density.
+    """
+    return np.random.randint(
+        0, np.iinfo(np.uint64).max + 1,
+        size=n_words, dtype=np.uint64
+    )
+def random_binary_vectors(n: int, n_words: int = N_WORDS) -> np.ndarray:
+    """Generate n random 4096-bit vectors. Shape: (n, N_WORDS), C-contiguous."""
+    return np.ascontiguousarray(
+        np.random.randint(
+            0, np.iinfo(np.uint64).max + 1,
+            size=(n, n_words), dtype=np.uint64
+        )
+    )
+def normalize_density(v: np.ndarray, target_density: float = 0.5) -> np.ndarray:
+    """Normalize a binary vector to target bit density.
+    Randomly flips bits to reach the desired proportion of 1-bits.
+    """
+    bits = np.unpackbits(v.view(np.uint8))
+    current = bits.sum() / len(bits)
+    target_ones = int(target_density * len(bits))
+    current_ones = int(bits.sum())
+    if current_ones == target_ones:
+        return v.copy()
+    if current_ones > target_ones:
+        # Flip some 1s to 0s
+        one_positions = np.where(bits == 1)[0]
+        to_flip = np.random.choice(one_positions, current_ones - target_ones, replace=False)
+        bits[to_flip] = 0
+    else:
+        # Flip some 0s to 1s
+        zero_positions = np.where(bits == 0)[0]
+        to_flip = np.random.choice(zero_positions, target_ones - current_ones, replace=False)
+        bits[to_flip] = 1
+    return np.packbits(bits).view(np.uint64).copy()
+def hamming_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    """Normalized Hamming similarity in [0, 1]. 1.0 = identical."""
+    return 1.0 - hamming_distance(a, b) / N_BITS