Harry00 commited on
Commit
ebaf2ce
Β·
verified Β·
1 Parent(s): fdbd59c

feat: complete MLE engine implementation

Browse files
mle/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (1.59 kB). View file
 
mle/__pycache__/demo.cpython-312.pyc ADDED
Binary file (13.1 kB). View file
 
mle/binding/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .semantic_binding import HRRBinding, BinaryBinding, BindingEngine
mle/binding/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (230 Bytes). View file
 
mle/binding/__pycache__/semantic_binding.cpython-312.pyc ADDED
Binary file (22.5 kB). View file
 
mle/binding/semantic_binding.py ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE Binding Module: Semantic Binding Operations
3
+ =================================================
4
+ Implements circular convolution-based binding for composing and
5
+ decomposing semantic relations between hyperdimensional vectors.
6
+
7
+ Two implementations:
8
+ 1. FFT-based (HRR): High precision, O(N log N), works on real-valued vectors
9
+ 2. Binary (BSC): O(N/64) via XOR, works directly on packed uint64 vectors
10
+
11
+ The binding operation creates a new vector C = bind(A, B) such that:
12
+ - C is quasi-orthogonal to both A and B
13
+ - unbind(C, B) β‰ˆ A (recoverable)
14
+ - bind is commutative and associative
15
+
16
+ This enables representing structured relations:
17
+ - "cat IS_A animal" β†’ bind(cat, IS_A) stores a trace recoverable with animal
18
+ - Analogies: unbind(bind(king, male), bind(queen, female)) β‰ˆ identity
19
+ """
20
+
21
+ import numpy as np
22
+ from typing import Optional, Tuple, List
23
+ import logging
24
+
25
+ from ..utils.simd_ops import (
26
+ N_BITS, N_WORDS,
27
+ xor_vectors, random_binary_vector, random_binary_vectors,
28
+ hamming_distance, hamming_similarity, majority_vote, popcount
29
+ )
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ # ══════════════════════════════════════════════════════════════════════════════
35
+ # FFT-based Circular Convolution (Holographic Reduced Representations)
36
+ # ══════════════════════════════════════════════════════════════════════════════
37
+
38
+ class HRRBinding:
39
+ """
40
+ Holographic Reduced Representations via circular convolution.
41
+
42
+ Operates on real-valued vectors of dimension N.
43
+ Uses numpy.fft for O(N log N) binding/unbinding.
44
+
45
+ Properties:
46
+ - bind(A, B) = circular_conv(A, B) via IFFT(FFT(A) * FFT(B))
47
+ - unbind(C, B) = circular_corr(C, B) via IFFT(FFT(C) * conj(FFT(B)))
48
+ - Similarity-preserving: cos(A, B) relates to cos(bind(A,X), bind(B,X))
49
+ """
50
+
51
+ def __init__(self, dim: int = N_BITS):
52
+ self.dim = dim
53
+ # Pre-allocate FFT workspace
54
+ self._fft_len = dim # Use full-length FFT
55
+
56
+ @staticmethod
57
+ def random_vector(dim: int = N_BITS) -> np.ndarray:
58
+ """Generate a random unit-length HRR vector."""
59
+ v = np.random.randn(dim).astype(np.float32)
60
+ v /= np.linalg.norm(v)
61
+ return v
62
+
63
+ @staticmethod
64
+ def bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
65
+ """Circular convolution: bind two vectors.
66
+ C = IFFT(FFT(A) βŠ™ FFT(B))
67
+ """
68
+ A = np.fft.rfft(a)
69
+ B = np.fft.rfft(b)
70
+ return np.fft.irfft(A * B, n=len(a)).astype(np.float32)
71
+
72
+ @staticmethod
73
+ def unbind(c: np.ndarray, b: np.ndarray) -> np.ndarray:
74
+ """Circular correlation: recover A from C = bind(A, B).
75
+ A β‰ˆ IFFT(FFT(C) βŠ™ conj(FFT(B)))
76
+ """
77
+ C = np.fft.rfft(c)
78
+ B = np.fft.rfft(b)
79
+ return np.fft.irfft(C * np.conj(B), n=len(c)).astype(np.float32)
80
+
81
+ @staticmethod
82
+ def bundle(*vectors: np.ndarray) -> np.ndarray:
83
+ """Superposition (sum + normalize) of multiple HRR vectors."""
84
+ s = np.sum(vectors, axis=0)
85
+ norm = np.linalg.norm(s)
86
+ if norm > 1e-8:
87
+ s /= norm
88
+ return s.astype(np.float32)
89
+
90
+ @staticmethod
91
+ def similarity(a: np.ndarray, b: np.ndarray) -> float:
92
+ """Cosine similarity between HRR vectors."""
93
+ dot = np.dot(a, b)
94
+ na = np.linalg.norm(a)
95
+ nb = np.linalg.norm(b)
96
+ if na < 1e-8 or nb < 1e-8:
97
+ return 0.0
98
+ return float(dot / (na * nb))
99
+
100
+ @staticmethod
101
+ def permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
102
+ """Cyclic permutation (for positional encoding / sequence ordering)."""
103
+ return np.roll(v, shift).astype(np.float32)
104
+
105
+ @staticmethod
106
+ def inverse_permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
107
+ """Inverse cyclic permutation."""
108
+ return np.roll(v, -shift).astype(np.float32)
109
+
110
+ @classmethod
111
+ def bind_sequence(cls, vectors: List[np.ndarray]) -> np.ndarray:
112
+ """Bind a sequence with positional encoding via permutation.
113
+ S = Ξ£_i permute(V_i, i)
114
+ Preserves order information.
115
+ """
116
+ result = np.zeros_like(vectors[0])
117
+ for i, v in enumerate(vectors):
118
+ result += cls.permute(v, i)
119
+ norm = np.linalg.norm(result)
120
+ if norm > 1e-8:
121
+ result /= norm
122
+ return result.astype(np.float32)
123
+
124
+ @classmethod
125
+ def encode_pair(cls, role: np.ndarray, filler: np.ndarray) -> np.ndarray:
126
+ """Encode a role-filler pair: bind(role, filler)."""
127
+ return cls.bind(role, filler)
128
+
129
+ @classmethod
130
+ def decode_filler(cls, structure: np.ndarray, role: np.ndarray) -> np.ndarray:
131
+ """Extract filler from structure given role: unbind(structure, role)."""
132
+ return cls.unbind(structure, role)
133
+
134
+ @classmethod
135
+ def encode_triple(cls, subject: np.ndarray, relation: np.ndarray,
136
+ obj: np.ndarray) -> np.ndarray:
137
+ """Encode a knowledge triple (s, r, o).
138
+ T = bind(bind(subject, relation), object)
139
+ """
140
+ return cls.bind(cls.bind(subject, relation), obj)
141
+
142
+
143
+ # ══════════════════════════════════════════════════════════════════════════════
144
+ # Binary Binding (BSC - Binary Spatter Codes)
145
+ # ══════════════════════════════════════════════════════════════════════════════
146
+
147
+ class BinaryBinding:
148
+ """
149
+ Binary Spatter Code binding via XOR.
150
+
151
+ Operates directly on packed uint64 vectors (512 bytes for 4096 bits).
152
+ Extremely fast on CPU: single XOR instruction per 64-bit word.
153
+
154
+ Properties:
155
+ - bind(A, B) = A βŠ• B (XOR)
156
+ - unbind(C, B) = C βŠ• B = A (XOR is self-inverse β†’ exact recovery!)
157
+ - bundle = majority vote
158
+ - similarity = normalized Hamming distance
159
+ """
160
+
161
+ @staticmethod
162
+ def bind(a: np.ndarray, b: np.ndarray) -> np.ndarray:
163
+ """Binary binding via XOR. Self-inverse: bind(bind(a,b), b) = a."""
164
+ return xor_vectors(a, b)
165
+
166
+ @staticmethod
167
+ def unbind(c: np.ndarray, b: np.ndarray) -> np.ndarray:
168
+ """Binary unbinding = XOR (since XOR is its own inverse)."""
169
+ return xor_vectors(c, b)
170
+
171
+ @staticmethod
172
+ def bundle(*vectors: np.ndarray) -> np.ndarray:
173
+ """Majority-vote bundling. Requires odd number of vectors for tie-breaking."""
174
+ if len(vectors) == 1:
175
+ return vectors[0].copy()
176
+ vecs = np.stack(vectors)
177
+ return majority_vote(np.ascontiguousarray(vecs))
178
+
179
+ @staticmethod
180
+ def similarity(a: np.ndarray, b: np.ndarray) -> float:
181
+ """Normalized Hamming similarity [0, 1]."""
182
+ return hamming_similarity(a, b)
183
+
184
+ @staticmethod
185
+ def permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
186
+ """Bit-level cyclic permutation for sequence encoding.
187
+ Shifts all bits by `shift` positions cyclically.
188
+ """
189
+ bits = np.unpackbits(v.view(np.uint8))
190
+ shifted = np.roll(bits, shift)
191
+ return np.packbits(shifted).view(np.uint64).copy()
192
+
193
+ @staticmethod
194
+ def inverse_permute(v: np.ndarray, shift: int = 1) -> np.ndarray:
195
+ """Inverse bit-level cyclic permutation."""
196
+ bits = np.unpackbits(v.view(np.uint8))
197
+ shifted = np.roll(bits, -shift)
198
+ return np.packbits(shifted).view(np.uint64).copy()
199
+
200
+ @classmethod
201
+ def bind_sequence(cls, vectors: List[np.ndarray]) -> np.ndarray:
202
+ """Bind a sequence with positional encoding.
203
+ S = bundle(permute(V_0, 0), permute(V_1, 1), ..., permute(V_n, n))
204
+ """
205
+ positioned = [cls.permute(v, i) for i, v in enumerate(vectors)]
206
+ return cls.bundle(*positioned)
207
+
208
+ @classmethod
209
+ def encode_pair(cls, role: np.ndarray, filler: np.ndarray) -> np.ndarray:
210
+ """Encode role-filler: bind(role, filler)."""
211
+ return cls.bind(role, filler)
212
+
213
+ @classmethod
214
+ def decode_filler(cls, structure: np.ndarray, role: np.ndarray) -> np.ndarray:
215
+ """Decode filler from structure given role."""
216
+ return cls.unbind(structure, role)
217
+
218
+ @classmethod
219
+ def encode_triple(cls, subject: np.ndarray, relation: np.ndarray,
220
+ obj: np.ndarray) -> np.ndarray:
221
+ """Encode knowledge triple (s, r, o) = bind(bind(s, r), o)."""
222
+ return cls.bind(cls.bind(subject, relation), obj)
223
+
224
+ @classmethod
225
+ def create_analogy_query(cls, a: np.ndarray, b: np.ndarray,
226
+ c: np.ndarray) -> np.ndarray:
227
+ """Create analogy query: a:b :: c:?
228
+ Relation R = bind(a, b) [XOR extracts the difference]
229
+ Query = bind(c, R) [apply same relation to c]
230
+ """
231
+ relation = cls.bind(a, b)
232
+ return cls.bind(c, relation)
233
+
234
+
235
+ # ══════════════════════════════════════════════════════════════════════════════
236
+ # Hybrid Binding Engine
237
+ # ══════════════════════════════════════════════════════════════════════════════
238
+
239
+ class BindingEngine:
240
+ """
241
+ Unified binding engine that supports both binary and real-valued operations.
242
+
243
+ The engine maintains a concept codebook (binary vectors for fast routing)
244
+ and can convert between binary and real domains for FFT operations.
245
+ """
246
+
247
+ def __init__(self, dim: int = N_BITS, use_binary: bool = True):
248
+ self.dim = dim
249
+ self.use_binary = use_binary
250
+ self.binary = BinaryBinding()
251
+ self.hrr = HRRBinding(dim)
252
+
253
+ # Concept codebook: name β†’ binary vector
254
+ self._codebook: dict = {}
255
+
256
+ def register_concept(self, name: str, vector: Optional[np.ndarray] = None) -> np.ndarray:
257
+ """Register a named concept with a binary vector."""
258
+ if vector is None:
259
+ vector = random_binary_vector()
260
+ self._codebook[name] = vector.copy()
261
+ return vector
262
+
263
+ def get_concept(self, name: str) -> Optional[np.ndarray]:
264
+ """Get binary vector for a named concept."""
265
+ return self._codebook.get(name)
266
+
267
+ def bind(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
268
+ """Bind two vectors using the configured method."""
269
+ if self.use_binary and a.dtype == np.uint64:
270
+ return self.binary.bind(a, b)
271
+ return self.hrr.bind(a, b)
272
+
273
+ def unbind(self, c: np.ndarray, b: np.ndarray) -> np.ndarray:
274
+ """Unbind: recover A from C = bind(A, B) given B."""
275
+ if self.use_binary and c.dtype == np.uint64:
276
+ return self.binary.unbind(c, b)
277
+ return self.hrr.unbind(c, b)
278
+
279
+ def bundle(self, *vectors: np.ndarray) -> np.ndarray:
280
+ """Bundle multiple vectors."""
281
+ if self.use_binary and vectors[0].dtype == np.uint64:
282
+ return self.binary.bundle(*vectors)
283
+ return self.hrr.bundle(*vectors)
284
+
285
+ def similarity(self, a: np.ndarray, b: np.ndarray) -> float:
286
+ """Compute similarity between vectors."""
287
+ if self.use_binary and a.dtype == np.uint64:
288
+ return self.binary.similarity(a, b)
289
+ return self.hrr.similarity(a, b)
290
+
291
+ def encode_relation(self, subject: str, relation: str, obj: str) -> np.ndarray:
292
+ """Encode a semantic relation between named concepts.
293
+ Auto-registers unknown concepts.
294
+ """
295
+ for name in [subject, relation, obj]:
296
+ if name not in self._codebook:
297
+ self.register_concept(name)
298
+
299
+ s = self._codebook[subject]
300
+ r = self._codebook[relation]
301
+ o = self._codebook[obj]
302
+
303
+ if self.use_binary:
304
+ return self.binary.encode_triple(s, r, o)
305
+ return self.hrr.encode_triple(
306
+ self._to_real(s), self._to_real(r), self._to_real(o)
307
+ )
308
+
309
+ def solve_analogy(self, a: str, b: str, c: str,
310
+ candidates: Optional[List[str]] = None) -> List[Tuple[str, float]]:
311
+ """Solve analogy a:b :: c:?
312
+ Returns ranked candidates with similarity scores.
313
+ """
314
+ va = self._codebook.get(a)
315
+ vb = self._codebook.get(b)
316
+ vc = self._codebook.get(c)
317
+ if va is None or vb is None or vc is None:
318
+ raise ValueError(f"Unknown concept(s): {a}, {b}, {c}")
319
+
320
+ if self.use_binary:
321
+ query = self.binary.create_analogy_query(va, vb, vc)
322
+ else:
323
+ query = self.hrr.unbind(
324
+ self.hrr.bind(self._to_real(vb), self._to_real(vc)),
325
+ self._to_real(va)
326
+ )
327
+
328
+ # Search candidates
329
+ search_names = candidates or list(self._codebook.keys())
330
+ results = []
331
+ for name in search_names:
332
+ vec = self._codebook[name]
333
+ if self.use_binary:
334
+ sim = self.binary.similarity(query, vec)
335
+ else:
336
+ sim = self.hrr.similarity(query, self._to_real(vec))
337
+ results.append((name, sim))
338
+
339
+ results.sort(key=lambda x: x[1], reverse=True)
340
+ return results
341
+
342
+ def _to_real(self, binary_vec: np.ndarray) -> np.ndarray:
343
+ """Convert packed binary vector to real-valued Β±1 vector."""
344
+ bits = np.unpackbits(binary_vec.view(np.uint8)).astype(np.float32)
345
+ return (2.0 * bits - 1.0) # {0,1} β†’ {-1, +1}
346
+
347
+ def _to_binary(self, real_vec: np.ndarray) -> np.ndarray:
348
+ """Convert real-valued vector to packed binary (threshold at 0)."""
349
+ bits = (real_vec > 0).astype(np.uint8)
350
+ return np.packbits(bits).view(np.uint64).copy()
mle/demo.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MLE β€” Morpho-Logic Engine β€” Interactive Demo
4
+ ==============================================
5
+ Demonstrates the full reasoning pipeline:
6
+ 1. Knowledge base construction
7
+ 2. Query injection β†’ routing β†’ binding β†’ energy minimization β†’ response
8
+ 3. Analogy solving
9
+ 4. Concept composition
10
+ 5. Multi-hop reasoning
11
+ """
12
+
13
+ import numpy as np
14
+ import time
15
+ import sys
16
+ import os
17
+
18
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
19
+
20
+ from mle import MorphoLogicEngine
21
+ from mle.binding import BinaryBinding
22
+
23
+
24
+ def banner(text):
25
+ w = 70
26
+ print(f"\n{'━'*w}")
27
+ print(f" {text}")
28
+ print(f"{'━'*w}")
29
+
30
+
31
+ def main():
32
+ print("""
33
+ ╔══════════════════════════════════════════════════════════════════════╗
34
+ β•‘ β•‘
35
+ β•‘ β–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β•‘
36
+ β•‘ β–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•”β•β•β•β•β• β•‘
37
+ β•‘ β–ˆβ–ˆβ•”β–ˆβ–ˆβ–ˆβ–ˆβ•”β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— Morpho-Logic Engine β•‘
38
+ β•‘ β–ˆβ–ˆβ•‘β•šβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•”β•β•β• v0.1.0 β•‘
39
+ β•‘ β–ˆβ–ˆβ•‘ β•šβ•β• β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β•‘
40
+ β•‘ β•šβ•β• β•šβ•β• β•šβ•β•β•β•β•β•β•β•šβ•β•β•β•β•β•β• Energy-Based Reasoning AI β•‘
41
+ β•‘ β•‘
42
+ β•‘ 4096-bit Hyperdimensional Vectors Β· SIMD-Optimized Β· CPU-Native β•‘
43
+ β•‘ Sparse Distributed Memory Β· Circular Convolution Binding β•‘
44
+ β•‘ Hopfield Energy Dynamics Β· Gradient-Free Reasoning β•‘
45
+ β•‘ β•‘
46
+ β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•
47
+ """)
48
+
49
+ np.random.seed(42)
50
+
51
+ # ══════════════════════════════════════════════════════════════════════
52
+ # 1. Initialize Engine
53
+ # ══════════════════════════════════════════════════════════════════════
54
+ banner("1. INITIALIZING ENGINE")
55
+
56
+ engine = MorphoLogicEngine(
57
+ beam_width=500,
58
+ max_routing_depth=3,
59
+ max_reasoning_steps=5,
60
+ energy_mode='hybrid',
61
+ hopfield_beta=8.0,
62
+ relaxation_iterations=30,
63
+ )
64
+ print(" βœ“ Engine initialized (beam=500, hybrid energy, Hopfield Ξ²=8.0)")
65
+
66
+ # ══════════════════════════════════════════════════════════════════════
67
+ # 2. Build Knowledge Base
68
+ # ══════════════════════════════════════════════════════════════════════
69
+ banner("2. BUILDING KNOWLEDGE BASE")
70
+
71
+ # Concepts organized by category
72
+ knowledge = {
73
+ 'Animals': {
74
+ 'concepts': ['cat', 'dog', 'fish', 'bird', 'horse', 'eagle', 'dolphin', 'snake'],
75
+ 'relations': [
76
+ ('cat', 'is_a', 'animal'), ('dog', 'is_a', 'animal'),
77
+ ('fish', 'is_a', 'animal'), ('bird', 'is_a', 'animal'),
78
+ ('horse', 'is_a', 'animal'), ('eagle', 'is_a', 'bird'),
79
+ ('dolphin', 'is_a', 'animal'), ('snake', 'is_a', 'animal'),
80
+ ('cat', 'is_a', 'pet'), ('dog', 'is_a', 'pet'),
81
+ ('fish', 'lives_in', 'water'), ('dolphin', 'lives_in', 'water'),
82
+ ('bird', 'has', 'wing'), ('bird', 'can', 'fly'),
83
+ ('eagle', 'can', 'fly'), ('fish', 'can', 'swim'),
84
+ ('dolphin', 'can', 'swim'), ('horse', 'can', 'run'),
85
+ ('snake', 'can', 'crawl'),
86
+ ]
87
+ },
88
+ 'Nature': {
89
+ 'concepts': ['water', 'ocean', 'river', 'sky', 'forest', 'mountain',
90
+ 'tree', 'leaf', 'flower', 'rain'],
91
+ 'relations': [
92
+ ('tree', 'has', 'leaf'), ('tree', 'in', 'forest'),
93
+ ('flower', 'has', 'color'), ('rain', 'from', 'sky'),
94
+ ('river', 'contains', 'water'), ('ocean', 'contains', 'water'),
95
+ ('mountain', 'has', 'peak'),
96
+ ]
97
+ },
98
+ 'Royalty': {
99
+ 'concepts': ['king', 'queen', 'prince', 'princess',
100
+ 'man', 'woman', 'child', 'crown', 'throne'],
101
+ 'relations': [
102
+ ('king', 'is_a', 'man'), ('queen', 'is_a', 'woman'),
103
+ ('prince', 'is_a', 'man'), ('princess', 'is_a', 'woman'),
104
+ ('king', 'married_to', 'queen'),
105
+ ('king', 'has', 'crown'), ('king', 'sits_on', 'throne'),
106
+ ('prince', 'child_of', 'king'),
107
+ ('princess', 'child_of', 'queen'),
108
+ ]
109
+ },
110
+ 'Vehicles': {
111
+ 'concepts': ['car', 'boat', 'airplane', 'bicycle',
112
+ 'wheel', 'engine', 'road', 'wing'],
113
+ 'relations': [
114
+ ('car', 'has', 'wheel'), ('car', 'has', 'engine'),
115
+ ('car', 'on', 'road'), ('boat', 'on', 'water'),
116
+ ('airplane', 'has', 'wing'), ('airplane', 'can', 'fly'),
117
+ ('bicycle', 'has', 'wheel'),
118
+ ]
119
+ },
120
+ 'Abstract': {
121
+ 'concepts': ['animal', 'pet', 'color', 'peak', 'speed', 'size'],
122
+ 'relations': []
123
+ }
124
+ }
125
+
126
+ # Meta-relations
127
+ meta_relations = ['is_a', 'has', 'can', 'lives_in', 'in', 'on',
128
+ 'married_to', 'child_of', 'from', 'contains', 'sits_on']
129
+
130
+ total_concepts = 0
131
+ total_relations = 0
132
+ for category, data in knowledge.items():
133
+ for c in data['concepts']:
134
+ engine.add_concept(c)
135
+ total_concepts += 1
136
+ for s, r, o in data['relations']:
137
+ engine.add_relation(s, r, o)
138
+ total_relations += 1
139
+
140
+ for r in meta_relations:
141
+ if engine.binding.get_concept(r) is None:
142
+ engine.add_concept(r)
143
+ total_concepts += 1
144
+
145
+ stats = engine.stats()
146
+ print(f" βœ“ {total_concepts} concepts loaded")
147
+ print(f" βœ“ {total_relations} relations stored")
148
+ print(f" βœ“ Memory: {stats['memory']['size']} entries, "
149
+ f"{stats['memory']['memory_mb']:.2f} MB")
150
+
151
+ # ══════════════════════════════════════════════════════════════════════
152
+ # 3. Simple Concept Queries
153
+ # ══════════════════════════════════════════════════════════════════════
154
+ banner("3. CONCEPT QUERIES")
155
+
156
+ queries = ["cat", "ocean", "king", "airplane"]
157
+ for q in queries:
158
+ result = engine.reason(q, max_steps=3)
159
+ top3 = result['response']['nearest_concepts'][:3]
160
+ energies = [s.energy for s in result['reasoning_chain']
161
+ if s.energy != float('inf')]
162
+ e_str = " β†’ ".join(f"{e:.4f}" for e in energies) if energies else "N/A"
163
+ print(f"\n Query: '{q}'")
164
+ print(f" Nearest: {[(n, f'{s:.3f}') for n, s in top3]}")
165
+ print(f" Energy: {e_str}")
166
+ print(f" Steps: {result['num_steps']}, Latency: {result['latency_ms']:.0f}ms")
167
+
168
+ # ══════════════════════════════════════════════════════════════════════
169
+ # 4. Association
170
+ # ══════════════════════════════════════════════════════════════════════
171
+ banner("4. ASSOCIATION QUERIES")
172
+
173
+ for concept in ["cat", "water", "king"]:
174
+ assocs = engine.associate(concept, top_k=5)
175
+ print(f"\n '{concept}' β†’ {[(n, f'{s:.3f}') for n, s in assocs[:5]]}")
176
+
177
+ # ══════════════════════════════════════════════════════════════════════
178
+ # 5. Analogy Solving
179
+ # ══════════════════════════════════════════════════════════════════════
180
+ banner("5. ANALOGY SOLVING")
181
+
182
+ analogies = [
183
+ ("king", "man", "queen"), # king:man :: queen:? β†’ woman
184
+ ("bird", "fly", "fish"), # bird:fly :: fish:? β†’ swim
185
+ ("car", "road", "boat"), # car:road :: boat:? β†’ water
186
+ ]
187
+
188
+ for a, b, c in analogies:
189
+ result = engine.solve_analogy(a, b, c)
190
+ ranking = result['codebook_ranking'][:5]
191
+ print(f"\n {a} : {b} :: {c} : ?")
192
+ print(f" Top-5: {[(n, f'{s:.3f}') for n, s in ranking]}")
193
+ print(f" Latency: {result['latency_ms']:.0f}ms")
194
+
195
+ # ══════════════════════════════════════════════════════════════════════
196
+ # 6. Concept Composition
197
+ # ══════════════════════════════════════════════════════════════════════
198
+ banner("6. CONCEPT COMPOSITION")
199
+
200
+ compositions = [
201
+ ("water", "animal"), # β†’ fish/dolphin
202
+ ("sky", "animal"), # β†’ bird/eagle
203
+ ("man", "crown"), # β†’ king
204
+ ]
205
+
206
+ for concepts in compositions:
207
+ result = engine.compose(*concepts)
208
+ top5 = result['response']['nearest_concepts'][:5]
209
+ print(f"\n {' + '.join(concepts)} β†’ ?")
210
+ print(f" Top-5: {[(n, f'{s:.3f}') for n, s in top5]}")
211
+
212
+ # ══════════════════════════════════════════════════════════════════════
213
+ # 7. Structured Queries (Role-Filler)
214
+ # ══════════════════════════════════════════════════════════════════════
215
+ banner("7. STRUCTURED QUERIES")
216
+
217
+ struct_queries = [
218
+ ({"subject": "bird", "relation": "can"}, ["subject", "relation"]),
219
+ ({"subject": "king", "relation": "has"}, ["subject", "relation"]),
220
+ ]
221
+
222
+ for query_dict, roles in struct_queries:
223
+ result = engine.reason(query_dict, max_steps=2, roles=roles)
224
+ print(f"\n Query: {query_dict}")
225
+ if result['response'].get('role_fillers'):
226
+ for role, fillers in result['response']['role_fillers'].items():
227
+ print(f" {role} β†’ {[(n, f'{s:.3f}') for n, s in fillers[:3]]}")
228
+
229
+ # ══════════════════════════════════════════════════════════════════════
230
+ # 8. Multi-Step Reasoning Chain
231
+ # ══════════════════════════════════════════════════════════════════════
232
+ banner("8. MULTI-STEP REASONING (Energy Trajectory)")
233
+
234
+ result = engine.reason("forest", max_steps=5)
235
+ chain = result['reasoning_chain']
236
+
237
+ print(f"\n Query: 'forest'")
238
+ print(f" Steps: {len(chain)}")
239
+ for i, step in enumerate(chain):
240
+ routing = step.routing_result
241
+ n_candidates = len(routing.indices) if routing else 0
242
+ print(f" Step {i}: E={step.energy:.4f}, "
243
+ f"candidates={n_candidates}, "
244
+ f"t={step.timestamp:.3f}s")
245
+
246
+ print(f"\n Total energy reduction: {result['total_energy_reduction']:.4f}")
247
+ print(f" Total latency: {result['latency_ms']:.0f}ms")
248
+ print(f"\n Final response:")
249
+ top5 = result['response']['nearest_concepts'][:5]
250
+ print(f" Nearest: {[(n, f'{s:.3f}') for n, s in top5]}")
251
+
252
+ # ══════════════════════════════════════════════════════════════════════
253
+ # 9. Performance Summary
254
+ # ══════════════════════════════════════════════════════════════════════
255
+ banner("9. PERFORMANCE SUMMARY")
256
+
257
+ # Benchmark: 100 random queries
258
+ latencies = []
259
+ for _ in range(100):
260
+ concept = np.random.choice(list(engine.binding._codebook.keys()))
261
+ t0 = time.perf_counter()
262
+ engine.reason(concept, max_steps=2)
263
+ latencies.append((time.perf_counter() - t0) * 1000)
264
+
265
+ print(f"""
266
+ Query Performance (100 queries, 2 reasoning steps):
267
+ Average latency: {np.mean(latencies):>8.1f} ms
268
+ Median latency: {np.median(latencies):>8.1f} ms
269
+ P95 latency: {np.percentile(latencies, 95):>8.1f} ms
270
+ P99 latency: {np.percentile(latencies, 99):>8.1f} ms
271
+ Throughput: {1000/np.mean(latencies):>8.1f} queries/sec
272
+
273
+ Memory Footprint:
274
+ Entries: {stats['memory']['size']:>8d}
275
+ Memory: {stats['memory']['memory_mb']:>8.2f} MB
276
+ Per entry: {stats['memory']['memory_mb']*1024*1024/max(stats['memory']['size'],1):>8.0f} bytes
277
+
278
+ Architecture:
279
+ Vector dimension: {4096:>8d} bits
280
+ Storage per vec: {512:>8d} bytes (64 Γ— uint64)
281
+ Beam width: {engine.router.beam_width:>8d}
282
+ LSH tables: {engine.memory.lsh.n_tables:>8d}
283
+ LSH projections: {engine.memory.lsh.n_projections:>8d}
284
+ """)
285
+
286
+ print("━" * 70)
287
+ print(" MLE Demo Complete βœ“")
288
+ print("━" * 70)
289
+
290
+
291
+ if __name__ == '__main__':
292
+ main()
mle/energy/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .energy_model import EnergyFunction, RelaxationDynamics, HopfieldDynamics, EnergyModel
mle/energy/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (259 Bytes). View file
 
mle/energy/__pycache__/energy_model.cpython-312.pyc ADDED
Binary file (22.9 kB). View file
 
mle/energy/energy_model.py ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE Energy Module: Energy-Based Reasoning Dynamics
3
+ ====================================================
4
+ Replaces backpropagation with local energy functions and relaxation dynamics.
5
+
6
+ The energy landscape guides reasoning:
7
+ - Low energy states = coherent, consistent representations
8
+ - High energy states = contradictions, ambiguity
9
+ - Reasoning = trajectory from high to low energy (relaxation)
10
+
11
+ Energy components:
12
+ 1. Compatibility energy: measures how well activated memories agree
13
+ 2. Binding coherence: measures structural consistency of relations
14
+ 3. Sparsity energy: encourages focused activation (not everything at once)
15
+ 4. Temporal smoothness: penalizes erratic state changes
16
+
17
+ The system uses iterative relaxation (not gradient descent):
18
+ - Each step flips/adjusts the state component that most reduces energy
19
+ - Convergence is guaranteed for bounded energy functions
20
+ - No gradients stored β†’ O(1) memory per parameter
21
+ """
22
+
23
+ import numpy as np
24
+ from typing import Optional, Tuple, List, Dict, Any
25
+ import logging
26
+
27
+ from ..utils.simd_ops import (
28
+ N_BITS, N_WORDS,
29
+ hamming_distance, hamming_batch, hamming_similarity,
30
+ xor_vectors, popcount, random_binary_vector
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ class EnergyFunction:
37
+ """
38
+ Composite energy function for the MLE system.
39
+
40
+ E(state) = α·E_compat + β·E_binding + γ·E_sparse + δ·E_smooth
41
+
42
+ where:
43
+ - E_compat: pairwise compatibility between activated vectors
44
+ - E_binding: coherence of bound structures
45
+ - E_sparse: activation sparsity (L1 penalty on activations)
46
+ - E_smooth: temporal smoothness (distance from previous state)
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ alpha: float = 1.0, # compatibility weight
52
+ beta: float = 0.5, # binding coherence weight
53
+ gamma: float = 0.1, # sparsity weight
54
+ delta: float = 0.05, # smoothness weight
55
+ temperature: float = 1.0,
56
+ ):
57
+ self.alpha = alpha
58
+ self.beta = beta
59
+ self.gamma = gamma
60
+ self.delta = delta
61
+ self.temperature = temperature
62
+
63
+ def compatibility_energy(self, state_vector: np.ndarray,
64
+ context_vectors: np.ndarray,
65
+ weights: Optional[np.ndarray] = None) -> float:
66
+ """Measure how compatible the state is with context vectors.
67
+ Lower = more compatible.
68
+
69
+ E_compat = -Ξ£_i w_i Β· sim(state, context_i)
70
+
71
+ Uses normalized Hamming similarity.
72
+ """
73
+ if len(context_vectors) == 0:
74
+ return 0.0
75
+
76
+ dists = hamming_batch(state_vector, np.ascontiguousarray(context_vectors))
77
+ similarities = 1.0 - dists.astype(np.float64) / N_BITS
78
+
79
+ if weights is not None:
80
+ weighted_sim = (similarities * weights).sum()
81
+ else:
82
+ weighted_sim = similarities.mean()
83
+
84
+ return -weighted_sim # negative because we minimize energy
85
+
86
+ def binding_coherence_energy(self, bindings: List[Tuple[np.ndarray, np.ndarray, np.ndarray]]) -> float:
87
+ """Measure coherence of bound structures.
88
+ Each binding is (bound_vec, role, expected_filler).
89
+ E_binding = Ξ£_i hamming(unbind(bound_i, role_i), filler_i) / N
90
+
91
+ Lower = bindings are more intact.
92
+ """
93
+ if not bindings:
94
+ return 0.0
95
+
96
+ total = 0.0
97
+ for bound_vec, role, expected_filler in bindings:
98
+ # Unbind: XOR with role
99
+ recovered = xor_vectors(bound_vec, role)
100
+ dist = hamming_distance(recovered, expected_filler)
101
+ total += dist / N_BITS
102
+
103
+ return total / len(bindings)
104
+
105
+ def sparsity_energy(self, activations: np.ndarray) -> float:
106
+ """L1 penalty on activations to encourage focused reasoning.
107
+ E_sparse = ||activations||_1
108
+ """
109
+ return float(np.abs(activations).sum())
110
+
111
+ def smoothness_energy(self, current_state: np.ndarray,
112
+ previous_state: Optional[np.ndarray] = None) -> float:
113
+ """Penalize large state changes (temporal smoothness).
114
+ E_smooth = hamming(current, previous) / N
115
+ """
116
+ if previous_state is None:
117
+ return 0.0
118
+ return hamming_distance(current_state, previous_state) / N_BITS
119
+
120
+ def total_energy(self, state_vector: np.ndarray,
121
+ context_vectors: np.ndarray,
122
+ activations: np.ndarray,
123
+ bindings: Optional[List] = None,
124
+ previous_state: Optional[np.ndarray] = None,
125
+ weights: Optional[np.ndarray] = None) -> Dict[str, float]:
126
+ """Compute total energy and its components.
127
+
128
+ Returns dict with individual terms and total.
129
+ """
130
+ e_compat = self.alpha * self.compatibility_energy(
131
+ state_vector, context_vectors, weights
132
+ )
133
+ e_binding = self.beta * self.binding_coherence_energy(bindings or [])
134
+ e_sparse = self.gamma * self.sparsity_energy(activations)
135
+ e_smooth = self.delta * self.smoothness_energy(state_vector, previous_state)
136
+
137
+ total = e_compat + e_binding + e_sparse + e_smooth
138
+
139
+ return {
140
+ 'total': total,
141
+ 'compatibility': e_compat,
142
+ 'binding_coherence': e_binding,
143
+ 'sparsity': e_sparse,
144
+ 'smoothness': e_smooth,
145
+ }
146
+
147
+
148
+ class RelaxationDynamics:
149
+ """
150
+ Iterative relaxation engine for energy minimization.
151
+
152
+ At each step:
153
+ 1. Compute energy of current state
154
+ 2. Generate candidate perturbations (bit flips, vector swaps)
155
+ 3. Accept the perturbation that most reduces energy
156
+ 4. Repeat until convergence or max iterations
157
+
158
+ This implements a greedy local search with optional stochasticity
159
+ (simulated annealing schedule).
160
+ """
161
+
162
+ def __init__(
163
+ self,
164
+ energy_fn: EnergyFunction,
165
+ max_iterations: int = 50,
166
+ n_candidates: int = 32, # perturbations to try per step
167
+ flip_fraction: float = 0.05, # fraction of bits to flip per perturbation
168
+ cooling_rate: float = 0.95,
169
+ initial_temperature: float = 1.0,
170
+ convergence_threshold: float = 1e-4,
171
+ ):
172
+ self.energy_fn = energy_fn
173
+ self.max_iterations = max_iterations
174
+ self.n_candidates = n_candidates
175
+ self.flip_fraction = flip_fraction
176
+ self.cooling_rate = cooling_rate
177
+ self.initial_temp = initial_temperature
178
+ self.convergence_threshold = convergence_threshold
179
+
180
+ def _perturb(self, state: np.ndarray, n_flips: int) -> np.ndarray:
181
+ """Create a perturbation by flipping random bits."""
182
+ bits = np.unpackbits(state.view(np.uint8)).copy()
183
+ flip_positions = np.random.choice(N_BITS, n_flips, replace=False)
184
+ bits[flip_positions] ^= 1
185
+ return np.packbits(bits).view(np.uint64).copy()
186
+
187
+ def _perturb_toward(self, state: np.ndarray, target: np.ndarray,
188
+ strength: float = 0.1) -> np.ndarray:
189
+ """Perturb state toward a target vector (biased flip)."""
190
+ state_bits = np.unpackbits(state.view(np.uint8))
191
+ target_bits = np.unpackbits(target.view(np.uint8))
192
+
193
+ # Find differing positions
194
+ diff = np.where(state_bits != target_bits)[0]
195
+ if len(diff) == 0:
196
+ return state.copy()
197
+
198
+ # Flip a fraction of differing bits toward target
199
+ n_flip = max(1, int(len(diff) * strength))
200
+ to_flip = np.random.choice(diff, min(n_flip, len(diff)), replace=False)
201
+
202
+ new_bits = state_bits.copy()
203
+ new_bits[to_flip] = target_bits[to_flip]
204
+ return np.packbits(new_bits).view(np.uint64).copy()
205
+
206
+ def relax(self, initial_state: np.ndarray,
207
+ context_vectors: np.ndarray,
208
+ activations: np.ndarray,
209
+ bindings: Optional[List] = None,
210
+ attractors: Optional[np.ndarray] = None,
211
+ weights: Optional[np.ndarray] = None) -> Dict[str, Any]:
212
+ """
213
+ Run relaxation dynamics to minimize energy.
214
+
215
+ Args:
216
+ initial_state: starting state vector (N_WORDS,) uint64
217
+ context_vectors: activated memory contents (M, N_WORDS) uint64
218
+ activations: activation strengths (M,) float64
219
+ bindings: list of (bound, role, filler) tuples to check coherence
220
+ attractors: target vectors to bias perturbations toward
221
+ weights: per-context weights for compatibility
222
+
223
+ Returns:
224
+ Dict with final_state, energy_trajectory, iterations, etc.
225
+ """
226
+ state = initial_state.copy()
227
+ n_flips = max(1, int(N_BITS * self.flip_fraction))
228
+ temperature = self.initial_temp
229
+
230
+ # Compute initial energy
231
+ energy_info = self.energy_fn.total_energy(
232
+ state, context_vectors, activations, bindings, None, weights
233
+ )
234
+ current_energy = energy_info['total']
235
+
236
+ trajectory = [energy_info]
237
+ best_state = state.copy()
238
+ best_energy = current_energy
239
+ prev_state = None
240
+
241
+ for iteration in range(self.max_iterations):
242
+ # Generate candidate perturbations
243
+ candidates = []
244
+ for _ in range(self.n_candidates):
245
+ if attractors is not None and len(attractors) > 0 and np.random.random() < 0.5:
246
+ # Biased perturbation toward a random attractor
247
+ target_idx = np.random.randint(len(attractors))
248
+ candidate = self._perturb_toward(
249
+ state, attractors[target_idx],
250
+ strength=self.flip_fraction
251
+ )
252
+ else:
253
+ candidate = self._perturb(state, n_flips)
254
+ candidates.append(candidate)
255
+
256
+ # Evaluate all candidates
257
+ best_candidate = None
258
+ best_candidate_energy = current_energy
259
+
260
+ for candidate in candidates:
261
+ e = self.energy_fn.total_energy(
262
+ candidate, context_vectors, activations,
263
+ bindings, prev_state, weights
264
+ )
265
+ if e['total'] < best_candidate_energy:
266
+ best_candidate = candidate
267
+ best_candidate_energy = e['total']
268
+ best_candidate_info = e
269
+
270
+ # Accept or reject (simulated annealing)
271
+ if best_candidate is not None:
272
+ delta_e = best_candidate_energy - current_energy
273
+ if delta_e < 0 or np.random.random() < np.exp(-delta_e / max(temperature, 1e-8)):
274
+ prev_state = state.copy()
275
+ state = best_candidate
276
+ current_energy = best_candidate_energy
277
+ trajectory.append(best_candidate_info)
278
+
279
+ if current_energy < best_energy:
280
+ best_state = state.copy()
281
+ best_energy = current_energy
282
+
283
+ # Cool down
284
+ temperature *= self.cooling_rate
285
+
286
+ # Check convergence
287
+ if len(trajectory) >= 2:
288
+ improvement = abs(trajectory[-2]['total'] - trajectory[-1]['total'])
289
+ if improvement < self.convergence_threshold:
290
+ logger.debug(f"Relaxation converged at iteration {iteration}")
291
+ break
292
+
293
+ return {
294
+ 'final_state': best_state,
295
+ 'final_energy': best_energy,
296
+ 'trajectory': trajectory,
297
+ 'iterations': iteration + 1,
298
+ 'converged': iteration < self.max_iterations - 1,
299
+ 'energy_reduction': trajectory[0]['total'] - best_energy,
300
+ }
301
+
302
+
303
+ class HopfieldDynamics:
304
+ """
305
+ Modern Hopfield Network dynamics for continuous-state energy minimization.
306
+
307
+ Uses the update rule from Ramsauer et al. 2020:
308
+ ΞΎ_new = X @ softmax(Ξ² Β· X^T @ ΞΎ)
309
+
310
+ where X contains stored patterns as columns, ΞΎ is the current state,
311
+ and Ξ² is inverse temperature.
312
+
313
+ Energy: E(ΞΎ) = -lse(Ξ², X^T @ ΞΎ) + 0.5Β·||ΞΎ||Β²
314
+
315
+ Adapted for binary vectors by converting to Β±1 real-valued representation.
316
+ """
317
+
318
+ def __init__(
319
+ self,
320
+ beta: float = 8.0, # inverse temperature (higher = sharper attention)
321
+ max_iterations: int = 20,
322
+ convergence_eps: float = 1e-6,
323
+ ):
324
+ self.beta = beta
325
+ self.max_iterations = max_iterations
326
+ self.convergence_eps = convergence_eps
327
+
328
+ def _to_bipolar(self, packed: np.ndarray) -> np.ndarray:
329
+ """Convert packed binary to Β±1 float."""
330
+ bits = np.unpackbits(packed.view(np.uint8)).astype(np.float64)
331
+ return 2.0 * bits - 1.0
332
+
333
+ def _to_packed(self, bipolar: np.ndarray) -> np.ndarray:
334
+ """Convert Β±1 float to packed binary."""
335
+ bits = (bipolar > 0).astype(np.uint8)
336
+ return np.packbits(bits).view(np.uint64).copy()
337
+
338
+ def energy(self, xi: np.ndarray, X: np.ndarray) -> float:
339
+ """Compute Hopfield energy.
340
+ xi: (D,) current state
341
+ X: (D, N) stored patterns as columns
342
+ E = -logsumexp(Ξ² Β· X^T @ ΞΎ) / Ξ² + 0.5Β·||ΞΎ||Β²
343
+ """
344
+ scores = self.beta * (X.T @ xi)
345
+ max_score = scores.max()
346
+ lse = max_score + np.log(np.sum(np.exp(scores - max_score)))
347
+ return -lse / self.beta + 0.5 * np.dot(xi, xi)
348
+
349
+ def update(self, xi: np.ndarray, X: np.ndarray) -> np.ndarray:
350
+ """Single Hopfield update step.
351
+ ΞΎ_new = X @ softmax(Ξ² Β· X^T @ ΞΎ)
352
+ """
353
+ scores = self.beta * (X.T @ xi)
354
+ # Numerically stable softmax
355
+ scores -= scores.max()
356
+ exp_scores = np.exp(scores)
357
+ attention = exp_scores / (exp_scores.sum() + 1e-12)
358
+ return X @ attention
359
+
360
+ def relax(self, query_packed: np.ndarray,
361
+ patterns_packed: np.ndarray,
362
+ activations: Optional[np.ndarray] = None) -> Dict[str, Any]:
363
+ """
364
+ Run Hopfield relaxation from query toward stored patterns.
365
+
366
+ Args:
367
+ query_packed: (N_WORDS,) uint64 initial state
368
+ patterns_packed: (M, N_WORDS) uint64 stored patterns
369
+ activations: (M,) optional weights for patterns
370
+
371
+ Returns:
372
+ Dict with final state, energy trajectory, attention weights, etc.
373
+ """
374
+ # Convert to bipolar
375
+ xi = self._to_bipolar(query_packed)
376
+ X = np.stack([self._to_bipolar(p) for p in patterns_packed]).T # (D, M)
377
+
378
+ if activations is not None:
379
+ # Weight patterns by activation
380
+ X = X * activations[np.newaxis, :]
381
+
382
+ trajectory = []
383
+ attention_history = []
384
+
385
+ for iteration in range(self.max_iterations):
386
+ e = self.energy(xi, X)
387
+ trajectory.append(e)
388
+
389
+ # Update
390
+ xi_new = self.update(xi, X)
391
+
392
+ # Track attention
393
+ scores = self.beta * (X.T @ xi)
394
+ scores -= scores.max()
395
+ attention = np.exp(scores) / (np.exp(scores).sum() + 1e-12)
396
+ attention_history.append(attention.copy())
397
+
398
+ # Check convergence
399
+ diff = np.linalg.norm(xi_new - xi)
400
+ xi = xi_new
401
+ if diff < self.convergence_eps:
402
+ break
403
+
404
+ final_packed = self._to_packed(xi)
405
+ final_energy = self.energy(xi, X)
406
+ trajectory.append(final_energy)
407
+
408
+ return {
409
+ 'final_state': final_packed,
410
+ 'final_state_real': xi,
411
+ 'final_energy': final_energy,
412
+ 'energy_trajectory': trajectory,
413
+ 'attention_weights': attention_history[-1] if attention_history else None,
414
+ 'iterations': iteration + 1,
415
+ 'converged': iteration < self.max_iterations - 1,
416
+ }
417
+
418
+
419
+ class EnergyModel:
420
+ """
421
+ Unified energy model combining local energy function with relaxation dynamics.
422
+
423
+ Supports two modes:
424
+ 1. Binary relaxation: direct bit-flip search (greedy + SA)
425
+ 2. Hopfield relaxation: continuous-state update β†’ discretize
426
+
427
+ The energy model drives the reasoning process:
428
+ - Query activates memories (high energy, incoherent state)
429
+ - Relaxation reduces energy (resolves conflicts, strengthens coherence)
430
+ - Final state represents the "answer" (lowest energy configuration)
431
+ """
432
+
433
+ def __init__(
434
+ self,
435
+ mode: str = 'hybrid', # 'binary', 'hopfield', or 'hybrid'
436
+ energy_fn: Optional[EnergyFunction] = None,
437
+ binary_dynamics: Optional[RelaxationDynamics] = None,
438
+ hopfield_dynamics: Optional[HopfieldDynamics] = None,
439
+ ):
440
+ self.mode = mode
441
+ self.energy_fn = energy_fn or EnergyFunction()
442
+ self.binary_dynamics = binary_dynamics or RelaxationDynamics(self.energy_fn)
443
+ self.hopfield_dynamics = hopfield_dynamics or HopfieldDynamics()
444
+
445
+ def minimize(self, initial_state: np.ndarray,
446
+ context_vectors: np.ndarray,
447
+ activations: np.ndarray,
448
+ bindings: Optional[List] = None,
449
+ **kwargs) -> Dict[str, Any]:
450
+ """
451
+ Minimize energy from initial state.
452
+
453
+ In hybrid mode:
454
+ 1. First run Hopfield (fast convergence, continuous)
455
+ 2. Then refine with binary relaxation (exact, discrete)
456
+ """
457
+ if self.mode == 'hopfield':
458
+ return self.hopfield_dynamics.relax(
459
+ initial_state, context_vectors, activations
460
+ )
461
+ elif self.mode == 'binary':
462
+ return self.binary_dynamics.relax(
463
+ initial_state, context_vectors, activations, bindings, **kwargs
464
+ )
465
+ else: # hybrid
466
+ # Phase 1: Hopfield for fast coarse convergence
467
+ hopfield_result = self.hopfield_dynamics.relax(
468
+ initial_state, context_vectors, activations
469
+ )
470
+
471
+ # Phase 2: Binary refinement
472
+ binary_result = self.binary_dynamics.relax(
473
+ hopfield_result['final_state'],
474
+ context_vectors, activations, bindings,
475
+ attractors=context_vectors[:min(10, len(context_vectors))],
476
+ **kwargs
477
+ )
478
+
479
+ # Combine results
480
+ return {
481
+ 'final_state': binary_result['final_state'],
482
+ 'final_energy': binary_result['final_energy'],
483
+ 'hopfield_energy_trajectory': hopfield_result['energy_trajectory'],
484
+ 'binary_energy_trajectory': [t['total'] for t in binary_result['trajectory']],
485
+ 'hopfield_attention': hopfield_result.get('attention_weights'),
486
+ 'total_iterations': (
487
+ hopfield_result['iterations'] + binary_result['iterations']
488
+ ),
489
+ 'converged': binary_result['converged'],
490
+ 'energy_reduction': (
491
+ hopfield_result['energy_trajectory'][0] -
492
+ binary_result['final_energy']
493
+ ) if hopfield_result['energy_trajectory'] else 0.0,
494
+ }
mle/inference/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .reasoning_engine import ReasoningEngine, QueryEncoder, ResponseDecoder
mle/inference/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (238 Bytes). View file
 
mle/inference/__pycache__/reasoning_engine.cpython-312.pyc ADDED
Binary file (25.5 kB). View file
 
mle/inference/reasoning_engine.py ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE Inference Module: Reasoning Engine
3
+ ========================================
4
+ The complete reasoning pipeline that integrates all modules:
5
+
6
+ Query β†’ Encoding β†’ Routing β†’ Binding β†’ Energy Minimization β†’ Response
7
+
8
+ The "chain of thought" is an internal trajectory through state space:
9
+ 1. ENCODE: Transform input query into a binary vector
10
+ 2. ROUTE: Activate relevant memory regions (top-500 beam)
11
+ 3. BIND: Construct composite representations from activated memories
12
+ 4. RELAX: Minimize energy to find coherent state
13
+ 5. DECODE: Extract response from final state
14
+
15
+ Multiple reasoning iterations are possible (iterative deepening):
16
+ each iteration uses the previous result as context for the next.
17
+ """
18
+
19
+ import numpy as np
20
+ from typing import Optional, List, Dict, Any, Tuple
21
+ import logging
22
+ import time
23
+ import hashlib
24
+
25
+ from ..memory.sparse_address_table import SparseAddressTable
26
+ from ..routing.recursive_jit_router import RecursiveJITRouter, RoutingResult
27
+ from ..binding.semantic_binding import BindingEngine, BinaryBinding
28
+ from ..energy.energy_model import EnergyModel, EnergyFunction, RelaxationDynamics, HopfieldDynamics
29
+ from ..utils.simd_ops import (
30
+ N_BITS, N_WORDS,
31
+ random_binary_vector, random_binary_vectors,
32
+ hamming_distance, hamming_similarity,
33
+ xor_vectors, majority_vote
34
+ )
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ class QueryEncoder:
40
+ """
41
+ Encode text/symbolic queries into binary vectors.
42
+
43
+ Encoding strategies:
44
+ 1. Named concept lookup (exact match in codebook)
45
+ 2. Hash-based encoding (deterministic binary vector from string)
46
+ 3. Composite encoding (bind multiple concepts together)
47
+ """
48
+
49
+ def __init__(self, binding_engine: BindingEngine):
50
+ self.binding = binding_engine
51
+
52
+ def encode(self, query: Any) -> np.ndarray:
53
+ """Encode a query into a binary vector.
54
+
55
+ Supports:
56
+ - str: hash-based or codebook lookup
57
+ - np.ndarray: pass-through (already a vector)
58
+ - list of str: composite binding of concepts
59
+ - dict: role-filler structure encoding
60
+ """
61
+ if isinstance(query, np.ndarray):
62
+ return query.copy()
63
+ elif isinstance(query, str):
64
+ return self._encode_string(query)
65
+ elif isinstance(query, (list, tuple)):
66
+ return self._encode_composite(query)
67
+ elif isinstance(query, dict):
68
+ return self._encode_structure(query)
69
+ else:
70
+ raise TypeError(f"Cannot encode query of type {type(query)}")
71
+
72
+ def _encode_string(self, text: str) -> np.ndarray:
73
+ """Encode a string to binary vector.
74
+ First tries codebook lookup, then falls back to hash.
75
+ """
76
+ # Try codebook lookup
77
+ vec = self.binding.get_concept(text)
78
+ if vec is not None:
79
+ return vec.copy()
80
+
81
+ # Hash-based encoding: deterministic binary vector from string
82
+ return self._hash_to_vector(text)
83
+
84
+ def _hash_to_vector(self, text: str) -> np.ndarray:
85
+ """Deterministic hash-based encoding.
86
+ Uses SHA-512 repeatedly to fill 4096 bits.
87
+ """
88
+ n_hashes = N_BITS // 512 + 1 # SHA-512 = 512 bits
89
+ bits = []
90
+ for i in range(n_hashes):
91
+ h = hashlib.sha512(f"{text}:{i}".encode()).digest()
92
+ bits.append(np.frombuffer(h, dtype=np.uint8))
93
+ all_bytes = np.concatenate(bits)[:N_BITS // 8]
94
+ return all_bytes.view(np.uint64).copy()
95
+
96
+ def _encode_composite(self, concepts: List[str]) -> np.ndarray:
97
+ """Encode a composite query by bundling encoded concepts."""
98
+ vectors = [self._encode_string(c) for c in concepts]
99
+ if len(vectors) == 1:
100
+ return vectors[0]
101
+ return BinaryBinding.bundle(*vectors)
102
+
103
+ def _encode_structure(self, structure: Dict[str, str]) -> np.ndarray:
104
+ """Encode a structured query (role:filler pairs).
105
+ Example: {"subject": "cat", "relation": "is_a"} β†’
106
+ bind(encode("subject"), encode("cat")) βŠ• bind(encode("relation"), encode("is_a"))
107
+ """
108
+ pairs = []
109
+ for role, filler in structure.items():
110
+ role_vec = self._encode_string(role)
111
+ filler_vec = self._encode_string(filler)
112
+ pairs.append(BinaryBinding.bind(role_vec, filler_vec))
113
+ return BinaryBinding.bundle(*pairs) if len(pairs) > 1 else pairs[0]
114
+
115
+
116
+ class ResponseDecoder:
117
+ """
118
+ Decode final state vector into interpretable response.
119
+
120
+ Strategies:
121
+ 1. Nearest neighbor: find closest named concept(s) in codebook
122
+ 2. Unbinding: recover specific fillers for given roles
123
+ 3. Activation readout: return most-activated memory entries
124
+ """
125
+
126
+ def __init__(self, memory: SparseAddressTable, binding_engine: BindingEngine):
127
+ self.memory = memory
128
+ self.binding = binding_engine
129
+
130
+ def decode(self, state: np.ndarray, top_k: int = 5,
131
+ roles: Optional[List[str]] = None) -> Dict[str, Any]:
132
+ """Decode state vector into response.
133
+
134
+ Returns dict with:
135
+ - nearest_concepts: closest named concepts with similarities
136
+ - nearest_memories: closest memory entries
137
+ - role_fillers: decoded role-filler pairs (if roles provided)
138
+ - confidence: overall decoding confidence
139
+ """
140
+ result = {}
141
+
142
+ # 1. Nearest concepts in codebook
143
+ concept_scores = []
144
+ for name, vec in self.binding._codebook.items():
145
+ sim = hamming_similarity(state, vec)
146
+ concept_scores.append((name, float(sim)))
147
+ concept_scores.sort(key=lambda x: x[1], reverse=True)
148
+ result['nearest_concepts'] = concept_scores[:top_k]
149
+
150
+ # 2. Nearest memory entries
151
+ memory_results = self.memory.query_nearest(state, k=top_k, use_lsh=True)
152
+ nearest_memories = []
153
+ for idx, dist in memory_results:
154
+ meta = self.memory._metadata[idx]
155
+ name = meta.get('name', f'entry_{idx}') if meta else f'entry_{idx}'
156
+ sim = 1.0 - dist / N_BITS
157
+ nearest_memories.append({
158
+ 'index': idx,
159
+ 'name': name,
160
+ 'distance': dist,
161
+ 'similarity': sim,
162
+ 'metadata': meta,
163
+ })
164
+ result['nearest_memories'] = nearest_memories
165
+
166
+ # 3. Role-filler decoding
167
+ if roles:
168
+ role_fillers = {}
169
+ for role_name in roles:
170
+ role_vec = self.binding.get_concept(role_name)
171
+ if role_vec is None:
172
+ role_vec = QueryEncoder(self.binding)._hash_to_vector(role_name)
173
+ filler = BinaryBinding.unbind(state, role_vec)
174
+ # Find nearest concept to recovered filler
175
+ filler_scores = []
176
+ for name, vec in self.binding._codebook.items():
177
+ sim = hamming_similarity(filler, vec)
178
+ filler_scores.append((name, float(sim)))
179
+ filler_scores.sort(key=lambda x: x[1], reverse=True)
180
+ role_fillers[role_name] = filler_scores[:3]
181
+ result['role_fillers'] = role_fillers
182
+
183
+ # 4. Confidence (based on how close the best match is)
184
+ if concept_scores:
185
+ result['confidence'] = concept_scores[0][1]
186
+ else:
187
+ result['confidence'] = 0.0
188
+
189
+ return result
190
+
191
+
192
+ class ReasoningStep:
193
+ """Record of a single reasoning step in the inference chain."""
194
+ __slots__ = ['step_num', 'state', 'energy', 'energy_components',
195
+ 'routing_result', 'decoded', 'timestamp']
196
+
197
+ def __init__(self, step_num: int):
198
+ self.step_num = step_num
199
+ self.state: Optional[np.ndarray] = None
200
+ self.energy: float = float('inf')
201
+ self.energy_components: Dict[str, float] = {}
202
+ self.routing_result: Optional[RoutingResult] = None
203
+ self.decoded: Optional[Dict] = None
204
+ self.timestamp: float = 0.0
205
+
206
+
207
+ class ReasoningEngine:
208
+ """
209
+ The MLE Reasoning Engine β€” core inference loop.
210
+
211
+ Architecture:
212
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
213
+ β”‚ Query │────▢│ Router │────▢│ Binder │────▢│ Energy β”‚
214
+ β”‚ Encoder β”‚ β”‚ (Beam) β”‚ β”‚ (Compose)β”‚ β”‚ (Relax) β”‚
215
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
216
+ β”‚ β”‚
217
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚
218
+ └───────────│ Response │◀────│ Decode β”‚β—€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
219
+ β”‚ β”‚ β”‚ β”‚
220
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
221
+
222
+ Reasoning loop:
223
+ 1. Encode query β†’ binary vector
224
+ 2. Route β†’ find relevant memories (beam search)
225
+ 3. Activate memories, construct bindings
226
+ 4. Minimize energy (Hopfield + binary relaxation)
227
+ 5. Decode final state β†’ response
228
+ 6. [Optional] Use result as new query β†’ iterate
229
+ """
230
+
231
+ def __init__(
232
+ self,
233
+ memory: Optional[SparseAddressTable] = None,
234
+ beam_width: int = 500,
235
+ max_routing_depth: int = 3,
236
+ max_reasoning_steps: int = 5,
237
+ energy_mode: str = 'hybrid',
238
+ hopfield_beta: float = 8.0,
239
+ relaxation_iterations: int = 50,
240
+ ):
241
+ # Memory
242
+ self.memory = memory or SparseAddressTable()
243
+
244
+ # Binding engine
245
+ self.binding = BindingEngine(use_binary=True)
246
+
247
+ # Router
248
+ self.router = RecursiveJITRouter(
249
+ memory=self.memory,
250
+ beam_width=beam_width,
251
+ max_depth=max_routing_depth,
252
+ )
253
+
254
+ # Energy model
255
+ energy_fn = EnergyFunction()
256
+ self.energy_model = EnergyModel(
257
+ mode=energy_mode,
258
+ energy_fn=energy_fn,
259
+ binary_dynamics=RelaxationDynamics(
260
+ energy_fn,
261
+ max_iterations=relaxation_iterations,
262
+ n_candidates=32,
263
+ ),
264
+ hopfield_dynamics=HopfieldDynamics(
265
+ beta=hopfield_beta,
266
+ max_iterations=20,
267
+ ),
268
+ )
269
+
270
+ # Encoder/Decoder
271
+ self.encoder = QueryEncoder(self.binding)
272
+ self.decoder = ResponseDecoder(self.memory, self.binding)
273
+
274
+ # Config
275
+ self.max_reasoning_steps = max_reasoning_steps
276
+
277
+ # ── Knowledge Management ──────────────────────────────────────────────
278
+
279
+ def add_concept(self, name: str, metadata: Optional[Dict] = None) -> np.ndarray:
280
+ """Add a named concept to both memory and binding codebook."""
281
+ vec = self.binding.register_concept(name)
282
+ self.memory.store_concept(name, content=vec, metadata=metadata)
283
+ return vec
284
+
285
+ def add_relation(self, subject: str, relation: str, obj: str,
286
+ metadata: Optional[Dict] = None) -> int:
287
+ """Store a semantic relation as a bound vector in memory."""
288
+ # Ensure all concepts exist
289
+ for name in [subject, relation, obj]:
290
+ if self.binding.get_concept(name) is None:
291
+ self.add_concept(name)
292
+
293
+ # Create bound representation
294
+ s = self.binding.get_concept(subject)
295
+ r = self.binding.get_concept(relation)
296
+ o = self.binding.get_concept(obj)
297
+ bound = BinaryBinding.encode_triple(s, r, o)
298
+
299
+ # Store in memory
300
+ meta = metadata or {}
301
+ meta.update({
302
+ 'type': 'relation',
303
+ 'subject': subject,
304
+ 'relation': relation,
305
+ 'object': obj,
306
+ 'name': f"{subject}_{relation}_{obj}",
307
+ })
308
+
309
+ # Address = blend of subject and relation (for routing)
310
+ address = BinaryBinding.bundle(s, r, o)
311
+ return self.memory.store(address, bound, metadata=meta,
312
+ name=f"{subject}_{relation}_{obj}")
313
+
314
+ # ── Core Reasoning ────────────────────────────────────────────────────
315
+
316
+ def reason(self, query: Any, max_steps: Optional[int] = None,
317
+ roles: Optional[List[str]] = None,
318
+ verbose: bool = False) -> Dict[str, Any]:
319
+ """
320
+ Execute full reasoning pipeline.
321
+
322
+ Args:
323
+ query: input query (string, vector, list of strings, or dict)
324
+ max_steps: override max reasoning iterations
325
+ roles: roles to decode in response
326
+ verbose: print step-by-step info
327
+
328
+ Returns:
329
+ Dict with:
330
+ - response: decoded response dict
331
+ - reasoning_chain: list of ReasoningStep objects
332
+ - total_energy_reduction: cumulative energy decrease
333
+ - latency_ms: total time
334
+ """
335
+ t0 = time.perf_counter()
336
+ steps = max_steps or self.max_reasoning_steps
337
+ chain: List[ReasoningStep] = []
338
+
339
+ # ── Step 0: Encode query ──────────────────────────────────────
340
+ query_vec = self.encoder.encode(query)
341
+ current_state = query_vec.copy()
342
+
343
+ if verbose:
344
+ logger.info(f"Query encoded. Starting reasoning with max {steps} steps.")
345
+
346
+ for step_num in range(steps):
347
+ step = ReasoningStep(step_num)
348
+ step.timestamp = time.perf_counter() - t0
349
+
350
+ # ── Step 1: Route β€” find relevant memories ────────────────
351
+ routing = self.router.route_and_activate(current_state)
352
+ step.routing_result = routing
353
+
354
+ if verbose:
355
+ logger.info(
356
+ f"Step {step_num}: Routed to {len(routing.indices)} candidates "
357
+ f"(depth={routing.depth}, latency={routing.latency_ms:.1f}ms)"
358
+ )
359
+
360
+ if len(routing.indices) == 0:
361
+ step.state = current_state
362
+ chain.append(step)
363
+ break
364
+
365
+ # ── Step 2: Gather context from activated memories ────────
366
+ active_indices = routing.indices[:min(100, len(routing.indices))]
367
+ context_vectors = np.ascontiguousarray(
368
+ self.memory._contents[active_indices]
369
+ )
370
+ act_strengths = routing.activations[:len(active_indices)]
371
+
372
+ # ── Step 3: Construct bindings for coherence checking ─────
373
+ bindings = self._extract_bindings(active_indices)
374
+
375
+ # ── Step 4: Energy minimization ───────────────────────────
376
+ energy_result = self.energy_model.minimize(
377
+ current_state,
378
+ context_vectors,
379
+ act_strengths,
380
+ bindings=bindings,
381
+ )
382
+
383
+ new_state = energy_result['final_state']
384
+ step.state = new_state
385
+ step.energy = energy_result['final_energy']
386
+
387
+ if verbose:
388
+ logger.info(
389
+ f"Step {step_num}: Energy {step.energy:.4f} "
390
+ f"(iterations={energy_result.get('total_iterations', '?')})"
391
+ )
392
+
393
+ chain.append(step)
394
+
395
+ # ── Check convergence ─────────────────────────────────────
396
+ if step_num > 0:
397
+ state_change = hamming_distance(current_state, new_state) / N_BITS
398
+ energy_change = abs(chain[-2].energy - step.energy)
399
+ if state_change < 0.01 and energy_change < 1e-4:
400
+ if verbose:
401
+ logger.info(f"Reasoning converged at step {step_num}")
402
+ break
403
+
404
+ current_state = new_state
405
+
406
+ # ── Decode final state ────────────────────────────────────────
407
+ final_state = chain[-1].state if chain else query_vec
408
+ response = self.decoder.decode(final_state, top_k=10, roles=roles)
409
+
410
+ # Add decoding to last step
411
+ if chain:
412
+ chain[-1].decoded = response
413
+
414
+ total_time = (time.perf_counter() - t0) * 1000
415
+
416
+ # Energy trajectory
417
+ energies = [s.energy for s in chain if s.energy != float('inf')]
418
+ energy_reduction = (energies[0] - energies[-1]) if len(energies) >= 2 else 0.0
419
+
420
+ return {
421
+ 'response': response,
422
+ 'reasoning_chain': chain,
423
+ 'total_energy_reduction': energy_reduction,
424
+ 'latency_ms': total_time,
425
+ 'num_steps': len(chain),
426
+ 'final_state': final_state,
427
+ }
428
+
429
+ def _extract_bindings(self, indices: np.ndarray) -> List[Tuple]:
430
+ """Extract binding triples from activated memory entries for coherence checking."""
431
+ bindings = []
432
+ for idx in indices:
433
+ meta = self.memory._metadata[idx]
434
+ if meta and meta.get('type') == 'relation':
435
+ subj = meta.get('subject')
436
+ rel = meta.get('relation')
437
+ obj = meta.get('object')
438
+ if subj and rel and obj:
439
+ s_vec = self.binding.get_concept(subj)
440
+ r_vec = self.binding.get_concept(rel)
441
+ o_vec = self.binding.get_concept(obj)
442
+ if s_vec is not None and r_vec is not None and o_vec is not None:
443
+ bound = self.memory._contents[idx]
444
+ # Check: unbind(bound, bind(s,r)) should β‰ˆ o
445
+ role_composed = BinaryBinding.bind(s_vec, r_vec)
446
+ bindings.append((bound, role_composed, o_vec))
447
+ return bindings[:20] # Limit for performance
448
+
449
+ # ── Analogy Solving ───────────────────────────────────────────────
450
+
451
+ def solve_analogy(self, a: str, b: str, c: str,
452
+ candidates: Optional[List[str]] = None) -> Dict[str, Any]:
453
+ """
454
+ Solve analogy: a is to b as c is to ?
455
+
456
+ Uses binding-based approach:
457
+ 1. Compute relation R = bind(a, b) [XOR extracts difference]
458
+ 2. Apply: query = bind(c, R)
459
+ 3. Route query through memory
460
+ 4. Decode nearest match
461
+ """
462
+ t0 = time.perf_counter()
463
+
464
+ # Get or create concept vectors
465
+ for name in [a, b, c]:
466
+ if self.binding.get_concept(name) is None:
467
+ self.add_concept(name)
468
+
469
+ va = self.binding.get_concept(a)
470
+ vb = self.binding.get_concept(b)
471
+ vc = self.binding.get_concept(c)
472
+
473
+ # Compute analogy query
474
+ relation = BinaryBinding.bind(va, vb)
475
+ query = BinaryBinding.bind(vc, relation)
476
+
477
+ # Route and find matches
478
+ result = self.reason(query, max_steps=2)
479
+
480
+ # Also do direct codebook comparison
481
+ codebook_results = self.binding.solve_analogy(a, b, c, candidates)
482
+
483
+ return {
484
+ 'query': f"{a} : {b} :: {c} : ?",
485
+ 'codebook_ranking': codebook_results[:10],
486
+ 'memory_response': result['response'],
487
+ 'latency_ms': (time.perf_counter() - t0) * 1000,
488
+ }
489
+
490
+ # ── Association ───────────────────────────────────────────────────
491
+
492
+ def associate(self, concept: str, top_k: int = 5) -> List[Tuple[str, float]]:
493
+ """Find concepts most associated with the given concept."""
494
+ vec = self.binding.get_concept(concept)
495
+ if vec is None:
496
+ vec = self.encoder.encode(concept)
497
+
498
+ # Route through memory
499
+ routing = self.router.route(vec, beam_width=100)
500
+ if len(routing.indices) == 0:
501
+ return []
502
+
503
+ # Collect associated concepts
504
+ associations = []
505
+ seen = set()
506
+ for idx in routing.indices:
507
+ meta = self.memory._metadata[idx]
508
+ if meta:
509
+ name = meta.get('name', '')
510
+ if name and name != concept and name not in seen:
511
+ sim = 1.0 - routing.distances[len(associations)] / N_BITS \
512
+ if len(associations) < len(routing.distances) else 0.0
513
+ associations.append((name, float(sim)))
514
+ seen.add(name)
515
+ if len(associations) >= top_k:
516
+ break
517
+
518
+ return associations
519
+
520
+ # ── Composition ───────────────────────────────────────────────────
521
+
522
+ def compose(self, *concepts: str) -> Dict[str, Any]:
523
+ """Compose multiple concepts and find what the composition represents."""
524
+ vectors = []
525
+ for c in concepts:
526
+ vec = self.binding.get_concept(c)
527
+ if vec is None:
528
+ self.add_concept(c)
529
+ vec = self.binding.get_concept(c)
530
+ vectors.append(vec)
531
+
532
+ # Bundle all concepts
533
+ if len(vectors) == 1:
534
+ composed = vectors[0]
535
+ else:
536
+ composed = BinaryBinding.bundle(*vectors)
537
+
538
+ # Decode what this composition means
539
+ result = self.reason(composed, max_steps=2)
540
+ return {
541
+ 'input_concepts': concepts,
542
+ 'response': result['response'],
543
+ 'latency_ms': result['latency_ms'],
544
+ }
545
+
546
+ # ── Stats ─────────────────────────────────────────────────────────
547
+
548
+ def stats(self) -> Dict[str, Any]:
549
+ """Return engine statistics."""
550
+ return {
551
+ 'memory': self.memory.stats(),
552
+ 'codebook_size': len(self.binding._codebook),
553
+ 'beam_width': self.router.beam_width,
554
+ 'energy_mode': self.energy_model.mode,
555
+ }
mle/memory/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .sparse_address_table import SparseAddressTable, HammingLSH, MemoryEntry
mle/memory/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (236 Bytes). View file
 
mle/memory/__pycache__/sparse_address_table.cpython-312.pyc ADDED
Binary file (22 kB). View file
 
mle/memory/sparse_address_table.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE Memory Module: Sparse Address Table
3
+ ========================================
4
+ Distributed memory indexed by 4096-bit binary vectors.
5
+ Semantic proximity is encoded via Hamming distance.
6
+
7
+ Features:
8
+ - Bit-packed storage (512 bytes/vector) with cache-aligned layout
9
+ - LSH index for sub-linear approximate nearest neighbor search
10
+ - Multi-resolution indexing (coarse + fine search)
11
+ - Metadata/payload attachment per entry
12
+ """
13
+
14
+ import numpy as np
15
+ from collections import defaultdict
16
+ from typing import List, Tuple, Optional, Dict, Any
17
+ import logging
18
+
19
+ from ..utils.simd_ops import (
20
+ N_BITS, N_WORDS, N_BYTES,
21
+ random_binary_vector, random_binary_vectors,
22
+ hamming_distance, hamming_batch, hamming_topk,
23
+ xor_vectors, popcount, majority_vote, hamming_similarity
24
+ )
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class HammingLSH:
30
+ """Locality-Sensitive Hashing for Hamming space.
31
+
32
+ Uses random bit sampling as the LSH family:
33
+ h_i(v) = v[bit_index_i]
34
+ P(h(a) == h(b)) = 1 - hamming(a,b)/n
35
+
36
+ Multiple hash tables with K-bit signatures for amplification.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ n_bits: int = N_BITS,
42
+ n_tables: int = 32,
43
+ n_projections: int = 8,
44
+ seed: int = 42
45
+ ):
46
+ self.n_bits = n_bits
47
+ self.n_tables = n_tables
48
+ self.n_projections = n_projections
49
+
50
+ rng = np.random.RandomState(seed)
51
+ # Random bit indices for each table: which bits to sample
52
+ self.bit_indices = [
53
+ rng.choice(n_bits, n_projections, replace=False)
54
+ for _ in range(n_tables)
55
+ ]
56
+
57
+ # Hash tables: table_idx -> {hash_key -> list of vector indices}
58
+ self.tables: List[Dict[bytes, List[int]]] = [
59
+ defaultdict(list) for _ in range(n_tables)
60
+ ]
61
+ self.n_indexed = 0
62
+
63
+ def _compute_hash(self, bits_unpacked: np.ndarray, table_idx: int) -> bytes:
64
+ """Extract hash signature from unpacked bit array."""
65
+ sig = bits_unpacked[self.bit_indices[table_idx]]
66
+ return np.packbits(sig).tobytes()
67
+
68
+ def _unpack_vector(self, packed: np.ndarray) -> np.ndarray:
69
+ """Unpack uint64 vector to bit array."""
70
+ return np.unpackbits(packed.view(np.uint8))
71
+
72
+ def add(self, packed_vector: np.ndarray, idx: int):
73
+ """Add a single vector to all hash tables."""
74
+ bits = self._unpack_vector(packed_vector)
75
+ for t in range(self.n_tables):
76
+ h = self._compute_hash(bits, t)
77
+ self.tables[t][h].append(idx)
78
+ self.n_indexed += 1
79
+
80
+ def add_batch(self, packed_vectors: np.ndarray, start_idx: int = 0):
81
+ """Add multiple vectors to all hash tables."""
82
+ for i in range(len(packed_vectors)):
83
+ self.add(packed_vectors[i], start_idx + i)
84
+
85
+ def query_candidates(self, packed_query: np.ndarray, max_candidates: int = 2000) -> np.ndarray:
86
+ """Find candidate indices via LSH (before exact reranking).
87
+ Returns deduplicated candidate indices.
88
+ """
89
+ bits = self._unpack_vector(packed_query)
90
+ candidates = set()
91
+ for t in range(self.n_tables):
92
+ h = self._compute_hash(bits, t)
93
+ bucket = self.tables[t].get(h, [])
94
+ candidates.update(bucket)
95
+ if len(candidates) >= max_candidates:
96
+ break
97
+ return np.array(list(candidates)[:max_candidates], dtype=np.int64)
98
+
99
+ def query_multi_probe(self, packed_query: np.ndarray, n_probes: int = 3,
100
+ max_candidates: int = 2000) -> np.ndarray:
101
+ """Multi-probe LSH: also check neighboring buckets by flipping bits.
102
+ Increases recall at cost of more bucket lookups.
103
+ For short signatures (n_projections <= 12), we can flip multiple
104
+ bits combinatorially.
105
+ """
106
+ bits = self._unpack_vector(packed_query)
107
+ candidates = set()
108
+
109
+ for t in range(self.n_tables):
110
+ # Original bucket
111
+ h = self._compute_hash(bits, t)
112
+ candidates.update(self.tables[t].get(h, []))
113
+
114
+ # Probe neighboring buckets: flip each single projection bit
115
+ probe_bits = bits.copy()
116
+ n_probe_bits = min(n_probes, self.n_projections)
117
+ for probe in range(n_probe_bits):
118
+ bit_pos = self.bit_indices[t][probe]
119
+ probe_bits[bit_pos] ^= 1
120
+ h2 = self._compute_hash(probe_bits, t)
121
+ candidates.update(self.tables[t].get(h2, []))
122
+ probe_bits[bit_pos] ^= 1 # restore
123
+
124
+ # Also probe 2-bit flips for the first few bits
125
+ if n_probes >= 2 and self.n_projections >= 2:
126
+ for i in range(min(n_probes, self.n_projections)):
127
+ for j in range(i + 1, min(n_probes, self.n_projections)):
128
+ probe_bits = bits.copy()
129
+ probe_bits[self.bit_indices[t][i]] ^= 1
130
+ probe_bits[self.bit_indices[t][j]] ^= 1
131
+ h3 = self._compute_hash(probe_bits, t)
132
+ candidates.update(self.tables[t].get(h3, []))
133
+
134
+ if len(candidates) >= max_candidates:
135
+ break
136
+
137
+ return np.array(list(candidates)[:max_candidates], dtype=np.int64)
138
+
139
+
140
+ class MemoryEntry:
141
+ """A single entry in the Sparse Address Table."""
142
+ __slots__ = ['address', 'content', 'metadata', 'activation', 'timestamp']
143
+
144
+ def __init__(self, address: np.ndarray, content: np.ndarray,
145
+ metadata: Optional[Dict[str, Any]] = None):
146
+ self.address = address # (N_WORDS,) uint64 - the index key
147
+ self.content = content # (N_WORDS,) uint64 - stored data
148
+ self.metadata = metadata or {} # arbitrary metadata
149
+ self.activation = 0.0 # current activation level
150
+ self.timestamp = 0 # last access time
151
+
152
+
153
+ class SparseAddressTable:
154
+ """
155
+ Distributed memory indexed by 4096-bit binary vectors.
156
+
157
+ Architecture:
158
+ - Primary storage: contiguous (N, N_WORDS) uint64 matrix for SIMD batch ops
159
+ - LSH index: multi-table bit-sampling for sub-linear ANN search
160
+ - Content storage: separate matrix (decoupled address/content)
161
+ - Activation tracking: for energy-based dynamics
162
+
163
+ Memory layout is Structure of Arrays (SoA) for cache locality
164
+ during batch Hamming distance computation.
165
+ """
166
+
167
+ def __init__(
168
+ self,
169
+ capacity: int = 100_000,
170
+ lsh_tables: int = 32,
171
+ lsh_projections: int = 8,
172
+ lsh_seed: int = 42
173
+ ):
174
+ self.capacity = capacity
175
+ self.size = 0
176
+
177
+ # SoA layout: addresses and contents as contiguous matrices
178
+ self._addresses = np.zeros((capacity, N_WORDS), dtype=np.uint64)
179
+ self._contents = np.zeros((capacity, N_WORDS), dtype=np.uint64)
180
+
181
+ # Metadata and activation stored separately
182
+ self._metadata: List[Dict[str, Any]] = [None] * capacity
183
+ self._activations = np.zeros(capacity, dtype=np.float64)
184
+ self._timestamps = np.zeros(capacity, dtype=np.int64)
185
+
186
+ # LSH index β€” use short signatures (8-bit) with many tables (32)
187
+ # for high recall on 4096-bit vectors
188
+ self.lsh = HammingLSH(
189
+ n_bits=N_BITS,
190
+ n_tables=lsh_tables,
191
+ n_projections=lsh_projections,
192
+ seed=lsh_seed
193
+ )
194
+
195
+ # Global step counter for timestamps
196
+ self._step = 0
197
+
198
+ # Symbol table: name -> index mapping for named concepts
199
+ self._symbol_table: Dict[str, int] = {}
200
+
201
+ @property
202
+ def addresses(self) -> np.ndarray:
203
+ """Active address vectors. Shape: (size, N_WORDS)."""
204
+ return self._addresses[:self.size]
205
+
206
+ @property
207
+ def contents(self) -> np.ndarray:
208
+ """Active content vectors. Shape: (size, N_WORDS)."""
209
+ return self._contents[:self.size]
210
+
211
+ @property
212
+ def activations(self) -> np.ndarray:
213
+ """Active activation levels. Shape: (size,)."""
214
+ return self._activations[:self.size]
215
+
216
+ def store(self, address: np.ndarray, content: np.ndarray,
217
+ metadata: Optional[Dict[str, Any]] = None,
218
+ name: Optional[str] = None) -> int:
219
+ """Store a new entry. Returns the entry index."""
220
+ if self.size >= self.capacity:
221
+ self._grow()
222
+
223
+ idx = self.size
224
+ self._addresses[idx] = address
225
+ self._contents[idx] = content
226
+ self._metadata[idx] = metadata or {}
227
+ self._timestamps[idx] = self._step
228
+ self._step += 1
229
+
230
+ # Index in LSH
231
+ self.lsh.add(address, idx)
232
+
233
+ if name:
234
+ self._symbol_table[name] = idx
235
+
236
+ self.size += 1
237
+ return idx
238
+
239
+ def store_concept(self, name: str, content: Optional[np.ndarray] = None,
240
+ metadata: Optional[Dict[str, Any]] = None) -> int:
241
+ """Store a named concept with auto-generated address."""
242
+ address = random_binary_vector()
243
+ if content is None:
244
+ content = random_binary_vector()
245
+ meta = metadata or {}
246
+ meta['name'] = name
247
+ return self.store(address, content, metadata=meta, name=name)
248
+
249
+ def get_by_name(self, name: str) -> Optional[Tuple[np.ndarray, np.ndarray, Dict]]:
250
+ """Retrieve entry by symbolic name."""
251
+ idx = self._symbol_table.get(name)
252
+ if idx is None:
253
+ return None
254
+ return (self._addresses[idx].copy(),
255
+ self._contents[idx].copy(),
256
+ self._metadata[idx])
257
+
258
+ def get_address_by_name(self, name: str) -> Optional[np.ndarray]:
259
+ """Get the address vector for a named concept."""
260
+ idx = self._symbol_table.get(name)
261
+ if idx is None:
262
+ return None
263
+ return self._addresses[idx].copy()
264
+
265
+ def get_content_by_name(self, name: str) -> Optional[np.ndarray]:
266
+ """Get the content vector for a named concept."""
267
+ idx = self._symbol_table.get(name)
268
+ if idx is None:
269
+ return None
270
+ return self._contents[idx].copy()
271
+
272
+ def query_nearest(self, query: np.ndarray, k: int = 10,
273
+ use_lsh: bool = True) -> List[Tuple[int, int]]:
274
+ """Find k nearest entries by Hamming distance to query address.
275
+
276
+ Args:
277
+ query: (N_WORDS,) uint64 query vector
278
+ k: number of results
279
+ use_lsh: if True, use LSH pre-filter; if False, exact scan
280
+
281
+ Returns:
282
+ List of (index, distance) tuples, sorted by distance ascending.
283
+ """
284
+ if self.size == 0:
285
+ return []
286
+
287
+ if use_lsh and self.size > 1000:
288
+ # LSH pre-filter β†’ exact rerank
289
+ candidates = self.lsh.query_multi_probe(query, max_candidates=max(k * 10, 2000))
290
+ if len(candidates) == 0:
291
+ # Fallback to exact
292
+ candidates = np.arange(self.size, dtype=np.int64)
293
+ candidate_vecs = np.ascontiguousarray(self._addresses[candidates])
294
+ dists = hamming_batch(query, candidate_vecs)
295
+ if k < len(candidates):
296
+ top_local = np.argpartition(dists, k)[:k]
297
+ else:
298
+ top_local = np.arange(len(candidates))
299
+ order = np.argsort(dists[top_local])
300
+ sorted_local = top_local[order]
301
+ return [(int(candidates[i]), int(dists[i])) for i in sorted_local]
302
+ else:
303
+ # Exact search
304
+ indices, distances = hamming_topk(query, self.addresses, k=k)
305
+ return [(int(idx), int(dist)) for idx, dist in zip(indices, distances)]
306
+
307
+ def query_radius(self, query: np.ndarray, radius: int) -> List[Tuple[int, int]]:
308
+ """Find all entries within Hamming radius of query."""
309
+ if self.size == 0:
310
+ return []
311
+ dists = hamming_batch(query, self.addresses)
312
+ mask = dists <= radius
313
+ indices = np.where(mask)[0]
314
+ return [(int(i), int(dists[i])) for i in indices]
315
+
316
+ def activate(self, indices: np.ndarray, strengths: np.ndarray):
317
+ """Set activation levels for specified entries."""
318
+ self._activations[indices] = strengths
319
+
320
+ def decay_activations(self, factor: float = 0.95):
321
+ """Exponential decay of all activations."""
322
+ self._activations[:self.size] *= factor
323
+
324
+ def get_active(self, threshold: float = 0.1) -> np.ndarray:
325
+ """Get indices of entries with activation above threshold."""
326
+ return np.where(self._activations[:self.size] > threshold)[0]
327
+
328
+ def read_activated(self, threshold: float = 0.1) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
329
+ """Read contents of activated entries.
330
+ Returns: (indices, content_vectors, activation_strengths)
331
+ """
332
+ active_idx = self.get_active(threshold)
333
+ if len(active_idx) == 0:
334
+ return (np.array([], dtype=np.int64),
335
+ np.zeros((0, N_WORDS), dtype=np.uint64),
336
+ np.array([], dtype=np.float64))
337
+ return (active_idx,
338
+ self._contents[active_idx],
339
+ self._activations[active_idx])
340
+
341
+ def _grow(self, factor: float = 1.5):
342
+ """Grow internal storage when capacity is exceeded."""
343
+ new_cap = int(self.capacity * factor)
344
+ logger.info(f"Growing SparseAddressTable from {self.capacity} to {new_cap}")
345
+
346
+ new_addr = np.zeros((new_cap, N_WORDS), dtype=np.uint64)
347
+ new_cont = np.zeros((new_cap, N_WORDS), dtype=np.uint64)
348
+ new_act = np.zeros(new_cap, dtype=np.float64)
349
+ new_ts = np.zeros(new_cap, dtype=np.int64)
350
+
351
+ new_addr[:self.size] = self._addresses[:self.size]
352
+ new_cont[:self.size] = self._contents[:self.size]
353
+ new_act[:self.size] = self._activations[:self.size]
354
+ new_ts[:self.size] = self._timestamps[:self.size]
355
+
356
+ self._addresses = new_addr
357
+ self._contents = new_cont
358
+ self._activations = new_act
359
+ self._timestamps = new_ts
360
+ self._metadata.extend([None] * (new_cap - self.capacity))
361
+ self.capacity = new_cap
362
+
363
+ def stats(self) -> Dict[str, Any]:
364
+ """Return memory statistics."""
365
+ mem_bytes = self.size * N_BYTES * 2 # addresses + contents
366
+ return {
367
+ 'size': self.size,
368
+ 'capacity': self.capacity,
369
+ 'memory_mb': mem_bytes / (1024 * 1024),
370
+ 'lsh_tables': self.lsh.n_tables,
371
+ 'lsh_projections': self.lsh.n_projections,
372
+ 'active_entries': int((self._activations[:self.size] > 0.1).sum()),
373
+ 'named_symbols': len(self._symbol_table),
374
+ }
375
+
376
+ def __repr__(self):
377
+ return (f"SparseAddressTable(size={self.size}, capacity={self.capacity}, "
378
+ f"symbols={len(self._symbol_table)})")
mle/routing/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .recursive_jit_router import RecursiveJITRouter, RoutingResult
mle/routing/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (218 Bytes). View file
 
mle/routing/__pycache__/recursive_jit_router.cpython-312.pyc ADDED
Binary file (11.9 kB). View file
 
mle/routing/recursive_jit_router.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE Routing Module: Recursive JIT Routing
3
+ ==========================================
4
+ Dynamic beam-search routing through memory space.
5
+
6
+ Given a query encoded as a 4096-bit binary vector, the router:
7
+ 1. Uses LSH to find initial candidate set
8
+ 2. Recursively refines by expanding neighborhoods of top candidates
9
+ 3. Maintains a beam of top-K (default 500) candidates at each step
10
+ 4. Terminates when beam stabilizes or max depth reached
11
+
12
+ The routing is "JIT" because it lazily explores only promising regions,
13
+ avoiding full memory scans. Each recursion level enriches the candidate
14
+ set with neighbors of the current best matches.
15
+ """
16
+
17
+ import numpy as np
18
+ from typing import List, Tuple, Optional, Dict, Any
19
+ import logging
20
+ import time
21
+
22
+ from ..memory.sparse_address_table import SparseAddressTable
23
+ from ..utils.simd_ops import (
24
+ N_WORDS, N_BITS,
25
+ hamming_batch, hamming_topk, hamming_distance,
26
+ xor_vectors, random_binary_vector
27
+ )
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class RoutingResult:
33
+ """Result of a routing query."""
34
+ __slots__ = ['indices', 'distances', 'activations', 'depth', 'latency_ms',
35
+ 'candidates_explored', 'beam_history']
36
+
37
+ def __init__(self):
38
+ self.indices: np.ndarray = np.array([], dtype=np.int64)
39
+ self.distances: np.ndarray = np.array([], dtype=np.int32)
40
+ self.activations: np.ndarray = np.array([], dtype=np.float64)
41
+ self.depth: int = 0
42
+ self.latency_ms: float = 0.0
43
+ self.candidates_explored: int = 0
44
+ self.beam_history: List[float] = [] # mean distance per depth
45
+
46
+ def top_k(self, k: int) -> Tuple[np.ndarray, np.ndarray]:
47
+ """Get top-k results."""
48
+ k = min(k, len(self.indices))
49
+ return self.indices[:k], self.distances[:k]
50
+
51
+
52
+ class RecursiveJITRouter:
53
+ """
54
+ Recursive JIT Routing Engine.
55
+
56
+ Search strategy:
57
+ 1. INIT: LSH query β†’ initial candidate set (~2000 candidates)
58
+ 2. REFINE: For each recursion level:
59
+ a. Compute exact Hamming distances for all candidates
60
+ b. Select top-K beam (default 500)
61
+ c. Expand: for each beam member, find its LSH neighbors
62
+ d. Merge new candidates into pool (deduplicated)
63
+ 3. TERMINATE: When beam doesn't change or max_depth reached
64
+
65
+ The expansion step is key: it allows the router to "walk" through
66
+ memory space, following semantic chains. Two concepts connected
67
+ via intermediate nodes can be found even if they're not directly
68
+ similar to the query.
69
+ """
70
+
71
+ def __init__(
72
+ self,
73
+ memory: SparseAddressTable,
74
+ beam_width: int = 500,
75
+ max_depth: int = 3,
76
+ expansion_factor: int = 5,
77
+ convergence_threshold: float = 0.01,
78
+ lsh_candidates_per_probe: int = 2000,
79
+ ):
80
+ self.memory = memory
81
+ self.beam_width = beam_width
82
+ self.max_depth = max_depth
83
+ self.expansion_factor = expansion_factor
84
+ self.convergence_threshold = convergence_threshold
85
+ self.lsh_candidates = lsh_candidates_per_probe
86
+
87
+ def route(self, query: np.ndarray, beam_width: Optional[int] = None,
88
+ max_depth: Optional[int] = None) -> RoutingResult:
89
+ """Execute recursive JIT routing for a query vector.
90
+
91
+ Args:
92
+ query: (N_WORDS,) uint64 binary vector
93
+ beam_width: override default beam width
94
+ max_depth: override default max depth
95
+
96
+ Returns:
97
+ RoutingResult with sorted candidates
98
+ """
99
+ t0 = time.perf_counter()
100
+ bw = beam_width or self.beam_width
101
+ md = max_depth or self.max_depth
102
+ result = RoutingResult()
103
+
104
+ if self.memory.size == 0:
105
+ result.latency_ms = (time.perf_counter() - t0) * 1000
106
+ return result
107
+
108
+ # ── Phase 1: Initial candidate set via LSH ────────────────────────
109
+ candidate_set = set()
110
+ lsh_candidates = self.memory.lsh.query_multi_probe(
111
+ query, n_probes=3, max_candidates=self.lsh_candidates
112
+ )
113
+ candidate_set.update(lsh_candidates.tolist())
114
+
115
+ # If LSH returns too few, add random samples
116
+ if len(candidate_set) < bw and self.memory.size > bw:
117
+ random_fill = np.random.choice(
118
+ self.memory.size,
119
+ min(bw * 2, self.memory.size),
120
+ replace=False
121
+ )
122
+ candidate_set.update(random_fill.tolist())
123
+
124
+ # ── Phase 2: Recursive refinement ─────────────────────────────────
125
+ prev_beam_mean = float('inf')
126
+
127
+ for depth in range(md):
128
+ # Convert candidate set to array
129
+ candidates = np.array(sorted(candidate_set), dtype=np.int64)
130
+
131
+ # Compute exact Hamming distances
132
+ candidate_vecs = np.ascontiguousarray(
133
+ self.memory._addresses[candidates]
134
+ )
135
+ dists = hamming_batch(query, candidate_vecs)
136
+
137
+ # Select top-K beam
138
+ actual_k = min(bw, len(candidates))
139
+ if actual_k < len(candidates):
140
+ beam_local_idx = np.argpartition(dists, actual_k)[:actual_k]
141
+ else:
142
+ beam_local_idx = np.arange(len(candidates))
143
+
144
+ beam_order = np.argsort(dists[beam_local_idx])
145
+ beam_local_idx = beam_local_idx[beam_order]
146
+
147
+ beam_indices = candidates[beam_local_idx]
148
+ beam_dists = dists[beam_local_idx]
149
+
150
+ # Track convergence
151
+ beam_mean = float(beam_dists.mean()) if len(beam_dists) > 0 else float('inf')
152
+ result.beam_history.append(beam_mean)
153
+
154
+ # Check convergence
155
+ improvement = (prev_beam_mean - beam_mean) / max(prev_beam_mean, 1.0)
156
+ if improvement < self.convergence_threshold and depth > 0:
157
+ logger.debug(f"Routing converged at depth {depth}, improvement={improvement:.4f}")
158
+ break
159
+ prev_beam_mean = beam_mean
160
+
161
+ # ── Expansion: find neighbors of top beam members ─────────
162
+ if depth < md - 1:
163
+ n_expand = min(self.expansion_factor, len(beam_indices))
164
+ for i in range(n_expand):
165
+ idx = beam_indices[i]
166
+ # Use the beam member's address as a new query
167
+ member_addr = self.memory._addresses[idx]
168
+ new_candidates = self.memory.lsh.query_multi_probe(
169
+ member_addr,
170
+ n_probes=2,
171
+ max_candidates=self.lsh_candidates // self.expansion_factor
172
+ )
173
+ candidate_set.update(new_candidates.tolist())
174
+
175
+ # ── Phase 3: Compute final activations ────────────────────────────
176
+ # Activation = inverse normalized Hamming distance (softmax-like)
177
+ max_dist = N_BITS
178
+ similarities = 1.0 - beam_dists.astype(np.float64) / max_dist
179
+ # Temperature-scaled softmax for activations
180
+ temperature = 0.1
181
+ exp_sim = np.exp(similarities / temperature)
182
+ activations = exp_sim / (exp_sim.sum() + 1e-12)
183
+
184
+ # Populate result
185
+ result.indices = beam_indices
186
+ result.distances = beam_dists
187
+ result.activations = activations
188
+ result.depth = depth + 1
189
+ result.candidates_explored = len(candidate_set)
190
+ result.latency_ms = (time.perf_counter() - t0) * 1000
191
+
192
+ return result
193
+
194
+ def route_and_activate(self, query: np.ndarray, **kwargs) -> RoutingResult:
195
+ """Route and update memory activations."""
196
+ result = self.route(query, **kwargs)
197
+ if len(result.indices) > 0:
198
+ # Decay existing activations
199
+ self.memory.decay_activations(factor=0.9)
200
+ # Set new activations
201
+ self.memory.activate(result.indices, result.activations)
202
+ return result
203
+
204
+ def multi_hop_route(self, query: np.ndarray, hops: int = 2,
205
+ beam_width: Optional[int] = None) -> List[RoutingResult]:
206
+ """Multi-hop routing: each hop uses the centroid of previous results as query.
207
+
208
+ This enables "chain of thought" routing through semantic space:
209
+ Query β†’ Hop1 results β†’ Centroid β†’ Hop2 results β†’ ...
210
+ """
211
+ bw = beam_width or self.beam_width
212
+ results = []
213
+ current_query = query.copy()
214
+
215
+ for hop in range(hops):
216
+ result = self.route(current_query, beam_width=bw)
217
+ results.append(result)
218
+
219
+ if len(result.indices) == 0:
220
+ break
221
+
222
+ # Compute weighted centroid of top results for next hop
223
+ top_k = min(10, len(result.indices))
224
+ top_contents = self.memory._contents[result.indices[:top_k]]
225
+ weights = result.activations[:top_k]
226
+
227
+ # Weighted majority vote as centroid
228
+ # Weight by activation: more activated = more influence
229
+ current_query = self._weighted_centroid(top_contents, weights)
230
+
231
+ return results
232
+
233
+ def _weighted_centroid(self, vectors: np.ndarray, weights: np.ndarray) -> np.ndarray:
234
+ """Compute weighted centroid of binary vectors via weighted majority vote."""
235
+ # Unpack to bits
236
+ bits = np.unpackbits(vectors.view(np.uint8), axis=1).astype(np.float64)
237
+ # Weighted sum
238
+ weighted = (bits * weights[:, np.newaxis]).sum(axis=0)
239
+ # Threshold at weighted midpoint
240
+ threshold = weights.sum() / 2.0
241
+ centroid_bits = (weighted > threshold).astype(np.uint8)
242
+ return np.packbits(centroid_bits).view(np.uint64).copy()
mle/tests/__pycache__/test_full_system.cpython-312.pyc ADDED
Binary file (37.4 kB). View file
 
mle/tests/test_full_system.py ADDED
@@ -0,0 +1,904 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE Comprehensive Test Suite
3
+ ===============================
4
+ Tests covering:
5
+ 1. SIMD operations correctness & performance
6
+ 2. Memory storage & retrieval
7
+ 3. LSH indexing quality
8
+ 4. Routing latency & scalability
9
+ 5. Binding operations (binary & HRR)
10
+ 6. Energy convergence
11
+ 7. Reasoning capabilities (association, analogy, composition)
12
+ 8. End-to-end integration
13
+ """
14
+
15
+ import numpy as np
16
+ import time
17
+ import sys
18
+ import os
19
+
20
+ # Add project root to path
21
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
22
+
23
+ from mle.utils.simd_ops import (
24
+ N_BITS, N_WORDS,
25
+ hamming_distance, hamming_batch, hamming_topk,
26
+ hamming_similarity, xor_vectors, popcount,
27
+ majority_vote, random_binary_vector, random_binary_vectors,
28
+ normalize_density, get_native_lib
29
+ )
30
+ from mle.memory.sparse_address_table import SparseAddressTable, HammingLSH
31
+ from mle.routing.recursive_jit_router import RecursiveJITRouter
32
+ from mle.binding.semantic_binding import HRRBinding, BinaryBinding, BindingEngine
33
+ from mle.energy.energy_model import EnergyFunction, RelaxationDynamics, HopfieldDynamics, EnergyModel
34
+ from mle.inference.reasoning_engine import ReasoningEngine
35
+
36
+
37
+ def header(title):
38
+ print(f"\n{'='*70}")
39
+ print(f" {title}")
40
+ print(f"{'='*70}")
41
+
42
+
43
+ def check(condition, message):
44
+ status = "βœ“" if condition else "βœ—"
45
+ print(f" [{status}] {message}")
46
+ return condition
47
+
48
+
49
+ # ══════════════════════════════════════════════════════════════════════════════
50
+ # 1. SIMD OPERATIONS
51
+ # ══════════════════════════════════════════════════════════════════════════════
52
+
53
+ def test_simd_operations():
54
+ header("1. SIMD Operations")
55
+ all_pass = True
56
+ np.random.seed(42)
57
+
58
+ # Check native lib
59
+ lib = get_native_lib()
60
+ all_pass &= check(lib is not None, f"Native SIMD library compiled: {lib is not None}")
61
+
62
+ # Basic Hamming distance
63
+ a = random_binary_vector()
64
+ b = random_binary_vector()
65
+ dist = hamming_distance(a, b)
66
+ all_pass &= check(
67
+ 1800 < dist < 2200,
68
+ f"Random vector Hamming distance β‰ˆ N/2: {dist} (expected ~2048)"
69
+ )
70
+
71
+ # Self-distance = 0
72
+ all_pass &= check(
73
+ hamming_distance(a, a) == 0,
74
+ "Self-distance = 0"
75
+ )
76
+
77
+ # XOR identity: dist(a, aβŠ•b) should relate to popcount(b)
78
+ xor_ab = xor_vectors(a, b)
79
+ d1 = hamming_distance(a, xor_ab)
80
+ d2 = popcount(b)
81
+ # d1 should equal popcount(a XOR (a XOR b)) = popcount(b)
82
+ all_pass &= check(
83
+ d1 == d2,
84
+ f"XOR identity: dist(a, aβŠ•b) = popcount(b): {d1} == {d2}"
85
+ )
86
+
87
+ # Batch Hamming distance
88
+ corpus = random_binary_vectors(1000)
89
+ dists = hamming_batch(a, corpus)
90
+ all_pass &= check(
91
+ dists.shape == (1000,),
92
+ f"Batch Hamming shape: {dists.shape}"
93
+ )
94
+ all_pass &= check(
95
+ np.all(dists >= 0) and np.all(dists <= N_BITS),
96
+ f"Batch Hamming range: [{dists.min()}, {dists.max()}]"
97
+ )
98
+
99
+ # Top-K
100
+ indices, distances = hamming_topk(a, corpus, k=10)
101
+ all_pass &= check(
102
+ len(indices) == 10,
103
+ f"Top-10 returned: {len(indices)}"
104
+ )
105
+ all_pass &= check(
106
+ np.all(np.diff(distances) >= 0),
107
+ f"Top-K sorted ascending: {distances[:5]}..."
108
+ )
109
+
110
+ # Verify top-K correctness against full sort
111
+ full_sort_idx = np.argsort(dists)[:10]
112
+ full_sort_dist = dists[full_sort_idx]
113
+ all_pass &= check(
114
+ np.array_equal(distances, full_sort_dist),
115
+ f"Top-K matches full sort: {np.array_equal(distances, full_sort_dist)}"
116
+ )
117
+
118
+ # Majority vote
119
+ vecs = random_binary_vectors(5)
120
+ mv = majority_vote(np.ascontiguousarray(vecs))
121
+ all_pass &= check(
122
+ mv.shape == (N_WORDS,) and mv.dtype == np.uint64,
123
+ f"Majority vote shape/dtype: {mv.shape}, {mv.dtype}"
124
+ )
125
+
126
+ # Normalize density
127
+ v = random_binary_vector()
128
+ v_norm = normalize_density(v, 0.5)
129
+ actual_density = popcount(v_norm) / N_BITS
130
+ all_pass &= check(
131
+ abs(actual_density - 0.5) < 0.01,
132
+ f"Density normalization: {actual_density:.4f} (target 0.5)"
133
+ )
134
+
135
+ # ── Performance benchmark ──
136
+ print()
137
+ corpus_sizes = [1_000, 10_000, 100_000]
138
+ for n in corpus_sizes:
139
+ corpus = random_binary_vectors(n)
140
+ query = random_binary_vector()
141
+
142
+ # Batch Hamming
143
+ t0 = time.perf_counter()
144
+ for _ in range(10):
145
+ hamming_batch(query, corpus)
146
+ elapsed = (time.perf_counter() - t0) / 10 * 1000
147
+ throughput = n / elapsed * 1000
148
+ print(f" ⏱ Batch Hamming ({n:>7d} vecs): {elapsed:>7.2f} ms"
149
+ f" ({throughput/1e6:.1f}M vecs/s)")
150
+
151
+ # Top-500
152
+ t0 = time.perf_counter()
153
+ for _ in range(10):
154
+ hamming_topk(query, corpus, k=500)
155
+ elapsed = (time.perf_counter() - t0) / 10 * 1000
156
+ print(f" ⏱ Top-500 ({n:>7d} vecs): {elapsed:>7.2f} ms")
157
+
158
+ return all_pass
159
+
160
+
161
+ # ══════════════════════════════════════════════════════════════════════════════
162
+ # 2. MEMORY & LSH
163
+ # ══════════════════════════════════════════════════════════════════════════════
164
+
165
+ def test_memory_and_lsh():
166
+ header("2. Memory & LSH Indexing")
167
+ all_pass = True
168
+ np.random.seed(42)
169
+
170
+ # Create memory
171
+ mem = SparseAddressTable(capacity=10000, lsh_tables=16, lsh_projections=24)
172
+ all_pass &= check(mem.size == 0, f"Empty memory: size={mem.size}")
173
+
174
+ # Store concepts
175
+ n_concepts = 5000
176
+ addresses = random_binary_vectors(n_concepts)
177
+ contents = random_binary_vectors(n_concepts)
178
+
179
+ t0 = time.perf_counter()
180
+ for i in range(n_concepts):
181
+ mem.store(addresses[i], contents[i],
182
+ metadata={'name': f'concept_{i}', 'index': i})
183
+ store_time = (time.perf_counter() - t0) * 1000
184
+
185
+ all_pass &= check(
186
+ mem.size == n_concepts,
187
+ f"Stored {n_concepts} concepts in {store_time:.1f}ms"
188
+ )
189
+
190
+ # Exact search
191
+ query = addresses[42].copy()
192
+ results = mem.query_nearest(query, k=5, use_lsh=False)
193
+ all_pass &= check(
194
+ results[0][0] == 42 and results[0][1] == 0,
195
+ f"Exact retrieval: found correct entry (dist=0)"
196
+ )
197
+
198
+ # LSH search
199
+ results_lsh = mem.query_nearest(query, k=5, use_lsh=True)
200
+ found_exact = any(idx == 42 for idx, _ in results_lsh)
201
+ all_pass &= check(
202
+ found_exact,
203
+ f"LSH retrieval: found exact match in top-5"
204
+ )
205
+
206
+ # Near-duplicate search
207
+ near = addresses[42].copy()
208
+ bits = np.unpackbits(near.view(np.uint8))
209
+ # Flip 50 random bits (~1.2% difference)
210
+ flip_pos = np.random.choice(N_BITS, 50, replace=False)
211
+ bits[flip_pos] ^= 1
212
+ near_modified = np.packbits(bits).view(np.uint64).copy()
213
+
214
+ results_near = mem.query_nearest(near_modified, k=10, use_lsh=True)
215
+ all_pass &= check(
216
+ results_near[0][1] <= 100,
217
+ f"Near-duplicate found: best distance = {results_near[0][1]} (flipped 50 bits)"
218
+ )
219
+
220
+ # Named concept
221
+ cat_idx = mem.store_concept("cat", metadata={'category': 'animal'})
222
+ retrieved = mem.get_by_name("cat")
223
+ all_pass &= check(
224
+ retrieved is not None,
225
+ f"Named concept 'cat' stored and retrieved"
226
+ )
227
+
228
+ # Activation
229
+ mem.activate(np.array([0, 1, 2]), np.array([0.9, 0.5, 0.3]))
230
+ active = mem.get_active(threshold=0.4)
231
+ all_pass &= check(
232
+ len(active) == 2,
233
+ f"Activation: {len(active)} entries above threshold 0.4"
234
+ )
235
+
236
+ mem.decay_activations(0.5)
237
+ active_after = mem.get_active(threshold=0.4)
238
+ all_pass &= check(
239
+ len(active_after) == 1,
240
+ f"After decay: {len(active_after)} entries above threshold 0.4"
241
+ )
242
+
243
+ # Stats
244
+ stats = mem.stats()
245
+ all_pass &= check(
246
+ stats['size'] == n_concepts + 1,
247
+ f"Memory stats: {stats}"
248
+ )
249
+
250
+ # ── LSH Recall benchmark ──
251
+ # Test with near-duplicates (meaningful LSH scenario)
252
+ # Create clusters: for 100 base vectors, create 5 near-duplicates each (50 bits flipped)
253
+ print()
254
+ mem2 = SparseAddressTable(capacity=2000, lsh_tables=32, lsh_projections=8)
255
+ base_vecs = random_binary_vectors(100)
256
+ cluster_map = {} # idx -> cluster_id
257
+ next_idx = 0
258
+ for cid in range(100):
259
+ mem2.store(base_vecs[cid], base_vecs[cid])
260
+ cluster_map[next_idx] = cid
261
+ next_idx += 1
262
+ for _ in range(5):
263
+ bits = np.unpackbits(base_vecs[cid].view(np.uint8)).copy()
264
+ flips = np.random.choice(N_BITS, 100, replace=False)
265
+ bits[flips] ^= 1
266
+ variant = np.packbits(bits).view(np.uint64).copy()
267
+ mem2.store(variant, variant)
268
+ cluster_map[next_idx] = cid
269
+ next_idx += 1
270
+
271
+ # For each base vector, check if LSH finds its cluster members
272
+ recall_tests = 100
273
+ total_recall = 0
274
+ for cid in range(recall_tests):
275
+ query = base_vecs[cid]
276
+ lsh_results = mem2.query_nearest(query, k=10, use_lsh=True)
277
+ # Count how many results are from the same cluster
278
+ lsh_ids = [idx for idx, _ in lsh_results]
279
+ same_cluster = sum(1 for idx in lsh_ids if cluster_map.get(idx) == cid)
280
+ # Each cluster has 6 members; top-10 should find most
281
+ total_recall += same_cluster / min(6, 10)
282
+ avg_recall = total_recall / recall_tests
283
+ all_pass &= check(
284
+ avg_recall > 0.3,
285
+ f"LSH Cluster Recall@10: {avg_recall:.2%} (near-duplicates, 100 clusters)"
286
+ )
287
+
288
+ # Also verify that exact self-lookup always works via LSH
289
+ exact_recall = 0
290
+ for cid in range(recall_tests):
291
+ query = base_vecs[cid]
292
+ lsh_results = mem2.query_nearest(query, k=1, use_lsh=True)
293
+ if lsh_results and lsh_results[0][1] == 0:
294
+ exact_recall += 1
295
+ all_pass &= check(
296
+ exact_recall == recall_tests,
297
+ f"LSH Exact self-lookup: {exact_recall}/{recall_tests}"
298
+ )
299
+
300
+ return all_pass
301
+
302
+
303
+ # ══════════════════════════════════════════════════════════════════════════════
304
+ # 3. ROUTING
305
+ # ══════════════════════════════════════════════════════════════════════════════
306
+
307
+ def test_routing():
308
+ header("3. Recursive JIT Routing")
309
+ all_pass = True
310
+ np.random.seed(42)
311
+
312
+ # Build memory with 10K entries
313
+ mem = SparseAddressTable(capacity=20000)
314
+ n = 10000
315
+ addresses = random_binary_vectors(n)
316
+ contents = random_binary_vectors(n)
317
+ for i in range(n):
318
+ mem.store(addresses[i], contents[i], metadata={'name': f'v_{i}'})
319
+
320
+ router = RecursiveJITRouter(
321
+ memory=mem,
322
+ beam_width=500,
323
+ max_depth=3,
324
+ expansion_factor=5,
325
+ )
326
+
327
+ # Basic routing
328
+ query = addresses[100].copy()
329
+ result = router.route(query)
330
+ all_pass &= check(
331
+ len(result.indices) > 0,
332
+ f"Routing returned {len(result.indices)} results"
333
+ )
334
+ all_pass &= check(
335
+ result.distances[0] == 0,
336
+ f"Exact match found at distance 0"
337
+ )
338
+ all_pass &= check(
339
+ result.latency_ms < 1000,
340
+ f"Routing latency: {result.latency_ms:.1f}ms (target < 1000ms)"
341
+ )
342
+
343
+ # Random query routing
344
+ random_q = random_binary_vector()
345
+ result_rnd = router.route(random_q)
346
+ all_pass &= check(
347
+ len(result_rnd.indices) == 500,
348
+ f"Beam width respected: {len(result_rnd.indices)} (target 500)"
349
+ )
350
+ all_pass &= check(
351
+ np.all(np.diff(result_rnd.distances) >= 0),
352
+ "Results sorted by distance"
353
+ )
354
+
355
+ # Beam convergence (distances should decrease across depth)
356
+ all_pass &= check(
357
+ len(result_rnd.beam_history) > 0,
358
+ f"Beam history recorded: {len(result_rnd.beam_history)} depths, "
359
+ f"means={[f'{m:.0f}' for m in result_rnd.beam_history]}"
360
+ )
361
+
362
+ # Route and activate
363
+ result_act = router.route_and_activate(random_q)
364
+ active = mem.get_active(threshold=0.001)
365
+ all_pass &= check(
366
+ len(active) > 0,
367
+ f"Route-and-activate: {len(active)} entries activated"
368
+ )
369
+
370
+ # Multi-hop routing
371
+ results_multi = router.multi_hop_route(random_q, hops=2)
372
+ all_pass &= check(
373
+ len(results_multi) == 2,
374
+ f"Multi-hop routing: {len(results_multi)} hops completed"
375
+ )
376
+
377
+ # ── Scalability benchmark ──
378
+ print()
379
+ for n_test in [1_000, 10_000, 50_000]:
380
+ mem_test = SparseAddressTable(capacity=n_test + 1000)
381
+ addrs = random_binary_vectors(n_test)
382
+ conts = random_binary_vectors(n_test)
383
+ for i in range(n_test):
384
+ mem_test.store(addrs[i], conts[i])
385
+ r_test = RecursiveJITRouter(mem_test, beam_width=500, max_depth=3)
386
+
387
+ latencies = []
388
+ for _ in range(10):
389
+ q = random_binary_vector()
390
+ res = r_test.route(q)
391
+ latencies.append(res.latency_ms)
392
+
393
+ avg_lat = np.mean(latencies)
394
+ p99_lat = np.percentile(latencies, 99)
395
+ print(f" ⏱ Routing ({n_test:>6d} entries): "
396
+ f"avg={avg_lat:.1f}ms, p99={p99_lat:.1f}ms, "
397
+ f"explored={res.candidates_explored}")
398
+
399
+ return all_pass
400
+
401
+
402
+ # ══════════════════════════════════════════════════════════════════════════════
403
+ # 4. BINDING OPERATIONS
404
+ # ══════════════════════════════════════════════════════════════════════════════
405
+
406
+ def test_binding():
407
+ header("4. Binding Operations")
408
+ all_pass = True
409
+ np.random.seed(42)
410
+
411
+ # ── Binary binding (BSC) ──
412
+ print(" --- Binary Binding (BSC/XOR) ---")
413
+ a = random_binary_vector()
414
+ b = random_binary_vector()
415
+
416
+ # Bind + unbind = identity
417
+ bound = BinaryBinding.bind(a, b)
418
+ recovered = BinaryBinding.unbind(bound, b)
419
+ all_pass &= check(
420
+ hamming_distance(a, recovered) == 0,
421
+ "XOR bind+unbind = exact recovery"
422
+ )
423
+
424
+ # Bound is quasi-orthogonal to inputs
425
+ sim_ab = hamming_similarity(bound, a)
426
+ sim_bb = hamming_similarity(bound, b)
427
+ all_pass &= check(
428
+ abs(sim_ab - 0.5) < 0.05 and abs(sim_bb - 0.5) < 0.05,
429
+ f"Bound quasi-orthogonal to inputs: sim(C,A)={sim_ab:.3f}, sim(C,B)={sim_bb:.3f}"
430
+ )
431
+
432
+ # Bundle (majority vote)
433
+ c = random_binary_vector()
434
+ bundled = BinaryBinding.bundle(a, b, c)
435
+ sim_a = hamming_similarity(bundled, a)
436
+ sim_b = hamming_similarity(bundled, b)
437
+ sim_c = hamming_similarity(bundled, c)
438
+ all_pass &= check(
439
+ sim_a > 0.55 and sim_b > 0.55 and sim_c > 0.55,
440
+ f"Bundle preserves similarity: {sim_a:.3f}, {sim_b:.3f}, {sim_c:.3f}"
441
+ )
442
+
443
+ # Permutation
444
+ perm_a = BinaryBinding.permute(a, 1)
445
+ inv_perm_a = BinaryBinding.inverse_permute(perm_a, 1)
446
+ all_pass &= check(
447
+ hamming_distance(a, inv_perm_a) == 0,
448
+ "Permutation + inverse = identity"
449
+ )
450
+ all_pass &= check(
451
+ hamming_similarity(a, perm_a) < 0.55,
452
+ f"Permuted is dissimilar: sim={hamming_similarity(a, perm_a):.3f}"
453
+ )
454
+
455
+ # Triple encoding
456
+ s, r, o = random_binary_vector(), random_binary_vector(), random_binary_vector()
457
+ triple = BinaryBinding.encode_triple(s, r, o)
458
+ # Decode object: unbind(unbind(triple, s), r)
459
+ decoded_o = BinaryBinding.unbind(BinaryBinding.unbind(triple, s), r)
460
+ all_pass &= check(
461
+ hamming_distance(o, decoded_o) == 0,
462
+ "Triple encode/decode: exact recovery of object"
463
+ )
464
+
465
+ # ── HRR binding (circular convolution) ──
466
+ print(" --- HRR Binding (Circular Convolution) ---")
467
+ dim = 4096
468
+ ha = HRRBinding.random_vector(dim)
469
+ hb = HRRBinding.random_vector(dim)
470
+
471
+ # Bind + unbind β‰ˆ identity (approximate for HRR)
472
+ hbound = HRRBinding.bind(ha, hb)
473
+ hrecovered = HRRBinding.unbind(hbound, hb)
474
+ hrr_sim = HRRBinding.similarity(ha, hrecovered)
475
+ all_pass &= check(
476
+ hrr_sim > 0.3,
477
+ f"HRR bind+unbind similarity: {hrr_sim:.3f} (should be >> 0, indicating recovery)"
478
+ )
479
+
480
+ # Bound is quasi-orthogonal
481
+ hrr_orth = HRRBinding.similarity(hbound, ha)
482
+ all_pass &= check(
483
+ abs(hrr_orth) < 0.2,
484
+ f"HRR bound quasi-orthogonal: sim={hrr_orth:.3f}"
485
+ )
486
+
487
+ # Bundle preserves components
488
+ hc = HRRBinding.random_vector(dim)
489
+ hbundled = HRRBinding.bundle(ha, hb, hc)
490
+ all_pass &= check(
491
+ HRRBinding.similarity(hbundled, ha) > 0.2,
492
+ f"HRR bundle preserves components: sim={HRRBinding.similarity(hbundled, ha):.3f}"
493
+ )
494
+
495
+ # ── Binding Engine ──
496
+ print(" --- Binding Engine ---")
497
+ engine = BindingEngine(use_binary=True)
498
+ engine.register_concept("king")
499
+ engine.register_concept("queen")
500
+ engine.register_concept("man")
501
+ engine.register_concept("woman")
502
+
503
+ sim_kk = engine.similarity(engine.get_concept("king"), engine.get_concept("king"))
504
+ sim_kq = engine.similarity(engine.get_concept("king"), engine.get_concept("queen"))
505
+ all_pass &= check(
506
+ sim_kk == 1.0,
507
+ f"Self-similarity = 1.0: {sim_kk}"
508
+ )
509
+ all_pass &= check(
510
+ abs(sim_kq - 0.5) < 0.05,
511
+ f"Random concept similarity β‰ˆ 0.5: {sim_kq:.3f}"
512
+ )
513
+
514
+ # ── Performance ──
515
+ print()
516
+ n_ops = 10000
517
+ t0 = time.perf_counter()
518
+ for _ in range(n_ops):
519
+ BinaryBinding.bind(a, b)
520
+ elapsed = (time.perf_counter() - t0) * 1000
521
+ print(f" ⏱ Binary bind: {n_ops} ops in {elapsed:.1f}ms "
522
+ f"({n_ops/elapsed*1000:.0f} ops/s)")
523
+
524
+ t0 = time.perf_counter()
525
+ for _ in range(n_ops):
526
+ HRRBinding.bind(ha, hb)
527
+ elapsed = (time.perf_counter() - t0) * 1000
528
+ print(f" ⏱ HRR bind: {n_ops} ops in {elapsed:.1f}ms "
529
+ f"({n_ops/elapsed*1000:.0f} ops/s)")
530
+
531
+ return all_pass
532
+
533
+
534
+ # ══════════════════════════════════════════════════════════════════════════════
535
+ # 5. ENERGY & CONVERGENCE
536
+ # ══════════════════════════════════════════════════════════════════════════════
537
+
538
+ def test_energy_convergence():
539
+ header("5. Energy Model & Convergence")
540
+ all_pass = True
541
+ np.random.seed(42)
542
+
543
+ # Create some context vectors
544
+ n_context = 20
545
+ context = random_binary_vectors(n_context)
546
+ activations = np.random.dirichlet(np.ones(n_context))
547
+
548
+ # ── Energy function ──
549
+ efn = EnergyFunction(alpha=1.0, beta=0.5, gamma=0.1, delta=0.05)
550
+
551
+ # Random state should have moderate energy
552
+ state = random_binary_vector()
553
+ e = efn.total_energy(state, context, activations)
554
+ all_pass &= check(
555
+ 'total' in e and 'compatibility' in e,
556
+ f"Energy components computed: {list(e.keys())}"
557
+ )
558
+ all_pass &= check(
559
+ isinstance(e['total'], float),
560
+ f"Total energy: {e['total']:.4f}"
561
+ )
562
+
563
+ # ── Binary relaxation ──
564
+ print(" --- Binary Relaxation ---")
565
+ dynamics = RelaxationDynamics(
566
+ efn, max_iterations=30, n_candidates=16, flip_fraction=0.05
567
+ )
568
+ result = dynamics.relax(state, context, activations)
569
+
570
+ initial_e = result['trajectory'][0]['total']
571
+ final_e = result['final_energy']
572
+ all_pass &= check(
573
+ final_e <= initial_e + 0.01, # allow tiny float imprecision
574
+ f"Energy decreased: {initial_e:.4f} β†’ {final_e:.4f} "
575
+ f"(Ξ” = {initial_e - final_e:.4f})"
576
+ )
577
+ all_pass &= check(
578
+ result['iterations'] > 0,
579
+ f"Iterations: {result['iterations']}"
580
+ )
581
+
582
+ # Check trajectory is generally decreasing
583
+ traj_energies = [t['total'] for t in result['trajectory']]
584
+ decreasing_steps = sum(1 for i in range(1, len(traj_energies))
585
+ if traj_energies[i] <= traj_energies[i-1] + 0.001)
586
+ pct_decreasing = decreasing_steps / max(len(traj_energies) - 1, 1)
587
+ all_pass &= check(
588
+ pct_decreasing > 0.5,
589
+ f"Trajectory mostly decreasing: {pct_decreasing:.0%}"
590
+ )
591
+
592
+ # ── Hopfield relaxation ──
593
+ print(" --- Hopfield Dynamics ---")
594
+ hopfield = HopfieldDynamics(beta=8.0, max_iterations=20)
595
+ h_result = hopfield.relax(state, context, activations)
596
+
597
+ h_traj = h_result['energy_trajectory']
598
+ all_pass &= check(
599
+ len(h_traj) > 1,
600
+ f"Hopfield trajectory: {len(h_traj)} steps"
601
+ )
602
+ all_pass &= check(
603
+ h_traj[-1] <= h_traj[0] + 0.01,
604
+ f"Hopfield energy decreased: {h_traj[0]:.4f} β†’ {h_traj[-1]:.4f}"
605
+ )
606
+
607
+ # Attention should be concentrated
608
+ att = h_result.get('attention_weights')
609
+ if att is not None:
610
+ max_att = att.max()
611
+ all_pass &= check(
612
+ max_att > 1.0 / n_context,
613
+ f"Hopfield attention concentrated: max={max_att:.4f} (uniform={1/n_context:.4f})"
614
+ )
615
+
616
+ # ── Hybrid model ──
617
+ print(" --- Hybrid Energy Model ---")
618
+ model = EnergyModel(mode='hybrid')
619
+ hybrid_result = model.minimize(state, context, activations)
620
+ all_pass &= check(
621
+ 'final_state' in hybrid_result,
622
+ f"Hybrid model produced final state"
623
+ )
624
+ all_pass &= check(
625
+ hybrid_result['converged'] or hybrid_result['total_iterations'] > 0,
626
+ f"Hybrid: {hybrid_result['total_iterations']} total iterations, "
627
+ f"converged={hybrid_result['converged']}"
628
+ )
629
+
630
+ return all_pass
631
+
632
+
633
+ # ══════════════════════════════════════════════════════════════════════════════
634
+ # 6. REASONING CAPABILITIES
635
+ # ══════════════════════════════════════════════════════════════════════════════
636
+
637
+ def test_reasoning():
638
+ header("6. Reasoning Capabilities")
639
+ all_pass = True
640
+ np.random.seed(42)
641
+
642
+ engine = ReasoningEngine(
643
+ beam_width=200,
644
+ max_routing_depth=2,
645
+ max_reasoning_steps=3,
646
+ energy_mode='hybrid',
647
+ relaxation_iterations=20,
648
+ )
649
+
650
+ # ── Build knowledge base ──
651
+ print(" --- Building Knowledge Base ---")
652
+ concepts = [
653
+ "cat", "dog", "animal", "pet",
654
+ "fish", "water", "ocean", "river",
655
+ "bird", "sky", "wing", "fly",
656
+ "car", "road", "wheel", "engine",
657
+ "tree", "leaf", "forest", "green",
658
+ "sun", "moon", "star", "night",
659
+ "king", "queen", "prince", "princess",
660
+ "man", "woman", "child", "person",
661
+ ]
662
+
663
+ for c in concepts:
664
+ engine.add_concept(c)
665
+
666
+ relations = [
667
+ ("cat", "is_a", "animal"),
668
+ ("dog", "is_a", "animal"),
669
+ ("cat", "is_a", "pet"),
670
+ ("dog", "is_a", "pet"),
671
+ ("fish", "lives_in", "water"),
672
+ ("fish", "is_a", "animal"),
673
+ ("bird", "has", "wing"),
674
+ ("bird", "can", "fly"),
675
+ ("bird", "is_a", "animal"),
676
+ ("car", "has", "wheel"),
677
+ ("car", "on", "road"),
678
+ ("tree", "has", "leaf"),
679
+ ("tree", "in", "forest"),
680
+ ("leaf", "is", "green"),
681
+ ("king", "is_a", "man"),
682
+ ("queen", "is_a", "woman"),
683
+ ("prince", "is_a", "man"),
684
+ ("princess", "is_a", "woman"),
685
+ ("king", "married_to", "queen"),
686
+ ("sun", "in", "sky"),
687
+ ("moon", "in", "sky"),
688
+ ("star", "in", "sky"),
689
+ ]
690
+
691
+ for s, r, o in relations:
692
+ engine.add_relation(s, r, o)
693
+
694
+ stats = engine.stats()
695
+ all_pass &= check(
696
+ stats['codebook_size'] >= len(concepts),
697
+ f"Knowledge base: {stats['codebook_size']} concepts, "
698
+ f"{stats['memory']['size']} memory entries"
699
+ )
700
+
701
+ # ── Test 1: Association ──
702
+ print(" --- Association ---")
703
+ assoc_cat = engine.associate("cat", top_k=10)
704
+ all_pass &= check(
705
+ len(assoc_cat) > 0,
706
+ f"Association for 'cat': {len(assoc_cat)} results"
707
+ )
708
+ if assoc_cat:
709
+ print(f" Top associations: {assoc_cat[:5]}")
710
+
711
+ # ── Test 2: Concept Query ──
712
+ print(" --- Concept Query ---")
713
+ result = engine.reason("cat", max_steps=2)
714
+ all_pass &= check(
715
+ result['response'] is not None,
716
+ f"Reasoning on 'cat': {result['num_steps']} steps, "
717
+ f"{result['latency_ms']:.1f}ms"
718
+ )
719
+ if result['response']['nearest_concepts']:
720
+ top_concept = result['response']['nearest_concepts'][0]
721
+ print(f" Nearest concept: {top_concept[0]} (sim={top_concept[1]:.3f})")
722
+
723
+ # ── Test 3: Energy convergence during reasoning ──
724
+ print(" --- Energy Convergence ---")
725
+ energies = [s.energy for s in result['reasoning_chain'] if s.energy != float('inf')]
726
+ if len(energies) >= 2:
727
+ all_pass &= check(
728
+ energies[-1] <= energies[0] + 0.01,
729
+ f"Energy decreased during reasoning: {energies[0]:.4f} β†’ {energies[-1]:.4f}"
730
+ )
731
+ print(f" Energy trajectory: {[f'{e:.4f}' for e in energies]}")
732
+
733
+ # ── Test 4: Analogy ──
734
+ print(" --- Analogy ---")
735
+ analogy_result = engine.solve_analogy("king", "man", "queen")
736
+ all_pass &= check(
737
+ analogy_result is not None,
738
+ f"Analogy 'king:man :: queen:?': completed in {analogy_result['latency_ms']:.1f}ms"
739
+ )
740
+ if analogy_result['codebook_ranking']:
741
+ top_answer = analogy_result['codebook_ranking'][0]
742
+ print(f" Top answer: {top_answer[0]} (sim={top_answer[1]:.3f})")
743
+ top_5 = [(n, f"{s:.3f}") for n, s in analogy_result['codebook_ranking'][:5]]
744
+ print(f" Top-5: {top_5}")
745
+
746
+ # ── Test 5: Composition ──
747
+ print(" --- Composition ---")
748
+ comp_result = engine.compose("water", "animal")
749
+ all_pass &= check(
750
+ comp_result is not None,
751
+ f"Composition 'water + animal': {comp_result['latency_ms']:.1f}ms"
752
+ )
753
+ if comp_result['response']['nearest_concepts']:
754
+ top = comp_result['response']['nearest_concepts'][:5]
755
+ print(f" Nearest to 'water+animal': {[(n, f'{s:.3f}') for n, s in top]}")
756
+
757
+ # ── Test 6: Structured query ──
758
+ print(" --- Structured Query ---")
759
+ struct_result = engine.reason(
760
+ {"subject": "bird", "relation": "can"},
761
+ max_steps=2,
762
+ roles=["subject", "relation"]
763
+ )
764
+ all_pass &= check(
765
+ struct_result is not None,
766
+ f"Structured query completed: {struct_result['latency_ms']:.1f}ms"
767
+ )
768
+ if struct_result['response'].get('role_fillers'):
769
+ for role, fillers in struct_result['response']['role_fillers'].items():
770
+ print(f" Role '{role}': {fillers[:3]}")
771
+
772
+ # ── Test 7: Multi-step reasoning convergence ──
773
+ print(" --- Multi-step Convergence ---")
774
+ deep_result = engine.reason("forest", max_steps=5)
775
+ chain = deep_result['reasoning_chain']
776
+ all_pass &= check(
777
+ len(chain) > 0,
778
+ f"Multi-step reasoning: {len(chain)} steps, {deep_result['latency_ms']:.1f}ms"
779
+ )
780
+ step_energies = [s.energy for s in chain if s.energy != float('inf')]
781
+ if step_energies:
782
+ print(f" Step energies: {[f'{e:.4f}' for e in step_energies]}")
783
+
784
+ return all_pass
785
+
786
+
787
+ # ══════════════════════════════════════════════════════════════════════════════
788
+ # 7. END-TO-END INTEGRATION
789
+ # ══════════════════════════════════════════════════════════════════════════════
790
+
791
+ def test_integration():
792
+ header("7. End-to-End Integration")
793
+ all_pass = True
794
+ np.random.seed(42)
795
+
796
+ # Build a larger knowledge base
797
+ engine = ReasoningEngine(
798
+ beam_width=500,
799
+ max_routing_depth=3,
800
+ max_reasoning_steps=3,
801
+ energy_mode='hybrid',
802
+ )
803
+
804
+ # Create 1000 random concepts with some structure
805
+ n_base = 500
806
+ categories = ["animal", "plant", "vehicle", "tool", "place"]
807
+ for cat in categories:
808
+ engine.add_concept(cat)
809
+
810
+ for i in range(n_base):
811
+ name = f"concept_{i}"
812
+ engine.add_concept(name)
813
+ cat = categories[i % len(categories)]
814
+ engine.add_relation(name, "is_a", cat)
815
+
816
+ stats = engine.stats()
817
+ print(f" Knowledge base: {stats}")
818
+
819
+ # Test full pipeline
820
+ t0 = time.perf_counter()
821
+ result = engine.reason("concept_42", max_steps=3)
822
+ total_ms = (time.perf_counter() - t0) * 1000
823
+
824
+ all_pass &= check(
825
+ result['response'] is not None,
826
+ f"Full pipeline completed in {total_ms:.1f}ms"
827
+ )
828
+
829
+ # Test batch queries
830
+ print()
831
+ print(" --- Batch Query Benchmark ---")
832
+ latencies = []
833
+ for i in range(50):
834
+ query = f"concept_{np.random.randint(n_base)}"
835
+ t0 = time.perf_counter()
836
+ r = engine.reason(query, max_steps=2)
837
+ latencies.append((time.perf_counter() - t0) * 1000)
838
+
839
+ avg_lat = np.mean(latencies)
840
+ p50_lat = np.percentile(latencies, 50)
841
+ p99_lat = np.percentile(latencies, 99)
842
+ all_pass &= check(
843
+ True,
844
+ f"50 queries: avg={avg_lat:.1f}ms, p50={p50_lat:.1f}ms, p99={p99_lat:.1f}ms"
845
+ )
846
+
847
+ # Memory efficiency
848
+ mem_stats = engine.memory.stats()
849
+ print(f" Memory usage: {mem_stats['memory_mb']:.2f} MB "
850
+ f"for {mem_stats['size']} entries")
851
+ bytes_per_entry = mem_stats['memory_mb'] * 1024 * 1024 / max(mem_stats['size'], 1)
852
+ all_pass &= check(
853
+ bytes_per_entry < 2048, # Should be ~1024 bytes (512 addr + 512 content)
854
+ f"Memory efficiency: {bytes_per_entry:.0f} bytes/entry (target ≀ 1024)"
855
+ )
856
+
857
+ return all_pass
858
+
859
+
860
+ # ══════════════════════════════════════════════════════════════════════════════
861
+ # MAIN
862
+ # ══════════════════════════════════════════════════════════════════════════════
863
+
864
+ def main():
865
+ print("\n" + "β–ˆ" * 70)
866
+ print(" MLE β€” Morpho-Logic Engine β€” Comprehensive Test Suite")
867
+ print("β–ˆ" * 70)
868
+
869
+ results = {}
870
+ tests = [
871
+ ("SIMD Operations", test_simd_operations),
872
+ ("Memory & LSH", test_memory_and_lsh),
873
+ ("Routing", test_routing),
874
+ ("Binding", test_binding),
875
+ ("Energy Convergence", test_energy_convergence),
876
+ ("Reasoning", test_reasoning),
877
+ ("Integration", test_integration),
878
+ ]
879
+
880
+ for name, test_fn in tests:
881
+ try:
882
+ results[name] = test_fn()
883
+ except Exception as e:
884
+ print(f"\n βœ—βœ—βœ— {name} FAILED with exception: {e}")
885
+ import traceback
886
+ traceback.print_exc()
887
+ results[name] = False
888
+
889
+ # Summary
890
+ header("TEST SUMMARY")
891
+ total = len(results)
892
+ passed = sum(1 for v in results.values() if v)
893
+ for name, result in results.items():
894
+ status = "PASS βœ“" if result else "FAIL βœ—"
895
+ print(f" [{status}] {name}")
896
+
897
+ print(f"\n Total: {passed}/{total} test groups passed")
898
+ print("β–ˆ" * 70)
899
+
900
+ return 0 if passed == total else 1
901
+
902
+
903
+ if __name__ == '__main__':
904
+ exit(main())
mle/utils/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .simd_ops import (
2
+ N_BITS, N_WORDS, N_BYTES,
3
+ hamming_distance, hamming_batch, hamming_topk,
4
+ hamming_similarity, xor_vectors, popcount,
5
+ majority_vote, random_binary_vector, random_binary_vectors,
6
+ normalize_density,
7
+ )
mle/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (477 Bytes). View file
 
mle/utils/__pycache__/simd_ops.cpython-312.pyc ADDED
Binary file (20 kB). View file
 
mle/utils/simd_ops.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MLE SIMD-Optimized Bitwise Operations
3
+ =====================================
4
+ Hardware-accelerated Hamming distance, popcount, and batch XOR operations.
5
+ Uses ctypes to call GCC-compiled C with -march=native for automatic SIMD
6
+ vectorization (AVX-512 VPOPCNTQ / AVX2 POPCNT / SSE4.2 POPCNT).
7
+
8
+ Fallback: pure NumPy LUT-based popcount for portability.
9
+ """
10
+
11
+ import numpy as np
12
+ import ctypes
13
+ import tempfile
14
+ import subprocess
15
+ import os
16
+ import logging
17
+ from pathlib import Path
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # ── Constants ──────────────────────────────────────────────────────────────────
22
+ N_BITS = 4096
23
+ N_WORDS = N_BITS // 64 # 64 uint64 words = 512 bytes per vector
24
+ N_BYTES = N_BITS // 8 # 512 bytes
25
+
26
+ # ── Compile native SIMD library ───────────────────────────────────────────────
27
+
28
+ _NATIVE_C_SOURCE = r"""
29
+ #include <stdint.h>
30
+ #include <stdlib.h>
31
+ #include <string.h>
32
+
33
+ /* Single-pair Hamming distance: XOR + popcount over N uint64 words */
34
+ int hamming_single(const uint64_t *a, const uint64_t *b, int n_words) {
35
+ int cnt = 0;
36
+ for (int i = 0; i < n_words; i++)
37
+ cnt += __builtin_popcountll(a[i] ^ b[i]);
38
+ return cnt;
39
+ }
40
+
41
+ /* Batch Hamming: query (1 x n_words) vs corpus (n_vecs x n_words)
42
+ Results written to out[n_vecs]. Layout: corpus is row-major contiguous. */
43
+ void hamming_batch(const uint64_t *query, const uint64_t *corpus,
44
+ int n_words, int n_vecs, int *out) {
45
+ for (int v = 0; v < n_vecs; v++) {
46
+ int cnt = 0;
47
+ const uint64_t *row = corpus + (long)v * n_words;
48
+ for (int w = 0; w < n_words; w++)
49
+ cnt += __builtin_popcountll(query[w] ^ row[w]);
50
+ out[v] = cnt;
51
+ }
52
+ }
53
+
54
+ /* Batch Hamming with top-K selection (partial sort).
55
+ Returns indices of top_k smallest distances.
56
+ Uses a simple max-heap of size top_k for O(N log K). */
57
+ static void swap_int(int *a, int *b) { int t = *a; *a = *b; *b = t; }
58
+
59
+ static void sift_down_max(int *heap_dist, int *heap_idx, int size, int i) {
60
+ while (1) {
61
+ int largest = i, l = 2*i+1, r = 2*i+2;
62
+ if (l < size && heap_dist[l] > heap_dist[largest]) largest = l;
63
+ if (r < size && heap_dist[r] > heap_dist[largest]) largest = r;
64
+ if (largest == i) break;
65
+ swap_int(&heap_dist[i], &heap_dist[largest]);
66
+ swap_int(&heap_idx[i], &heap_idx[largest]);
67
+ i = largest;
68
+ }
69
+ }
70
+
71
+ void hamming_topk(const uint64_t *query, const uint64_t *corpus,
72
+ int n_words, int n_vecs, int top_k,
73
+ int *out_indices, int *out_dists) {
74
+ /* Initialize heap with first top_k elements */
75
+ int heap_size = (top_k < n_vecs) ? top_k : n_vecs;
76
+ for (int v = 0; v < heap_size; v++) {
77
+ int cnt = 0;
78
+ const uint64_t *row = corpus + (long)v * n_words;
79
+ for (int w = 0; w < n_words; w++)
80
+ cnt += __builtin_popcountll(query[w] ^ row[w]);
81
+ out_dists[v] = cnt;
82
+ out_indices[v] = v;
83
+ }
84
+ /* Build max-heap */
85
+ for (int i = heap_size/2 - 1; i >= 0; i--)
86
+ sift_down_max(out_dists, out_indices, heap_size, i);
87
+
88
+ /* Process remaining vectors */
89
+ for (int v = heap_size; v < n_vecs; v++) {
90
+ int cnt = 0;
91
+ const uint64_t *row = corpus + (long)v * n_words;
92
+ for (int w = 0; w < n_words; w++)
93
+ cnt += __builtin_popcountll(query[w] ^ row[w]);
94
+ if (cnt < out_dists[0]) {
95
+ out_dists[0] = cnt;
96
+ out_indices[0] = v;
97
+ sift_down_max(out_dists, out_indices, heap_size, 0);
98
+ }
99
+ }
100
+ }
101
+
102
+ /* Popcount of a single vector (count of 1-bits) */
103
+ int popcount_vec(const uint64_t *a, int n_words) {
104
+ int cnt = 0;
105
+ for (int i = 0; i < n_words; i++)
106
+ cnt += __builtin_popcountll(a[i]);
107
+ return cnt;
108
+ }
109
+
110
+ /* Batch XOR: out[i] = a[i] ^ b[i] for vectors of n_words */
111
+ void xor_vectors(const uint64_t *a, const uint64_t *b, uint64_t *out, int n_words) {
112
+ for (int i = 0; i < n_words; i++)
113
+ out[i] = a[i] ^ b[i];
114
+ }
115
+
116
+ /* Batch majority vote: given n_vecs vectors of n_words uint64,
117
+ compute per-bit majority. Result in out[n_words]. */
118
+ void majority_vote(const uint64_t *vecs, int n_vecs, int n_words, uint64_t *out) {
119
+ int n_bits = n_words * 64;
120
+ int threshold = n_vecs / 2;
121
+ /* Count per-bit using word-level iteration */
122
+ for (int w = 0; w < n_words; w++) {
123
+ uint64_t result = 0;
124
+ for (int b = 0; b < 64; b++) {
125
+ int count = 0;
126
+ uint64_t mask = (uint64_t)1 << b;
127
+ for (int v = 0; v < n_vecs; v++)
128
+ count += ((vecs[(long)v * n_words + w] & mask) != 0);
129
+ if (count > threshold)
130
+ result |= mask;
131
+ }
132
+ out[w] = result;
133
+ }
134
+ }
135
+ """
136
+
137
+ _lib = None
138
+ _lib_path = None
139
+
140
+
141
+ def _compile_native():
142
+ """Compile the C library with native SIMD optimization."""
143
+ global _lib, _lib_path
144
+ if _lib is not None:
145
+ return _lib
146
+
147
+ src_path = os.path.join(tempfile.gettempdir(), "mle_simd_ops.c")
148
+ lib_path = os.path.join(tempfile.gettempdir(), "mle_simd_ops.so")
149
+ _lib_path = lib_path
150
+
151
+ with open(src_path, "w") as f:
152
+ f.write(_NATIVE_C_SOURCE)
153
+
154
+ try:
155
+ subprocess.run(
156
+ ["gcc", "-O3", "-march=native", "-shared", "-fPIC",
157
+ "-o", lib_path, src_path],
158
+ check=True, capture_output=True, text=True
159
+ )
160
+ lib = ctypes.CDLL(lib_path)
161
+
162
+ # hamming_single
163
+ lib.hamming_single.restype = ctypes.c_int
164
+ lib.hamming_single.argtypes = [
165
+ ctypes.POINTER(ctypes.c_uint64),
166
+ ctypes.POINTER(ctypes.c_uint64),
167
+ ctypes.c_int
168
+ ]
169
+
170
+ # hamming_batch
171
+ lib.hamming_batch.restype = None
172
+ lib.hamming_batch.argtypes = [
173
+ ctypes.POINTER(ctypes.c_uint64),
174
+ ctypes.POINTER(ctypes.c_uint64),
175
+ ctypes.c_int, ctypes.c_int,
176
+ ctypes.POINTER(ctypes.c_int)
177
+ ]
178
+
179
+ # hamming_topk
180
+ lib.hamming_topk.restype = None
181
+ lib.hamming_topk.argtypes = [
182
+ ctypes.POINTER(ctypes.c_uint64),
183
+ ctypes.POINTER(ctypes.c_uint64),
184
+ ctypes.c_int, ctypes.c_int, ctypes.c_int,
185
+ ctypes.POINTER(ctypes.c_int),
186
+ ctypes.POINTER(ctypes.c_int)
187
+ ]
188
+
189
+ # popcount_vec
190
+ lib.popcount_vec.restype = ctypes.c_int
191
+ lib.popcount_vec.argtypes = [
192
+ ctypes.POINTER(ctypes.c_uint64), ctypes.c_int
193
+ ]
194
+
195
+ # xor_vectors
196
+ lib.xor_vectors.restype = None
197
+ lib.xor_vectors.argtypes = [
198
+ ctypes.POINTER(ctypes.c_uint64),
199
+ ctypes.POINTER(ctypes.c_uint64),
200
+ ctypes.POINTER(ctypes.c_uint64),
201
+ ctypes.c_int
202
+ ]
203
+
204
+ # majority_vote
205
+ lib.majority_vote.restype = None
206
+ lib.majority_vote.argtypes = [
207
+ ctypes.POINTER(ctypes.c_uint64),
208
+ ctypes.c_int, ctypes.c_int,
209
+ ctypes.POINTER(ctypes.c_uint64)
210
+ ]
211
+
212
+ _lib = lib
213
+ logger.info("Native SIMD library compiled successfully with -march=native")
214
+ return lib
215
+ except Exception as e:
216
+ logger.warning(f"Failed to compile native SIMD library: {e}. Using NumPy fallback.")
217
+ return None
218
+
219
+
220
+ def get_native_lib():
221
+ """Get the compiled native library (lazy initialization)."""
222
+ return _compile_native()
223
+
224
+
225
+ # ── NumPy Fallback Operations ─────────────────────────────────────────────────
226
+
227
+ # LUT for byte-level popcount (256 entries)
228
+ _POPCOUNT_LUT = np.array([bin(i).count('1') for i in range(256)], dtype=np.int32)
229
+
230
+
231
+ def _np_hamming_single(a: np.ndarray, b: np.ndarray) -> int:
232
+ """Pure NumPy Hamming distance between two packed uint64 vectors."""
233
+ xor = np.bitwise_xor(a, b).view(np.uint8)
234
+ return int(_POPCOUNT_LUT[xor].sum())
235
+
236
+
237
+ def _np_hamming_batch(query: np.ndarray, corpus: np.ndarray) -> np.ndarray:
238
+ """Pure NumPy batch Hamming distance. query: (N_WORDS,), corpus: (M, N_WORDS)."""
239
+ xor = np.bitwise_xor(query[np.newaxis, :], corpus) # (M, N_WORDS)
240
+ xor_bytes = xor.view(np.uint8) # (M, N_BYTES)
241
+ return _POPCOUNT_LUT[xor_bytes].reshape(len(corpus), -1).sum(axis=1)
242
+
243
+
244
+ # ── Public API (auto-selects native or fallback) ─────────────────────────────
245
+
246
+ def _as_ptr64(arr: np.ndarray):
247
+ """Get ctypes pointer to uint64 array."""
248
+ return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_uint64))
249
+
250
+
251
+ def _as_ptr32(arr: np.ndarray):
252
+ """Get ctypes pointer to int32 array."""
253
+ return arr.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
254
+
255
+
256
+ def hamming_distance(a: np.ndarray, b: np.ndarray) -> int:
257
+ """Compute Hamming distance between two 4096-bit packed vectors.
258
+ a, b: np.ndarray of shape (N_WORDS,) dtype=uint64.
259
+ """
260
+ lib = get_native_lib()
261
+ if lib is not None:
262
+ return lib.hamming_single(_as_ptr64(a), _as_ptr64(b), N_WORDS)
263
+ return _np_hamming_single(a, b)
264
+
265
+
266
+ def hamming_batch(query: np.ndarray, corpus: np.ndarray) -> np.ndarray:
267
+ """Compute Hamming distances from query to all corpus vectors.
268
+ query: (N_WORDS,) uint64
269
+ corpus: (M, N_WORDS) uint64, C-contiguous
270
+ Returns: (M,) int32 array of distances.
271
+ """
272
+ assert corpus.flags['C_CONTIGUOUS'], "Corpus must be C-contiguous for SIMD"
273
+ n_vecs = corpus.shape[0]
274
+ lib = get_native_lib()
275
+ if lib is not None:
276
+ out = np.empty(n_vecs, dtype=np.int32)
277
+ lib.hamming_batch(
278
+ _as_ptr64(query), _as_ptr64(corpus),
279
+ N_WORDS, n_vecs, _as_ptr32(out)
280
+ )
281
+ return out
282
+ return _np_hamming_batch(query, corpus).astype(np.int32)
283
+
284
+
285
+ def hamming_topk(query: np.ndarray, corpus: np.ndarray, k: int = 500):
286
+ """Find top-k nearest vectors by Hamming distance.
287
+ Returns: (indices, distances) each of shape (k,), sorted ascending by distance.
288
+ Uses O(N log K) max-heap in native code.
289
+ """
290
+ assert corpus.flags['C_CONTIGUOUS'], "Corpus must be C-contiguous"
291
+ n_vecs = corpus.shape[0]
292
+ actual_k = min(k, n_vecs)
293
+ lib = get_native_lib()
294
+
295
+ if lib is not None:
296
+ out_idx = np.empty(actual_k, dtype=np.int32)
297
+ out_dist = np.empty(actual_k, dtype=np.int32)
298
+ lib.hamming_topk(
299
+ _as_ptr64(query), _as_ptr64(corpus),
300
+ N_WORDS, n_vecs, actual_k,
301
+ _as_ptr32(out_idx), _as_ptr32(out_dist)
302
+ )
303
+ # Sort by distance (heap output is unordered)
304
+ order = np.argsort(out_dist)
305
+ return out_idx[order], out_dist[order]
306
+ else:
307
+ dists = _np_hamming_batch(query, corpus)
308
+ if actual_k < n_vecs:
309
+ idx = np.argpartition(dists, actual_k)[:actual_k]
310
+ else:
311
+ idx = np.arange(n_vecs)
312
+ order = np.argsort(dists[idx])
313
+ sorted_idx = idx[order]
314
+ return sorted_idx.astype(np.int32), dists[sorted_idx].astype(np.int32)
315
+
316
+
317
+ def xor_vectors(a: np.ndarray, b: np.ndarray) -> np.ndarray:
318
+ """Bitwise XOR of two packed uint64 vectors."""
319
+ lib = get_native_lib()
320
+ if lib is not None:
321
+ out = np.empty(N_WORDS, dtype=np.uint64)
322
+ lib.xor_vectors(_as_ptr64(a), _as_ptr64(b), _as_ptr64(out), N_WORDS)
323
+ return out
324
+ return np.bitwise_xor(a, b)
325
+
326
+
327
+ def popcount(a: np.ndarray) -> int:
328
+ """Count number of 1-bits in packed uint64 vector."""
329
+ lib = get_native_lib()
330
+ if lib is not None:
331
+ return lib.popcount_vec(_as_ptr64(a), N_WORDS)
332
+ return int(_POPCOUNT_LUT[a.view(np.uint8)].sum())
333
+
334
+
335
+ def majority_vote(vectors: np.ndarray) -> np.ndarray:
336
+ """Bitwise majority vote across multiple packed uint64 vectors.
337
+ vectors: (M, N_WORDS) uint64, C-contiguous.
338
+ Returns: (N_WORDS,) uint64.
339
+ """
340
+ assert vectors.flags['C_CONTIGUOUS']
341
+ n_vecs = vectors.shape[0]
342
+ lib = get_native_lib()
343
+ if lib is not None:
344
+ out = np.empty(N_WORDS, dtype=np.uint64)
345
+ lib.majority_vote(_as_ptr64(vectors), n_vecs, N_WORDS, _as_ptr64(out))
346
+ return out
347
+ # NumPy fallback: unpack, sum, threshold
348
+ bits = np.unpackbits(vectors.view(np.uint8), axis=1) # (M, N_BITS)
349
+ summed = bits.astype(np.int32).sum(axis=0)
350
+ majority = (summed > n_vecs / 2).astype(np.uint8)
351
+ return np.packbits(majority).view(np.uint64)
352
+
353
+
354
+ # ── Vector Generation ─────────────────────────────────────────────────────────
355
+
356
+ def random_binary_vector(n_words: int = N_WORDS) -> np.ndarray:
357
+ """Generate a random 4096-bit vector, stored as packed uint64.
358
+ Each bit is iid Bernoulli(0.5) β†’ balanced density.
359
+ """
360
+ return np.random.randint(
361
+ 0, np.iinfo(np.uint64).max + 1,
362
+ size=n_words, dtype=np.uint64
363
+ )
364
+
365
+
366
+ def random_binary_vectors(n: int, n_words: int = N_WORDS) -> np.ndarray:
367
+ """Generate n random 4096-bit vectors. Shape: (n, N_WORDS), C-contiguous."""
368
+ return np.ascontiguousarray(
369
+ np.random.randint(
370
+ 0, np.iinfo(np.uint64).max + 1,
371
+ size=(n, n_words), dtype=np.uint64
372
+ )
373
+ )
374
+
375
+
376
+ def normalize_density(v: np.ndarray, target_density: float = 0.5) -> np.ndarray:
377
+ """Normalize a binary vector to target bit density.
378
+ Randomly flips bits to reach the desired proportion of 1-bits.
379
+ """
380
+ bits = np.unpackbits(v.view(np.uint8))
381
+ current = bits.sum() / len(bits)
382
+ target_ones = int(target_density * len(bits))
383
+ current_ones = int(bits.sum())
384
+
385
+ if current_ones == target_ones:
386
+ return v.copy()
387
+
388
+ if current_ones > target_ones:
389
+ # Flip some 1s to 0s
390
+ one_positions = np.where(bits == 1)[0]
391
+ to_flip = np.random.choice(one_positions, current_ones - target_ones, replace=False)
392
+ bits[to_flip] = 0
393
+ else:
394
+ # Flip some 0s to 1s
395
+ zero_positions = np.where(bits == 0)[0]
396
+ to_flip = np.random.choice(zero_positions, target_ones - current_ones, replace=False)
397
+ bits[to_flip] = 1
398
+
399
+ return np.packbits(bits).view(np.uint64).copy()
400
+
401
+
402
+ def hamming_similarity(a: np.ndarray, b: np.ndarray) -> float:
403
+ """Normalized Hamming similarity in [0, 1]. 1.0 = identical."""
404
+ return 1.0 - hamming_distance(a, b) / N_BITS