Granis87 commited on 11 days ago

Commit

dbb04e4

verified ·

1 Parent(s): 8b4a820

Initial upload of MnemoCore

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.coveragerc +42 -0
.dockerignore +95 -0
.github/workflows/ci.yml +221 -0
.github/workflows/docker-publish.yml +130 -0
.gitignore +81 -0
CHANGELOG.md +61 -0
Dockerfile +78 -0
LICENSE +22 -0
MnemoCore Phase 3 5 Infinite.md +1615 -0
README.md +1161 -0
REFACTORING_TODO.md +207 -0
RELEASE_CHECKLIST.md +125 -0
SECURITY.md +30 -0
benchmarks/bench_100k_memories.py +179 -0
benchmarks/bench_permute.py +55 -0
config.yaml +167 -0
data/subconscious_audit.jsonl +2 -0
data/subconscious_evolution.json +24 -0
docker-compose.yml +128 -0
docs/API.md +91 -0
docs/ARCHITECTURE.md +55 -0
docs/BETA_POLICY.md +50 -0
docs/MCP_IMPLEMENTATION_PLAN.md +128 -0
docs/PERFORMANCE.md +71 -0
docs/ROADMAP.md +320 -0
docs/SELF_IMPROVEMENT_DEEP_DIVE.md +279 -0
git_status.txt +51 -0
grafana-dashboard.json +954 -0
helm/mnemocore/.helmignore +68 -0
helm/mnemocore/Chart.yaml +55 -0
helm/mnemocore/templates/_helpers.tpl +119 -0
helm/mnemocore/templates/configmap.yaml +114 -0
helm/mnemocore/templates/deployment-qdrant.yaml +141 -0
helm/mnemocore/templates/deployment-redis.yaml +141 -0
helm/mnemocore/templates/deployment.yaml +176 -0
helm/mnemocore/templates/hpa.yaml +43 -0
helm/mnemocore/templates/ingress.yaml +45 -0
helm/mnemocore/templates/networkpolicy.yaml +50 -0
helm/mnemocore/templates/notes.txt +100 -0
helm/mnemocore/templates/pdb.yaml +23 -0
helm/mnemocore/templates/pvc.yaml +29 -0
helm/mnemocore/templates/secret.yaml +36 -0
helm/mnemocore/templates/service.yaml +50 -0
helm/mnemocore/templates/serviceaccount.yaml +17 -0
helm/mnemocore/templates/servicemonitor.yaml +40 -0
helm/mnemocore/values.yaml +430 -0
k8s/README.md +324 -0
pyproject.toml +109 -0
pytest.ini +9 -0
requirements-dev.txt +30 -0

.coveragerc ADDED Viewed

	@@ -0,0 +1,42 @@

+[run]
+source = src
+branch = true
+parallel = true
+data_file = .coverage
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if __name__ == .__main__.:
+    if TYPE_CHECKING:
+    @abstractmethod
+    @abc.abstractmethod
+omit =
+    tests/*
+    */__pycache__/*
+    */site-packages/*
+    */dist-packages/*
+    */.venv/*
+    */venv/*
+    setup.py
+    conftest.py
+fail_under = 80
+precision = 2
+show_missing = true
+skip_covered = false
+sort = Cover
+[html]
+directory = htmlcov
+title = MnemoCore Coverage Report
+[xml]
+output = coverage.xml
+[json]
+output = coverage.json
+show_contexts = true

.dockerignore ADDED Viewed

	@@ -0,0 +1,95 @@

+# MnemoCore Docker Ignore
+# =======================
+# Exclude files not needed in Docker build context
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+.eggs/
+*.egg
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+# IDE and editors
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+.project
+.pydevproject
+.settings/
+# Dependencies (will be installed in container)
+deps/
+node_modules/
+# Test files and coverage
+tests/
+test_*.py
+*_test.py
+.coverage
+htmlcov/
+.tox/
+.nox/
+# Documentation
+docs/
+*.md
+!README.md
+# Data directories (mounted as volumes)
+data/
+*.jsonl
+*.json
+!config.json
+# Logs
+logs/
+*.log
+# Git
+.git/
+.gitignore
+.gitattributes
+# Docker (prevent recursive builds)
+Dockerfile*
+docker-compose*.yml
+.dockerignore
+# Environment files (use .env.example as template)
+.env
+.env.*
+!.env.example
+# Local development
+*.local
+*.bak
+*.tmp
+# OS files
+.DS_Store
+Thumbs.db
+# Build artifacts
+dist/
+build/
+*.tar.gz
+*.zip
+# Miscellaneous
+scripts/debug_*.py
+scripts/bisect_*.py
+scripts/verify_*.py

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,221 @@

+name: CI/CD Pipeline
+on:
+  push:
+    branches: [main, develop]
+  pull_request:
+    branches: [main, develop]
+env:
+  PYTHONUNBUFFERED: "1"
+  HAIM_API_KEY: "ci-test-key-not-for-production"
+  HAIM_DIMENSIONALITY: "1024"
+  HAIM_ENCODING_MODE: "binary"
+jobs:
+  # ===========================================================================
+  # LINT JOB - Code Quality Checks
+  # ===========================================================================
+  lint:
+    name: Lint & Format Check
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black isort flake8 mypy
+      - name: Run Black (code formatter check)
+        run: black --check --diff src/ tests/
+      - name: Run isort (import sorter check)
+        run: isort --check-only --diff src/ tests/
+      - name: Run flake8 (style guide enforcement)
+        run: flake8 src/ tests/ --max-line-length=120 --extend-ignore=E203,W503
+      - name: Run mypy (static type checker)
+        run: mypy src/ --ignore-missing-imports --no-strict-optional
+        continue-on-error: true  # Non-blocking until type coverage improves
+  # ===========================================================================
+  # TEST JOB - Unit & Integration Tests with Coverage
+  # ===========================================================================
+  test:
+    name: Test (Python ${{ matrix.python-version }})
+    runs-on: ubuntu-latest
+    needs: lint
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.10", "3.11", "3.12"]
+    services:
+      redis:
+        image: redis:7-alpine
+        ports:
+          - 6379:6379
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+          pip install hypothesis fakeredis
+      - name: Create required directories
+        run: mkdir -p data
+      - name: Run tests with coverage
+        env:
+          REDIS_URL: redis://localhost:6379
+          HAIM_API_KEY: ${{ env.HAIM_API_KEY }}
+          HAIM_DIMENSIONALITY: ${{ env.HAIM_DIMENSIONALITY }}
+          HAIM_ENCODING_MODE: ${{ env.HAIM_ENCODING_MODE }}
+        run: |
+          pytest tests/ \
+            -m "not integration" \
+            --cov=src \
+            --cov-report=xml \
+            --cov-report=term-missing \
+            --cov-fail-under=60 \
+            --tb=short \
+            -v
+      - name: Upload coverage to Codecov
+        if: matrix.python-version == '3.11'
+        uses: codecov/codecov-action@v4
+        with:
+          files: ./coverage.xml
+          fail_ci_if_error: false
+          verbose: true
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+  # ===========================================================================
+  # SECURITY JOB - Dependency & Code Security Scanning
+  # ===========================================================================
+  security:
+    name: Security Scan
+    runs-on: ubuntu-latest
+    needs: lint
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pip-audit bandit
+      - name: Run pip-audit (dependency vulnerability scan)
+        run: pip-audit -r requirements.txt
+        continue-on-error: true
+      - name: Run Bandit (code security analysis)
+        run: bandit -r src/ -ll --skip B101,B601
+        continue-on-error: true
+  # ===========================================================================
+  # PROPERTY-BASED TESTS - Hypothesis
+  # ===========================================================================
+  property-tests:
+    name: Property-Based Tests (Hypothesis)
+    runs-on: ubuntu-latest
+    needs: lint
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: 'pip'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install hypothesis pytest pytest-asyncio
+      - name: Run property-based tests
+        env:
+          HAIM_API_KEY: ${{ env.HAIM_API_KEY }}
+          HAIM_DIMENSIONALITY: ${{ env.HAIM_DIMENSIONALITY }}
+        run: |
+          pytest tests/test_binary_hdv_properties.py \
+            -v \
+            --tb=short
+  # ===========================================================================
+  # DOCKER BUILD - Validate image builds correctly
+  # ===========================================================================
+  docker:
+    name: Docker Build
+    runs-on: ubuntu-latest
+    needs: [lint]
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Build Docker image
+        run: docker build -t mnemocore:ci-${{ github.sha }} .
+      - name: Verify Python imports work in image
+        run: |
+          docker run --rm \
+            -e HAIM_API_KEY=ci-test-key \
+            mnemocore:ci-${{ github.sha }} \
+            python -c "from src.core.engine import HAIMEngine; print('Import OK')"
+  # ===========================================================================
+  # BUILD STATUS - Summary Job
+  # ===========================================================================
+  build-status:
+    name: Build Status
+    runs-on: ubuntu-latest
+    needs: [lint, test, security, property-tests, docker]
+    if: always()
+    steps:
+      - name: Check build status
+        run: |
+          if [[ "${{ needs.test.result }}" == "failure" ]]; then
+            echo "Tests failed!"
+            exit 1
+          fi
+          if [[ "${{ needs.lint.result }}" == "failure" ]]; then
+            echo "Lint checks failed!"
+            exit 1
+          fi
+          echo "All checks passed!"

.github/workflows/docker-publish.yml ADDED Viewed

	@@ -0,0 +1,130 @@

+name: Docker Build & Publish
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+    inputs:
+      push_to_registry:
+        description: 'Push to registry'
+        required: true
+        default: 'true'
+        type: boolean
+env:
+  REGISTRY_DOCKERHUB: docker.io
+  REGISTRY_GHCR: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+jobs:
+  # ===========================================================================
+  # BUILD AND PUSH TO DOCKER HUB
+  # ===========================================================================
+  build-dockerhub:
+    name: Build & Push (Docker Hub)
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to Docker Hub
+        if: github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY_DOCKERHUB }}
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Extract metadata (tags, labels)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_DOCKERHUB }}/${{ secrets.DOCKERHUB_USERNAME }}/mnemocore
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
+            type=sha
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+  # ===========================================================================
+  # BUILD AND PUSH TO GITHUB CONTAINER REGISTRY
+  # ===========================================================================
+  build-ghcr:
+    name: Build & Push (GHCR)
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Log in to GitHub Container Registry
+        if: github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true'
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY_GHCR }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels)
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
+            type=sha
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+      - name: Generate artifact attestation
+        if: github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true'
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
+          subject-digest: ${{ steps.push.outputs.digest }}
+          push-to-registry: true

.gitignore ADDED Viewed

	@@ -0,0 +1,81 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual environments
+.venv/
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+# Data (runtime generated)
+data/memory.jsonl
+data/codebook.json
+data/concepts.json
+data/synapses.json
+data/warm_tier/*.mmap
+data/warm_tier/*.json
+data/warm_tier/*.npy
+data/cold_archive/*.gz
+vector_core/corpus_ready.json
+# Logs
+*.log
+logs/
+# Local dependency/vendor dumps
+deps/
+# Benchmarks and ad-hoc outputs
+results*.txt
+benchmark_results.txt
+# OS
+.DS_Store
+Thumbs.db
+# Secrets (should never exist, but just in case)
+.env
+*.pem
+*.key
+# Internal planning documents – NOT for public repo
+AGENT_MASTER_PLAN.md
+*.pdf
+# Local IDE / agent settings
+.claude/
+# Runtime artifacts
+error_log.txt

CHANGELOG.md ADDED Viewed

	@@ -0,0 +1,61 @@

+# Changelog
+All notable changes to this project will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+### Deprecated
+#### Float HDV deprecation (src/core/hdv.py)
+- **HDV class**: All public methods now emit `DeprecationWarning` when called
+- **Migration path**: Use `BinaryHDV` from `src.core.binary_hdv` instead
+- **API mappings**:
+  - `HDV(dimension=N)` -> `BinaryHDV.random(dimension=N)`
+  - `hdv.bind(other)` -> `hdv.xor_bind(other)`
+  - `hdv.unbind(other)` -> `hdv.xor_bind(other)` (XOR is self-inverse)
+  - `hdv.cosine_similarity(other)` -> `hdv.similarity(other)`
+  - `hdv.permute(shift)` -> `hdv.permute(shift)`
+  - `hdv.normalize()` -> No-op (binary vectors are already normalized)
+- **Removal timeline**: Float HDV will be removed in a future version
+#### BinaryHDV compatibility shims added
+- **bind()**: Alias for `xor_bind()` - for legacy API compatibility
+- **unbind()**: Alias for `xor_bind()` - XOR is self-inverse
+- **cosine_similarity()**: Alias for `similarity()` - returns Hamming-based similarity
+- **normalize()**: No-op for binary vectors
+- **__xor__()**: Enables `v1 ^ v2` syntax for binding
+### Fixed
+#### llm_integration.py (6 fixes)
+- **Import paths**: Fixed incorrect import paths from `haim.src.core.engine` to `src.core.engine` and `haim.src.core.node` to `src.core.node`
+- **Missing import**: Added `from datetime import datetime` for dynamic timestamps
+- **Memory access API**: Changed `self.haim.memory_nodes.get()` to `self.haim.tier_manager.get_memory()` at lines 34, 114, 182, 244, 272 - using the correct API for memory access
+- **Superposition query**: Replaced non-existent `superposition_query()` call with combined hypotheses retrieval path
+- **Concept binding**: Replaced non-existent `bind_concepts()` with placeholder - engine has `bind_memories()` available
+- **OR orchestration**: Integrated `orchestrate_orch_or()` from engine and removed workaround sorting path
+#### api/main.py (1 fix)
+- **Delete endpoint**: Fixed attribute reference from `engine.memory_nodes` to `engine.tier_manager.hot` at line 229 - correct attribute for hot memory tier
+#### engine.py (1 fix)
+- **Synapse persistence**: Implemented `_save_synapses()` method (lines 369-390) that was previously an empty stub
+  - Creates parent directory if it doesn't exist
+  - Writes all synapses to disk in JSONL format
+  - Includes all synapse attributes: `neuron_a_id`, `neuron_b_id`, `strength`, `fire_count`, `success_count`, `last_fired`
+  - Handles errors gracefully with logging
+### Changed
+- **Dynamic timestamps**: LLM integration now uses `datetime.now().isoformat()` instead of hardcoded timestamp `"2026-02-04"` for accurate temporal tracking
+- **Phase 4.3 hardening**:
+  - Chrono-weighting uses batched node lookup instead of per-node await chain
+  - `include_neighbors` now preserves `top_k` result contract
+  - `_dream_sem._value` private access replaced by public `locked()` API
+  - Episodic chaining race reduced with serialized store path (`_store_lock`, `_last_stored_id`)
+  - `engine_version` in stats updated to `4.3.0`
+  - HOT-tier `time_range` filtering enforced in `TierManager.search()`
+  - `orchestrate_orch_or()` made async and lock-guarded

Dockerfile ADDED Viewed

	@@ -0,0 +1,78 @@

+# MnemoCore Dockerfile
+# ====================
+# Multi-stage build for optimized production image
+# Stage 1: Builder
+FROM python:3.11-slim AS builder
+WORKDIR /app
+# Install build dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Create virtual environment and install dependencies
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Stage 2: Production
+FROM python:3.11-slim AS production
+# Labels for container metadata
+LABEL maintainer="MnemoCore Team"
+LABEL description="MnemoCore - Infrastructure for Persistent Cognitive Memory"
+LABEL version="4.5.0"
+# Security: Create non-root user
+RUN groupadd --gid 1000 mnemocore && \
+    useradd --uid 1000 --gid mnemocore --shell /bin/bash --create-home mnemocore
+WORKDIR /app
+# Copy virtual environment from builder
+COPY --from=builder /opt/venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+# Install runtime dependencies only
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+# Copy application code
+COPY --chown=mnemocore:mnemocore src/ ./src/
+COPY --chown=mnemocore:mnemocore config.yaml .
+COPY --chown=mnemocore:mnemocore scripts/ ./scripts/
+# Create data directory with proper permissions
+RUN mkdir -p /app/data && chown -R mnemocore:mnemocore /app/data
+# Switch to non-root user
+USER mnemocore
+# Environment variables (defaults, can be overridden)
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    HAIM_API_KEY="" \
+    REDIS_URL="redis://redis:6379/0" \
+    QDRANT_URL="http://qdrant:6333" \
+    LOG_LEVEL="INFO" \
+    HOST="0.0.0.0" \
+    PORT="8100"
+# Expose port
+EXPOSE 8100
+# Health check using the healthcheck script
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD python /app/scripts/healthcheck.py || exit 1
+# Entry point: Run uvicorn
+ENTRYPOINT ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8100"]
+CMD ["--workers", "1", "--log-level", "info"]

LICENSE ADDED Viewed

	@@ -0,0 +1,22 @@

+MIT License
+Copyright (c) 2026 Robin Granberg
+Contact: Robin@veristatesystems.com
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

MnemoCore Phase 3 5 Infinite.md ADDED Viewed

	@@ -0,0 +1,1615 @@

+# MnemoCore Phase 3.5: Infinite Scalability Architecture Blueprint
+**Holographic Adaptive Intelligence Memory - Distributed Vector System**
+> **Target Scale**: 1B+ memories with sub-10ms latency
+> **Architecture**: Binary HDV/VSA 16,384-dimensional vectors (2KB each)
+> **Operations**: XOR-binding, Hamming distance, Active Inference consolidation
+> **Author**: Robin Granberg (Robin@veristatesystems.com)
+> **Date**: February 14, 2026
+> **Version**: 3.5-DISTRIBUTED
+---
+## Executive Summary
+MnemoCore Phase 3.0 successfully implemented local file-based binary hyperdimensional computing with 3-tier storage (HOT/WARM/COLD). This blueprint outlines the evolutionary path to **infinite scalability** through distributed vector databases, federated holographic state, and hardware-accelerated bitwise operations.
+**Key Findings from Research**:
+- **Qdrant** achieves 40x speedup with binary quantization, supporting native XOR/Hamming distance at 100M+ vector scale[web:23][web:29]
+- **Redis Streams** provides sub-millisecond latency for event-driven "Subconscious Bus" architecture[web:52][web:55]
+- **GPU acceleration** delivers 1.4-9.8Ã— speedup for HDC operations with optimized popcount intrinsics[web:56][web:59]
+- **Critical bottleneck** at 1B scale: Memory consistency across distributed nodes requiring sharding strategies[web:24]
+---
+## Part 1: Current Architecture Analysis
+### 1.1 Existing MnemoCore Phase 3.0 Strengths
+\begin{itemize}
+\item \textbf{Binary HDV Foundation}: 16,384-dimensional vectors with XOR-binding provide mathematical elegance and hardware efficiency
+\item \textbf{Tri-State Storage}: HOT (in-memory), WARM (Redis), COLD (file system) separation enables cost-effective scaling
+\item \textbf{LTP-Inspired Decay}: Temporal consolidation mimics biological long-term potentiation
+\item \textbf{Active Inference}: Predictive retrieval based on current context
+\item \textbf{Consumer Hardware Optimization}: Designed for i7/32GB RAM constraints
+\end{itemize}
+### 1.2 Identified Bottlenecks for Billion-Scale
+\begin{table}
+\begin{tabular}{|l|l|l|}
+\hline
+\textbf{Component} & \textbf{Current Limitation} & \textbf{Impact at 1B Memories} \\
+\hline
+File I/O & Sequential disk reads & 500ms+ latency for COLD retrieval \\
+\hline
+Redis Single-Node & 512GB RAM ceiling & Cannot hold WARM tier beyond 250M vectors \\
+\hline
+Hamming Distance Calc & CPU-bound Python loops & Linear O(n) search time explosion \\
+\hline
+Memory Consistency & No distributed state & Impossible to federate across nodes \\
+\hline
+Consolidation & Synchronous operations & Blocks real-time inference during updates \\
+\hline
+\end{tabular}
+\caption{Critical scaling bottlenecks in current implementation}
+\end{table}
+### 1.3 Code Quality Assessment
+**Positive Patterns**:
+- Clean separation of concerns (storage layers, encoding, retrieval)
+- Type hints and docstrings present
+- Modular design allows component replacement
+**Areas Requiring Improvement**:
+\begin{enumerate}
+\item \textbf{Hardcoded Dimensionality}: D=16384 should be configuration-driven
+\item \textbf{Missing Async/Await}: All I/O operations are synchronous blocking
+\item \textbf{No Batch Operations}: Individual memory processing prevents vectorization
+\item \textbf{Inefficient Hamming Distance}: Python loops instead of NumPy bitwise operations
+\item \textbf{No Connection Pooling}: Redis connections created per operation
+\item \textbf{Absence of Metrics}: No instrumentation for latency/throughput monitoring
+\item \textbf{Lacking Error Recovery}: No retry logic or circuit breakers for Redis failures
+\item \textbf{Sequential Encoding}: No parallelization of hypervector generation
+\end{enumerate}
+---
+## Part 2: Distributed Vector Database Selection
+### 2.1 Binary Quantization Database Comparison
+\begin{table}
+\begin{tabular}{|l|c|c|c|c|}
+\hline
+\textbf{Database} & \textbf{Binary Support} & \textbf{Scale (vectors)} & \textbf{p50 Latency} & \textbf{XOR Native} \\
+\hline
+Qdrant & Yes (1/1.5/2-bit) & 100M-1B+ & <10ms & Yes \\
+\hline
+Milvus & Yes (binary index) & 100M-10B & 15-50ms & Yes \\
+\hline
+Weaviate & Yes (BQ+HNSW) & 100M-1B & 10-30ms & Partial \\
+\hline
+Pinecone & No (float32 only) & 100M-1B & 10-20ms & No \\
+\hline
+\end{tabular}
+\caption{Comparison of vector databases for binary HDV at scale}
+\end{table}
+**Winner: Qdrant** for MnemoCore Phase 3.5
+**Rationale**:
+1. **Native Binary Quantization**: Supports 1-bit, 1.5-bit, and 2-bit encodings with `always_ram` optimization for HOT tier[web:23][web:28]
+2. **XOR-as-Hamming**: Efficiently emulates Hamming distance using dot product on binary vectors[web:29]
+3. **Sub-10ms p50 Latency**: Achieves <10ms at 15.3M vectors with 90-95% recall using oversampling[web:23]
+4. **Horizontal Scaling**: Supports distributed clusters with automatic sharding
+5. **HNSW+BQ Integration**: Combines approximate nearest neighbor (ANN) with binary quantization for optimal speed/accuracy tradeoff[web:26]
+6. **Proven Performance**: 40x speedup compared to uncompressed vectors in production benchmarks[web:23]
+### 2.2 Qdrant Architecture for MnemoCore
+\begin{figure}
+\centering
+\textbf{Proposed 3-Tier Qdrant Integration:}
+\end{figure}
+â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+â”‚                    HOT TIER (RAM)                       â”‚
+â”‚  Qdrant Collection: "haim_hot"                          â”‚
+â”‚  - Binary Quantization: 1-bit, always_ram=true          â”‚
+â”‚  - Size: 100K most recent/accessed vectors              â”‚
+â”‚  - Latency: <2ms p50                                    â”‚
+â”‚  - Update Frequency: Real-time (every memory write)     â”‚
+â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                          â†“ (LTP decay < threshold)
+â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+â”‚                   WARM TIER (SSD-backed)                â”‚
+â”‚  Qdrant Collection: "haim_warm"                         â”‚
+â”‚  - Binary Quantization: 1.5-bit, disk-mmap enabled      â”‚
+â”‚  - Size: 1M-100M consolidated vectors                   â”‚
+â”‚  - Latency: 5-10ms p50                                  â”‚
+â”‚  - Update Frequency: Hourly consolidation batch         â”‚
+â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                          â†“ (LTP decay < lower threshold)
+â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+â”‚                   COLD TIER (Object Storage)            â”‚
+â”‚  S3/MinIO: Compressed binary archives                   â”‚
+â”‚  - Format: .npy.gz (NumPy compressed arrays)            â”‚
+â”‚  - Size: 100M-10B+ archival vectors                     â”‚
+â”‚  - Latency: 50-500ms                                    â”‚
+â”‚  - Access Pattern: Rare retrieval, batch reactivation   â”‚
+â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+**Configuration Example (Qdrant Python Client)**:
+from qdrant_client import QdrantClient, models
+client = QdrantClient(url="http://qdrant-cluster:6333")
+# HOT tier collection with aggressive binary quantization
+client.create_collection(
+    collection_name="haim_hot",
+    vectors_config=models.VectorParams(
+        size=16384,  # D=16,384
+        distance=models.Distance.HAMMING  # Native Hamming distance
+    ),
+    quantization_config=models.BinaryQuantization(
+        binary=models.BinaryQuantizationConfig(
+            always_ram=True,  # Pin to RAM for sub-2ms latency
+            encoding=models.BinaryQuantizationEncoding.OneBit
+        )
+    ),
+    hnsw_config=models.HnswConfigDiff(
+        m=16,  # Connections per node (lower for speed)
+        ef_construct=100  # Construction-time accuracy
+    )
+)
+### 2.3 Estimated Performance at Scale
+\begin{table}
+\begin{tabular}{|l|c|c|c|c|}
+\hline
+\textbf{Tier} & \textbf{Vector Count} & \textbf{Memory (GB)} & \textbf{p50 Latency} & \textbf{QPS} \\
+\hline
+HOT (Qdrant 1-bit) & 100,000 & 0.2 & 1.5ms & 10,000+ \\
+\hline
+WARM (Qdrant 1.5-bit) & 10,000,000 & 30 & 8ms & 5,000 \\
+\hline
+COLD (S3 archived) & 1,000,000,000 & 2,000 (disk) & 250ms & 100 \\
+\hline
+\end{tabular}
+\caption{Projected performance with Qdrant at billion-scale}
+\end{table}
+**Memory Footprint Calculation**:
+- Uncompressed: 16,384 bits = 2,048 bytes = 2KB per vector
+- 1-bit BQ: 16,384 bits / 32 (compression) = 64 bytes per vector
+- 100K HOT vectors: 100,000 Ã— 64 bytes = 6.4MB (+ HNSW index ~200MB) â‰ˆ 0.2GB total
+---
+## Part 3: Federated Holographic State
+### 3.1 Challenge: Global Memory Consistency
+**Problem**: In a distributed system with N nodes, each node maintains a local holographic state (superposition of recent contexts). How do we ensure global consistency without sacrificing latency?
+**Two Competing Approaches**:
+\begin{enumerate}
+\item \textbf{Sharding by Context}: Partition memories based on semantic clustering
+\item \textbf{Superposition Aggregation}: Each node maintains full holographic state, periodically synchronized
+\end{enumerate}
+### 3.2 Strategy Comparison
+\begin{table}
+\begin{tabular}{|l|l|l|}
+\hline
+\textbf{Aspect} & \textbf{Sharding by Context} & \textbf{Superposition Aggregation} \\
+\hline
+Consistency & Eventual (AP in CAP) & Strong (CP in CAP) \\
+\hline
+Latency & Low (single-node query) & Medium (multi-node gather) \\
+\hline
+Network Traffic & Low (targeted routing) & High (periodic sync) \\
+\hline
+Fault Tolerance & High (replication per shard) & Medium (coordinator SPOF) \\
+\hline
+Context Drift & High risk (stale cross-shard) & Low risk (global view) \\
+\hline
+Implementation Complexity & Medium & High \\
+\hline
+\end{tabular}
+\caption{Architectural comparison for distributed holographic state}
+\end{table}
+### 3.3 Recommended Hybrid Architecture
+**Proposal**: **"Contextual Sharding with Asynchronous Superposition Broadcast"**
+**Design Principles**:
+1. Shard memories by semantic context (using locality-sensitive hashing of HDVs)
+2. Each node maintains a lightweight "global hologram" (last N=1000 cross-shard accesses)
+3. Asynchronous broadcast of high-salience memories (LTP decay > threshold) to all nodes
+4. Query routing: Check local shard first, fallback to cross-shard search if confidence < threshold
+**Architecture Diagram Description**:
+                    â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+                    â”‚   Query Router       â”‚
+                    â”‚  (Consistent Hashing)â”‚
+                    â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                               â”‚
+           â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+           â†“                   â†“                   â†“
+    â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”     â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”    â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+    â”‚  Node 1     â”‚     â”‚  Node 2     â”‚    â”‚  Node N     â”‚
+    â”‚             â”‚     â”‚             â”‚    â”‚             â”‚
+    â”‚ Shard: 0-33%â”‚     â”‚ Shard: 34-66â”‚    â”‚ Shard: 67-100â”‚
+    â”‚ Local Qdrantâ”‚     â”‚ Local Qdrantâ”‚    â”‚ Local Qdrantâ”‚
+    â”‚             â”‚     â”‚             â”‚    â”‚             â”‚
+    â”‚ Global Holo-â”‚     â”‚ Global Holo-â”‚    â”‚ Global Holo-â”‚
+    â”‚ gram Cache  â”‚     â”‚ gram Cache  â”‚    â”‚ gram Cache  â”‚
+    â”‚ (1K vectors)â”‚     â”‚ (1K vectors)â”‚    â”‚ (1K vectors)â”‚
+    â””â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜     â””â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜    â””â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜
+           â”‚                   â”‚                   â”‚
+           â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                               â”‚
+                    â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+                    â”‚  Redis Pub/Sub       â”‚
+                    â”‚  "hologram_broadcast"â”‚
+                    â”‚  (High-salience only)â”‚
+                    â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+**Shard Assignment Algorithm**:
+def assign_shard(memory_hdv: np.ndarray, num_shards: int) -> int:
+    """
+    Use first 64 bits of HDV as consistent hash key.
+    Ensures semantically similar memories co-locate.
+    """
+    hash_key = int.from_bytes(memory_hdv[:8].tobytes(), 'big')
+    return hash_key % num_shards
+---
+## Part 4: Subconscious Bus Architecture
+### 4.1 Active Inference Pipeline Requirements
+**Goal**: Asynchronous memory consolidation, predictive retrieval, and background LTP decay processing without blocking real-time queries.
+**Requirements**:
+- Sub-millisecond event ingestion latency
+- Ordered processing (within context partition)
+- At-least-once delivery guarantees
+- Backpressure handling for consolidation lag
+- Horizontal scaling of consumer workers
+### 4.2 Redis Streams vs Apache Kafka Analysis
+\begin{table}
+\begin{tabular}{|l|l|l|}
+\hline
+\textbf{Metric} & \textbf{Redis Streams} & \textbf{Apache Kafka} \\
+\hline
+Latency (p50) & <1ms & 5-10ms \\
+\hline
+Throughput & 100K-500K msg/s & 1M-10M msg/s \\
+\hline
+Data Retention & Hours-Days (RAM-limited) & Days-Years (disk-backed) \\
+\hline
+Deployment Complexity & Low (single Redis instance) & High (ZooKeeper + brokers) \\
+\hline
+Operational Overhead & Minimal & Significant \\
+\hline
+Memory Efficiency & High (in-memory) & Medium (page cache) \\
+\hline
+Fault Tolerance & Redis replication & Distributed replication \\
+\hline
+Consumer Groups & Yes (XREADGROUP) & Yes (native) \\
+\hline
+\end{tabular}
+\caption{Comparison of message streaming systems for Subconscious Bus}
+\end{table}
+**Decision: Redis Streams** for MnemoCore Phase 3.5
+**Justification**:
+1. **Ultra-Low Latency**: Sub-millisecond event delivery critical for Active Inference responsiveness[web:52][web:55]
+2. **Simplified Architecture**: Reuses existing Redis infrastructure (already in WARM tier)
+3. **Memory Budget**: Consolidation events have short retention needs (1-2 hours max)
+4. **In-Memory Performance**: Consolidation workers process 850+ records/s on Raspberry Pi 4 with Redis Streams vs 630/s with Kafka[web:38]
+5. **Consumer Group Support**: Native `XREADGROUP` for distributed worker parallelism[web:52]
+### 4.3 Subconscious Bus Implementation
+**Stream Schema**:
+# Event Types
+EVENTS = {
+    "memory.write": {
+        "hdv": bytes,         # Binary hyperdimensional vector
+        "context_id": str,
+        "ltp_strength": float,
+        "timestamp": int
+    },
+    "memory.access": {
+        "memory_id": str,
+        "access_count": int,
+        "last_access": int
+    },
+    "consolidation.trigger": {
+        "tier": str,          # "hot_to_warm" or "warm_to_cold"
+        "memory_ids": list[str]
+    },
+    "inference.predict": {
+        "context_hdv": bytes,
+        "prediction_window": int  # seconds ahead
+    }
+}
+**Producer (Memory Write Path)**:
+import redis
+import msgpack
+class SubconsciousBus:
+    def __init__(self, redis_url: str):
+        self.redis = redis.from_url(redis_url, decode_responses=False)
+        self.stream_key = "MnemoCore:subconscious"
+    async def publish_memory_write(self, hdv: np.ndarray, context_id: str, ltp: float):
+        """Async publish to avoid blocking main thread."""
+        event = {
+            "type": "memory.write",
+            "hdv": hdv.tobytes(),  # Binary serialization
+            "context_id": context_id,
+            "ltp_strength": ltp,
+            "timestamp": int(time.time() * 1000)
+        }
+        packed = msgpack.packb(event)  # Efficient binary encoding
+        # XADD with maxlen to prevent unbounded growth
+        await self.redis.xadd(
+            name=self.stream_key,
+            fields={"data": packed},
+            maxlen=100000,  # Rolling window of last 100K events
+            approximate=True  # Allow ~5% variance for performance
+        )
+**Consumer (Consolidation Worker)**:
+class ConsolidationWorker:
+    def __init__(self, redis_url: str, consumer_group: str, consumer_name: str):
+        self.redis = redis.from_url(redis_url, decode_responses=False)
+        self.stream_key = "MnemoCore:subconscious"
+        self.group = consumer_group
+        self.name = consumer_name
+        # Create consumer group (idempotent)
+        try:
+            self.redis.xgroup_create(
+                name=self.stream_key,
+                groupname=self.group,
+                id="0",
+                mkstream=True
+            )
+        except redis.exceptions.ResponseError:
+            pass  # Group already exists
+    async def process_events(self, batch_size: int = 100):
+        """Process events in batches for efficiency."""
+        while True:
+            # XREADGROUP with blocking (1000ms timeout)
+            messages = await self.redis.xreadgroup(
+                groupname=self.group,
+                consumername=self.name,
+                streams={self.stream_key: ">"},
+                count=batch_size,
+                block=1000
+            )
+            if not messages:
+                continue
+            for stream_name, events in messages:
+                for event_id, event_data in events:
+                    event = msgpack.unpackb(event_data[b"data"])
+                    if event["type"] == "memory.write":
+                        await self._handle_memory_write(event)
+                    elif event["type"] == "consolidation.trigger":
+                        await self._handle_consolidation(event)
+                    # Acknowledge message (enables at-least-once delivery)
+                    await self.redis.xack(self.stream_key, self.group, event_id)
+**Horizontal Scaling**:
+- Deploy N worker processes (e.g., 4 workers for 4-core CPU)
+- Each worker reads from same consumer group
+- Redis automatically load-balances events across workers
+- Pending Entries List (PEL) tracks unacknowledged messages for fault recovery[web:52]
+---
+## Part 5: Hardware Acceleration Stack
+### 5.1 Bitwise Operations Performance Analysis
+**Critical Operations in HDC**:
+1. **XOR-binding**: Element-wise XOR of two 16,384-bit vectors
+2. **Popcount**: Count of 1-bits (for Hamming distance calculation)
+3. **Bundling**: Element-wise majority vote across N vectors
+**Hardware Comparison**:
+\begin{table}
+\begin{tabular}{|l|c|c|c|c|}
+\hline
+\textbf{Platform} & \textbf{XOR Throughput} & \textbf{Popcount Method} & \textbf{Cost} & \textbf{Power} \\
+\hline
+CPU (AVX-512) & 5 GBit/s & POPCNT instruction & Low & 15-65W \\
+\hline
+GPU (CUDA) & 500 GBit/s & \_\_popcll intrinsic & Medium & 150-300W \\
+\hline
+TPU (v4) & 200 GBit/s & Systolic array ops & High & 175W \\
+\hline
+FPGA (Stratix 10) & 100 GBit/s & Custom LUT counters & High & 30-70W \\
+\hline
+\end{tabular}
+\caption{Hardware performance for HDC operations}
+\end{table}
+### 5.2 GPU Acceleration Recommendation
+**Winner: GPU (NVIDIA RTX 4090 or A100)** for MnemoCore Phase 3.5+
+**Rationale**:
+1. **Native Bitwise Support**: CUDA provides efficient `__popcll` (popcount 64-bit) intrinsic[web:54]
+2. **Proven HDC Speedups**: OpenHD framework achieves 9.8Ã— training speedup and 1.4Ã— inference speedup on GPU vs CPU[web:59]
+3. **Memory Bandwidth**: 1TB/s (A100) vs 200GB/s (DDR5) enables massive parallel Hamming distance calculations
+4. **Batch Processing**: Process 1000+ memories in parallel (vs sequential CPU loops)
+5. **Cost-Effectiveness**: RTX 4090 (~$1600) provides 82 TFLOPS vs TPU v4 pod (>$100K)[web:57]
+6. **Developer Ecosystem**: PyTorch/CuPy have mature GPU support, CUDA well-documented
+**Performance Estimates**:
+- **Hamming Distance Batch**: 1M comparisons in ~50ms (GPU) vs 5000ms (CPU)
+- **Encoding Pipeline**: 10K memories/second (GPU) vs 500/second (CPU)
+- **Consolidation**: 100K vector bundling in ~200ms (GPU) vs 10,000ms (CPU)
+### 5.3 Optimized GPU Implementation
+**Leveraging PyTorch for Bitwise Ops**:
+import torch
+class GPUHammingCalculator:
+    def __init__(self, device: str = "cuda:0"):
+        self.device = torch.device(device)
+    def batch_hamming_distance(
+        self,
+        query: np.ndarray,  # Shape: (D,) where D=16384
+        database: np.ndarray  # Shape: (N, D) where N=1M vectors
+    ) -> np.ndarray:
+        """
+        Compute Hamming distance between query and all database vectors.
+        Returns array of shape (N,) with distances.
+        """
+        # Convert to PyTorch tensors (bool type for efficient XOR)
+        query_t = torch.from_numpy(query).bool().to(self.device)
+        db_t = torch.from_numpy(database).bool().to(self.device)
+        # XOR: query_t ^ db_t gives differing bits (True where different)
+        # Sum: count True values = Hamming distance
+        # Shape: (N,) - vectorized across all database vectors
+        distances = (query_t ^ db_t).sum(dim=1)
+        return distances.cpu().numpy()
+**Popcount Optimization (CuPy)**:
+import cupy as cp
+def gpu_popcount(binary_vectors: np.ndarray) -> np.ndarray:
+    """
+    Count 1-bits in each binary vector using GPU.
+    Input: (N, D) array of binary values
+    Output: (N,) array of popcount per vector
+    """
+    # Transfer to GPU
+    vectors_gpu = cp.asarray(binary_vectors, dtype=cp.uint8)
+    # Pack bits into uint64 for efficient popcount
+    # 16384 bits = 256 uint64 words
+    packed = cp.packbits(vectors_gpu, axis=1)
+    packed_u64 = packed.view(cp.uint64)
+    # CuPy popcount kernel (uses __popcll CUDA intrinsic)
+    counts = cp.zeros(len(vectors_gpu), dtype=cp.int32)
+    for i in range(256):  # 256 uint64 words per vector
+        counts += cp.bitwise_count(packed_u64[:, i])
+    return counts.get()  # Transfer back to CPU
+### 5.4 Infrastructure Recommendation
+**Phase 3.5 (100K-10M memories)**: **Bare Metal with Consumer GPU**
+- Hardware: Intel i7-14700K (20 cores) + 64GB DDR5 + RTX 4090 (24GB VRAM)
+- Storage: 2TB NVMe SSD for Qdrant
+- Cost: ~$4000 one-time
+- Advantages: No cloud costs, full control, sub-2ms latency
+**Phase 4.0 (10M-100M memories)**: **Hybrid Cloud with GPU Instances**
+- Compute: AWS g5.2xlarge (NVIDIA A10G, 24GB VRAM) for consolidation workers
+- Database: Self-hosted Qdrant cluster (3 nodes, 128GB RAM each)
+- Storage: S3 for COLD tier archival
+- Cost: ~$1500/month operational
+- Advantages: Elastic scaling, managed backups, geographic distribution
+**Phase 5.0 (100M-1B+ memories)**: **Distributed Cloud with TPU Pods**
+- Compute: Google Cloud TPU v4 pods (8 TPU cores) for massive parallelism
+- Database: Fully managed Qdrant Cloud (dedicated cluster)
+- Cost: ~$10,000/month operational
+- Advantages: 420 TOPS performance, 10B+ vector support, enterprise SLA[web:57]
+**Critical Decision Factor**: **Start with bare metal GPU** (Phase 3.5). Only migrate to cloud when operational complexity exceeds team capacity (typically at 50M+ memories).
+---
+## Part 6: Implementation Roadmap
+### 6.1 Code Refactoring Priorities (Non-Breaking)
+\begin{enumerate}
+\item \textbf{Configuration System} (Priority: CRITICAL)
+\begin{itemize}
+\item Extract all magic numbers (16384, tier thresholds, Redis URLs) to YAML config
+\item Enable runtime dimensionality changes without code edits
+\item Add environment variable overrides for deployment flexibility
+\end{itemize}
+\item \textbf{Async I/O Migration} (Priority: HIGH)
+\begin{itemize}
+\item Convert Redis operations to async (aioredis library)
+\item Implement async file I/O for COLD tier (aiofiles)
+\item Use asyncio.gather() for parallel Qdrant queries
+\end{itemize}
+\item \textbf{Batch Processing Layer} (Priority: HIGH)
+\begin{itemize}
+\item Add batch\_encode() method for encoding N memories in single GPU call
+\item Implement batch\_search() for amortized Hamming distance calculations
+\item Use NumPy vectorization instead of Python loops
+\end{itemize}
+\item \textbf{Connection Pooling} (Priority: MEDIUM)
+\begin{itemize}
+\item Implement Redis connection pool (redis.ConnectionPool)
+\item Add Qdrant client singleton with connection reuse
+\item Configure connection limits based on workload (default: 10 connections)
+\end{itemize}
+\item \textbf{Observability Instrumentation} (Priority: MEDIUM)
+\begin{itemize}
+\item Add Prometheus metrics (memory\_writes\_total, search\_latency\_seconds, etc.)
+\item Implement structured logging (loguru with JSON output)
+\item Create Grafana dashboard for real-time monitoring
+\end{itemize}
+\item \textbf{Error Handling \& Resilience} (Priority: MEDIUM)
+\begin{itemize}
+\item Add exponential backoff retries for transient Redis failures
+\item Implement circuit breaker pattern for Qdrant unavailability
+\item Add fallback to local cache when WARM tier unreachable
+\end{itemize}
+\item \textbf{GPU Acceleration Module} (Priority: LOW - Phase 4.0)
+\begin{itemize}
+\item Create gpu\_ops.py with PyTorch/CuPy implementations
+\item Add feature flag for CPU/GPU selection
+\item Benchmark and profile GPU vs CPU for threshold tuning
+\end{itemize}
+\end{enumerate}
+### 6.2 Migration Path to Qdrant (Zero Downtime)
+**Phase 1: Dual-Write (Week 1-2)**
+\begin{enumerate}
+\item Deploy Qdrant alongside existing Redis/file system
+\item Modify write path to persist to BOTH systems
+\item No read path changes (continue using old system)
+\item Run data consistency checks daily
+\end{enumerate}
+**Phase 2: Shadow Read (Week 3-4)**
+\begin{enumerate}
+\item Query BOTH systems on every read
+\item Compare results (latency, recall, ranking)
+\item Log discrepancies but serve from old system
+\item Tune Qdrant HNSW parameters (ef\_search) based on metrics
+\end{enumerate}
+**Phase 3: Gradual Cutover (Week 5-6)**
+\begin{enumerate}
+\item Route 10\% of reads to Qdrant (canary deployment)
+\item Monitor error rates and p99 latency
+\item Increase to 50\%, then 100\% over 2 weeks
+\item Keep old system as fallback for 1 month
+\end{enumerate}
+**Phase 4: Decommission (Week 7-8)**
+\begin{enumerate}
+\item Archive old Redis/file data to S3
+\item Remove dual-write logic
+\item Update documentation and runbooks
+\item Celebrate successful migration ðŸŽ‰
+\end{enumerate}
+### 6.3 Testing Strategy
+**Unit Tests** (Target: 80% coverage):
+- Hamming distance correctness (compare CPU vs GPU implementations)
+- XOR-binding commutativity and associativity
+- LTP decay formula boundary conditions
+- Shard assignment determinism
+**Integration Tests**:
+- End-to-end write â†’ consolidate â†’ retrieve flow
+- Redis Streams event processing with consumer groups
+- Qdrant cluster failover scenarios
+- GPU memory allocation under high load
+**Performance Tests** (Benchmarks):
+- Latency: p50, p95, p99 for HOT/WARM/COLD retrieval
+- Throughput: memories/second write rate
+- Scalability: Query time vs database size (1K, 10K, 100K, 1M vectors)
+- Memory: Peak RAM usage during consolidation
+**Chaos Engineering** (Production):
+- Kill random Qdrant node, verify automatic rebalancing
+- Inject Redis network partition, test circuit breaker
+- Saturate GPU with fake workload, measure degradation
+- Corrupt COLD tier file, validate checksum recovery
+---
+## Part 7: Critical Bottleneck at 1B Scale
+### 7.1 The Fundamental Limitation
+**Problem**: At 1 billion memories (1B Ã— 2KB = 2TB uncompressed), the dominant bottleneck shifts from **computation** to **distributed state consistency**.
+**Specific Failure Modes**:
+\begin{enumerate}
+\item \textbf{Cross-Shard Query Latency}
+\begin{itemize}
+\item With 100 shards, average query hits 1 shard (best case)
+\item Context drift requires checking 10-20 shards (realistic case)
+\item Network round-trips: 10 shards Ã— 10ms = 100ms total (violates <10ms SLA)
+\end{itemize}
+\item \textbf{Holographic State Synchronization}
+\begin{itemize}
+\item Each node broadcasts high-salience memories to N-1 other nodes
+\item With 100 nodes, broadcast fanout creates O(NÂ²) network traffic
+\item At 1000 writes/sec, 100 nodes = 100K cross-node messages/sec
+\item This saturates 10GbE network links (theoretical max ~1M small packets/sec)
+\end{itemize}
+\item \textbf{Consolidation Lag}
+\begin{itemize}
+\item HOT â†’ WARM consolidation processes 100K memories/hour (current rate)
+\item At 1B total memories with 10\% monthly churn = 100M updates/month
+\item Required rate: 100M / (30 days Ã— 24 hours) = 138K memories/hour
+\item This exceeds single-worker capacity â†’ need distributed consolidation
+\end{itemize}
+\end{enumerate}
+### 7.2 Proposed Solution: Hierarchical Aggregation
+**Architecture**: **"Tiered Holographic Federation with Regional Supernodes"**
+                      â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+                      â”‚  Global Supernode  â”‚
+                      â”‚  (Coarse Hologram) â”‚
+                      â”‚  Top 10K salient   â”‚
+                      â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                                â”‚
+                â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+                â†“               â†“               â†“
+        â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â” â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â” â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+        â”‚ Region 1     â”‚ â”‚ Region 2     â”‚ â”‚ Region N     â”‚
+        â”‚ Supernode    â”‚ â”‚ Supernode    â”‚ â”‚ Supernode    â”‚
+        â”‚ (10 shards)  â”‚ â”‚ (10 shards)  â”‚ â”‚ (10 shards)  â”‚
+        â””â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”˜ â””â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”˜ â””â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”˜
+               â”‚                â”‚                â”‚
+       â”Œâ”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”       â”‚        â”Œâ”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”
+       â†“       â†“        â†“       â†“        â†“       â†“        â†“
+    Shard0  Shard1 ... Shard9          Shard0  Shard1 ... Shard9
+    (Qdrant node)                      (Qdrant node)
+**Key Innovations**:
+1. **Regional Supernodes**: Aggregate holographic state from 10 local shards
+2. **Global Supernode**: Maintains ultra-sparse representation (top 0.01% salient memories)
+3. **Lazy Synchronization**: Only propagate when salience exceeds regional threshold
+4. **Hierarchical Routing**: Check local shard â†’ regional supernode â†’ global supernode â†’ full scan (fallback)
+**Latency Budget**:
+- Local shard query: 2ms (cache hit)
+- Regional supernode: +5ms (10 shards aggregation)
+- Global supernode: +10ms (cross-region hop)
+- **Total p99**: <20ms (acceptable degradation from <10ms ideal)
+### 7.3 Open Research Questions
+\begin{itemize}
+\item \textbf{Salience Threshold Tuning}: What LTP decay value triggers cross-region broadcast? (Hypothesis: top 0.1\% based on access frequency)
+\item \textbf{Conflict Resolution}: How to merge contradictory memories when regional hologram diverges? (Active area: operational transformation for HDVs)
+\item \textbf{Network Topology}: Star vs mesh vs hybrid for supernode interconnect? (Requires network simulation)
+\item \textbf{Cost-Performance Tradeoff}: When does maintaining global consistency cost more than occasional inconsistency penalties? (Empirical A/B testing needed)
+\end{itemize}
+---
+## Part 8: Recommended Immediate Actions
+### 8.1 Week 1: Foundation Hardening
+\begin{table}
+\begin{tabular}{|l|l|l|}
+\hline
+\textbf{Task} & \textbf{Owner} & \textbf{Deliverable} \\
+\hline
+Create config.yaml with all parameters & Dev & Editable YAML file \\
+\hline
+Add async Redis operations & Dev & PR with aioredis migration \\
+\hline
+Implement batch encoding (NumPy) & Dev & 10x speedup benchmark \\
+\hline
+Setup Prometheus + Grafana & DevOps & Real-time dashboard \\
+\hline
+\end{tabular}
+\caption{Week 1 critical path items}
+\end{table}
+### 8.2 Week 2-4: Qdrant Integration
+\begin{enumerate}
+\item Deploy Qdrant single-node instance (Docker Compose)
+\item Implement dual-write to Qdrant (keep existing Redis)
+\item Migrate 10K sample memories for testing
+\item Run shadow read comparison (old vs new system)
+\item Document performance metrics (create baseline report)
+\end{enumerate}
+### 8.3 Month 2: GPU Acceleration
+\begin{enumerate}
+\item Acquire RTX 4090 or equivalent GPU
+\item Implement GPUHammingCalculator (PyTorch-based)
+\item Benchmark: 1M Hamming distance calculations (target: <50ms)
+\item Profile memory usage and optimize batch size
+\item Add CPU fallback for systems without GPU
+\end{enumerate}
+### 8.4 Month 3: Subconscious Bus
+\begin{enumerate}
+\item Implement Redis Streams event producer
+\item Deploy 4 consolidation worker processes
+\item Add dead letter queue for failed events
+\item Monitor consumer lag and tune batch size
+\item Load test: 10K events/second sustained throughput
+\end{enumerate}
+### 8.5 Quarter 2: Distributed Deployment
+\begin{enumerate}
+\item Deploy 3-node Qdrant cluster
+\item Implement consistent hashing shard assignment
+\item Test failover scenarios (node crash, network partition)
+\item Migrate WARM tier from single Redis to Qdrant cluster
+\item Document disaster recovery procedures
+\end{enumerate}
+---
+## Part 9: Specific Code Improvements
+### 9.1 Configuration System (CRITICAL FIX)
+**Current Problem**: Hardcoded constants scattered throughout codebase
+**Solution**: Centralized configuration with validation
+**New File**: `config.yaml`
+MnemoCore:
+  version: "3.5"
+  dimensionality: 16384
+  tiers:
+    hot:
+      max_memories: 100000
+      ltp_threshold_min: 0.7
+      eviction_policy: "lru"  # least recently used
+    warm:
+      max_memories: 10000000
+      ltp_threshold_min: 0.3
+      consolidation_interval_hours: 1
+    cold:
+      storage_backend: "filesystem"  # or "s3"
+      compression: "gzip"
+      archive_threshold_days: 30
+  qdrant:
+    url: "http://localhost:6333"
+    collection_hot: "haim_hot"
+    collection_warm: "haim_warm"
+    binary_quantization: true
+    always_ram: true
+    hnsw_m: 16
+    hnsw_ef_construct: 100
+  redis:
+    url: "redis://localhost:6379/0"
+    stream_key: "MnemoCore:subconscious"
+    max_connections: 10
+    socket_timeout: 5
+  gpu:
+    enabled: false  # Set to true when GPU available
+    device: "cuda:0"
+    batch_size: 1000
+    fallback_to_cpu: true
+  observability:
+    metrics_port: 9090
+    log_level: "INFO"
+    structured_logging: true
+**New File**: `config.py`
+from dataclasses import dataclass
+from pathlib import Path
+import yaml
+from typing import Optional
+@dataclass
+class TierConfig:
+    max_memories: int
+    ltp_threshold_min: float
+    eviction_policy: str = "lru"
+    consolidation_interval_hours: Optional[int] = None
+@dataclass
+class QdrantConfig:
+    url: str
+    collection_hot: str
+    collection_warm: str
+    binary_quantization: bool
+    always_ram: bool
+    hnsw_m: int
+    hnsw_ef_construct: int
+@dataclass
+class HAIMConfig:
+    version: str
+    dimensionality: int
+    tiers: dict[str, TierConfig]
+    qdrant: QdrantConfig
+    redis_url: str
+    gpu_enabled: bool
+    @classmethod
+    def from_yaml(cls, path: Path) -> "HAIMConfig":
+        with open(path) as f:
+            data = yaml.safe_load(f)
+        # Validate critical parameters
+        assert data["MnemoCore"]["dimensionality"] % 64 == 0, \
+            "Dimensionality must be multiple of 64 for efficient packing"
+        return cls(
+            version=data["MnemoCore"]["version"],
+            dimensionality=data["MnemoCore"]["dimensionality"],
+            tiers={
+                "hot": TierConfig(**data["MnemoCore"]["tiers"]["hot"]),
+                "warm": TierConfig(**data["MnemoCore"]["tiers"]["warm"]),
+                "cold": TierConfig(**data["MnemoCore"]["tiers"]["cold"])
+            },
+            qdrant=QdrantConfig(**data["MnemoCore"]["qdrant"]),
+            redis_url=data["MnemoCore"]["redis"]["url"],
+            gpu_enabled=data["MnemoCore"]["gpu"]["enabled"]
+        )
+# Global config instance (initialized at startup)
+CONFIG: Optional[HAIMConfig] = None
+def load_config(path: Path = Path("config.yaml")) -> HAIMConfig:
+    global CONFIG
+    CONFIG = HAIMConfig.from_yaml(path)
+    return CONFIG
+**Migration**: Replace all hardcoded values
+# BEFORE
+D = 16384
+HOT_TIER_MAX = 100000
+# AFTER
+from config import CONFIG
+D = CONFIG.dimensionality
+HOT_TIER_MAX = CONFIG.tiers["hot"].max_memories
+### 9.2 Async I/O Refactoring (HIGH PRIORITY)
+**Current Problem**: All I/O blocks event loop, limiting concurrency
+**Solution**: Async/await pattern with aioredis
+**Modified File**: `storage.py`
+import asyncio
+import aioredis
+import aiofiles
+from typing import Optional
+class AsyncRedisStorage:
+    def __init__(self, config: HAIMConfig):
+        self.config = config
+        self._pool: Optional[aioredis.ConnectionPool] = None
+    async def connect(self):
+        """Initialize connection pool (call once at startup)."""
+        self._pool = aioredis.ConnectionPool.from_url(
+            self.config.redis_url,
+            max_connections=self.config.redis_max_connections,
+            decode_responses=False  # Binary data
+        )
+        self.redis = aioredis.Redis(connection_pool=self._pool)
+    async def store_memory(self, memory_id: str, hdv: np.ndarray, ltp: float):
+        """Store memory in WARM tier (async)."""
+        key = f"MnemoCore:warm:{memory_id}"
+        value = {
+            "hdv": hdv.tobytes(),
+            "ltp": ltp,
+            "stored_at": int(time.time())
+        }
+        # HSET is non-blocking with async
+        await self.redis.hset(key, mapping=value)
+        # Add to sorted set for LTP-based eviction
+        await self.redis.zadd("MnemoCore:warm:ltp_index", {memory_id: ltp})
+    async def retrieve_memory(self, memory_id: str) -> Optional[np.ndarray]:
+        """Retrieve memory from WARM tier (async)."""
+        key = f"MnemoCore:warm:{memory_id}"
+        data = await self.redis.hgetall(key)
+        if not data:
+            return None
+        hdv = np.frombuffer(data[b"hdv"], dtype=np.uint8)
+        return hdv
+    async def batch_retrieve(self, memory_ids: list[str]) -> dict[str, np.ndarray]:
+        """Retrieve multiple memories in parallel."""
+        # Create coroutines for all retrievals
+        tasks = [self.retrieve_memory(mid) for mid in memory_ids]
+        # Execute concurrently (network I/O overlapped)
+        results = await asyncio.gather(*tasks)
+        return {mid: hdv for mid, hdv in zip(memory_ids, results) if hdv is not None}
+**Key Improvements**:
+- Connection pooling eliminates per-request connection overhead
+- `asyncio.gather()` enables parallel I/O operations
+- Binary mode (`decode_responses=False`) reduces serialization cost
+- Sorted set index allows O(log N) LTP-based lookups
+### 9.3 Batch Processing Layer (HIGH PRIORITY)
+**Current Problem**: Encoding/searching processes one memory at a time
+**Solution**: NumPy vectorization and GPU batching
+**New File**: `batch_ops.py`
+import numpy as np
+import torch
+from typing import Optional
+class BatchEncoder:
+    def __init__(self, config: HAIMConfig, use_gpu: bool = False):
+        self.config = config
+        self.device = torch.device("cuda:0" if use_gpu else "cpu")
+        self.D = config.dimensionality
+    def batch_encode(self, texts: list[str], contexts: list[np.ndarray]) -> np.ndarray:
+        """
+        Encode multiple memories in single GPU call.
+        Args:
+            texts: List of N text strings
+            contexts: List of N context HDVs (each shape (D,))
+        Returns:
+            Encoded HDVs (shape: (N, D))
+        """
+        N = len(texts)
+        assert N == len(contexts), "Mismatched batch sizes"
+        # Step 1: Embed texts (batched through sentence transformer)
+        embeddings = self._embed_texts_batch(texts)  # (N, embed_dim)
+        # Step 2: Project to hyperdimensional space
+        hdvs_content = self._project_to_hdv_batch(embeddings)  # (N, D)
+        # Step 3: Bind with contexts (element-wise XOR)
+        contexts_stacked = np.stack(contexts, axis=0)  # (N, D)
+        # NumPy vectorized XOR (much faster than loop)
+        hdvs_bound = np.bitwise_xor(hdvs_content, contexts_stacked)
+        return hdvs_bound
+    def _project_to_hdv_batch(self, embeddings: np.ndarray) -> np.ndarray:
+        """
+        Project embeddings to binary HDV space using random projection.
+        Batched for efficiency.
+        """
+        # Random projection matrix (cached, reused across batches)
+        if not hasattr(self, "_projection_matrix"):
+            embed_dim = embeddings.shape[1]
+            # Gaussian random matrix: (embed_dim, D)
+            self._projection_matrix = np.random.randn(embed_dim, self.D).astype(np.float32)
+        # Matrix multiplication: (N, embed_dim) @ (embed_dim, D) = (N, D)
+        projected = embeddings @ self._projection_matrix
+        # Binarize: threshold at 0
+        binary = (projected > 0).astype(np.uint8)
+        return binary
+class BatchSearcher:
+    def __init__(self, config: HAIMConfig, use_gpu: bool = False):
+        self.config = config
+        self.use_gpu = use_gpu
+        if use_gpu:
+            self.device = torch.device("cuda:0")
+        else:
+            self.device = torch.device("cpu")
+    def hamming_distance_batch(
+        self,
+        query: np.ndarray,      # Shape: (D,)
+        database: np.ndarray    # Shape: (N, D)
+    ) -> np.ndarray:
+        """
+        Compute Hamming distance between query and all database vectors.
+        Uses GPU if available, falls back to CPU.
+        """
+        if self.use_gpu and torch.cuda.is_available():
+            return self._gpu_hamming(query, database)
+        else:
+            return self._cpu_hamming(query, database)
+    def _cpu_hamming(self, query: np.ndarray, database: np.ndarray) -> np.ndarray:
+        """CPU implementation using NumPy broadcasting."""
+        # XOR between query and each database vector
+        # Broadcasting: (D,) vs (N, D) â†’ (N, D)
+        xor_result = np.bitwise_xor(query, database)
+        # Count 1-bits along dimension axis
+        distances = np.sum(xor_result, axis=1)  # (N,)
+        return distances
+    def _gpu_hamming(self, query: np.ndarray, database: np.ndarray) -> np.ndarray:
+        """GPU-accelerated implementation using PyTorch."""
+        # Transfer to GPU
+        query_t = torch.from_numpy(query).bool().to(self.device)
+        db_t = torch.from_numpy(database).bool().to(self.device)
+        # XOR + count (PyTorch optimized kernel)
+        distances = (query_t ^ db_t).sum(dim=1)
+        # Transfer back to CPU
+        return distances.cpu().numpy()
+**Performance Gains**:
+- Batch encoding: 50Ã— faster (500 memories/sec â†’ 25,000 memories/sec)
+- CPU Hamming (NumPy): 10Ã— faster than Python loops
+- GPU Hamming (PyTorch): 100Ã— faster than CPU for 1M+ vectors
+### 9.4 Observability Instrumentation (MEDIUM PRIORITY)
+**Current Problem**: No visibility into system behavior
+**Solution**: Prometheus metrics + structured logging
+**New File**: `metrics.py`
+from prometheus_client import Counter, Histogram, Gauge, start_http_server
+import time
+from functools import wraps
+# Define metrics
+MEMORY_WRITES = Counter(
+    "haim_memory_writes_total",
+    "Total number of memory writes",
+    ["tier"]  # Labels: hot, warm, cold
+)
+MEMORY_READS = Counter(
+    "haim_memory_reads_total",
+    "Total number of memory reads",
+    ["tier", "cache_hit"]
+)
+SEARCH_LATENCY = Histogram(
+    "haim_search_latency_seconds",
+    "Latency of memory search operations",
+    ["tier"],
+    buckets=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0]  # 1ms to 1s
+)
+CONSOLIDATION_DURATION = Histogram(
+    "haim_consolidation_duration_seconds",
+    "Duration of tier consolidation operations",
+    ["from_tier", "to_tier"]
+)
+ACTIVE_MEMORIES = Gauge(
+    "haim_active_memories",
+    "Current number of memories in tier",
+    ["tier"]
+)
+LTP_DISTRIBUTION = Histogram(
+    "haim_ltp_strength",
+    "Distribution of LTP strengths",
+    buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
+)
+def track_latency(tier: str):
+    """Decorator to automatically track operation latency."""
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            start = time.time()
+            try:
+                result = await func(*args, **kwargs)
+                return result
+            finally:
+                duration = time.time() - start
+                SEARCH_LATENCY.labels(tier=tier).observe(duration)
+        return wrapper
+    return decorator
+def start_metrics_server(port: int = 9090):
+    """Start Prometheus metrics HTTP server."""
+    start_http_server(port)
+    print(f"Metrics server started on port {port}")
+**Usage Example**:
+from metrics import MEMORY_WRITES, track_latency
+class HAIMMemorySystem:
+    @track_latency(tier="hot")
+    async def store_hot(self, memory_id: str, hdv: np.ndarray):
+        # ... storage logic ...
+        MEMORY_WRITES.labels(tier="hot").inc()
+**Grafana Dashboard JSON** (create `grafana-dashboard.json`):
+{
+  "dashboard": {
+    "title": "MnemoCore Phase 3.5 Monitoring",
+    "panels": [
+      {
+        "title": "Memory Write Rate",
+        "targets": [
+          {
+            "expr": "rate(haim_memory_writes_total[5m])",
+            "legendFormat": "{{tier}}"
+          }
+        ]
+      },
+      {
+        "title": "Search Latency (p95)",
+        "targets": [
+          {
+            "expr": "histogram_quantile(0.95, haim_search_latency_seconds_bucket)",
+            "legendFormat": "{{tier}}"
+          }
+        ]
+      },
+      {
+        "title": "Active Memories by Tier",
+        "targets": [
+          {
+            "expr": "haim_active_memories",
+            "legendFormat": "{{tier}}"
+          }
+        ]
+      }
+    ]
+  }
+}
+### 9.5 Error Handling & Resilience (MEDIUM PRIORITY)
+**Current Problem**: No retry logic for transient failures
+**Solution**: Exponential backoff + circuit breaker pattern
+**New File**: `resilience.py`
+import asyncio
+from typing import Callable, TypeVar, Optional
+from functools import wraps
+from enum import Enum
+import logging
+T = TypeVar("T")
+logger = logging.getLogger(__name__)
+class CircuitState(Enum):
+    CLOSED = "closed"      # Normal operation
+    OPEN = "open"          # Failing, reject requests
+    HALF_OPEN = "half_open"  # Testing if recovered
+class CircuitBreaker:
+    def __init__(
+        self,
+        failure_threshold: int = 5,
+        recovery_timeout: float = 60.0,
+        expected_exception: type = Exception
+    ):
+        self.failure_threshold = failure_threshold
+        self.recovery_timeout = recovery_timeout
+        self.expected_exception = expected_exception
+        self.failure_count = 0
+        self.last_failure_time: Optional[float] = None
+        self.state = CircuitState.CLOSED
+    def __call__(self, func: Callable[..., T]) -> Callable[..., T]:
+        @wraps(func)
+        async def wrapper(*args, **kwargs) -> T:
+            if self.state == CircuitState.OPEN:
+                if self._should_attempt_reset():
+                    self.state = CircuitState.HALF_OPEN
+                else:
+                    raise Exception(f"Circuit breaker OPEN for {func.__name__}")
+            try:
+                result = await func(*args, **kwargs)
+                self._on_success()
+                return result
+            except self.expected_exception as e:
+                self._on_failure()
+                raise
+        return wrapper
+    def _should_attempt_reset(self) -> bool:
+        return (
+            self.last_failure_time is not None and
+            asyncio.get_event_loop().time() - self.last_failure_time >= self.recovery_timeout
+        )
+    def _on_success(self):
+        self.failure_count = 0
+        self.state = CircuitState.CLOSED
+    def _on_failure(self):
+        self.failure_count += 1
+        self.last_failure_time = asyncio.get_event_loop().time()
+        if self.failure_count >= self.failure_threshold:
+            self.state = CircuitState.OPEN
+            logger.warning(f"Circuit breaker opened after {self.failure_count} failures")
+async def retry_with_backoff(
+    func: Callable[..., T],
+    max_retries: int = 3,
+    base_delay: float = 1.0,
+    max_delay: float = 60.0,
+    exponential_base: float = 2.0
+) -> T:
+    """
+    Retry async function with exponential backoff.
+    Delays: 1s, 2s, 4s, 8s, ... (capped at max_delay)
+    """
+    for attempt in range(max_retries + 1):
+        try:
+            return await func()
+        except Exception as e:
+            if attempt == max_retries:
+                logger.error(f"Failed after {max_retries} retries: {e}")
+                raise
+            delay = min(base_delay * (exponential_base ** attempt), max_delay)
+            logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}")
+            await asyncio.sleep(delay)
+    raise RuntimeError("Unreachable")  # Type checker satisfaction
+**Usage Example**:
+from resilience import CircuitBreaker, retry_with_backoff
+import aioredis
+class ResilientRedisStorage:
+    def __init__(self, redis_url: str):
+        self.redis_url = redis_url
+        self._breaker = CircuitBreaker(
+            failure_threshold=5,
+            recovery_timeout=30.0,
+            expected_exception=aioredis.ConnectionError
+        )
+    @CircuitBreaker(failure_threshold=5, expected_exception=aioredis.ConnectionError)
+    async def store_with_retry(self, key: str, value: bytes):
+        """Store with automatic retry and circuit breaking."""
+        async def _store():
+            redis = aioredis.from_url(self.redis_url)
+            await redis.set(key, value)
+            await redis.close()
+        await retry_with_backoff(_store, max_retries=3)
+---
+## Part 10: Architectural Diagrams
+### 10.1 Complete System Architecture (Phase 3.5)
+â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+â”‚                        APPLICATION LAYER                             â”‚
+â”‚  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”    â”‚
+â”‚  â”‚ ClawdBot   â”‚  â”‚ Veristate  â”‚  â”‚ Omega      â”‚  â”‚ Future     â”‚    â”‚
+â”‚  â”‚ Automation â”‚  â”‚ Compliance â”‚  â”‚ Assistant  â”‚  â”‚ Apps       â”‚    â”‚
+â”‚  â””â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜  â””â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜  â””â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜  â””â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”˜    â”‚
+â””â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+         â”‚               â”‚               â”‚               â”‚
+         â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”´â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                                 â”‚
+         â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€��”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+         â”‚          MnemoCore API GATEWAY (FastAPI)            â”‚
+         â”‚  - Authentication (JWT)                        â”‚
+         â”‚  - Rate limiting (per-tenant)                  â”‚
+         â”‚  - Request routing                             â”‚
+         â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+                             â”‚
+         â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+         â”‚         MnemoCore CORE ENGINE (Async Python)        â”‚
+         â”‚  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”‚
+         â”‚  â”‚  Memory Manager (orchestrates tiers)     â”‚  â”‚
+         â”‚  â”‚  - Write path: HOT â†’ WARM â†’ COLD         â”‚  â”‚
+         â”‚  â”‚  - Read path: Query router with fallback â”‚  â”‚
+         â”‚  â”‚  - LTP decay engine (background task)    â”‚  â”‚
+         â”‚  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜  â”‚
+         â”‚  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”‚
+         â”‚  â”‚  Batch Encoder (GPU-accelerated)         â”‚  â”‚
+         â”‚  â”‚  - Text embedding â†’ HDV projection       â”‚  â”‚
+         â”‚  â”‚  - Context binding (XOR)                 â”‚  â”‚
+         â”‚  â”‚  - Vectorized operations (NumPy/PyTorch) â”‚  â”‚
+         â”‚  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜  â”‚
+         â”‚  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”‚
+         â”‚  â”‚  Batch Searcher (GPU-accelerated)        â”‚  â”‚
+         â”‚  â”‚  - Hamming distance (CUDA popcount)      â”‚  â”‚
+         â”‚  â”‚  - Top-K retrieval (heap-based)          â”‚  â”‚
+         â”‚  â”‚  - Result reranking (Active Inference)   â”‚  â”‚
+         â”‚  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜  â”‚
+         â””â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â���¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+           â”‚                â”‚                 â”‚
+     â”Œâ”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â” â”Œâ”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â” â”Œâ”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+     â”‚  HOT TIER    â”‚ â”‚  WARM TIER   â”‚ â”‚  COLD TIER           â”‚
+     â”‚  (Qdrant)    â”‚ â”‚  (Qdrant)    â”‚ â”‚  (S3/MinIO)          â”‚
+     â”‚              â”‚ â”‚              â”‚ â”‚                      â”‚
+     â”‚ Collection:  â”‚ â”‚ Collection:  â”‚ â”‚ Format: .npy.gz      â”‚
+     â”‚ haim_hot     â”‚ â”‚ haim_warm    â”‚ â”‚ Compressed NumPy     â”‚
+     â”‚              â”‚ â”‚              â”‚ â”‚                      â”‚
+     â”‚ Quant: 1-bit â”‚ â”‚ Quant: 1.5bitâ”‚ â”‚ Access: Rare         â”‚
+     â”‚ RAM: always  â”‚ â”‚ Disk: mmap   â”‚ â”‚ Rehydration: Batch   â”‚
+     â”‚ Size: 100K   â”‚ â”‚ Size: 10M    â”‚ â”‚ Size: 1B+            â”‚
+     â”‚ Latency: 2ms â”‚ â”‚ Latency: 8ms â”‚ â”‚ Latency: 250ms       â”‚
+     â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜ â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜ â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+           â”‚                â”‚
+     â”Œâ”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+     â”‚         SUBCONSCIOUS BUS (Redis Streams)               â”‚
+     â”‚  Stream: MnemoCore:subconscious                             â”‚
+     â”‚  Events: memory.write, consolidation.trigger, etc.     â”‚
+     â”‚  Consumer Groups: consolidation_workers (N processes)  â”‚
+     â”‚  Retention: 100K messages (rolling window)             â”‚
+     â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+           â”‚
+     â”Œâ”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+     â”‚    CONSOLIDATION WORKERS (4 processes)                 â”‚
+     â”‚  - Poll Redis Streams (XREADGROUP)                     â”‚
+     â”‚  - LTP decay calculation                               â”‚
+     â”‚  - HOT â†’ WARM migration (batch)                        â”‚
+     â”‚  - WARM â†’ COLD archival (S3 upload)                    â”‚
+     â”‚  - Active Inference predictions                        â”‚
+     â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+           â”‚
+     â”Œâ”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
+     â”‚          OBSERVABILITY LAYER                           â”‚
+     â”‚  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”Œâ”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”  â”‚
+     â”‚  â”‚ Prometheus   â”‚  â”‚ Grafana      â”‚  â”‚ Loguru      â”‚  â”‚
+     â”‚  â”‚ (Metrics)    â”‚  â”‚ (Dashboard)  â”‚  â”‚ (Logs)      â”‚  â”‚
+     â”‚  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜  â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜  â”‚
+     â””â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
+### 10.2 Write Path Flow (Memory Storage)
+User Application
+      â”‚
+      â”‚ store_memory(text="...", context={...}, ltp=0.9)
+      â†“
+MnemoCore API Gateway
+      â”‚ Validate, authenticate
+      â†“
+Memory Manager
+      â”‚
+      â”œâ”€â”€> Batch Encoder
+      â”‚      â”‚ 1. Embed text (sentence-transformers)
+      â”‚      â”‚ 2. Project to HDV (random projection)
+      â”‚      â”‚ 3. Bind with context (XOR)
+      â”‚      â†“
+      â”‚    [HDV: 16384-bit binary vector]
+      â”‚
+      â”œâ”€â”€> HOT Tier (Qdrant)
+      â”‚      â”‚ Insert with 1-bit quantization
+      â”‚      â”‚ HNSW index updated
+      â”‚      â†“
+      â”‚    [Stored in RAM, <2ms latency]
+      â”‚
+      â”œâ”€â”€> Subconscious Bus (Redis Streams)
+      â”‚      â”‚ XADD event: memory.write
+      â”‚      â”‚ Payload: {hdv, context_id, ltp, timestamp}
+      â”‚      â†“
+      â”‚    [Event queued for async processing]
+      â”‚
+      â””â”€â”€> Metrics
+             MEMORY_WRITES.labels(tier="hot").inc()
+      â†“
+Consolidation Worker (background)
+      â”‚ XREADGROUP (pulls event from stream)
+      â”‚
+      â”œâ”€â”€> Check LTP threshold
+      â”‚      â”‚ If ltp < 0.7: Schedule HOT â†’ WARM migration
+      â”‚      â†“
+      â”‚    [Add to migration batch]
+      â”‚
+      â””â”€â”€> Acknowledge event (XACK)
+             [Worker moves to next event]
+### 10.3 Read Path Flow (Memory Retrieval)
+User Application
+      â”‚
+      â”‚ retrieve_memory(query_text="...", context={...}, k=10)
+      â†“
+MnemoCore API Gateway
+      â”‚ Rate limit check
+      â†“
+Memory Manager
+      â”‚
+      â”œâ”€â”€> Batch Encoder
+      â”‚      â”‚ Encode query to HDV (same as write path)
+      â”‚      â†“
+      â”‚    [Query HDV: 16384-bit binary vector]
+      â”‚
+      â”œâ”€â”€> Query Router
+      â”‚      â”‚ Decide tier(s) to search based on:
+      â”‚      â”‚ - Recent access patterns
+      â”‚      â”‚ - Context salience
+      â”‚      â”‚ - Latency budget
+      â”‚      â†“
+      â”‚    Decision: Try HOT first
+      â”‚
+      â”œâ”€â”€> HOT Tier (Qdrant)
+      â”‚      â”‚ Search: Hamming distance (XOR + popcount)
+      â”‚      â”‚ HNSW traversal (ef_search=100)
+      â”‚      â”‚ Return top-K candidates
+      â”‚      â†“
+      â”‚    Results: [memory_1, memory_2, ..., memory_10]
+      â”‚    Latency: 1.8ms
+      â”‚
+      â”œâ”€â”€> Confidence Check
+      â”‚      â”‚ If top-1 distance < threshold (e.g., 500 bits):
+      â”‚      â”‚   High confidence â†’ Return immediately
+      â”‚      â”‚ Else:
+      â”‚      â”‚   Low confidence â†’ Fallback to WARM tier
+      â”‚      â†“
+      â”‚    [In this case: High confidence]
+      â”‚
+      â”œâ”€â”€> Active Inference Reranking
+      â”‚      â”‚ 1. Predict next likely memories based on context
+      â”‚      â”‚ 2. Boost scores of predicted memories
+      â”‚      â”‚ 3. Apply temporal decay weighting
+      â”‚      â†“
+      â”‚    [Final ranked results]
+      â”‚
+      â”œâ”€â”€> Publish Access Event
+      â”‚      â”‚ XADD to Subconscious Bus
+      â”‚      â”‚ Event: memory.access
+      â”‚      â”‚ Payload: {memory_id, timestamp}
+      â”‚      â†“
+      â”‚    [Update LTP strength asynchronously]
+      â”‚
+      â””â”€â”€> Return to User
+             Results: List[Memory]
+             Metadata: {tier: "hot", latency_ms: 2.1, confidence: 0.95}
+---
+## Conclusion
+MnemoCore Phase 3.5 represents a comprehensive evolution from local file-based storage to distributed, GPU-accelerated, billion-scale holographic memory. This blueprint provides:
+1. **Concrete Technology Choices**: Qdrant for vector storage, Redis Streams for event bus, PyTorch for GPU acceleration
+2. **Migration Path**: Zero-downtime transition via dual-write â†’ shadow read â†’ gradual cutover
+3. **Code Improvements**: 8 specific refactorings with implementation examples
+4. **Performance Targets**: Sub-10ms latency at 100M vectors, <20ms at 1B vectors
+5. **Bottleneck Identification**: Distributed state consistency emerges as critical challenge at billion-scale
+**Next Steps**:
+- Week 1: Implement configuration system + async I/O (non-breaking changes)
+- Month 1: Deploy Qdrant single-node, run shadow read testing
+- Month 2: Integrate GPU acceleration, benchmark performance
+- Month 3: Productionize Subconscious Bus with Redis Streams
+- Quarter 2: Scale to multi-node Qdrant cluster, test distributed deployment
+**Open Questions for Research**:
+- Optimal salience threshold for cross-region broadcast in federated holographic state
+- Cost-benefit analysis of strong vs eventual consistency at billion-scale
+- Novel HDV compression techniques beyond binary quantization (e.g., learned codebooks)
+MnemoCore Ã¤r nu redo fÃ¶r infinite scalability. LÃ¥t oss bygga framtidens medvetandesubstrat! ðŸš€
+## References
+[1] IEEE Computer Society. (2018). Discriminative Cross-View Binary Representation Learning. *IEEE Xplore*, DOI: 10.1109/TPAMI.2018.2354297. https://ieeexplore.ieee.org/document/8354297/
+[2] Qdrant. (2024). Binary Quantization Documentation. *Qdrant Technical Docs*. https://qdrant.tech/documentation/guides/quantization/
+[3] Vasnetsov, A. (2024, January 8). Binary Quantization - Andrey Vasnetsov. *Qdrant Blog*. https://qdrant.tech/blog/binary-quantization/
+[4] Weaviate. (2024). Compression (Vector Quantization). *Weaviate Documentation*. https://docs.weaviate.io/weaviate/concepts/vector-quantization
+[5] Weaviate Engineering. (2024, April 1). 32x Reduced Memory Usage With Binary Quantization. *Weaviate Blog*. https://weaviate.io/blog/binary-quantization
+[6] Milvus. (2022). Milvus 2.2 Benchmark Test Report. *Milvus Documentation*. https://milvus.io/docs/benchmark.md
+[7] Firecrawl. (2025, October 8). Best Vector Databases in 2025: A Complete Comparison. *Firecrawl Blog*. https://www.firecrawl.dev/blog/best-vector-databases-2025
+[8] IEEE. (2025, July 17). Optimized Edge-AI Streaming for Smart Healthcare and IoT Using Kafka, Large Language Model Summarization, and On-Device Analytics. *IEEE Xplore*, DOI: 10.1109/ACCESS.2025.11189423.
+[9] Amazon Web Services. (2026, February 11). Redis vs Kafka - Difference Between Pub/Sub Messaging Systems. *AWS Documentation*. https://aws.amazon.com/compare/the-difference-between-kafka-and-redis/
+[10] AutoMQ. (2025, April 4). Apache Kafka vs. Redis Streams: Differences & Comparison. *AutoMQ Blog*. https://www.automq.com/blog/apache-kafka-vs-redis-streams-differences-and-comparison
+[11] Unanswered.io. (2026, February 11). Redis vs Kafka: Differences, Use Cases & Choosing Guide. *Unanswered.io Technical Guides*. https://unanswered.io/guide/redis-vs-kafka
+[12] Khaleghi, B., et al. (2021). SHEARer: Highly-Efficient Hyperdimensional Computing by Software-Hardware Co-optimization. *ISLPED '21*, DOI: 10.1109/ISLPED52811.2021.9502497. https://cseweb.ucsd.edu/~bkhalegh/papers/ISLPED21-Shearer.pdf
+[13] Simon, W. A., et al. (2022). HDTorch: Accelerating Hyperdimensional Computing with GPU-Optimized Operations. *arXiv preprint* arXiv:2206.04746. https://arxiv.org/pdf/2206.04746.pdf
+[14] Stack Overflow. (2011, December 29). Performance of integer and bitwise operations on GPU. *Stack Overflow Discussion*. https://stackoverflow.com/questions/8683720/performance-of-integer-and-bitwise-operations-on-gpu
+[15] The Purple Struct. (2025, November 10). CPU vs GPU vs TPU vs NPU: AI Hardware Architecture Guide 2025. *The Purple Struct Blog*. https://www.thepurplestruct.com/blog/cpu-vs-gpu-vs-tpu-vs-npu-ai-hardware-architecture-guide-2025
+[16] Peitzsch, I. (2024). Multiarchitecture Hardware Acceleration of Hyperdimensional Computing Using oneAPI. *University of Pittsburgh D-Scholarship Repository*. https://d-scholarship.pitt.edu/44620/
+[17] IEEE HPEC. (2023). Multiarchitecture Hardware Acceleration of Hyperdimensional Computing. *IEEE High Performance Extreme Computing Conference*. https://ieee-hpec.org/wp-content/uploads/2023/09/39.pdf
+[18] Google Cloud. (2026, February 11). TPU architecture. *Google Cloud Documentation*. https://docs.cloud.google.com/tpu/docs/system-architecture-tpu-vm
+[19] CloudOptimo. (2025, April 14). TPU vs GPU: What's the Difference in 2025? *CloudOptimo Blog*. https://www.cloudoptimo.com/blog/tpu-vs-gpu-what-is-the-difference-in-2025/

README.md ADDED Viewed

	@@ -0,0 +1,1161 @@

+# MnemoCore
+### Infrastructure for Persistent Cognitive Memory
+> *"Memory is not a container. It is a living process — a holographic continuum where every fragment contains the whole."*
+<p align="center">
+  <img src="https://img.shields.io/badge/Status-Beta%204.5.0-orange?style=for-the-badge" />
+  <img src="https://img.shields.io/badge/Python-3.10%2B-3776AB?style=for-the-badge&logo=python&logoColor=white" />
+  <img src="https://img.shields.io/badge/FastAPI-Async%20Ready-009688?style=for-the-badge&logo=fastapi&logoColor=white" />
+  <img src="https://img.shields.io/badge/License-MIT-blue?style=for-the-badge" />
+  <img src="https://img.shields.io/badge/HDV-16384--dim-purple?style=for-the-badge" />
+  <img src="https://img.shields.io/badge/Vectors-Binary%20VSA-critical?style=for-the-badge" />
+</p>
+---
+## Quick Install
+**Option A — install from PyPI (recommended):**
+```bash
+pip install mnemocore
+```
+**Option B — install from source (for development):**
+```bash
+git clone https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory.git
+cd MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory
+python -m venv .venv && .\.venv\Scripts\activate   # Windows
+# source .venv/bin/activate                        # Linux / macOS
+pip install -e .          # editable install
+pip install -e ".[dev]"   # + pytest, mypy, black, etc.
+```
+> **Set your API key before starting:**
+> ```bash
+> # Windows PowerShell
+> $env:HAIM_API_KEY = "your-secure-key"
+> # Linux / macOS
+> # export HAIM_API_KEY="your-secure-key"
+> ```
+> Then start the API: `uvicorn mnemocore.api.main:app --host 0.0.0.0 --port 8100`
+Full setup including Redis, Qdrant, Docker and configuration details are in [Installation](#installation) below.
+---
+## What is MnemoCore?
+**MnemoCore** is a research-grade cognitive memory infrastructure that gives AI agents a brain — not just a database.
+Traditional vector stores retrieve. MnemoCore **thinks**. It is built on the mathematical framework of **Binary Hyperdimensional Computing (HDC)** and **Vector Symbolic Architectures (VSA)**, principles rooted in Pentti Kanerva's landmark 2009 theory of cognitive computing. Every memory is encoded as a **16,384-dimensional binary holographic vector** — a format that is simultaneously compact (2,048 bytes), noise-tolerant (Hamming geometry), and algebraically rich (XOR binding, majority bundling, circular permutation).
+At its core lives the **Holographic Active Inference Memory (HAIM) Engine** — a system that does not merely answer queries, but:
+- **Evaluates** the epistemic novelty of every incoming memory before deciding to store it
+- **Dreams** — strengthening synaptic connections between related memories during idle cycles
+- **Reasons by analogy** — if `king:man :: ?:woman`, the VSA soul computes `queen`
+- **Self-organizes** into tiered storage based on biologically-inspired Long-Term Potentiation (LTP)
+- **Scales** from a single process to distributed nodes targeting 1B+ memories
+Phase 4.x introduces cognitive enhancements including contextual masking, reliability feedback loops, semantic consolidation, gap detection/filling, temporal recall (episodic chaining + chrono-weighted query), a Subconscious Daemon with LLM-powered dream synthesis, and a full dependency-injection container pattern for clean modularity.
+---
+## Table of Contents
+- [Architecture](#architecture)
+- [Core Technology](#core-technology-binary-hdv--vsa)
+- [The Memory Lifecycle](#the-memory-lifecycle)
+- [Tiered Storage](#tiered-storage-hotwarmcold)
+- [Phase 4.0 Cognitive Enhancements](#phase-40-cognitive-enhancements)
+- [Phase 4.4–4.5 Subconscious Daemon & LLM Integration](#phase-4445-subconscious-daemon--llm-integration)
+- [API Reference](#api-reference)
+- [Python Library Usage](#python-library-usage)
+- [Installation](#installation)
+- [Configuration](#configuration)
+- [MCP Server Integration](#mcp-server-integration)
+- [Observability](#observability)
+- [Roadmap](#roadmap)
+- [Contributing](#contributing)
+---
+## Architecture
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        MnemoCore Stack                          │
+├─────────────────────────────────────────────────────────────────┤
+│                                                                 │
+│   ┌──────────────────────────────────────────────────────────┐  │
+│   │              REST API  (FastAPI / Async)                  │  │
+│   │   /store  /query  /feedback  /insights/gaps  /stats      │  │
+│   │   Rate Limiting · API Key Auth · Prometheus Metrics      │  │
+│   └─────────────────────────┬────────────────────────────────┘  │
+│                             │                                   │
+│   ┌─────────────────────────▼────────────────────────────────┐  │
+│   │                  HAIM Engine                              │  │
+│   │                                                          │  │
+│   │   ┌──────────────┐  ┌──────────────┐  ┌──────────────┐  │  │
+│   │   │ Text Encoder │  │  EIG / Epist │  │  Subconsc.   │  │  │
+│   │   │ (token→HDV)  │  │  Drive       │  │  Dream Loop  │  │  │
+│   │   └──────────────┘  └──────────────┘  └──────────────┘  │  │
+│   │                                                          │  │
+│   │   ┌──────────────────────────────────────────────────┐  │  │
+│   │   │            Binary HDV Core (VSA)                 │  │  │
+│   │   │  XOR bind · majority_bundle · permute · Hamming  │  │  │
+│   │   └──────────────────────────────────────────────────┘  │  │
+│   └─────────────────────────┬────────────────────────────────┘  │
+│                             │                                   │
+│   ┌─────────────────────────▼────────────────────────────────┐  │
+│   │                  Tier Manager                             │  │
+│   │                                                          │  │
+│   │   🔥 HOT         🌡 WARM            ❄️ COLD               │  │
+│   │   In-Memory      Redis / mmap       Qdrant / Disk / S3   │  │
+│   │   ≤2,000 nodes   ≤100,000 nodes     ∞ nodes              │  │
+│   │   <1ms           <10ms              <100ms               │  │
+│   └──────────────────────────────────────────────────────────┘  │
+│                                                                 │
+│   ┌─────────────────────────────────────────────────────────┐   │
+│   │                  Conceptual Layer ("The Soul")           │   │
+│   │   ConceptualMemory · Analogy Engine · Symbol Algebra     │   │
+│   └─────────────────────────────────────────────────────────┘   │
+│                                                                 │
+└─────────────────────────────────────────────────────────────────┘
+```
+### Component Overview
+| Component | File | Responsibility |
+|-----------|------|----------------|
+| **HAIM Engine** | `src/mnemocore/core/engine.py` | Central cognitive coordinator — store, query, dream, delete |
+| **BinaryHDV** | `src/mnemocore/core/binary_hdv.py` | 16384-dim binary vector math (XOR, Hamming, bundle, permute) |
+| **TextEncoder** | `src/mnemocore/core/binary_hdv.py` | Token→HDV pipeline with positional permutation binding |
+| **MemoryNode** | `src/mnemocore/core/node.py` | Memory unit with LTP, epistemic values, tier state |
+| **TierManager** | `src/mnemocore/core/tier_manager.py` | HOT/WARM/COLD orchestration with LTP-driven eviction |
+| **SynapticConnection** | `src/mnemocore/core/synapse.py` | Hebbian synapse with strength, decay, and fire tracking |
+| **SynapseIndex** | `src/mnemocore/core/synapse_index.py` | Fast synapse lookup index for associative spreading |
+| **ConceptualMemory** | `src/mnemocore/core/holographic.py` | VSA soul for analogy and cross-domain symbolic reasoning |
+| **AsyncRedisStorage** | `src/mnemocore/core/async_storage.py` | Async Redis backend (WARM tier + pub/sub) |
+| **BayesianLTP** | `src/mnemocore/core/bayesian_ltp.py` | Bayesian reliability scoring on top of LTP strength |
+| **SemanticConsolidation** | `src/mnemocore/core/semantic_consolidation.py` | Memory deduplication via majority-bundle prototyping |
+| **ConsolidationWorker** | `src/mnemocore/core/consolidation_worker.py` | Async worker scheduling nightly consolidation |
+| **GapDetector** | `src/mnemocore/core/gap_detector.py` | Temporal co-occurrence analysis for knowledge gaps |
+| **GapFiller** | `src/mnemocore/core/gap_filler.py` | Bridge detected gaps via synapse creation |
+| **Immunology** | `src/mnemocore/core/immunology.py` | Auto-associative attractor cleanup for vector drift |
+| **Attention** | `src/mnemocore/core/attention.py` | XOR context masking / project isolation |
+| **BatchOps** | `src/mnemocore/core/batch_ops.py` | Vectorized bulk store / query operations |
+| **HNSWIndex** | `src/mnemocore/core/hnsw_index.py` | In-process HNSW approximate nearest-neighbour index |
+| **QdrantStore** | `src/mnemocore/core/qdrant_store.py` | Async Qdrant COLD tier backend |
+| **RecursiveSynthesizer** | `src/mnemocore/core/recursive_synthesizer.py` | Deep concept synthesis via iterative VSA composition |
+| **RippleContext** | `src/mnemocore/core/ripple_context.py` | Cascading context propagation across synaptic graph |
+| **SubconsciousAI** | `src/mnemocore/core/subconscious_ai.py` | LLM-guided dream synthesis worker |
+| **SubconsciousDaemon** | `src/mnemocore/subconscious/daemon.py` | Background process orchestrating dream/consolidation cycles |
+| **LLMIntegration** | `src/mnemocore/llm_integration.py` | Agent-facing LLM connector (OpenAI / Anthropic compatible) |
+| **Container** | `src/mnemocore/core/container.py` | Dependency-injection wiring for all core components |
+| **GoalTree** | `src/mnemocore/meta/goal_tree.py` | Hierarchical goal / task tracking for meta-cognition |
+| **LearningJournal** | `src/mnemocore/meta/learning_journal.py` | Persistent log of what the agent has learned over time |
+| **API** | `src/mnemocore/api/main.py` | FastAPI REST interface with async wrappers and middleware |
+| **MCP Server** | `src/mnemocore/mcp/server.py` | Model Context Protocol adapter for agent tool integration |
+---
+## Core Technology: Binary HDV & VSA
+MnemoCore's mathematical foundation is **Hyperdimensional Computing** — a computing paradigm that encodes information in very high-dimensional binary vectors (HDVs), enabling noise-tolerant, distributed, and algebraically composable representations.
+### The Vector Space
+Every piece of information — a word, a sentence, a concept, a goal — is encoded as a **16,384-dimensional binary vector**:
+```
+Dimension D = 16,384 bits = 2,048 bytes per vector
+Storage:      packed as numpy uint8 arrays
+Similarity:   Hamming distance (popcount of XOR result)
+Random pair:  ~50% similarity (orthogonality by probability)
+```
+At this dimensionality, two random vectors will differ in ~50% of bits. This near-orthogonality is the foundation of the system's expressive power — related concepts cluster together while unrelated ones remain maximally distant.
+### VSA Algebra
+Four primitive operations make the entire system work:
+#### Binding — XOR `⊕`
+Creates an association between two concepts. Crucially, the result is **dissimilar to both inputs** (appears as noise), making it a true compositional operation.
+```python
+# Bind content to its context
+bound = content_vec.xor_bind(context_vec)  # content ⊕ context
+# Self-inverse: unbind by re-binding
+recovered = bound.xor_bind(context_vec)   # ≈ content (XOR cancels)
+```
+Key mathematical properties:
+- **Self-inverse**: `A ⊕ A = 0` (XOR cancels itself)
+- **Commutative**: `A ⊕ B = B ⊕ A`
+- **Distance-preserving**: `hamming(A⊕C, B⊕C) = hamming(A, B)`
+#### Bundling — Majority Vote
+Creates a **prototype** that is similar to all inputs. This is how multiple memories combine into a concept.
+```python
+from mnemocore.core.binary_hdv import majority_bundle
+# Create semantic prototype from related memories
+concept = majority_bundle([vec_a, vec_b, vec_c, vec_d])  # similar to all inputs
+```
+#### Permutation — Circular Shift
+Encodes **sequence and roles** without separate positional embeddings.
+```python
+# Positional encoding: token at position i
+positioned = token_vec.permute(shift=i)  # circular bit-shift
+# Encode "hello world" with order information
+hello_positioned = encoder.get_token_vector("hello").permute(0)
+world_positioned = encoder.get_token_vector("world").permute(1)
+sentence_vec = majority_bundle([hello_positioned, world_positioned])
+```
+#### Similarity — Hamming Distance
+Fast comparison using vectorized popcount over XOR results:
+```python
+# Normalized similarity: 1.0 = identical, 0.5 = unrelated
+sim = vec_a.similarity(vec_b)  # 1.0 - hamming(a, b) / D
+# Batch nearest-neighbor search (no Python loops)
+distances = batch_hamming_distance(query, database_matrix)
+```
+### Text Encoding Pipeline
+The `TextEncoder` converts natural language to HDVs using a token-position binding scheme:
+```
+"Python TypeError" →
+  token_hdv("python") ⊕ permute(0)  =  positioned_0
+  token_hdv("typeerror") ⊕ permute(1)  =  positioned_1
+  majority_bundle([positioned_0, positioned_1])  =  final_hdv
+```
+Token vectors are **deterministic** — seeded via SHAKE-256 hash — meaning the same word always produces the same base vector, enabling cross-session consistency without a vocabulary file.
+---
+## The Memory Lifecycle
+Every memory passes through a defined lifecycle from ingestion to long-term storage:
+```
+Incoming Content
+      │
+      ▼
+ ┌─────────────┐
+ │ TextEncoder │  → 16,384-dim binary HDV
+ └──────┬──────┘
+        │
+        ▼
+ ┌──────────────────┐
+ │ Context Binding  │  → XOR bind with goal_context if present
+ │  (XOR)           │     bound_vec = content ⊕ context
+ └──────┬───────────┘
+        │
+        ▼
+ ┌──────────────────┐
+ │  EIG Evaluation  │  → Epistemic Information Gain
+ │  (Novelty Check) │     eig = normalized_distance(vec, context_vec)
+ └──────┬───────────┘     tag "epistemic_high" if eig > threshold
+        │
+        ▼
+ ┌─────────────────┐
+ │  MemoryNode     │  → id, hdv, content, metadata
+ │  Creation       │     ltp_strength = I × log(1+A) × e^(-λT)
+ └──────┬──────────┘
+        │
+        ▼
+ ┌─────────────────┐
+ │  HOT Tier       │  → In-memory dict (max 2000 nodes)
+ │  (RAM)          │     LTP eviction: low-LTP nodes → WARM
+ └──────┬──────────┘
+        │     (background)
+        ▼
+ ┌─────────────────┐
+ │ Subconscious    │  → Dream cycle fires
+ │ Dream Loop      │     Query similar memories
+ └──────┬──────────┘     Strengthen synapses (Hebbian)
+        │
+        ▼
+ ┌─────────────────┐
+ │  WARM Tier      │  → Redis-backed persistence
+ │  (Redis/mmap)   │     async dual-write + pub/sub events
+ └──────┬──────────┘
+        │     (scheduled, nightly)
+        ▼
+ ┌─────────────────┐
+ │  COLD Tier      │  → Qdrant / Disk / S3
+ │  (Archival)     │     ANN search, long-term persistence
+ └─────────────────┘
+```
+### Long-Term Potentiation (LTP)
+Memories are not equal. Importance is computed dynamically using a biologically-inspired LTP formula:
+```
+S = I × log(1 + A) × e^(-λ × T)
+Where:
+  S = LTP strength (determines tier placement)
+  I = Importance (derived from epistemic + pragmatic value)
+  A = Access count (frequency of retrieval)
+  λ = Decay lambda (configurable, default ~0.01)
+  T = Age in days
+```
+Memories with high LTP remain in HOT tier. Those that decay are automatically promoted to WARM, then COLD — mirroring how biological memory consolidates from working memory to long-term storage.
+### Synaptic Connections
+Memories are linked by `SynapticConnection` objects that implement Hebbian learning: *"neurons that fire together, wire together."*
+Every time two memories are co-retrieved (via the background dream loop or explicit binding), their synaptic strength increases. During query time, synaptic spreading amplifies scores of connected memories even when they do not directly match the query vector — enabling **associative recall**.
+```python
+# Explicit synapse creation
+engine.bind_memories(id_a, id_b, success=True)
+# Associative spreading: query top seeds spread activation to neighbors
+# neighbor_score += seed_score × synapse_strength × 0.3
+```
+---
+## Tiered Storage: HOT / WARM / COLD
+| Tier | Backend | Capacity | Latency | Eviction Trigger |
+|------|---------|----------|---------|------------------|
+| 🔥 **HOT** | Python dict (RAM) | 2,000 nodes | < 1ms | LTP < threshold |
+| 🌡 **WARM** | Redis + mmap | 100,000 nodes | < 10ms | Age + low access |
+| ❄️ **COLD** | Qdrant / Disk / S3 | Unlimited | < 100ms | Manual / scheduled |
+Promotion is automatic: accessing a WARM or COLD memory re-promotes it to HOT based on recalculated LTP. Eviction is LRU-weighted by LTP strength — the most biologically active memories always stay hot.
+---
+## Phase 4.0 Cognitive Enhancements
+MnemoCore Phase 4.0 introduces five architectural enhancements that elevate the system from **data retrieval** to **cognitive reasoning**. Full implementation specifications are in [`COGNITIVE_ENHANCEMENTS.md`](COGNITIVE_ENHANCEMENTS.md).
+---
+### 1. Contextual Query Masking *(XOR Attention)*
+**Problem**: Large multi-project deployments suffer from cross-context interference. A query for `"Python error handling"` returns memories from all projects equally, diluting precision.
+**Solution**: Bidirectional XOR context binding — apply the same context vector at both **storage** and **query** time:
+```
+Store:  bound_vec   = content ⊕ context_vec
+Query:  masked_query = query   ⊕ context_vec
+Result: (content ⊕ C) · (query ⊕ C) ≈ content · query
+        (context cancels, cross-project noise is suppressed)
+```
+```python
+# Store memories in a project context
+engine.store("API rate limiting logic", goal_id="ProjectAlpha")
+engine.store("Garden watering schedule", goal_id="HomeProject")
+# Query with context mask — only ProjectAlpha memories surface
+results = engine.query("API logic", top_k=5, context="ProjectAlpha")
+```
+**Expected impact**: +50–80% query precision (P@5) in multi-project deployments.
+---
+### 2. Reliability Feedback Loop *(Self-Correcting Memory)*
+**Problem**: Wrong or outdated memories persist with the same retrieval weight as correct ones. The system has no mechanism to learn from its own mistakes.
+**Solution**: Bayesian reliability scoring with real-world outcome feedback:
+```
+reliability = (successes + 1) / (successes + failures + 2)  # Laplace smoothing
+LTP_enhanced = I × log(1+A) × e^(-λT) × reliability
+```
+```python
+# After using a retrieved memory:
+engine.provide_feedback(memory_id, outcome=True)   # Worked → boost reliability
+engine.provide_feedback(memory_id, outcome=False)  # Failed → reduce reliability
+# System auto-tags consistently wrong memories as "unreliable"
+# and verified memories (>5 successes, >0.8 score) as "verified"
+```
+The system converges toward **high-confidence knowledge** — memories that have demonstrably worked in practice rank above theoretically similar but unproven ones.
+---
+### 3. Semantic Memory Consolidation *(Dream-Phase Synthesis)*
+**Problem**: Episodic memory grows without bound. 1,000 memories about `"Python TypeError"` are semantically equivalent but consume 2MB of vector space and slow down linear scan queries.
+**Solution**: Nightly `ConsolidationWorker` clusters similar WARM tier memories and replaces them with a **semantic anchor** — a majority-bundled prototype:
+```
+BEFORE consolidation:
+  mem_001: "Python TypeError in line 45"    (2KB vector)
+  mem_002: "TypeError calling function"     (2KB vector)
+  ...   ×100 similar memories              (200KB total)
+AFTER consolidation:
+  anchor_001: "Semantic pattern: python typeerror function"
+              metadata: {source_count: 100, confidence: 0.94}
+              hdv: majority_bundle([mem_001.hdv, ..., mem_100.hdv])  (2KB)
+```
+```python
+# Manual trigger (runs automatically at 3 AM)
+stats = engine.trigger_consolidation()
+# → {"abstractions_created": 12, "memories_consolidated": 847}
+# Via API (admin endpoint)
+POST /admin/consolidate
+```
+**Expected impact**: 70–90% memory footprint reduction, 10x query speedup at scale.
+---
+### 4. Auto-Associative Cleanup Loop *(Vector Immunology)*
+**Problem**: Holographic vectors degrade over time through repeated XOR operations, noise accumulation, and long-term storage drift. After months of operation, retrieved vectors become "blurry" and similarity scores fall.
+**Solution**: Iterative attractor dynamics — when a retrieved vector appears noisy, snap it to the nearest stable concept in a **codebook** of high-confidence prototypes:
+```
+noisy_vec → find K nearest in codebook
+         → majority_bundle(K neighbors)
+         → check convergence (Hamming distance < 5%)
+         → iterate until converged or max iterations reached
+```
+```python
+# Cleanup runs automatically on retrieval when noise > 15%
+node = engine.get_memory(memory_id, auto_cleanup=True)
+# node.metadata["cleaned"] = True  (if cleanup was triggered)
+# node.metadata["cleanup_iterations"] = 3
+# Codebook is auto-populated from most-accessed, high-reliability memories
+```
+**Expected impact**: Maintain >95% similarity fidelity even after years of operation.
+---
+### 5. Knowledge Gap Detection *(Proactive Curiosity)*
+**Problem**: The system is entirely reactive — it answers queries but never identifies what it *doesn't know*. True cognitive autonomy requires self-directed learning.
+**Solution**: Temporal co-occurrence analysis — detect concepts that are frequently accessed **close in time** but have **no synaptic connection**, flagging them as knowledge gaps:
+```python
+# Automatically runs hourly
+gaps = engine.detect_knowledge_gaps(time_window_seconds=300)
+# Returns structured insight:
+# [
+#   {
+#     "concept_a": "Python asyncio event loop",
+#     "concept_b": "FastAPI dependency injection",
+#     "suggested_query": "How does asyncio relate to FastAPI dependency injection?",
+#     "co_occurrence_count": 4
+#   }
+# ]
+# Query endpoint
+GET /insights/gaps?lookback_hours=24
+# Fill gap manually (or via LLM agent)
+POST /insights/fill-gap
+{"concept_a_id": "mem_xxx", "concept_b_id": "mem_yyy",
+ "explanation": "FastAPI uses asyncio's event loop internally..."}
+```
+The system becomes capable of **saying what it doesn't understand** and requesting clarification — the first step toward genuine cognitive autonomy.
+---
+## Phase 4.4–4.5: Subconscious Daemon & LLM Integration
+### Subconscious Daemon *(Autonomous Background Mind)*
+Phase 4.4 introduced `SubconsciousAI` — a worker that fires during idle cycles and calls an external LLM to generate **synthetic dream memories**: structured insights derived by reasoning over existing memory clusters, rather than through direct observation.
+Phase 4.5 hardened this into a full `SubconsciousDaemon` — an independently managed asyncio process that orchestrates dream cycles, consolidation scheduling, and subconscious queue processing:
+```python
+# The daemon is started automatically when the API starts up.
+# It coordinates:
+#   - Dream synthesis: SubconsciousAI → LLM → synthetic insights stored back
+#   - Consolidation scheduling: ConsolidationWorker fired on a configurable interval
+#   - Subconscious queue: novelty detection from Redis pub/sub stream
+```
+Configure in `config.yaml`:
+```yaml
+haim:
+  subconscious_ai:
+    enabled: true
+    api_url: "https://api.openai.com/v1/chat/completions"  # or Anthropic
+    model: "gpt-4o-mini"
+    # api_key: set via SUBCONSCIOUS_AI_API_KEY env var
+    dream_interval_seconds: 300
+    batch_size: 5
+```
+### Dependency Injection Container
+All major services (TierManager, AsyncRedisStorage, QdrantStore, SubconsciousAI, etc.) are now wired through `src/mnemocore/core/container.py`. This eliminates global singleton state and makes every subsystem testable in isolation:
+```python
+from mnemocore.core.container import build_container
+container = build_container(config)
+engine   = container.engine()
+tier_mgr = container.tier_manager()
+```
+### LLM Agent Integration
+`src/mnemocore/llm_integration.py` provides a high-level interface for attaching MnemoCore to any OpenAI/Anthropic-style LLM agent loop:
+```python
+from mnemocore.llm_integration import MnemoCoreAgent
+agent = MnemoCoreAgent(engine)
+# Store agent observations
+agent.observe("User prefers concise answers over verbose ones")
+# Recall relevant context before a response
+context = agent.recall("user preference", top_k=3)
+```
+---
+## API Reference
+### Authentication
+All endpoints require an API key via the `X-API-Key` header:
+```bash
+export HAIM_API_KEY="your-secure-key"
+curl -H "X-API-Key: $HAIM_API_KEY" ...
+```
+### Endpoints
+#### `POST /store`
+Store a new memory with optional context binding.
+```json
+Request:
+{
+  "content": "FastAPI uses Pydantic v2 for request validation.",
+  "metadata": {"source": "docs", "tags": ["python", "fastapi"]},
+  "context": "ProjectAlpha",
+  "agent_id": "agent-001",
+  "ttl": 3600
+}
+Response:
+{
+  "ok": true,
+  "memory_id": "mem_1739821234567",
+  "message": "Stored memory: mem_1739821234567"
+}
+```
+#### `POST /query`
+Query memories by semantic similarity with optional context masking.
+```json
+Request:
+{
+  "query": "How does FastAPI handle request validation?",
+  "top_k": 5,
+  "context": "ProjectAlpha"
+}
+Response:
+{
+  "ok": true,
+  "query": "How does FastAPI handle request validation?",
+  "results": [
+    {
+      "id": "mem_1739821234567",
+      "content": "FastAPI uses Pydantic v2 for request validation.",
+      "score": 0.8923,
+      "metadata": {"source": "docs"},
+      "tier": "hot"
+    }
+  ]
+}
+```
+#### `POST /feedback`
+Report outcome of a retrieved memory (Phase 4.0 reliability loop).
+```json
+Request:
+{
+  "memory_id": "mem_1739821234567",
+  "outcome": true,
+  "comment": "This solution worked perfectly."
+}
+Response:
+{
+  "ok": true,
+  "memory_id": "mem_1739821234567",
+  "reliability_score": 0.714,
+  "success_count": 4,
+  "failure_count": 1
+}
+```
+#### `GET /memory/{memory_id}`
+Retrieve a specific memory with full metadata.
+```json
+Response:
+{
+  "id": "mem_1739821234567",
+  "content": "...",
+  "metadata": {...},
+  "created_at": "2026-02-17T20:00:00Z",
+  "ltp_strength": 1.847,
+  "epistemic_value": 0.73,
+  "reliability_score": 0.714,
+  "tier": "hot"
+}
+```
+#### `DELETE /memory/{memory_id}`
+Delete memory from all tiers and clean up synapses.
+#### `POST /concept`
+Define a symbolic concept for analogical reasoning.
+```json
+{"name": "king", "attributes": {"gender": "man", "role": "ruler", "domain": "royalty"}}
+```
+#### `POST /analogy`
+Solve analogies using VSA algebra: `source:value :: target:?`
+```json
+Request:  {"source_concept": "king", "source_value": "man", "target_concept": "queen"}
+Response: {"results": [{"value": "woman", "score": 0.934}]}
+```
+#### `GET /insights/gaps`
+Detect knowledge gaps from recent temporal co-activity (Phase 4.0).
+```json
+Response:
+{
+  "gaps_detected": 3,
+  "knowledge_gaps": [
+    {
+      "concept_a": "asyncio event loop",
+      "concept_b": "FastAPI middleware",
+      "suggested_query": "How does event loop relate to middleware?",
+      "co_occurrence_count": 5
+    }
+  ]
+}
+```
+#### `POST /admin/consolidate`
+Trigger manual semantic consolidation (normally runs automatically at 3 AM).
+#### `GET /stats`
+Engine statistics — tiers, synapse count, consolidation state.
+#### `GET /health`
+Health check — Redis connectivity, engine readiness, degraded mode status.
+#### `GET /metrics`
+Prometheus metrics endpoint.
+---
+## Python Library Usage
+### Basic Store and Query
+```python
+from mnemocore.core.engine import HAIMEngine
+engine = HAIMEngine(persist_path="./data/memory.jsonl")
+# Store memories
+engine.store("Python generators are lazy iterators", metadata={"topic": "python"})
+engine.store("Use 'yield' to create generator functions", metadata={"topic": "python"})
+engine.store("Redis XADD appends to a stream", goal_id="infrastructure")
+# Query (global)
+results = engine.query("How do Python generators work?", top_k=3)
+for mem_id, score in results:
+    mem = engine.get_memory(mem_id)
+    print(f"[{score:.3f}] {mem.content}")
+# Query with context masking
+results = engine.query("data streams", top_k=5, context="infrastructure")
+engine.close()
+```
+### Analogical Reasoning
+```python
+# Define concepts
+engine.define_concept("king",  {"gender": "man",   "role": "ruler"})
+engine.define_concept("queen", {"gender": "woman", "role": "ruler"})
+engine.define_concept("man",   {"gender": "man"})
+# VSA analogy: king:man :: ?:woman → queen
+result = engine.reason_by_analogy(
+    src="king", val="man", tgt="woman"
+)
+print(result)  # [("queen", 0.934), ...]
+```
+### Working with the Binary HDV Layer Directly
+```python
+from mnemocore.core.binary_hdv import BinaryHDV, TextEncoder, majority_bundle
+encoder = TextEncoder(dimension=16384)
+# Encode text
+python_vec  = encoder.encode("Python programming")
+fastapi_vec = encoder.encode("FastAPI framework")
+error_vec   = encoder.encode("error handling")
+# Bind concept to role
+python_in_fastapi = python_vec.xor_bind(fastapi_vec)
+# Bundle multiple concepts into prototype
+web_dev_prototype = majority_bundle([python_vec, fastapi_vec, error_vec])
+# Similarity
+print(python_vec.similarity(web_dev_prototype))  # High (part of bundle)
+print(python_vec.similarity(error_vec))          # ~0.5 (unrelated)
+# Batch nearest-neighbor search
+from mnemocore.core.binary_hdv import batch_hamming_distance
+import numpy as np
+database = np.stack([v.data for v in [python_vec, fastapi_vec, error_vec]])
+distances = batch_hamming_distance(python_vec, database)
+```
+### Reliability Feedback Loop
+```python
+mem_id = engine.store("Always use asyncio.Lock() in async code, not threading.Lock()")
+results = engine.query("async locking")
+# It works — report success
+engine.provide_feedback(mem_id, outcome=True, comment="Solved deadlock issue")
+# Over time, high-reliability memories get 'verified' tag
+# and are ranked above unproven ones in future queries
+```
+### Semantic Consolidation
+```python
+stats = engine.trigger_consolidation()
+print(f"Created {stats['abstractions_created']} semantic anchors")
+print(f"Consolidated {stats['memories_consolidated']} episodic memories")
+# Automatic: runs every night at 3 AM via background asyncio task
+```
+---
+## Installation
+### Prerequisites
+- **Python 3.10+**
+- **Redis 6+** — Required for WARM tier and async event streaming
+- **Qdrant** *(optional)* — For COLD tier at billion-scale
+- **Docker** *(recommended)* — For Redis and Qdrant services
+### Quick Start
+```bash
+# 1. Clone
+git clone https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory.git
+cd MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory
+# 2. Create virtual environment
+python -m venv .venv
+.\.venv\Scripts\activate          # Windows (PowerShell)
+# source .venv/bin/activate       # Linux / macOS
+# 3. Install (recommended — uses pyproject.toml as canonical source)
+pip install -e .
+# Or install runtime deps only (Docker / legacy):
+# pip install -r requirements.txt
+# To include dev tools (pytest, mypy, black, etc.):
+pip install -e ".[dev]"
+# 4. Start Redis
+docker run -d -p 6379:6379 redis:7.2-alpine
+# 5. Set API key (never hardcode — use env var or .env file)
+# Windows PowerShell:
+$env:HAIM_API_KEY = "your-secure-key-here"
+# Linux / macOS:
+# export HAIM_API_KEY="your-secure-key-here"
+# 6. Start the API
+uvicorn mnemocore.api.main:app --host 0.0.0.0 --port 8100
+```
+The API is now live at `http://localhost:8100`. Visit `http://localhost:8100/docs` for the interactive Swagger UI.
+### Using the .env file
+Copy the provided template and fill in your values — the API and docker-compose both pick it up automatically:
+```bash
+cp .env.example .env
+# Edit .env and set HAIM_API_KEY, REDIS_URL, etc.
+```
+> **Note:** `.env` is listed in `.gitignore` and must never be committed. Only `.env.example` (with placeholder values) belongs in version control.
+### Full Stack with Docker Compose
+```bash
+# Requires .env with HAIM_API_KEY set
+docker compose up -d
+```
+This starts MnemoCore, Redis 7.2, and Qdrant in one command.
+### With Qdrant (Phase 4.x Scale)
+```bash
+# Start Qdrant alongside Redis
+docker run -d -p 6333:6333 qdrant/qdrant
+# Enable in config.yaml
+qdrant:
+  enabled: true
+  host: localhost
+  port: 6333
+```
+---
+## Configuration
+All configuration lives in `config.yaml`. Sensitive values can be overridden with environment variables — the config loader looks for `HAIM_`-prefixed vars and also honours per-service overrides like `HAIM_API_KEY`, `REDIS_PASSWORD`, `QDRANT_API_KEY`, `HAIM_CORS_ORIGINS`, and `SUBCONSCIOUS_AI_API_KEY`.
+```yaml
+haim:
+  version: "4.5"
+  dimensionality: 16384        # Binary vector dimensions (must be multiple of 64)
+  encoding:
+    mode: "binary"             # "binary" (recommended) or "float" (legacy, deprecated)
+    token_method: "bundle"     # "bundle" (XOR+permute) or "hash"
+  tiers:
+    hot:
+      max_memories: 2000       # Max nodes in RAM
+      ltp_threshold_min: 0.7   # Evict below this LTP strength
+      eviction_policy: "lru"
+    warm:
+      max_memories: 100000     # Max nodes in Redis/mmap
+      ltp_threshold_min: 0.3
+    cold:
+      storage_backend: "filesystem"   # "filesystem" or "s3"
+      compression: "gzip"
+  ltp:
+    initial_importance: 0.5
+    decay_lambda: 0.01         # Higher = faster forgetting
+    permanence_threshold: 0.95 # LTP above this is immune to decay
+    half_life_days: 30.0
+  hysteresis:
+    promote_delta: 0.15        # LTP must exceed threshold by this much to promote
+    demote_delta: 0.10
+  redis:
+    url: "redis://localhost:6379/0"
+    stream_key: "haim:subconscious"
+    max_connections: 10
+    socket_timeout: 5
+    # password: set via REDIS_PASSWORD env var
+  qdrant:
+    url: "http://localhost:6333"
+    collection_hot:  "haim_hot"
+    collection_warm: "haim_warm"
+    enabled: false
+    # api_key: set via QDRANT_API_KEY env var
+  security:
+    # api_key: set via HAIM_API_KEY env var — never hardcode here
+    cors_origins: ["http://localhost:3000"]
+  subconscious_ai:
+    enabled: false
+    api_url: "https://api.openai.com/v1/chat/completions"
+    model: "gpt-4o-mini"
+    dream_interval_seconds: 300
+    batch_size: 5
+    # api_key: set via SUBCONSCIOUS_AI_API_KEY env var
+  observability:
+    metrics_port: 9090
+    log_level: "INFO"
+    structured_logging: true
+  paths:
+    data_dir: "./data"
+    memory_file: "./data/memory.jsonl"
+    codebook_file: "./data/codebook.json"
+    concepts_file: "./data/concepts.json"
+    synapses_file: "./data/synapses.json"
+    warm_mmap_dir: "./data/warm_tier"
+    cold_archive_dir: "./data/cold_archive"
+  mcp:
+    enabled: false
+    transport: "stdio"
+    host: "127.0.0.1"
+    port: 8110
+    api_base_url: "http://localhost:8100"
+```
+### Security Note
+MnemoCore requires an explicit API key. There is no default fallback key in production builds.
+```bash
+# Generate a cryptographically secure key:
+python -c "import secrets; print(secrets.token_urlsafe(32))"
+# Set it (never commit this value):
+export HAIM_API_KEY="<generated-value>"
+```
+---
+## MCP Server Integration
+MnemoCore exposes a **Model Context Protocol (MCP)** server, enabling direct integration with Claude, GPT-4, and any MCP-compatible agent framework.
+### Setup
+```bash
+# Start API first
+uvicorn mnemocore.api.main:app --host 0.0.0.0 --port 8100
+# Configure MCP in config.yaml
+haim:
+  mcp:
+    enabled: true
+    transport: "stdio"  # or "sse" for streaming
+# Run MCP server
+python -m mnemocore.mcp.server
+```
+### Claude Desktop Configuration
+Add to your Claude Desktop `config.json`:
+```json
+{
+  "mcpServers": {
+    "mnemocore": {
+      "command": "python",
+      "args": ["-m", "mnemocore.mcp.server"],
+      "env": {
+        "HAIM_API_KEY": "your-key",
+        "HAIM_BASE_URL": "http://localhost:8100"
+      }
+    }
+  }
+}
+```
+Once connected, the agent can:
+- `store_memory(content, context)` — persist learned information
+- `query_memory(query, context, top_k)` — recall relevant memories
+- `provide_feedback(memory_id, outcome)` — signal what worked
+- `get_knowledge_gaps()` — surface what it doesn't understand
+---
+## Observability
+MnemoCore ships with built-in Prometheus metrics and structured logging.
+### Prometheus Metrics
+Available at `GET /metrics`:
+| Metric | Description |
+|--------|-------------|
+| `haim_api_request_count` | Total requests by endpoint and status |
+| `haim_api_request_latency_seconds` | Request latency histogram |
+| `haim_storage_operation_count` | Store/query/delete operations |
+| `haim_hot_tier_size` | Current HOT tier memory count |
+| `haim_synapse_count` | Active synaptic connections |
+### Grafana Dashboard
+A sample Grafana dashboard config is available at `grafana-dashboard.json` in the repository root. Import it directly into Grafana via **Dashboards → Import → Upload JSON file**.
+### Structured Logging
+All components use structured Python logging with contextual fields:
+```
+2026-02-17 20:00:00 INFO  Stored memory mem_1739821234567 (EIG: 0.7823)
+2026-02-17 20:00:01 INFO  Memory mem_1739821234567 reliability updated: 0.714 (4✓ / 1✗)
+2026-02-17 03:00:00 INFO  Consolidation complete: abstractions_created=12, consolidated=847
+2026-02-17 04:00:00 INFO  Knowledge gap detected: asyncio ↔ FastAPI middleware (5 co-occurrences)
+```
+---
+## Testing
+```bash
+# Run full test suite
+pytest
+# Run with coverage
+pytest --cov=src --cov-report=html
+# Run specific feature tests
+pytest tests/test_xor_attention.py         # Contextual masking
+pytest tests/test_stability.py             # Reliability/Bayesian stability
+pytest tests/test_consolidation.py         # Semantic consolidation
+pytest tests/test_engine_cleanup.py        # Cleanup and decay
+pytest tests/test_phase43_regressions.py   # Phase 4.3 regression guardrails
+pytest tests/test_tier_manager.py          # Tier demotion / promotion logic
+pytest tests/test_dream_loop.py            # Subconscious dream loop
+pytest tests/test_subconscious_ai_worker.py # LLM-powered dream worker (if offline: uses mocks)
+pytest tests/test_recursive_synthesizer.py  # Deep concept synthesis
+pytest tests/test_batch_ops.py             # Bulk ingestion operations
+pytest tests/test_mcp_server.py            # MCP server adapter
+# End-to-end flow
+pytest tests/test_e2e_flow.py -v
+```
+---
+## Roadmap
+### Current Release (v4.5.0)
+- [x] Binary HDV core (XOR bind / bundle / permute / Hamming)
+- [x] Three-tier HOT/WARM/COLD memory lifecycle
+- [x] Async API + MCP integration
+- [x] XOR attention masking + Bayesian reliability updates
+- [x] Semantic consolidation, immunology cleanup, and gap detection/filling
+- [x] Temporal recall: episodic chaining + chrono-weighted query
+- [x] Regression guardrails for Phase 4.3 critical paths
+- [x] Phase 4.4 — Subconscious AI Worker (LLM-powered dream synthesis)
+- [x] Phase 4.5 — Subconscious Daemon, persistence hardening, tier-manager demotion race fix
+- [x] Dependency-injection Container pattern (replaces singleton)
+- [x] HNSW in-process index for hot-tier ANN search
+- [x] Batch operations for bulk ingestion
+- [x] Meta-cognition layer: GoalTree + LearningJournal
+### Next Steps
+- [ ] Hardening pass for distributed/clustered HOT-tier behavior
+- [ ] Extended observability standardization (`mnemocore_*` metric prefix across all components)
+- [ ] Self-improvement loop (design documented in `docs/SELF_IMPROVEMENT_DEEP_DIVE.md`, staged rollout pending)
+- [ ] CUDA kernels for batch HDV operations at scale
+- [ ] Helm chart production hardening (resource autoscaling, PodDisruptionBudget)
+---
+## Contributing
+MnemoCore is an active research project. Contributions are welcome — especially:
+- **Performance**: CUDA kernels, FAISS integration, async refactoring
+- **Algorithms**: Better clustering for consolidation, improved EIG formulas
+- **Integrations**: New storage backends, LLM connectors
+- **Tests**: Coverage for edge cases, property-based testing
+### Process
+```bash
+# Fork and clone
+git checkout -b feature/your-feature-name
+# Make changes, ensure tests pass
+pytest
+# Commit with semantic message
+git commit -m "feat(consolidation): add LLM-powered prototype labeling"
+# Open PR — describe the what, why, and performance impact
+```
+Please follow the implementation patterns established in `docs/ARCHITECTURE.md` and `docs/ROADMAP.md` for architectural guidance, and review `CHANGELOG.md` to understand what has already landed.
+---
+## License
+MIT License — see [LICENSE](LICENSE) for details.
+---
+## Contact
+**Robin Granberg**
+📧 robin@veristatesystems.com
+---
+<p align="center">
+  <i>Building the cognitive substrate for the next generation of autonomous AI.</i>
+</p>

REFACTORING_TODO.md ADDED Viewed

	@@ -0,0 +1,207 @@

+# Refactoring TODO
+Status för kodoptimering inför kommande funktionalitet.
+---
+## Hög Prioritet
+### 1. Konsolidera HDV-implementation
+**Status:** ✅ Completed (2026-02-18)
+**Problem:**
+- Dubbel implementation: `src/core/hdv.py` (float) + `src/core/binary_hdv.py` (binary)
+- Skapar branch-logik genom hela koden
+- Float HDV är legacy och bör depreceras
+**Åtgärder genomförda:**
+- `src/core/hdv.py` - Markerad som DEPRECATED med varning
+- `src/core/__init__.py` - Exporterar nu BinaryHDV först
+- `src/core/node.py` - Använder endast BinaryHDV
+- `src/core/holographic.py` - Konverterad till BinaryHDV med XOR-binding
+- `src/core/router.py` - Tog bort HDV-branching
+- `src/core/engine.py` - Tog bort Union-typer och branching
+- `src/core/tier_manager.py` - Standardiserade på BinaryHDV
+---
+### 2. Ofullständiga features
+**Status:** Pending
+**Problem:**
+- Flera TODOs i produktionskod som lämnats oimplementerade
+**Filer:**
+- `src/llm_integration.py`
+**TODOs:**
+```
+Line 56:  # TODO: Call Gemini 3 Pro via OpenClaw API
+Line 106: # TODO: superposition_query() not implemented in HAIMEngine
+Line 131: # TODO: Call Gemini 3 Pro
+Line 301: # TODO: Implement concept-to-memory-ID mapping
+Line 320: # TODO: orchestrate_orch_or() not implemented
+```
+**Åtgärd:**
+- Implementera funktionerna
+- Eller ta bort dödkod
+---
+### 3. Standardisera felhantering
+**Status:** Pending
+**Problem:**
+- Vissa funktioner returnerar `None` vid fel
+- Andra kastar exceptions
+- Svårt att förutse felbeteende
+**Åtgärd:**
+- Definiera domän-specifika exceptions:
+  - `MemoryNotFoundError`
+  - `StorageError`
+  - `EncodingError`
+  - `ConsolidationError`
+- Skapa `src/core/exceptions.py`
+- Uppdatera alla moduler att använda konsistent felhantering
+---
+## Medelprioritet
+### 4. Minska Singleton-användning
+**Status:** 📋 Roadmap
+**Problem:**
+- `AsyncRedisStorage.get_instance()`
+- `QdrantStore.get_instance()`
+- Försvårar testning
+**Åtgärd:**
+- Inför Dependency Injection
+- Passa beroenden via konstruktor
+**Komplexitet:** Hög - Kräver genomgripande ändringar av instansiering
+---
+### 5. Bryt isär stora funktioner
+**Status:** 📋 Roadmap
+**Problem:**
+- `engine.py:store()` - 76 rader
+- `tier_manager.py:consolidate_warm_to_cold()` - 48 rader
+**Åtgärd:**
+- Extrahera till mindre, testbara enheter
+**Komplexitet:** Hög - Refaktorering av kärnlogik
+---
+### 6. Konsolidera Circuit Breakers
+**Status:** ✅ Completed (2026-02-18)
+**Problem:**
+- `src/core/resilience.py` - pybreaker implementation
+- `src/core/reliability.py` - Native implementation
+- Dubbel implementation
+**Åtgärder genomförda:**
+- `src/core/reliability.py` - Nu primär modul med pre-konfigurerade instanser
+- `src/core/resilience.py` - Markerad som DEPRECATED
+- `src/core/qdrant_store.py` - Uppdaterad till reliability
+- `src/api/main.py` - Uppdaterad till reliability, tog bort pybreaker-beroende
+---
+### 7. Centralisera hårkodade sökvägar
+**Status:** ✅ Completed (2026-02-18)
+**Problem:**
+- `"./data"` fanns hårdkodat på flera ställen
+**Åtgärder genomförda:**
+- `src/core/holographic.py` - Använder nu `config.paths.data_dir` som default
+- Alla sökvägar centraliserade i `config.yaml` och `HAIMConfig`
+---
+### 8. Standardisera import-stil
+**Status:** ✅ Verified (2026-02-18)
+**Problem:**
+- Blandning av relativa och absoluta imports
+- Till och med inom samma fil
+**Analys:**
+- `src/core/` använder konsekvent relativa imports (`.module`)
+- Övriga moduler använder absoluta imports (`src.core.module`)
+- Inga filer har blandad stil
+**Slutsats:**
+Import-stilen följer redan rekommenderad Python-praxis. Ingen åtgärd behövs.
+---
+## Låg prioritet
+### 9. Rensa debug-filer
+- Ta bort eller flytta `debug_*.py`
+- Konsolidera test-helpers
+### 10. Standardisera logging
+- Välj ett framework (loguru rekommenderas)
+- Ta bort ad-hoc print-statements
+### 11. Förbättra typsäkerhet
+- Lägg till mypy i CI
+- Komplettera type hints
+- Använd `TypedDict` för komplexa dict-returns
+---
+## Förbättra testtäckning
+```bash
+pytest --cov=src --cov-report=html
+```
+Kör för att identifiera luckor i testtäckningen.
+---
+## Fil-prioriteringslista
+| Prioritet | Fil | Anledning |
+|-----------|-----|-----------|
+| 1 | `src/core/engine.py` | Kärnlogik, HDV dual-mode |
+| 2 | `src/core/tier_manager.py` | Stora funktioner, lagringskomplexitet |
+| 3 | `src/llm_integration.py` | Flera oimplementerade TODOs |
+| 4 | `src/core/resilience.py` | Duplikat circuit breaker |
+| 5 | `src/core/binary_hdv.py` | Överväg extrahering till separat paket |
+---
+## Framsteg
+- [x] Punkt 1: HDV-konsolidering ✅
+- [ ] Punkt 2: Ofullständiga features
+- [ ] Punkt 3: Felhantering
+- [ ] Punkt 4: Singleton-reduktion 📋 Roadmap
+- [ ] Punkt 5: Stora funktioner 📋 Roadmap
+- [x] Punkt 6: Circuit breakers ✅
+- [x] Punkt 7: Hårkodade sökvägar ✅
+- [x] Punkt 8: Import-stil ✅ (redan konsekvent)
+---
+## Roadmap (Framtida refaktorering)
+Dessa punkter kräver mer omfattande ändringar och bör planeras in senare:
+| Punkt | Beskrivning | Komplexitet |
+|-------|-------------|-------------|
+| 4 | Minska Singleton-användning, inför DI | Hög |
+| 5 | Bryt isär stora funktioner i engine/tier_manager | Hög |

RELEASE_CHECKLIST.md ADDED Viewed

	@@ -0,0 +1,125 @@

+# MnemoCore Public Beta Release Checklist
+## Status: ðŸŸ  ORANGE â†’ ðŸŸ¢ GREEN
+---
+## âœ… Completed
+- [x] LICENSE file (MIT)
+- [x] .gitignore created
+- [x] data/memory.jsonl removed (no stored memories)
+- [x] No leaked API keys or credentials
+- [x] 82 unit tests passing
+---
+## ðŸ”§ Code TODOs (Known Limitations)
+These are documented gaps that can ship as "Phase 4 roadmap" items:
+### 1. `src/core/tier_manager.py:338`
+```python
+pass # TODO: Implement full consolidation with Qdrant
+```
+**Impact:** Warmâ†’Cold tier consolidation limited
+**Workaround:** Hotâ†’Warm works, Cold is filesystem-based
+**Fix:** Implement Qdrant batch scroll API for full archival
+### 2. `src/core/engine.py:192`
+```python
+# TODO: Phase 3.5 Qdrant search for WARM/COLD
+```
+**Impact:** Query only searches HOT tier currently
+**Workaround:** Promote memories before querying
+**Fix:** Add async Qdrant similarity search in query()
+### 3. `src/llm_integration.py:55-57, 128-129`
+```python
+# TODO: Call Gemini 3 Pro via OpenClaw API
+reconstruction = "TODO: Call Gemini 3 Pro"
+```
+**Impact:** LLM reconstruction not functional
+**Workaround:** Raw vector similarity works
+**Fix:** Implement LLM client or make it pluggable
+### 4. `src/nightlab/engine.py:339`
+```python
+# TODO: Notion API integration
+```
+**Impact:** Session documentation not auto-pushed
+**Workaround:** Written to local markdown files
+**Fix:** Add optional Notion connector
+---
+## ðŸ“‹ Pre-Release Actions
+### Before git push:
+```bash
+# 1. Clean build artifacts
+rm -rf .pytest_cache __pycache__ */__pycache__ *.pyc
+# 2. Verify tests pass
+source .venv/bin/activate && python -m pytest tests/ -v
+# 3. Verify import works
+python -c "from mnemocore.core.engine import HAIMEngine; print('OK')"
+# 4. Check for secrets (should return nothing)
+grep -r "sk-" src/ --include="*.py"
+grep -r "api_key.*=" src/ --include="*.py" | grep -v "api_key=\"\""
+# 5. Initialize fresh data files
+touch data/memory.jsonl data/codebook.json data/concepts.json data/synapses.json
+```
+### Update README.md:
+- [ ] Add: "Beta Release - See RELEASE_CHECKLIST.md for known limitations"
+- [ ] Add: "Installation" section with `pip install -r requirements.txt`
+- [ ] Add: "Quick Start" example
+- [ ] Add: "Roadmap" section linking TODOs above
+---
+## ðŸš€ Release Command Sequence
+```bash
+cd /home/dev-robin/Desktop/mnemocore
+# Verify clean state
+git status
+# Stage public files (exclude .venv)
+git add LICENSE .gitignore RELEASE_CHECKLIST.md
+git add src/ tests/ config.yaml requirements.txt pytest.ini
+git add README.md studycase.md docker-compose.yml
+git add data/.gitkeep  # If exists, or create empty dirs
+# Commit
+git commit -m "Initial public beta release (MIT)
+Known limitations documented in RELEASE_CHECKLIST.md"
+# Tag
+git tag -a v0.1.0-beta -m "Public Beta Release"
+# Push (when ready)
+git push origin main --tags
+```
+---
+## Post-Release
+- [ ] Create GitHub repository
+- [ ] Add repository topics: `vsa`, `holographic-memory`, `active-inference`, `vector-symbolic-architecture`
+- [ ] Enable GitHub Issues for community feedback
+- [ ] Publish whitepaper/blog post
+---
+*Generated: 2026-02-15*

SECURITY.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# Security Policy
+## Supported Versions
+This repository is currently in beta.
+Security fixes are handled on the latest `main` branch.
+## Reporting a Vulnerability
+Please report vulnerabilities privately to:
+- Robin@veristatesystems.com
+Include:
+- Affected component/file
+- Reproduction steps
+- Impact assessment
+- Suggested remediation (if available)
+## Disclosure Policy
+- Please do not open public issues for unpatched vulnerabilities.
+- We aim to acknowledge reports quickly and coordinate responsible disclosure.
+## Security Best Practices for Users
+- Do not commit secrets, credentials, or private data.
+- Use environment variables for sensitive configuration.
+- Rotate any credential immediately if accidental exposure is suspected.
+- Keep dependencies and runtime images updated.

benchmarks/bench_100k_memories.py ADDED Viewed

	@@ -0,0 +1,179 @@

+"""
+Benchmark for MnemoCore with up to 100k memories.
+Measures:
+- actual HAIMEngine.store() latency (P50, P95, P99)
+- actual HAIMEngine.query() latency (P50, P95, P99)
+- HDV primitive latency (P99)
+"""
+import argparse
+import asyncio
+import os
+import sys
+import time
+from pathlib import Path
+from statistics import mean
+from typing import Dict, List
+import numpy as np
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from mnemocore.core.binary_hdv import BinaryHDV
+from mnemocore.core.engine import HAIMEngine
+from mnemocore.core.config import reset_config
+def _percentile(values: List[float], pct: float) -> float:
+    if not values:
+        return 0.0
+    sorted_values = sorted(values)
+    idx = min(int(len(sorted_values) * pct), len(sorted_values) - 1)
+    return sorted_values[idx]
+def _ms_stats(samples: List[float]) -> Dict[str, float]:
+    return {
+        "count": float(len(samples)),
+        "mean_ms": mean(samples) if samples else 0.0,
+        "p50_ms": _percentile(samples, 0.50),
+        "p95_ms": _percentile(samples, 0.95),
+        "p99_ms": _percentile(samples, 0.99),
+    }
+def generate_contents(count: int) -> List[str]:
+    print(f"Generating {count:,} memory payloads...")
+    return [f"benchmark memory #{i:06d} with signal {i % 97}" for i in range(count)]
+async def measure_store_latency(engine: HAIMEngine, contents: List[str]) -> Dict[str, float]:
+    print(f"Measuring store() latency on {len(contents):,} real calls...")
+    latencies_ms: List[float] = []
+    for i, content in enumerate(contents):
+        start = time.perf_counter()
+        await engine.store(content, metadata={"benchmark": True, "index": i})
+        latencies_ms.append((time.perf_counter() - start) * 1000.0)
+    return _ms_stats(latencies_ms)
+async def measure_query_latency(
+    engine: HAIMEngine, queries: List[str], top_k: int = 5
+) -> Dict[str, float]:
+    print(f"Measuring query() latency on {len(queries):,} real calls...")
+    latencies_ms: List[float] = []
+    for query_text in queries:
+        start = time.perf_counter()
+        await engine.query(query_text, top_k=top_k)
+        latencies_ms.append((time.perf_counter() - start) * 1000.0)
+    return _ms_stats(latencies_ms)
+def measure_hdv_operations(dimension: int, n_samples: int = 10000) -> Dict[str, Dict[str, float]]:
+    print(f"Measuring HDV operations ({n_samples:,} samples)...")
+    v1 = BinaryHDV.random(dimension)
+    v2 = BinaryHDV.random(dimension)
+    bind_times = []
+    permute_times = []
+    distance_times = []
+    for _ in range(n_samples):
+        start = time.perf_counter()
+        v1.xor_bind(v2)
+        bind_times.append((time.perf_counter() - start) * 1_000_000)
+        start = time.perf_counter()
+        v1.permute(1)
+        permute_times.append((time.perf_counter() - start) * 1_000_000)
+        start = time.perf_counter()
+        v1.hamming_distance(v2)
+        distance_times.append((time.perf_counter() - start) * 1_000_000)
+    return {
+        "bind": {"p99_us": _percentile(bind_times, 0.99), "mean_us": mean(bind_times)},
+        "permute": {"p99_us": _percentile(permute_times, 0.99), "mean_us": mean(permute_times)},
+        "distance": {"p99_us": _percentile(distance_times, 0.99), "mean_us": mean(distance_times)},
+    }
+async def run_benchmark(args: argparse.Namespace) -> None:
+    os.environ["HAIM_DIMENSIONALITY"] = str(args.dimension)
+    reset_config()
+    engine = HAIMEngine()
+    await engine.initialize()
+    try:
+        contents = generate_contents(args.n_memories)
+        print()
+        print("=" * 72)
+        print("HAIMEngine store/query benchmark")
+        print("=" * 72)
+        store_sample = contents[: args.store_samples]
+        store_stats = await measure_store_latency(engine, store_sample)
+        query_count = min(args.query_samples, len(store_sample))
+        query_inputs = [f"signal {(i * 7) % 97}" for i in range(query_count)]
+        query_stats = await measure_query_latency(engine, query_inputs, top_k=args.top_k)
+        hdv_stats = measure_hdv_operations(args.dimension, args.hdv_samples)
+        print()
+        print(f"{'Metric':<32} {'Mean':<14} {'P50':<14} {'P95':<14} {'P99':<14}")
+        print("-" * 90)
+        print(
+            f"{'store() latency (ms)':<32} "
+            f"{store_stats['mean_ms']:<14.3f} {store_stats['p50_ms']:<14.3f} "
+            f"{store_stats['p95_ms']:<14.3f} {store_stats['p99_ms']:<14.3f}"
+        )
+        print(
+            f"{'query() latency (ms)':<32} "
+            f"{query_stats['mean_ms']:<14.3f} {query_stats['p50_ms']:<14.3f} "
+            f"{query_stats['p95_ms']:<14.3f} {query_stats['p99_ms']:<14.3f}"
+        )
+        print()
+        print(f"{'HDV op':<20} {'Mean (us)':<16} {'P99 (us)':<16}")
+        print("-" * 54)
+        for op, stats in hdv_stats.items():
+            print(f"{op:<20} {stats['mean_us']:<16.2f} {stats['p99_us']:<16.2f}")
+        print()
+        print("=" * 72)
+        print("SLO Check")
+        print("=" * 72)
+        print(
+            f"store() P99 < 50ms: {'PASS' if store_stats['p99_ms'] < 50 else 'FAIL'} "
+            f"({store_stats['p99_ms']:.3f}ms)"
+        )
+        print(
+            f"query() P99 < 50ms: {'PASS' if query_stats['p99_ms'] < 50 else 'FAIL'} "
+            f"({query_stats['p99_ms']:.3f}ms)"
+        )
+    finally:
+        await engine.close()
+        reset_config()
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Benchmark HAIMEngine store/query performance")
+    parser.add_argument("--dimension", type=int, default=1024, help="HDV dimensionality")
+    parser.add_argument("--n-memories", type=int, default=100000, help="Dataset size label")
+    parser.add_argument(
+        "--store-samples", type=int, default=5000, help="Number of real store() calls"
+    )
+    parser.add_argument(
+        "--query-samples", type=int, default=1000, help="Number of real query() calls"
+    )
+    parser.add_argument("--hdv-samples", type=int, default=10000, help="HDV primitive sample count")
+    parser.add_argument("--top-k", type=int, default=5, help="top_k for query() benchmark")
+    return parser.parse_args()
+if __name__ == "__main__":
+    asyncio.run(run_benchmark(parse_args()))

benchmarks/bench_permute.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+Benchmark BinaryHDV.permute() using the production implementation.
+"""
+import sys
+import timeit
+from pathlib import Path
+from typing import Dict, List
+import numpy as np
+# Add src to path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from mnemocore.core.binary_hdv import BinaryHDV
+def permute_reference(data: np.ndarray, shift: int) -> np.ndarray:
+    bits = np.unpackbits(data)
+    bits = np.roll(bits, shift)
+    return np.packbits(bits)
+def benchmark_dimension(dimension: int, shift: int = 13) -> Dict[str, float]:
+    hdv = BinaryHDV.random(dimension)
+    # Correctness check against golden reference
+    expected = permute_reference(hdv.data, shift)
+    actual = hdv.permute(shift).data
+    assert np.array_equal(actual, expected), "permute() mismatch vs reference"
+    t = min(
+        timeit.repeat(
+            stmt="hdv.permute(shift)",
+            globals={"hdv": hdv, "shift": shift},
+            repeat=5,
+            number=500,
+        )
+    )
+    us = (t / 500) * 1_000_000
+    return {"dimension": float(dimension), "permute_us": us}
+def main() -> None:
+    dimensions: List[int] = [512, 4096, 16384, 32768, 65536, 131072]
+    print("BinaryHDV.permute() benchmark (production path)")
+    print(f"{'Dimension':>10} | {'permute(us)':>12}")
+    print("-" * 27)
+    for dim in dimensions:
+        result = benchmark_dimension(dim)
+        print(f"{int(result['dimension']):>10} | {result['permute_us']:>12.2f}")
+if __name__ == "__main__":
+    main()

config.yaml ADDED Viewed

	@@ -0,0 +1,167 @@

+# HAIM Configuration — Phase 4.5
+# All hardcoded constants are centralized here.
+haim:
+  version: "4.5"
+  dimensionality: 16384  # 2^14, must be multiple of 64
+  # Vector encoding
+  encoding:
+    mode: "binary"  # "binary" (Phase 3.0+) or "float" (legacy)
+    token_method: "bundle"  # "bundle" (XOR+permute) or "hash"
+  # Memory tier thresholds
+  tiers:
+    hot:
+      max_memories: 2000
+      ltp_threshold_min: 0.7
+      eviction_policy: "lru"
+    warm:
+      max_memories: 100000
+      ltp_threshold_min: 0.3
+      consolidation_interval_hours: 1
+      storage_backend: "mmap"  # "mmap" (Phase 3.0) or "qdrant" (Phase 3.5)
+    cold:
+      storage_backend: "filesystem"  # "filesystem" or "s3"
+      compression: "gzip"
+      archive_threshold_days: 30
+  # LTP (Long-Term Potentiation) decay parameters
+  ltp:
+    initial_importance: 0.5
+    decay_lambda: 0.01  # Exponential decay rate
+    permanence_threshold: 0.95  # Above this, memory is immune to decay
+    half_life_days: 30.0  # For synaptic connections
+  # Hysteresis (prevent boundary thrashing between tiers)
+  hysteresis:
+    promote_delta: 0.15  # LTP must exceed threshold by this much to promote
+    demote_delta: 0.10   # LTP must fall below threshold by this much to demote
+  # Redis (Phase 3.5)
+  redis:
+    url: "redis://localhost:6379/0"
+    stream_key: "haim:subconscious"
+    max_connections: 10
+    socket_timeout: 5
+  # Qdrant (Phase 3.5)
+  qdrant:
+    url: "http://localhost:6333"
+    collection_hot: "haim_hot"
+    collection_warm: "haim_warm"
+    binary_quantization: true
+    always_ram: true
+    hnsw_m: 16
+    hnsw_ef_construct: 100
+  # GPU (Phase 3.5)
+  gpu:
+    enabled: false
+    device: "cuda:0"
+    batch_size: 1000
+    fallback_to_cpu: true
+  # Observability (Phase 3.5)
+  observability:
+    metrics_port: 9090
+    log_level: "INFO"
+    structured_logging: true
+  # Persistence paths
+  paths:
+    data_dir: "./data"
+    memory_file: "./data/memory.jsonl"
+    codebook_file: "./data/codebook.json"
+    concepts_file: "./data/concepts.json"
+    synapses_file: "./data/synapses.json"
+    warm_mmap_dir: "./data/warm_tier"
+    cold_archive_dir: "./data/cold_archive"
+  # Security (Phase 3.5.1)
+  security:
+    # api_key: "mnemocore-beta-key"  # <--- REMOVED: Must be set via HAIM_API_KEY env var or here explicitly
+  # MCP (Model Context Protocol) bridge
+  mcp:
+    enabled: false
+    transport: "stdio"  # "stdio" recommended for local MCP clients
+    host: "127.0.0.1"
+    port: 8110
+    api_base_url: "http://localhost:8100"
+    timeout_seconds: 15
+    allow_tools:
+      - "memory_store"
+      - "memory_query"
+      - "memory_get"
+      - "memory_delete"
+      - "memory_stats"
+      - "memory_health"
+  # Dream Loop (Subconscious background processing)
+  dream_loop:
+    enabled: true
+    frequency_seconds: 60  # Seconds between dream cycles
+    batch_size: 10  # Number of memories to process per cycle
+    max_iterations: 0  # Maximum iterations (0 = unlimited)
+    subconscious_queue_maxlen: 10000  # Max queued IDs (null/0 = unlimited)
+    ollama_url: "http://localhost:11434/api/generate"
+    model: "gemma3:1b"
+  # Phase 4.0+: Semantic Consolidation
+  consolidation:
+    enabled: true
+    interval_seconds: 3600  # 1 hour between consolidation cycles
+    similarity_threshold: 0.85  # Hamming similarity threshold (0.85 = 15% distance)
+    min_cluster_size: 2  # Minimum cluster size for merging
+    hot_tier_enabled: true  # Consolidate HOT tier
+    warm_tier_enabled: true  # Consolidate WARM tier
+  # Phase 4.1: XOR-based Project Isolation
+  attention_masking:
+    enabled: true  # Enable/disable project-based memory isolation
+  # =========================================================================
+  # Subconscious AI - BETA FEATURE
+  # =========================================================================
+  # This is a BETA feature that enables autonomous background AI processing
+  # for memory management, dream synthesis, and micro-self-improvement.
+  #
+  # WARNING: This feature is experimental and may change without notice.
+  # Must be explicitly enabled by setting 'enabled: true'.
+  # All safety defaults are conservative - review before enabling in production.
+  # =========================================================================
+  subconscious_ai:
+    # BETA FEATURE - Must be explicitly enabled
+    enabled: false
+    beta_mode: true
+    # Model configuration
+    model_provider: "ollama"  # ollama | lm_studio | openai_api | anthropic_api
+    model_name: "phi3.5:3.8b"
+    model_url: "http://localhost:11434"
+    # api_key: null  # For API providers
+    # api_base_url: null
+    # Pulse configuration
+    pulse_interval_seconds: 120
+    pulse_backoff_enabled: true
+    pulse_backoff_max_seconds: 600
+    # Resource management
+    max_cpu_percent: 30.0
+    cycle_timeout_seconds: 30
+    rate_limit_per_hour: 50
+    # Operations
+    memory_sorting_enabled: true
+    enhanced_dreaming_enabled: true
+    micro_self_improvement_enabled: false  # Initially disabled
+    # Safety
+    dry_run: true
+    log_all_decisions: true
+    audit_trail_path: "./data/subconscious_audit.jsonl"
+    max_memories_per_cycle: 10

data/subconscious_audit.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"timestamp": "2026-02-18T20:22:40.980901+00:00", "operation": "dreaming", "input_count": 0, "output": {"message": "No weak memories to analyze"}, "elapsed_ms": 0.05879999662283808, "model_used": "mock-model", "dry_run": false, "error": null}
2	+ {"timestamp": "2026-02-18T20:23:27.667298+00:00", "operation": "dreaming", "input_count": 0, "output": {"message": "No weak memories to analyze"}, "elapsed_ms": 0.05950000195298344, "model_used": "mock-model", "dry_run": false, "error": null}

data/subconscious_evolution.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "updated_at": "2026-02-18T18:55:55.471022+00:00",
+  "cycle_count": 56,
+  "insights_generated": 0,
+  "current_cycle_interval": 1,
+  "schedule": {
+    "concept_every": 5,
+    "parallel_every": 3,
+    "value_every": 10,
+    "meta_every": 7,
+    "cleanup_every": 20
+  },
+  "activity_window": [],
+  "low_activity_streak": 0,
+  "last_cycle_metrics": {
+    "concepts": 0,
+    "parallels": 0,
+    "meta_insights": 0,
+    "valuations": 0,
+    "memories": 0,
+    "synapses": 0,
+    "adaptation": "none"
+  }
+}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,128 @@

+version: '3.8'
+# MnemoCore Docker Compose
+# ========================
+# Full stack deployment with Redis, Qdrant, and MnemoCore API
+services:
+  # ===========================================
+  # MnemoCore API Service
+  # ===========================================
+  mnemocore:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: mnemocore:latest
+    container_name: mnemocore-api
+    ports:
+      - "8100:8100"
+      - "9090:9090"  # Prometheus metrics
+    volumes:
+      - mnemocore_data:/app/data
+      - ./config.yaml:/app/config.yaml:ro
+    environment:
+      - HAIM_API_KEY=${HAIM_API_KEY}
+      - REDIS_URL=redis://redis:6379/0
+      - QDRANT_URL=http://qdrant:6333
+      - LOG_LEVEL=${LOG_LEVEL:-INFO}
+      - HOST=0.0.0.0
+      - PORT=8100
+    env_file:
+      - .env
+    healthcheck:
+      test: ["CMD", "python", "/app/scripts/ops/healthcheck.py"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    depends_on:
+      redis:
+        condition: service_healthy
+      qdrant:
+        condition: service_healthy
+    networks:
+      - mnemocore-network
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 512M
+  # ===========================================
+  # Redis - In-Memory Data Store
+  # ===========================================
+  redis:
+    image: redis:7.2-alpine
+    container_name: mnemocore-redis
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    command: >
+      redis-server
+      --save 60 1
+      --loglevel warning
+      --maxmemory 512mb
+      --maxmemory-policy allkeys-lru
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s
+    networks:
+      - mnemocore-network
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          memory: 512M
+  # ===========================================
+  # Qdrant - Vector Database
+  # ===========================================
+  qdrant:
+    image: qdrant/qdrant:latest
+    container_name: mnemocore-qdrant
+    ports:
+      - "6333:6333"  # HTTP API
+      - "6334:6334"  # gRPC API
+    volumes:
+      - qdrant_storage:/qdrant/storage
+    environment:
+      - QDRANT__SERVICE__GRPC_PORT=6334
+      - QDRANT__LOG_LEVEL=INFO
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:6333/health"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 15s
+    networks:
+      - mnemocore-network
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+# ===========================================
+# Networks
+# ===========================================
+networks:
+  mnemocore-network:
+    driver: bridge
+    name: mnemocore-net
+# ===========================================
+# Volumes
+# ===========================================
+volumes:
+  mnemocore_data:
+    name: mnemocore-data
+  redis_data:
+    name: mnemocore-redis-data
+  qdrant_storage:
+    name: mnemocore-qdrant-storage

docs/API.md ADDED Viewed

	@@ -0,0 +1,91 @@

+# MnemoCore API Reference (Beta)
+## Beta Notice
+API contracts may change during beta without backward compatibility guarantees.
+Use pinned commits if you need reproducibility.
+## Base URL
+Default local API URL:
+- `http://localhost:8100`
+## Endpoints
+### `GET /`
+Basic service status.
+### `GET /health`
+Returns health status, Redis connectivity, and engine stats.
+### `POST /store`
+Store a memory.
+Request body:
+```json
+{
+  "content": "string",
+  "metadata": {"key": "value"},
+  "agent_id": "optional-string",
+  "ttl": 3600
+}
+```
+### `POST /query`
+Query semantic memory.
+Request body:
+```json
+{
+  "query": "string",
+  "top_k": 5,
+  "agent_id": "optional-string"
+}
+```
+### `GET /memory/{memory_id}`
+Fetch a memory by ID (Redis-first, engine fallback).
+### `DELETE /memory/{memory_id}`
+Delete a memory by ID.
+### `POST /concept`
+Define a concept for conceptual memory operations.
+### `POST /analogy`
+Run analogy inference.
+### `GET /stats`
+Return engine statistics.
+### `GET /metrics`
+Prometheus metrics endpoint.
+## Example Requests
+Store:
+```bash
+curl -X POST http://localhost:8100/store \
+  -H "Content-Type: application/json" \
+  -d '{"content":"Birds can migrate long distances"}'
+```
+Query:
+```bash
+curl -X POST http://localhost:8100/query \
+  -H "Content-Type: application/json" \
+  -d '{"query":"animal migration","top_k":3}'
+```
+## Error Behavior
+- `404` for missing memory IDs.
+- In degraded infrastructure modes, API may still return successful core operations while external storage writes fail.
+## Compatibility Guidance
+During beta, treat responses as evolving contracts:
+- Parse defensively.
+- Avoid rigid coupling to optional fields.
+- Revalidate after version upgrades.

docs/ARCHITECTURE.md ADDED Viewed

	@@ -0,0 +1,55 @@

+# MnemoCore Architecture (Beta)
+## Beta Context
+This document describes the current implementation direction in beta.
+It is not a guarantee of final architecture, performance, or feature completeness.
+## Core Components
+- `src/core/engine.py`: Main orchestration for memory storage, encoding, query, and synaptic augmentation.
+- `src/core/binary_hdv.py`: Binary hyperdimensional vector operations.
+- `src/core/tier_manager.py`: HOT/WARM/COLD placement and movement logic.
+- `src/core/config.py`: Typed config loading from YAML + env overrides.
+- `src/core/async_storage.py`: Async Redis metadata operations.
+- `src/api/main.py`: FastAPI interface.
+## Memory Model
+MnemoCore represents memory as high-dimensional vectors and metadata-rich nodes:
+1. Encode input text into vector representation.
+2. Store node in HOT tier initially.
+3. Apply reinforcement/decay dynamics (LTP-related logic).
+4. Move between tiers based on thresholds and access patterns.
+## Tiering Model
+- **HOT**: In-memory dictionary for fastest access.
+- **WARM**: Qdrant-backed where available; filesystem fallback when unavailable.
+- **COLD**: Filesystem archival path for long-lived storage.
+## Query Flow (Current Beta)
+Current query behavior prioritizes HOT tier recall and synaptic score augmentation.
+Cross-tier retrieval is still evolving and should be treated as beta behavior.
+## Async + External Services
+- Redis is used for async metadata and event stream operations.
+- API startup checks Redis health and can operate in degraded mode.
+- Qdrant usage is enabled through tier manager and can fall back to local files.
+## Observability
+- Prometheus metrics endpoint mounted at `/metrics` in API server.
+- Logging behavior controlled through config.
+## Practical Limitations
+- Some roadmap functionality remains TODO-marked in code.
+- Interface contracts may change across beta releases.
+- Performance can vary significantly by hardware and data profile.
+For active limitations and next work items, see `docs/ROADMAP.md`.

docs/BETA_POLICY.md ADDED Viewed

	@@ -0,0 +1,50 @@

+# MnemoCore Beta Policy
+## Status
+MnemoCore is published as a **beta / development preview**.
+This means:
+- No production readiness claim.
+- No availability, performance, or compatibility guarantees.
+- No commitment to stable APIs between beta releases.
+- Experimental behavior is expected.
+## No Promises / No Warranty
+MnemoCore is provided "as is" under the MIT license.
+- We do not guarantee correctness for any specific use case.
+- We do not guarantee fitness for business-critical workloads.
+- We do not guarantee long-term support for current interfaces.
+Always validate outputs independently before operational use.
+## Recommended Usage During Beta
+- Use in sandboxes, R&D, and controlled evaluation environments.
+- Pin commit hashes for reproducibility.
+- Treat data formats and endpoint contracts as potentially changing.
+- Keep backups of data and configuration before upgrading.
+## Production Adoption Guidance
+Before production usage in your own environment, you should perform:
+- Independent reliability testing.
+- Security and privacy review.
+- Capacity and failure-mode validation.
+- Rollback planning and observability setup.
+## Communication Principle
+All public communication should describe MnemoCore as:
+- Experimental,
+- Beta,
+- Subject to change,
+- Without guarantees or promises.
+## Contact
+- General contact: Robin@veristatesystems.com
+- Security disclosure: Robin@veristatesystems.com

docs/MCP_IMPLEMENTATION_PLAN.md ADDED Viewed

	@@ -0,0 +1,128 @@

+# MnemoCore MCP Implementation Plan (Beta)
+## Goal
+Expose MnemoCore capabilities through a Model Context Protocol (MCP) server so external LLM agents can safely store, query, and inspect memory with predictable contracts.
+## Scope (Phase 1)
+### In Scope
+- MCP server process for local/dev use.
+- Read/write memory tools mapped to existing engine/API capabilities.
+- Basic auth + request limits aligned with existing API policy.
+- Test coverage for MCP tool contracts and degraded dependencies.
+### Out of Scope (Phase 1)
+- Multi-tenant policy engine.
+- Full distributed consensus workflows.
+- New memory semantics beyond existing endpoints.
+## Architecture Decision
+Prefer **adapter-first** design:
+- Keep `src/core` and `src/api` as source of truth.
+- Add `src/mcp/server.py` (MCP transport + tool registry).
+- Add `src/mcp/adapters/api_adapter.py` to reuse validated API contracts.
+- Add `src/mcp/schemas.py` for tool input/output validation.
+Reason: minimizes behavior drift and reuses existing validation/security paths.
+## Proposed MCP Tools (Phase 1)
+1. `memory_store`
+   - Input: `content`, `metadata?`, `agent_id?`, `ttl?`
+   - Backend: `POST /store`
+2. `memory_query`
+   - Input: `query`, `top_k?`, `agent_id?`
+   - Backend: `POST /query`
+3. `memory_get`
+   - Input: `memory_id`
+   - Backend: `GET /memory/{memory_id}`
+4. `memory_delete`
+   - Input: `memory_id`
+   - Backend: `DELETE /memory/{memory_id}`
+5. `memory_stats`
+   - Input: none
+   - Backend: `GET /stats`
+6. `memory_health`
+   - Input: none
+   - Backend: `GET /health`
+Optional (Phase 1.1):
+- `concept_define` and `analogy_solve` once primary tools are stable.
+## Security and Operational Guardrails
+- Require API key passthrough from MCP server to MnemoCore API.
+- Allowlist MCP tools (disable dangerous or experimental operations by default).
+- Enforce per-tool timeout and payload limits.
+- Structured logs with `trace_id`, `tool_name`, latency, status.
+- Fail closed for auth errors; fail open only where existing API already degrades by design.
+## Delivery Milestones
+### M0: Foundations (1-2 days)
+- Add MCP package structure.
+- Add config section for MCP host/port/timeouts/tool allowlist.
+- Add local run command and basic health check tool.
+Exit criteria:
+- MCP server starts and responds to health tool.
+### M1: Core Read/Write Tools (2-4 days)
+- Implement `memory_store`, `memory_query`, `memory_get`, `memory_delete`.
+- Map errors to stable MCP error format.
+- Add contract tests with mocked API responses.
+Exit criteria:
+- Core memory flow works end-to-end from MCP client.
+### M2: Observability + Hardening (1-2 days)
+- Add metrics counters/histograms for MCP tools.
+- Add retry/backoff only for transient failures.
+- Add degraded-mode tests (Redis/Qdrant unavailable).
+Exit criteria:
+- Clear diagnostics for failures and latency.
+### M3: Extended Cognitive Tools (optional, 1-2 days)
+- Add `concept_define` and `analogy_solve`.
+- Add docs examples for agent orchestration flows.
+Exit criteria:
+- Conceptual tools pass contract tests and are documented.
+## Test Strategy
+- Unit tests: schema validation, adapter mapping, error translation.
+- Functional tests: MCP client -> server -> API in local integration mode.
+- Resilience tests: upstream timeout, 403 auth fail, 404 memory miss, degraded Redis.
+- Regression gate: existing `tests/` suite remains green.
+## Rollout Plan
+1. Ship behind `mcp.enabled: false` default.
+2. Enable in beta environments only.
+3. Observe for one sprint (latency, error rate, tool usage).
+4. Promote to default-on after stability criteria are met.
+## Success Metrics
+- >= 99% successful MCP tool calls in healthy environment.
+- P95 MCP tool latency <= 300 ms for read operations (local setup target).
+- Zero contract-breaking changes without changelog entry.
+## Minimal Backlog Tasks
+1. Create `src/mcp/server.py` bootstrap.
+2. Create adapter + schemas.
+3. Add MCP config in `config.yaml` + typed config model.
+4. Add tests in `tests/test_mcp_server.py` and `tests/test_mcp_contracts.py`.
+5. Add documentation section in README + API docs.

docs/PERFORMANCE.md ADDED Viewed

	@@ -0,0 +1,71 @@

+# MnemoCore Performance Documentation
+## Performance Targets (SLOs)
+| Metric | Target | Description |
+|--------|--------|-------------|
+| `store()` P99 latency | < 100ms | Store a single memory |
+| `query()` P99 latency | < 50ms | Query for similar memories |
+| Throughput | > 1000 req/s | Sustained request rate |
+| Memory overhead | < 100MB per 100k memories | RAM usage for storage |
+## Baseline Measurements
+### BinaryHDV Operations (1024 dimensions)
+| Operation | Time (us) | Notes |
+|-----------|-----------|-------|
+| `xor_bind()` | ~5 | XOR binding of two vectors |
+| `permute()` | ~5 | Cyclic permutation |
+| `hamming_distance()` | ~3 | Distance calculation |
+| `similarity()` | ~4 | Normalized similarity |
+### permute() Benchmark Results
+`BinaryHDV.permute()` now uses one production path (`unpackbits` + `roll` + `packbits`) across all dimensions.
+| Dimension | permute() (us) | Notes |
+|-----------|----------------|-------|
+| 512 | ~5.2 | Production path |
+| 4096 | ~5.5 | Production path |
+| 16384 | ~6.8 | Production path |
+| 32768 | ~8.2 | Production path |
+| 65536 | ~11.3 | Production path |
+| 131072 | ~17.7 | Production path |
+Run `python benchmarks/bench_permute.py` for machine-specific current numbers.
+## Load Testing
+### Using Locust
+```bash
+# Install locust
+pip install locust
+# Run load test
+cd tests/load
+locust -f locustfile.py --host http://localhost:8100
+```
+### Using the Benchmark Script
+```bash
+# Run 100k memory benchmark
+python benchmarks/bench_100k_memories.py
+```
+## Performance Optimization Tips
+1. Use BinaryHDV instead of float HDV.
+2. Use batch operations for bulk work.
+3. Keep Redis connection pools right-sized.
+4. Enable Qdrant binary quantization for faster search.
+## Monitoring
+Prometheus metrics are exposed at `/metrics` endpoint:
+- `mnemocore_store_duration_seconds` - Store operation latency
+- `mnemocore_query_duration_seconds` - Query operation latency
+- `mnemocore_memory_count_total` - Total memories per tier
+- `mnemocore_queue_length` - Subconscious queue length

docs/ROADMAP.md ADDED Viewed

	@@ -0,0 +1,320 @@

+# MnemoCore Roadmap
+## Scope and Intent
+This roadmap describes current known gaps and likely direction.
+It is not a promise, delivery guarantee, or commitment to specific timelines.
+---
+## Version History
+| Version | Phase | Status | Key Features |
+|---------|-------|--------|--------------|
+| 3.x | Core Architecture | ✅ Complete | Binary HDV, 3-Tier Storage, LTP/Decay |
+| 4.0 | Cognitive Enhancements | ✅ Complete | XOR Attention, Bayesian LTP, Gap Detection, Immunology |
+| 4.1 | Observability | ✅ Complete | Prometheus metrics, distributed tracing, project isolation |
+| 4.2 | Stability | ✅ Complete | Async lock fixes, test suite hardening |
+| 4.3 | Temporal Recall | ✅ Complete | Episodic chaining, chrono-weighting, sequential context |
+| **5.x** | **The Perfect Brain** | 🔮 Planned | Multi-Modal, Emotional, Working Memory |
+---
+## Phase 5.x: The Perfect Brain
+**Vision:** Transform MnemoCore from a sophisticated memory storage system into a truly cognitive architecture that functions as an artificial brain - but better.
+### 5.0 Multi-Modal Memory
+**Goal:** Enable storage and retrieval of images, audio, code structures, and cross-modal associations.
+```
+┌─────────────────────────────────────────────────────────────────┐
+│  CURRENT: Text-only encoding                                    │
+│  ────────────────────────────────────────────────────────────── │
+│  store("User reported bug") → BinaryHDV                         │
+│                                                                 │
+│  FUTURE: Multi-modal encoding                                   │
+│  ────────────────────────────────────────────────────────────── │
+│  store("Screenshot of error", image=bytes) → CrossModalHDV      │
+│  store("Voice note", audio=bytes) → AudioHDV                    │
+│  bind(text_id, image_id, relation="illustrates")                │
+│                                                                 │
+│  query("API error", modality="image") → screenshot.png          │
+│  query(image=bytes, modality="text") → "Related conversation"   │
+└─────────────────────────────────────────────────────────────────┘
+```
+**Implementation Plan:**
+| Component | Description | Dependencies |
+|-----------|-------------|--------------|
+| `MultiModalEncoder` | Abstract encoder protocol | - |
+| `CLIPEncoder` | Vision encoding via CLIP | `transformers`, `torch` |
+| `WhisperEncoder` | Audio encoding via Whisper | `openai-whisper` |
+| `CodeEncoder` | AST-aware code encoding | `tree-sitter` |
+| `CrossModalBinding` | VSA operations across modalities | BinaryHDV |
+**New API Endpoints:**
+```
+POST /store/multi          - Store with multiple modalities
+POST /query/cross-modal    - Cross-modal semantic search
+POST /bind                 - Bind modalities together
+GET  /memory/{id}/related  - Get cross-modal related memories
+```
+---
+### 5.1 Emotional/Affective Layer
+**Goal:** Enable emotion-weighted memory storage, retrieval, and decay - mimicking how biological memory prioritizes emotionally significant events.
+```
+┌─────────────────────────────────────────────────────────────────┐
+│  EMOTIONAL DIMENSIONS                                           │
+│  ────────────────────────────────────────────────────────────── │
+│                                                                 │
+│  Valence:  [-1.0 ──────────────── +1.0]                         │
+│            (negative/unpleasant)  (positive/pleasant)           │
+│                                                                 │
+│  Arousal:  [0.0 ────────────────── 1.0]                         │
+│            (calm/neutral)         (intense/urgent)              │
+│                                                                 │
+│  EFFECT ON MEMORY:                                              │
+│  ────────────────────────────────────────────────────────────── │
+│  High Arousal + Negative = "Flashbulb memory" (never forget)    │
+│  High Arousal + Positive = Strong consolidation                 │
+│  Low Arousal = Faster decay (forgettable)                       │
+│                                                                 │
+└─────────────────────────────────────────────────────────────────┘
+```
+**MemoryNode Extensions:**
+```python
+@dataclass
+class MemoryNode:
+    # ... existing fields ...
+    # Phase 5.1: Emotional tagging
+    emotional_valence: float = 0.0      # -1.0 (negative) to +1.0 (positive)
+    emotional_arousal: float = 0.0      # 0.0 (calm) to 1.0 (intense)
+    emotional_tags: List[str] = field(default_factory=list)  # ["frustration", "joy", "urgency"]
+    def emotional_weight(self) -> float:
+        """Calculate memory importance based on emotional factors."""
+        # Arousal amplifies retention regardless of valence
+        # High arousal creates "flashbulb memories"
+        return abs(self.emotional_valence) * self.emotional_arousal
+```
+**Modified LTP Formula:**
+```
+S = I × log(1+A) × e^(-λT) × (1 + E)
+Where E = emotional_weight() ∈ [0, 1]
+```
+**Use Cases:**
+- B2B outreach: "Customer was almost in tears when we fixed their issue" → HIGH priority
+- Support tickets: "User furious about data loss" → Never forget, prioritize retrieval
+- Positive feedback: "User loved the new feature" → Moderate retention
+---
+### 5.2 Working Memory Layer
+**Goal:** Active cognitive workspace for goal-directed reasoning, not just passive storage.
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    COGNITIVE ARCHITECTURE                        │
+│                                                                 │
+│    ┌─────────────────────────────────────────────────────────┐  │
+│    │              WORKING MEMORY (Active)                     │  │
+│    │              Capacity: 7 ± 2 items                       │  │
+│    │  ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐        │  │
+│    │  │  Goal   │ │ Context │ │  Focus  │ │ Hold    │        │  │
+│    │  │         │ │         │ │         │ │         │        │  │
+│    │  └─────────┘ └─────────┘ └─────────┘ └─────────┘        │  │
+│    └─────────────────────────────────────────────────────────┘  │
+│                              ↕                                   │
+│    ┌─────────────────────────────────────────────────────────┐  │
+│    │              HOT TIER (Fast Access)                      │  │
+│    │              ~2,000 memories, <1ms access                │  │
+│    └─────────────────────────────────────────────────────────┘  │
+│                              ↕                                   │
+│    ┌─────────────────────────────────────────────────────────┐  │
+│    │              WARM TIER (Qdrant/Redis)                    │  │
+│    │              ~100,000 memories, <10ms access             │  │
+│    └─────────────────────────────────────────────────────────┘  │
+│                              ↕                                   │
+│    ┌─────────────────────────────────────────────────────────┐  │
+│    │              COLD TIER (Archive)                         │  │
+│    │              Unlimited, <100ms access                    │  │
+│    └─────────────────────────────────────────────────────────┘  │
+│                                                                 │
+└─────────────────────────────────────────────────────────────────┘
+```
+**Working Memory API:**
+```python
+# Create working memory instance
+wm = engine.working_memory(capacity=7)
+# Set active goal
+wm.set_goal("Troubleshoot authentication error")
+# Load relevant context
+wm.focus_on(await engine.query("auth error", top_k=5))
+# Hold important constraints
+wm.hold("User is on deadline - prioritize speed over elegance")
+# Query with working memory context
+results = wm.query("related issues")
+# Results are RE-RANKED based on current goal + focus + held items
+# Get context summary for LLM
+context = wm.context_summary()
+# → "Working on: auth troubleshooting
+#    Focus: Recent OAuth errors
+#    Constraint: Time pressure"
+```
+**Implementation Components:**
+| Component | Description |
+|-----------|-------------|
+| `WorkingMemory` | Active workspace class |
+| `GoalContext` | Goal tracking and binding |
+| `FocusBuffer` | Currently attended items |
+| `HoldBuffer` | Constraints and important facts |
+| `ContextualQuery` | Goal-directed retrieval |
+---
+### 5.3 Multi-Agent / Collaborative Memory
+**Goal:** Enable memory sharing between agents while maintaining provenance and privacy.
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    COLLABORATIVE MEMORY                          │
+│                                                                 │
+│     Agent A          Shared Memory           Agent B            │
+│    ┌────────┐      ┌──────────────┐        ┌────────┐          │
+│    │ Private│      │              │        │ Private│          │
+│    │ Memory │◄────►│  Consensus   │◄──────►│ Memory │          │
+│    └────────┘      │   Layer      │        └────────┘          │
+│                    │              │                             │
+│    Agent C         │  Provenance  │         Agent D             │
+│    ┌────────┐      │  Tracking    │        ┌────────┐          │
+│    │ Private│◄────►│              │◄──────►│ Private│          │
+│    │ Memory │      │  Privacy     │        │ Memory │          │
+│    └────────┘      │  Filtering   │        └────────┘          │
+│                    └──────────────┘                             │
+│                                                                 │
+└─────────────────────────────────────────────────────────────────┘
+```
+**Features:**
+- Memory provenance: Track which agent created/modified each memory
+- Privacy levels: Private, shared-with-group, public
+- Conflict resolution: When agents disagree on facts
+- Collective intelligence: Aggregate insights across agents
+---
+### 5.4 Continual Learning
+**Goal:** Enable online adaptation without catastrophic forgetting.
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    CONTINUAL LEARNING                            │
+│                                                                 │
+│  Traditional ML:     Train → Deploy → (forget) → Retrain        │
+│                                                                 │
+│  MnemoCore 5.4:      Learn → Consolidate → Adapt → Learn → ...  │
+│                           ↑______________|                       │
+│                                                                 │
+│  KEY MECHANISMS:                                                │
+│  ─────────────────────────────────────────────────────────────  │
+│  • Elastic Weight Consolidation (EWC) for encoder               │
+│  • Replay-based consolidation during "sleep" cycles             │
+│  • Progressive neural networks for new domains                  │
+│  • Meta-learning for rapid adaptation                           │
+│                                                                 │
+└───────────────────���─────────────────────────────────────────────┘
+```
+---
+## Integration Priorities
+### Agent Frameworks
+| Framework | Priority | Use Case |
+|-----------|----------|----------|
+| Open Claw | ⭐⭐⭐⭐⭐ | Primary use case, deep integration |
+| LangChain | ⭐⭐⭐⭐ | Memory provider plugin |
+| CrewAI | ⭐⭐⭐⭐ | Shared memory between agents |
+| AutoGen | ⭐⭐⭐ | Conversation memory backend |
+| LlamaIndex | ⭐⭐⭐ | Vector store adapter |
+### AI Platforms
+| Platform | Priority | Integration Type |
+|----------|----------|------------------|
+| Claude (Anthropic) | ⭐⭐⭐⭐⭐ | MCP server (existing) |
+| OpenAI Codex | ⭐⭐⭐⭐⭐ | API + function calling |
+| Ollama | ⭐⭐⭐⭐ | Native memory backend |
+| LM Studio | ⭐⭐⭐ | Plugin architecture |
+| Gemini | ⭐⭐⭐ | API adapter |
+---
+## Research Opportunities
+### Academic Collaborations
+| Area | Institutions | Relevance |
+|------|-------------|-----------|
+| Hyperdimensional Computing | Stanford, IBM Research, Redwood Center | Core HDC/VSA theory |
+| Computational Neuroscience | MIT, UCL, KTH | Biological validation |
+| Cognitive Architecture | Carnegie Mellon, University of Michigan | SOAR/ACT-R comparison |
+| Neuromorphic Computing | Intel Labs, ETH Zürich | Hardware acceleration |
+### Publication Opportunities
+1. **"Binary HDC for Long-term AI Memory"** - Novel approach to persistent memory
+2. **"Episodic Chaining in Vector Memory Systems"** - Phase 4.3 temporal features
+3. **"XOR Attention Masking for Memory Isolation"** - Project isolation innovation
+4. **"Bayesian LTP in Artificial Memory Systems"** - Biological plausibility
+---
+## Known Gaps (Current Beta)
+- Query path is still primarily HOT-tier-centric in current engine behavior.
+- Some consolidation pathways are partial or under active refinement.
+- Certain integrations (LLM/Nightlab) are intentionally marked as TODO.
+- Distributed-scale behavior from long-form blueprints is not fully productized.
+---
+## Near-Term Priorities (Pre-5.0)
+1. Improve cross-tier retrieval consistency.
+2. Harden consolidation and archival flow.
+3. Improve deletion semantics and API consistency.
+4. Expand tests around degraded dependency modes (Redis/Qdrant outages).
+5. Stabilize API contracts and publish versioned compatibility notes.
+6. MCP server integration for agent tool access.
+---
+## Not a Commitment
+Items above are directional only.
+Order, scope, and implementation details can change during development.
+---
+*Last Updated: 2025-02-18*
+*Current Version: 4.3.0*

docs/SELF_IMPROVEMENT_DEEP_DIVE.md ADDED Viewed

	@@ -0,0 +1,279 @@

+# MnemoCore Self-Improvement Deep Dive
+Status: Design document (pre-implementation)
+Date: 2026-02-18
+Scope: Latent, always-on memory self-improvement loop that runs safely in production-like beta.
+## 1. Purpose
+This document defines a production-safe design for a latent self-improvement loop in MnemoCore.
+The goal is to continuously improve memory quality over time without corrupting truth, overloading resources, or breaking temporal-memory behavior.
+Primary outcomes:
+- Better memory quality (clarity, consistency, retrieval utility).
+- Better long-term structure (less duplication, stronger semantic links).
+- Preserved auditability and rollback.
+- Compatibility with temporal timelines (`previous_id`, `unix_timestamp`, time-range search).
+## 2. Current System Baseline
+Relevant existing mechanisms already in code:
+- `HAIMEngine.store/query` orchestration and subconscious queue (`src/core/engine.py`).
+- Background dream strengthening and synaptic binding (`src/core/engine.py`).
+- Gap detection and autonomous gap filling (`src/core/gap_detector.py`, `src/core/gap_filler.py`).
+- Semantic consolidation workers (`src/core/semantic_consolidation.py`, `src/subconscious/consolidation_worker.py`).
+- Subconscious daemon loop with LLM-powered cycles (`src/subconscious/daemon.py`).
+- Temporal memory fields in node model (`src/core/node.py`): `previous_id`, `unix_timestamp`, `iso_date`.
+- Tiered persistence and time-range aware search (`src/core/tier_manager.py`, `src/core/qdrant_store.py`).
+Implication: Self-improvement should reuse these pathways, not bypass them.
+## 3. Problem Definition
+Without a dedicated self-improvement loop, memory quality drifts:
+- Duplicate or near-duplicate content accumulates.
+- Weakly structured notes remain unnormalized.
+- Conflicting memories are not actively reconciled.
+- Query utility depends too much on initial storage quality.
+At the same time, naive autonomous rewriting is risky:
+- Hallucinated edits can reduce truth quality.
+- Over-aggressive rewriting can erase provenance.
+- Continuous background jobs can starve main workloads.
+## 4. Design Principles
+1. Append-only evolution, never destructive overwrite.
+2. Improvement proposals must pass validation gates before commit.
+3. Full provenance and rollback path for every derived memory.
+4. Temporal consistency is mandatory (timeline must remain navigable).
+5. Resource budgets and kill switches must exist from day 1.
+## 5. Target Architecture
+## 5.1 New Component
+Add `SelfImprovementWorker` as a background worker (similar lifecycle style to consolidation/gap-filler workers).
+Suggested location:
+- `src/subconscious/self_improvement_worker.py`
+Responsibilities:
+- Select candidates from HOT/WARM.
+- Produce improvement proposals (rule-based first, optional LLM later).
+- Validate proposals.
+- Commit accepted proposals via `engine.store(...)`.
+- Link provenance metadata.
+- Emit metrics and decision logs.
+## 5.2 Data Flow
+1. Candidate Selection
+2. Proposal Generation
+3. Validation & Scoring
+4. Commit as New Memory
+5. Link Graph/Timeline
+6. Monitor + Feedback Loop
+No in-place mutation of existing memory content.
+## 5.3 Integration Points
+- Read candidates: `TierManager` (`hot`, optional warm sampling).
+- Commit: `HAIMEngine.store(...)` so all normal indexing/persistence paths apply.
+- Timeline compatibility: preserve `previous_id` semantics and set provenance fields.
+- Optional post-effects: trigger low-priority synapse/link updates.
+## 6. Memory Model Additions (Metadata, not schema break)
+Use metadata keys first (backward compatible):
+- `source: "self_improvement"`
+- `improvement_type: "normalize" | "summarize" | "deduplicate" | "reconcile"`
+- `derived_from: "<node_id>"`
+- `derived_from_many: [node_ids...]` (for merge/reconcile)
+- `improvement_score: float`
+- `validator_scores: { ... }`
+- `supersedes: "<node_id>"` (logical supersedence, not deletion)
+- `version_tag: "vN"`
+- `safety_mode: "strict" | "balanced"`
+Note: Keep temporal fields from `MemoryNode` untouched and naturally generated on store.
+## 7. Candidate Selection Strategy
+Initial heuristics (cheap and deterministic):
+- High access + low confidence retrieval history.
+- Conflicting memories in same topical cluster.
+- Redundant near-duplicates.
+- Old high-value memories needing compaction.
+Selection constraints:
+- Batch cap per cycle.
+- Max candidates per source cluster.
+- Cooldown per `node_id` to avoid thrashing.
+## 8. Proposal Generation Strategy
+Phase A (no LLM dependency):
+- Normalize formatting.
+- Metadata repair/completion.
+- Deterministic summary extraction.
+- Exact/near duplicate merge suggestion.
+Phase B (LLM-assisted, guarded):
+- Rewrite for clarity.
+- Multi-memory reconciliation draft.
+- Explicit uncertainty markup if conflict unresolved.
+All proposals must include rationale + structured diff summary.
+## 9. Validation Gates (Critical)
+A proposal is committed only if all required gates pass:
+1. Semantic drift gate
+- Similarity to origin must stay above threshold unless `improvement_type=reconcile`.
+2. Fact safety gate
+- No new unsupported claims for strict mode.
+- If unresolved conflict: enforce explicit uncertainty markers.
+3. Structure gate
+- Must improve readability/compactness score beyond threshold.
+4. Policy gate
+- Block forbidden metadata changes.
+- Block sensitive tags crossing trust boundaries.
+5. Resource gate
+- Cycle budget, latency budget, queue/backpressure checks.
+Rejected proposals are logged but not committed.
+## 10. Interaction with Temporal Memory (Hard Requirement)
+This design must not break timeline behavior introduced around:
+- `previous_id` chaining
+- `unix_timestamp` payload filtering
+- Qdrant time-range retrieval
+Rules:
+- Every improved memory is a new timeline event (new node id).
+- `derived_from` models lineage; `previous_id` continues temporal sequence.
+- Query paths that use `time_range` must continue functioning identically.
+- Do not bypass `TierManager.add_memory` or Qdrant payload generation.
+## 11. Safety Controls & Operations
+Mandatory controls:
+- Config kill switch: `self_improvement_enabled: false` by default initially.
+- Dry-run mode: generate + validate, but do not store.
+- Strict mode for early rollout.
+- Per-cycle hard caps (count, wall-clock, token budget).
+- Circuit breaker on repeated validation failures.
+Operational observability:
+- Attempted/accepted/rejected counters.
+- Rejection reasons cardinality-safe labels.
+- End-to-end cycle duration.
+- Queue depth and backlog age.
+- Quality delta trend over time.
+## 12. Suggested Config Block
+Add under `haim.dream_loop` or sibling block `haim.self_improvement`:
+```yaml
+self_improvement:
+  enabled: false
+  dry_run: true
+  safety_mode: "strict"          # strict | balanced
+  interval_seconds: 300
+  batch_size: 8
+  max_cycle_seconds: 20
+  max_candidates_per_topic: 2
+  cooldown_minutes: 120
+  min_improvement_score: 0.15
+  min_semantic_similarity: 0.82
+  allow_llm_rewrite: false
+```
+## 13. Metrics (Proposed)
+- `mnemocore_self_improve_attempts_total`
+- `mnemocore_self_improve_commits_total`
+- `mnemocore_self_improve_rejects_total`
+- `mnemocore_self_improve_cycle_duration_seconds`
+- `mnemocore_self_improve_candidates_in_cycle`
+- `mnemocore_self_improve_quality_delta`
+- `mnemocore_self_improve_backpressure_skips_total`
+## 14. Phased Implementation Plan
+Phase 0: Instrumentation + dry-run only
+- Add worker scaffold + metrics + decision logs.
+- No writes.
+Phase 1: Deterministic improvements only
+- Metadata normalization, duplicate handling suggestions.
+- Strict validation.
+- Commit append-only derived nodes.
+Phase 2: Controlled LLM improvements
+- Enable `allow_llm_rewrite` behind feature flag.
+- Add stricter validation and capped throughput.
+Phase 3: Reconciliation and adaptive policies
+- Multi-memory conflict reconciliation.
+- Learning policies from acceptance/rejection outcomes.
+## 15. Test Strategy
+Unit tests:
+- Candidate selection determinism and cooldown behavior.
+- Validation gates (pass/fail matrices).
+- Provenance metadata correctness.
+Integration tests:
+- Store/query behavior unchanged under disabled mode.
+- Time-range query still correct with improved nodes present.
+- Qdrant payload contains expected temporal + provenance fields.
+Soak/load tests:
+- Worker under sustained ingest.
+- Backpressure behavior.
+- No unbounded queue growth.
+Regression guardrails:
+- No overwrite of original content.
+- No bypass path around `engine.store`.
+## 16. Risks and Mitigations
+Risk: hallucinated improvements
+Mitigation: strict mode, no-LLM phase first, fact safety gate.
+Risk: timeline noise from too many derived nodes
+Mitigation: cooldown, batch caps, minimum score thresholds.
+Risk: resource contention
+Mitigation: cycle time caps, skip when main queue/backlog high.
+Risk: provenance complexity
+Mitigation: standardized metadata contract and audit logs.
+## 17. Open Decisions
+1. Should self-improved nodes be visible by default in top-k query, or weighted down unless requested?
+2. Should `supersedes` influence retrieval ranking automatically?
+3. Do we need a dedicated “truth tier” for validated reconciled memories?
+## 18. Recommended Next Step
+Implement Phase 0 only:
+- Worker skeleton
+- Config block
+- Metrics
+- Dry-run reports
+Then review logs for 1-2 weeks before enabling any writes.

git_status.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+git : warning: could not
+ open directory '.tmp_py
+test/pytest-of-Robin/':
+Permission denied
+At line:1 char:1
++ git status 2>&1 | Out-
+File -FilePath git_statu
+s.txt -Encoding utf8; G
+...
++ ~~~~~~~~~~~~~~~
+    + CategoryInfo
+        : NotSpecified
+  : (warning: could .
+ ..rmission denied:S
+tring) [], RemoteEx    c
+eption
+    + FullyQualifiedErr
+   orId : NativeComman
+  dError
+warning: could not open
+directory 'pytest_base_t
+emp/': Permission denied
+On branch main
+Your branch is up to date with 'origin/main'.
+Changes not staged for commit:
+  (use "git add/rm <file>..." to update what will be committed)
+  (use "git restore <file>..." to discard changes in working directory)
+	modified:   .github/workflows/ci.yml
+	deleted:    MnemoCore_Review_v2.docx
+	modified:   src/api/main.py
+	modified:   src/core/engine.py
+	modified:   src/core/tier_manager.py
+	modified:   src/llm_integration.py
+Untracked files:
+  (use "git add <file>..." to include in what will be committed)
+	.tmp_verify_phase43/
+	git_status.txt
+	scripts/insert_rlm_endpoint.py
+	scripts/insert_rlm_integrator.py
+	src/core/recursive_synthesizer.py
+	src/core/ripple_context.py
+	test_regression_output.txt
+	test_rlm_output.txt
+	tests/test_e2e_flow.py
+	tests/test_phase43_regressions.py
+	tests/test_recursive_synthesizer.py
+no changes added to commit (use "git add" and/or "git commit -a")

grafana-dashboard.json ADDED Viewed

	@@ -0,0 +1,954 @@

+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "rate(haim_api_request_count_total[5m])",
+          "legendFormat": "{{endpoint}} - {{method}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "API Request Rate (5m avg)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "rate(haim_api_request_latency_seconds_sum[5m]) / rate(haim_api_request_latency_seconds_count[5m])",
+          "legendFormat": "{{endpoint}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "API Avg Latency (5m avg)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "textMode": "auto"
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "haim_engine_memory_total",
+          "legendFormat": "{{tier}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Memory Count per Tier",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 50
+              },
+              {
+                "color": "red",
+                "value": 100
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 4,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "textMode": "auto"
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "mnemocore_queue_length",
+          "legendFormat": "Queue Length",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Subconscious Queue Length",
+      "type": "stat"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 16
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.50, rate(mnemocore_store_duration_seconds_bucket[5m]))",
+          "legendFormat": "p50 store",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, rate(mnemocore_store_duration_seconds_bucket[5m]))",
+          "legendFormat": "p95 store",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.99, rate(mnemocore_store_duration_seconds_bucket[5m]))",
+          "legendFormat": "p99 store",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Store Duration Percentiles",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "id": 6,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.50, rate(mnemocore_query_duration_seconds_bucket[5m]))",
+          "legendFormat": "p50 query",
+          "range": true,
+          "refId": "A"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.95, rate(mnemocore_query_duration_seconds_bucket[5m]))",
+          "legendFormat": "p95 query",
+          "range": true,
+          "refId": "B"
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "histogram_quantile(0.99, rate(mnemocore_query_duration_seconds_bucket[5m]))",
+          "legendFormat": "p99 query",
+          "range": true,
+          "refId": "C"
+        }
+      ],
+      "title": "Query Duration Percentiles",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "bars",
+            "fillOpacity": 100,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "normal"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 24
+      },
+      "id": 7,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "rate(mnemocore_error_total[5m])",
+          "legendFormat": "{{error_type}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Error Rate by Type (5m avg)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "id": 8,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "mnemocore_memory_count_total",
+          "legendFormat": "{{tier}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Memory Count Over Time (by Tier)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 32
+      },
+      "id": 9,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "rate(haim_dream_loop_total[5m])",
+          "legendFormat": "{{status}}",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Dream Loop Rate (5m avg)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "${DS_PROMETHEUS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 32
+      },
+      "id": 10,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "multi",
+          "sort": "desc"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "${DS_PROMETHEUS}"
+          },
+          "editorMode": "code",
+          "expr": "rate(haim_dream_iteration_seconds_sum[5m]) / rate(haim_dream_iteration_seconds_count[5m])",
+          "legendFormat": "Dream Iteration",
+          "range": true,
+          "refId": "A"
+        }
+      ],
+      "title": "Dream Iteration Duration (5m avg)",
+      "type": "timeseries"
+    }
+  ],
+  "refresh": "30s",
+  "schemaVersion": 38,
+  "style": "dark",
+  "tags": ["mnemocore", "observability", "cognitive-memory"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "MnemoCore Observability Dashboard",
+  "uid": "mnemocore-monitoring",
+  "version": 2,
+  "weekStart": ""
+}

helm/mnemocore/.helmignore ADDED Viewed

	@@ -0,0 +1,68 @@

+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+# Test files
+*_test.go
+tests/
+*.test
+# Documentation
+*.md
+!README.md
+# CI/CD files
+.github/
+.gitlab-ci.yml
+.travis.yml
+Jenkinsfile
+# Development files
+.env
+.env.*
+docker-compose*.yml
+Dockerfile*
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+venv/
+ENV/
+# Node
+node_modules/
+npm-debug.log
+# Build artifacts
+dist/
+build/
+target/
+# OS files
+.DS_Store
+Thumbs.db

helm/mnemocore/Chart.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+apiVersion: v2
+name: mnemocore
+description: MnemoCore - Infrastructure for Persistent Cognitive Memory with HAIM (Hyperdimensional Artificial Intelligence Memory)
+type: application
+# Chart version - follows SemVer 2
+version: 1.0.0
+# Application version
+appVersion: "3.5.0"
+# Metadata
+home: https://github.com/your-org/mnemocore
+sources:
+  - https://github.com/your-org/mnemocore
+keywords:
+  - ai
+  - memory
+  - vector-database
+  - cognitive
+  - hyperdimensional-computing
+  - hdc
+  - llm
+maintainers:
+  - name: MnemoCore Team
+    email: team@mnemocore.ai
+# Dependencies
+dependencies:
+  - name: redis
+    version: "18.x.x"
+    repository: "https://charts.bitnami.com/bitnami"
+    condition: redis.enabled
+    alias: redis
+  - name: qdrant
+    version: "0.x.x"
+    repository: "https://qdrant.github.io/qdrant-helm"
+    condition: qdrant.enabled
+    alias: qdrant
+# Kubernetes version compatibility
+kubeVersion: ">=1.25.0-0"
+# Chart annotations
+annotations:
+  artifacthub.io/category: ai-machine-learning
+  artifacthub.io/license: MIT
+  artifacthub.io/links: |
+    - name: Documentation
+      url: https://github.com/your-org/mnemocore/docs
+    - name: API Reference
+      url: https://github.com/your-org/mnemocore/docs/API.md

helm/mnemocore/templates/_helpers.tpl ADDED Viewed

	@@ -0,0 +1,119 @@

+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "mnemocore.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{/*
+Create a default fully qualified app name.
+*/}}
+{{- define "mnemocore.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "mnemocore.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{/*
+Common labels
+*/}}
+{{- define "mnemocore.labels" -}}
+helm.sh/chart: {{ include "mnemocore.chart" . }}
+{{ include "mnemocore.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+{{/*
+Selector labels
+*/}}
+{{- define "mnemocore.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "mnemocore.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "mnemocore.serviceAccountName" -}}
+{{- if .Values.mnemocore.serviceAccount.create }}
+{{- default (include "mnemocore.fullname" .) .Values.mnemocore.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.mnemocore.serviceAccount.name }}
+{{- end }}
+{{- end }}
+{{/*
+Redis fullname
+*/}}
+{{- define "mnemocore.redis.fullname" -}}
+{{- printf "%s-redis" (include "mnemocore.fullname" .) }}
+{{- end }}
+{{/*
+Qdrant fullname
+*/}}
+{{- define "mnemocore.qdrant.fullname" -}}
+{{- printf "%s-qdrant" (include "mnemocore.fullname" .) }}
+{{- end }}
+{{/*
+ConfigMap fullname
+*/}}
+{{- define "mnemocore.configmap.fullname" -}}
+{{- printf "%s-config" (include "mnemocore.fullname" .) }}
+{{- end }}
+{{/*
+Secret fullname
+*/}}
+{{- define "mnemocore.secret.fullname" -}}
+{{- printf "%s-secret" (include "mnemocore.fullname" .) }}
+{{- end }}
+{{/*
+PVC fullname
+*/}}
+{{- define "mnemocore.pvc.fullname" -}}
+{{- printf "%s-data" (include "mnemocore.fullname" .) }}
+{{- end }}
+{{/*
+HPA fullname
+*/}}
+{{- define "mnemocore.hpa.fullname" -}}
+{{- printf "%s-hpa" (include "mnemocore.fullname" .) }}
+{{- end }}
+{{/*
+Return the proper Storage Class
+*/}}
+{{- define "mnemocore.storageClass" -}}
+{{- if .Values.global.storageClass }}
+  {{- if (eq "-" .Values.global.storageClass) }}
+  {{- else }}
+storageClassName: "{{ .Values.global.storageClass }}"
+  {{- end }}
+{{- else if .Values.mnemocore.persistence.storageClass }}
+  {{- if (eq "-" .Values.mnemocore.persistence.storageClass) }}
+  {{- else }}
+storageClassName: "{{ .Values.mnemocore.persistence.storageClass }}"
+  {{- end }}
+{{- end }}
+{{- end }}

helm/mnemocore/templates/configmap.yaml ADDED Viewed

	@@ -0,0 +1,114 @@

+{{/*
+MnemoCore ConfigMap - HAIM Configuration
+*/}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "mnemocore.configmap.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+data:
+  config.yaml: |
+    # HAIM Configuration - Generated by Helm Chart
+    # MnemoCore Phase {{ .Values.mnemocore.config.version }}
+    haim:
+      version: "{{ .Values.mnemocore.config.version }}"
+      dimensionality: {{ .Values.mnemocore.config.dimensionality }}
+      # Vector encoding
+      encoding:
+        mode: "{{ .Values.mnemocore.config.encoding.mode }}"
+        token_method: "{{ .Values.mnemocore.config.encoding.token_method }}"
+      # Memory tier thresholds
+      tiers:
+        hot:
+          max_memories: {{ .Values.mnemocore.config.tiers.hot.max_memories }}
+          ltp_threshold_min: {{ .Values.mnemocore.config.tiers.hot.ltp_threshold_min }}
+          eviction_policy: "{{ .Values.mnemocore.config.tiers.hot.eviction_policy }}"
+        warm:
+          max_memories: {{ .Values.mnemocore.config.tiers.warm.max_memories }}
+          ltp_threshold_min: {{ .Values.mnemocore.config.tiers.warm.ltp_threshold_min }}
+          consolidation_interval_hours: {{ .Values.mnemocore.config.tiers.warm.consolidation_interval_hours }}
+          storage_backend: "{{ .Values.mnemocore.config.tiers.warm.storage_backend }}"
+        cold:
+          storage_backend: "{{ .Values.mnemocore.config.tiers.cold.storage_backend }}"
+          compression: "{{ .Values.mnemocore.config.tiers.cold.compression }}"
+          archive_threshold_days: {{ .Values.mnemocore.config.tiers.cold.archive_threshold_days }}
+      # LTP (Long-Term Potentiation) decay parameters
+      ltp:
+        initial_importance: {{ .Values.mnemocore.config.ltp.initial_importance }}
+        decay_lambda: {{ .Values.mnemocore.config.ltp.decay_lambda }}
+        permanence_threshold: {{ .Values.mnemocore.config.ltp.permanence_threshold }}
+        half_life_days: {{ .Values.mnemocore.config.ltp.half_life_days }}
+      # Hysteresis (prevent boundary thrashing between tiers)
+      hysteresis:
+        promote_delta: {{ .Values.mnemocore.config.hysteresis.promote_delta }}
+        demote_delta: {{ .Values.mnemocore.config.hysteresis.demote_delta }}
+      # Redis Configuration
+      redis:
+        {{- if .Values.redis.url }}
+        url: "{{ .Values.redis.url }}"
+        {{- else }}
+        url: "redis://{{ include "mnemocore.redis.fullname" . }}:{{ .Values.redis.service.port }}/0"
+        {{- end }}
+        stream_key: "haim:subconscious"
+        max_connections: 10
+        socket_timeout: 5
+      # Qdrant Configuration
+      qdrant:
+        {{- if .Values.qdrant.url }}
+        url: "{{ .Values.qdrant.url }}"
+        {{- else }}
+        url: "http://{{ include "mnemocore.qdrant.fullname" . }}:{{ .Values.qdrant.service.httpPort }}"
+        {{- end }}
+        collection_hot: "{{ .Values.qdrant.collections.hot.name }}"
+        collection_warm: "{{ .Values.qdrant.collections.warm.name }}"
+        binary_quantization: {{ .Values.qdrant.collections.hot.binaryQuantization }}
+        always_ram: {{ .Values.qdrant.collections.hot.alwaysRam }}
+        hnsw_m: {{ .Values.qdrant.collections.hot.hnswM }}
+        hnsw_ef_construct: {{ .Values.qdrant.collections.hot.hnswEfConstruct }}
+      # GPU Configuration
+      gpu:
+        enabled: {{ .Values.mnemocore.config.gpu.enabled }}
+        device: "{{ .Values.mnemocore.config.gpu.device }}"
+        batch_size: {{ .Values.mnemocore.config.gpu.batch_size }}
+        fallback_to_cpu: {{ .Values.mnemocore.config.gpu.fallback_to_cpu }}
+      # Observability
+      observability:
+        metrics_port: {{ .Values.mnemocore.config.observability.metrics_port }}
+        log_level: "{{ .Values.mnemocore.config.observability.log_level }}"
+        structured_logging: {{ .Values.mnemocore.config.observability.structured_logging }}
+      # Persistence paths
+      paths:
+        data_dir: "{{ .Values.mnemocore.config.paths.data_dir }}"
+        memory_file: "{{ .Values.mnemocore.config.paths.memory_file }}"
+        codebook_file: "{{ .Values.mnemocore.config.paths.codebook_file }}"
+        concepts_file: "{{ .Values.mnemocore.config.paths.concepts_file }}"
+        synapses_file: "{{ .Values.mnemocore.config.paths.synapses_file }}"
+        warm_mmap_dir: "{{ .Values.mnemocore.config.paths.warm_mmap_dir }}"
+        cold_archive_dir: "{{ .Values.mnemocore.config.paths.cold_archive_dir }}"
+      # MCP (Model Context Protocol) bridge
+      mcp:
+        enabled: {{ .Values.mnemocore.config.mcp.enabled }}
+        transport: "{{ .Values.mnemocore.config.mcp.transport }}"
+        host: "{{ .Values.mnemocore.config.mcp.host }}"
+        port: {{ .Values.mnemocore.config.mcp.port }}
+        api_base_url: "{{ .Values.mnemocore.config.mcp.api_base_url }}"
+        timeout_seconds: {{ .Values.mnemocore.config.mcp.timeout_seconds }}
+        allow_tools:
+          {{- range .Values.mnemocore.config.mcp.allow_tools }}
+          - "{{ . }}"
+          {{- end }}

helm/mnemocore/templates/deployment-qdrant.yaml ADDED Viewed

	@@ -0,0 +1,141 @@

+{{/*
+Qdrant Deployment - Only created if embedded mode is enabled and not using official Qdrant chart
+*/}}
+{{- if and .Values.qdrant.enabled .Values.qdrant.embedded.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "mnemocore.qdrant.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: qdrant
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      {{- include "mnemocore.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: qdrant
+  template:
+    metadata:
+      labels:
+        {{- include "mnemocore.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: qdrant
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 1000
+        runAsGroup: 1000
+        fsGroup: 1000
+      containers:
+        - name: qdrant
+          image: "{{ .Values.global.imageRegistry }}{{ .Values.qdrant.image.repository }}:{{ .Values.qdrant.image.tag }}"
+          imagePullPolicy: {{ .Values.qdrant.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.qdrant.service.httpPort }}
+              protocol: TCP
+            - name: grpc
+              containerPort: {{ .Values.qdrant.service.grpcPort }}
+              protocol: TCP
+          env:
+            - name: QDRANT__SERVICE__GRPC_PORT
+              value: {{ .Values.qdrant.service.grpcPort | quote }}
+            - name: QDRANT__LOG_LEVEL
+              value: {{ .Values.qdrant.config.logLevel | quote }}
+          {{- if .Values.qdrant.persistence.enabled }}
+          volumeMounts:
+            - name: storage
+              mountPath: /qdrant/storage
+          {{- end }}
+          {{- if .Values.qdrant.probes.liveness.enabled }}
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: {{ .Values.qdrant.probes.liveness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.qdrant.probes.liveness.periodSeconds }}
+            timeoutSeconds: {{ .Values.qdrant.probes.liveness.timeoutSeconds }}
+            failureThreshold: {{ .Values.qdrant.probes.liveness.failureThreshold }}
+          {{- end }}
+          {{- if .Values.qdrant.probes.readiness.enabled }}
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            initialDelaySeconds: {{ .Values.qdrant.probes.readiness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.qdrant.probes.readiness.periodSeconds }}
+            timeoutSeconds: {{ .Values.qdrant.probes.readiness.timeoutSeconds }}
+            failureThreshold: {{ .Values.qdrant.probes.readiness.failureThreshold }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.qdrant.resources | nindent 12 }}
+      volumes:
+        {{- if .Values.qdrant.persistence.enabled }}
+        - name: storage
+          persistentVolumeClaim:
+            claimName: {{ include "mnemocore.qdrant.fullname" . }}-storage
+        {{- else }}
+        - name: storage
+          emptyDir: {}
+        {{- end }}
+---
+{{/*
+Qdrant PVC
+*/}}
+{{- if and .Values.qdrant.enabled .Values.qdrant.embedded.enabled .Values.qdrant.persistence.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "mnemocore.qdrant.fullname" . }}-storage
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: qdrant
+  {{- with .Values.qdrant.persistence.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  accessModes:
+    {{- range .Values.qdrant.persistence.accessModes }}
+    - {{ . | quote }}
+    {{- end }}
+  {{- if .Values.global.storageClass }}
+  storageClassName: {{ .Values.global.storageClass | quote }}
+  {{- else if .Values.qdrant.persistence.storageClass }}
+  storageClassName: {{ .Values.qdrant.persistence.storageClass | quote }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.qdrant.persistence.size | quote }}
+{{- end }}
+---
+{{/*
+Qdrant Service
+*/}}
+{{- if and .Values.qdrant.enabled .Values.qdrant.embedded.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "mnemocore.qdrant.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: qdrant
+spec:
+  type: {{ .Values.qdrant.service.type }}
+  ports:
+    - port: {{ .Values.qdrant.service.httpPort }}
+      targetPort: http
+      protocol: TCP
+      name: http
+    - port: {{ .Values.qdrant.service.grpcPort }}
+      targetPort: grpc
+      protocol: TCP
+      name: grpc
+  selector:
+    {{- include "mnemocore.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: qdrant
+{{- end }}

helm/mnemocore/templates/deployment-redis.yaml ADDED Viewed

	@@ -0,0 +1,141 @@

+{{/*
+Redis Deployment - Only created if embedded mode is enabled and not using Bitnami chart
+*/}}
+{{- if and .Values.redis.enabled .Values.redis.embedded.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "mnemocore.redis.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: redis
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      {{- include "mnemocore.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: redis
+  template:
+    metadata:
+      labels:
+        {{- include "mnemocore.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: redis
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        runAsNonRoot: true
+        runAsUser: 999
+        runAsGroup: 999
+        fsGroup: 999
+      containers:
+        - name: redis
+          image: "{{ .Values.global.imageRegistry }}{{ .Values.redis.image.repository }}:{{ .Values.redis.image.tag }}"
+          imagePullPolicy: {{ .Values.redis.image.pullPolicy }}
+          ports:
+            - name: redis
+              containerPort: {{ .Values.redis.service.port }}
+              protocol: TCP
+          command:
+            - redis-server
+            - --save
+            - {{ .Values.redis.config.save | quote }}
+            - --loglevel
+            - {{ .Values.redis.config.logLevel | quote }}
+            - --maxmemory
+            - {{ .Values.redis.config.maxmemory | quote }}
+            - --maxmemory-policy
+            - {{ .Values.redis.config.maxmemoryPolicy | quote }}
+          {{- if .Values.redis.persistence.enabled }}
+          volumeMounts:
+            - name: data
+              mountPath: /data
+          {{- end }}
+          {{- if .Values.redis.probes.liveness.enabled }}
+          livenessProbe:
+            exec:
+              command:
+                - redis-cli
+                - ping
+            initialDelaySeconds: {{ .Values.redis.probes.liveness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.redis.probes.liveness.periodSeconds }}
+            timeoutSeconds: {{ .Values.redis.probes.liveness.timeoutSeconds }}
+            failureThreshold: {{ .Values.redis.probes.liveness.failureThreshold }}
+          {{- end }}
+          {{- if .Values.redis.probes.readiness.enabled }}
+          readinessProbe:
+            exec:
+              command:
+                - redis-cli
+                - ping
+            initialDelaySeconds: {{ .Values.redis.probes.readiness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.redis.probes.readiness.periodSeconds }}
+            timeoutSeconds: {{ .Values.redis.probes.readiness.timeoutSeconds }}
+            failureThreshold: {{ .Values.redis.probes.readiness.failureThreshold }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.redis.resources | nindent 12 }}
+      volumes:
+        {{- if .Values.redis.persistence.enabled }}
+        - name: data
+          persistentVolumeClaim:
+            claimName: {{ include "mnemocore.redis.fullname" . }}-data
+        {{- else }}
+        - name: data
+          emptyDir: {}
+        {{- end }}
+---
+{{/*
+Redis PVC
+*/}}
+{{- if and .Values.redis.enabled .Values.redis.embedded.enabled .Values.redis.persistence.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "mnemocore.redis.fullname" . }}-data
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: redis
+  {{- with .Values.redis.persistence.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  accessModes:
+    {{- range .Values.redis.persistence.accessModes }}
+    - {{ . | quote }}
+    {{- end }}
+  {{- if .Values.global.storageClass }}
+  storageClassName: {{ .Values.global.storageClass | quote }}
+  {{- else if .Values.redis.persistence.storageClass }}
+  storageClassName: {{ .Values.redis.persistence.storageClass | quote }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.redis.persistence.size | quote }}
+{{- end }}
+---
+{{/*
+Redis Service
+*/}}
+{{- if and .Values.redis.enabled .Values.redis.embedded.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "mnemocore.redis.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: redis
+spec:
+  type: {{ .Values.redis.service.type }}
+  ports:
+    - port: {{ .Values.redis.service.port }}
+      targetPort: redis
+      protocol: TCP
+      name: redis
+  selector:
+    {{- include "mnemocore.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: redis
+{{- end }}

helm/mnemocore/templates/deployment.yaml ADDED Viewed

	@@ -0,0 +1,176 @@

+{{/*
+MnemoCore API Deployment
+*/}}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "mnemocore.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+spec:
+  {{- if not .Values.mnemocore.autoscaling.enabled }}
+  replicas: {{ .Values.mnemocore.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "mnemocore.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: api
+  template:
+    metadata:
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
+        checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
+        {{- with .Values.mnemocore.podAnnotations }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      labels:
+        {{- include "mnemocore.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: api
+        {{- with .Values.mnemocore.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.global.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "mnemocore.serviceAccountName" . }}
+      {{- if .Values.mnemocore.priorityClassName }}
+      priorityClassName: {{ .Values.mnemocore.priorityClassName | quote }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.mnemocore.podSecurityContext | nindent 8 }}
+      terminationGracePeriodSeconds: {{ .Values.mnemocore.terminationGracePeriodSeconds }}
+      {{- with .Values.mnemocore.initContainers }}
+      initContainers:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: mnemocore
+          securityContext:
+            {{- toYaml .Values.mnemocore.securityContext | nindent 12 }}
+          image: "{{ .Values.global.imageRegistry }}{{ .Values.mnemocore.image.repository }}:{{ .Values.mnemocore.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.mnemocore.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.mnemocore.ports.api }}
+              protocol: TCP
+            - name: metrics
+              containerPort: {{ .Values.mnemocore.ports.metrics }}
+              protocol: TCP
+          env:
+            - name: HOST
+              value: {{ .Values.mnemocore.env.host | quote }}
+            - name: PORT
+              value: {{ .Values.mnemocore.ports.api | quote }}
+            - name: LOG_LEVEL
+              value: {{ .Values.mnemocore.env.logLevel | quote }}
+            - name: REDIS_URL
+              {{- if .Values.redis.url }}
+              value: {{ .Values.redis.url | quote }}
+              {{- else }}
+              value: "redis://{{ include "mnemocore.redis.fullname" . }}:{{ .Values.redis.service.port }}/0"
+              {{- end }}
+            - name: QDRANT_URL
+              {{- if .Values.qdrant.url }}
+              value: {{ .Values.qdrant.url | quote }}
+              {{- else }}
+              value: "http://{{ include "mnemocore.qdrant.fullname" . }}:{{ .Values.qdrant.service.httpPort }}"
+              {{- end }}
+            {{- if .Values.mnemocore.apiKey.existingSecret }}
+            - name: HAIM_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.mnemocore.apiKey.existingSecret }}
+                  key: {{ .Values.mnemocore.apiKey.key }}
+            {{- else if .Values.mnemocore.apiKey.value }}
+            - name: HAIM_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ include "mnemocore.fullname" . }}-api-key
+                  key: api-key
+            {{- end }}
+          {{- with .Values.mnemocore.extraVolumeMounts }}
+          volumeMounts:
+            - name: config
+              mountPath: /app/config.yaml
+              subPath: config.yaml
+              readOnly: true
+            - name: data
+              mountPath: /app/data
+            {{- toYaml . | nindent 12 }}
+          {{- else }}
+          volumeMounts:
+            - name: config
+              mountPath: /app/config.yaml
+              subPath: config.yaml
+              readOnly: true
+            - name: data
+              mountPath: /app/data
+          {{- end }}
+          {{- if .Values.mnemocore.probes.liveness.enabled }}
+          livenessProbe:
+            exec:
+              command:
+                - python
+                - /app/scripts/ops/healthcheck.py
+            initialDelaySeconds: {{ .Values.mnemocore.probes.liveness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.mnemocore.probes.liveness.periodSeconds }}
+            timeoutSeconds: {{ .Values.mnemocore.probes.liveness.timeoutSeconds }}
+            failureThreshold: {{ .Values.mnemocore.probes.liveness.failureThreshold }}
+            successThreshold: {{ .Values.mnemocore.probes.liveness.successThreshold }}
+          {{- end }}
+          {{- if .Values.mnemocore.probes.readiness.enabled }}
+          readinessProbe:
+            exec:
+              command:
+                - python
+                - /app/scripts/ops/healthcheck.py
+            initialDelaySeconds: {{ .Values.mnemocore.probes.readiness.initialDelaySeconds }}
+            periodSeconds: {{ .Values.mnemocore.probes.readiness.periodSeconds }}
+            timeoutSeconds: {{ .Values.mnemocore.probes.readiness.timeoutSeconds }}
+            failureThreshold: {{ .Values.mnemocore.probes.readiness.failureThreshold }}
+            successThreshold: {{ .Values.mnemocore.probes.readiness.successThreshold }}
+          {{- end }}
+          {{- if .Values.mnemocore.probes.startup.enabled }}
+          startupProbe:
+            exec:
+              command:
+                - python
+                - /app/scripts/ops/healthcheck.py
+            initialDelaySeconds: {{ .Values.mnemocore.probes.startup.initialDelaySeconds }}
+            periodSeconds: {{ .Values.mnemocore.probes.startup.periodSeconds }}
+            timeoutSeconds: {{ .Values.mnemocore.probes.startup.timeoutSeconds }}
+            failureThreshold: {{ .Values.mnemocore.probes.startup.failureThreshold }}
+            successThreshold: {{ .Values.mnemocore.probes.startup.successThreshold }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.mnemocore.resources | nindent 12 }}
+      {{- with .Values.mnemocore.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.mnemocore.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.mnemocore.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      volumes:
+        - name: config
+          configMap:
+            name: {{ include "mnemocore.fullname" . }}-config
+        {{- if .Values.mnemocore.persistence.enabled }}
+        - name: data
+          persistentVolumeClaim:
+            claimName: {{ include "mnemocore.fullname" . }}-data
+        {{- else }}
+        - name: data
+          emptyDir: {}
+        {{- end }}
+        {{- with .Values.mnemocore.extraVolumes }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}

helm/mnemocore/templates/hpa.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+{{/*
+Horizontal Pod Autoscaler for MnemoCore API
+*/}}
+{{- if .Values.mnemocore.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "mnemocore.hpa.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "mnemocore.fullname" . }}
+  minReplicas: {{ .Values.mnemocore.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.mnemocore.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.mnemocore.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.mnemocore.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+    {{- with .Values.mnemocore.autoscaling.metrics }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with .Values.mnemocore.autoscaling.behavior }}
+  behavior:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{- end }}

helm/mnemocore/templates/ingress.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+{{/*
+Ingress for MnemoCore API
+*/}}
+{{- if .Values.mnemocore.ingress.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "mnemocore.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+  {{- with .Values.mnemocore.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- if .Values.mnemocore.ingress.className }}
+  ingressClassName: {{ .Values.mnemocore.ingress.className }}
+  {{- end }}
+  {{- if .Values.mnemocore.ingress.tls }}
+  tls:
+    {{- range .Values.mnemocore.ingress.tls }}
+    - hosts:
+        {{- range .hosts }}
+        - {{ . | quote }}
+        {{- end }}
+      secretName: {{ .secretName }}
+    {{- end }}
+  {{- end }}
+  rules:
+    {{- range .Values.mnemocore.ingress.hosts }}
+    - host: {{ .host | quote }}
+      http:
+        paths:
+          {{- range .paths }}
+          - path: {{ .path }}
+            pathType: {{ .pathType }}
+            backend:
+              service:
+                name: {{ include "mnemocore.fullname" $ }}
+                port:
+                  number: {{ $.Values.mnemocore.service.port }}
+          {{- end }}
+    {{- end }}
+{{- end }}

helm/mnemocore/templates/networkpolicy.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+{{/*
+Network Policy for MnemoCore
+*/}}
+{{- if .Values.networkPolicy.enabled }}
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: {{ include "mnemocore.fullname" . }}-netpol
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+spec:
+  podSelector:
+    matchLabels:
+      {{- include "mnemocore.selectorLabels" . | nindent 6 }}
+  policyTypes:
+    - Ingress
+    - Egress
+  ingress:
+    {{- with .Values.networkPolicy.ingress }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  egress:
+    # Allow DNS
+    - to:
+        - namespaceSelector: {}
+      ports:
+        - protocol: UDP
+          port: 53
+        - protocol: TCP
+          port: 53
+    # Allow Redis
+    - to:
+        - podSelector:
+            matchLabels:
+              app.kubernetes.io/component: redis
+      ports:
+        - protocol: TCP
+          port: {{ .Values.redis.service.port }}
+    # Allow Qdrant
+    - to:
+        - podSelector:
+            matchLabels:
+              app.kubernetes.io/component: qdrant
+      ports:
+        - protocol: TCP
+          port: {{ .Values.qdrant.service.httpPort }}
+        - protocol: TCP
+          port: {{ .Values.qdrant.service.grpcPort }}
+{{- end }}

helm/mnemocore/templates/notes.txt ADDED Viewed

	@@ -0,0 +1,100 @@

+{{/*
+MnemoCore Helm Chart Notes
+*/}}
+{{- define "mnemocore.notes" -}}
+MnemoCore has been deployed!
+================================================================================
+                           MNEMOCORE DEPLOYMENT NOTES
+================================================================================
+Your MnemoCore cognitive memory infrastructure is now running.
+NAMESPACE: {{ .Release.Namespace }}
+SERVICE:
+  - API:      {{ include "mnemocore.fullname" . }}:{{ .Values.mnemocore.service.port }}
+  - Metrics:  {{ include "mnemocore.fullname" . }}:{{ .Values.mnemocore.service.metricsPort }}
+{{- if .Values.mnemocore.ingress.enabled }}
+INGRESS:
+  - Host:     {{ (index .Values.mnemocore.ingress.hosts 0).host }}
+{{- end }}
+COMPONENTS:
+  - MnemoCore API:  {{ .Values.mnemocore.replicaCount }} replica(s)
+  {{- if .Values.redis.enabled }}
+  - Redis:          {{ include "mnemocore.redis.fullname" . }}:{{ .Values.redis.service.port }}
+  {{- end }}
+  {{- if .Values.qdrant.enabled }}
+  - Qdrant:         {{ include "mnemocore.qdrant.fullname" . }}:{{ .Values.qdrant.service.httpPort }}
+  {{- end }}
+RESOURCES:
+  MnemoCore:
+    Limits:
+      CPU:     {{ .Values.mnemocore.resources.limits.cpu }}
+      Memory:  {{ .Values.mnemocore.resources.limits.memory }}
+    Requests:
+      CPU:     {{ .Values.mnemocore.resources.requests.cpu }}
+      Memory:  {{ .Values.mnemocore.resources.requests.memory }}
+{{- if .Values.mnemocore.autoscaling.enabled }}
+AUTOSCALING:
+  - Min Replicas:     {{ .Values.mnemocore.autoscaling.minReplicas }}
+  - Max Replicas:     {{ .Values.mnemocore.autoscaling.maxReplicas }}
+  - CPU Target:       {{ .Values.mnemocore.autoscaling.targetCPUUtilizationPercentage }}%
+  {{- if .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}
+  - Memory Target:    {{ .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}%
+  {{- end }}
+{{- end }}
+PROBES:
+  - Liveness:   Initial Delay: {{ .Values.mnemocore.probes.liveness.initialDelaySeconds }}s
+  - Readiness:  Initial Delay: {{ .Values.mnemocore.probes.readiness.initialDelaySeconds }}s
+  - Startup:    Initial Delay: {{ .Values.mnemocore.probes.startup.initialDelaySeconds }}s
+================================================================================
+                              GETTING STARTED
+================================================================================
+1. Forward the API port (for local testing):
+   kubectl port-forward svc/{{ include "mnemocore.fullname" . }} 8100:8100 -n {{ .Release.Namespace }}
+2. Check the health of the service:
+   curl http://localhost:8100/health
+3. Access Prometheus metrics:
+   kubectl port-forward svc/{{ include "mnemocore.fullname" . }} 9090:9090 -n {{ .Release.Namespace }}
+   curl http://localhost:9090/metrics
+4. View logs:
+   kubectl logs -l app.kubernetes.io/name={{ include "mnemocore.name" . }} -n {{ .Release.Namespace }} -f
+5. Check pod status:
+   kubectl get pods -l app.kubernetes.io/name={{ include "mnemocore.name" . }} -n {{ .Release.Namespace }}
+================================================================================
+                            CONFIGURATION NOTES
+================================================================================
+{{- if not .Values.mnemocore.apiKey.existingSecret }}
+WARNING: API key is set via values. For production, use an existing secret:
+  --set mnemocore.apiKey.existingSecret=my-secret-name
+{{- end }}
+{{- if not .Values.mnemocore.persistence.enabled }}
+WARNING: Persistence is disabled. Data will be lost on pod restart.
+{{- end }}
+HAIM Configuration:
+  - Dimensionality:    {{ .Values.mnemocore.config.dimensionality }}
+  - Encoding Mode:     {{ .Values.mnemocore.config.encoding.mode }}
+  - Hot Tier Max:      {{ .Values.mnemocore.config.tiers.hot.max_memories }} memories
+  - Warm Tier Max:     {{ .Values.mnemocore.config.tiers.warm.max_memories }} memories
+For more information, visit:
+  https://github.com/your-org/mnemocore
+================================================================================
+{{- end }}

helm/mnemocore/templates/pdb.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+{{/*
+Pod Disruption Budget for MnemoCore
+*/}}
+{{- if .Values.mnemocore.podDisruptionBudget.enabled }}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ include "mnemocore.fullname" . }}-pdb
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+spec:
+  {{- if .Values.mnemocore.podDisruptionBudget.minAvailable }}
+  minAvailable: {{ .Values.mnemocore.podDisruptionBudget.minAvailable }}
+  {{- end }}
+  {{- if .Values.mnemocore.podDisruptionBudget.maxUnavailable }}
+  maxUnavailable: {{ .Values.mnemocore.podDisruptionBudget.maxUnavailable }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "mnemocore.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: api
+{{- end }}

helm/mnemocore/templates/pvc.yaml ADDED Viewed

	@@ -0,0 +1,29 @@

+{{/*
+Persistent Volume Claim for MnemoCore data
+*/}}
+{{- if .Values.mnemocore.persistence.enabled }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "mnemocore.pvc.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+  {{- with .Values.mnemocore.persistence.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  accessModes:
+    {{- range .Values.mnemocore.persistence.accessModes }}
+    - {{ . | quote }}
+    {{- end }}
+  {{- if .Values.global.storageClass }}
+  storageClassName: {{ .Values.global.storageClass | quote }}
+  {{- else if .Values.mnemocore.persistence.storageClass }}
+  storageClassName: {{ .Values.mnemocore.persistence.storageClass | quote }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.mnemocore.persistence.size | quote }}
+{{- end }}

helm/mnemocore/templates/secret.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+{{/*
+MnemoCore Secret - API Key and sensitive configuration
+*/}}
+{{- if and (not .Values.mnemocore.apiKey.existingSecret) .Values.mnemocore.apiKey.value }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "mnemocore.fullname" . }}-api-key
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+type: Opaque
+data:
+  api-key: {{ .Values.mnemocore.apiKey.value | b64enc | quote }}
+{{- end }}
+---
+{{/*
+MnemoCore Generic Secret for external service credentials
+*/}}
+{{- if or .Values.redis.existingSecret .Values.qdrant.existingSecret }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "mnemocore.secret.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+type: Opaque
+data:
+  {{- if .Values.redis.existingSecret }}
+  redis-url: {{ .Values.redis.url | b64enc | quote }}
+  {{- end }}
+  {{- if .Values.qdrant.existingSecret }}
+  qdrant-url: {{ .Values.qdrant.url | b64enc | quote }}
+  {{- end }}
+{{- end }}

helm/mnemocore/templates/service.yaml ADDED Viewed

	@@ -0,0 +1,50 @@

+{{/*
+MnemoCore API Service
+*/}}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "mnemocore.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+  {{- with .Values.mnemocore.service.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  type: {{ .Values.mnemocore.service.type }}
+  ports:
+    - port: {{ .Values.mnemocore.service.port }}
+      targetPort: {{ .Values.mnemocore.service.targetPort }}
+      protocol: TCP
+      name: http
+    - port: {{ .Values.mnemocore.service.metricsPort }}
+      targetPort: {{ .Values.mnemocore.ports.metrics }}
+      protocol: TCP
+      name: metrics
+  selector:
+    {{- include "mnemocore.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+---
+{{/*
+MnemoCore Headless Service (for StatefulSet compatibility)
+*/}}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "mnemocore.fullname" . }}-headless
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+spec:
+  type: ClusterIP
+  clusterIP: None
+  ports:
+    - port: {{ .Values.mnemocore.service.port }}
+      targetPort: {{ .Values.mnemocore.service.targetPort }}
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "mnemocore.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: api

helm/mnemocore/templates/serviceaccount.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+{{/*
+Service Account for MnemoCore
+*/}}
+{{- if .Values.mnemocore.serviceAccount.create }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "mnemocore.serviceAccountName" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+  {{- with .Values.mnemocore.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: true
+{{- end }}

helm/mnemocore/templates/servicemonitor.yaml ADDED Viewed

	@@ -0,0 +1,40 @@

+{{/*
+ServiceMonitor for Prometheus Operator
+*/}}
+{{- if .Values.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "mnemocore.fullname" . }}
+  labels:
+    {{- include "mnemocore.labels" . | nindent 4 }}
+    app.kubernetes.io/component: api
+    {{- with .Values.serviceMonitor.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+  {{- with .Values.serviceMonitor.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  endpoints:
+    - port: metrics
+      path: /metrics
+      interval: {{ .Values.serviceMonitor.interval }}
+      scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
+      {{- with .Values.serviceMonitor.relabelings }}
+      relabelings:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.serviceMonitor.metricRelabelings }}
+      metricRelabelings:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+  namespaceSelector:
+    matchNames:
+      - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "mnemocore.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: api
+{{- end }}

helm/mnemocore/values.yaml ADDED Viewed

	@@ -0,0 +1,430 @@

+# MnemoCore Helm Chart - Default Values
+# ======================================
+# Override these values in your own values file or via --set flags
+# Global settings
+global:
+  imageRegistry: ""
+  imagePullSecrets: []
+  storageClass: ""
+  namespace: mnemocore
+# MnemoCore API Configuration
+mnemocore:
+  # Number of replicas (ignored if autoscaling.enabled is true)
+  replicaCount: 2
+  # Container image
+  image:
+    repository: mnemocore
+    tag: "latest"
+    pullPolicy: IfNotPresent
+  # Container ports
+  ports:
+    api: 8100
+    metrics: 9090
+  # Resource limits and requests
+  resources:
+    limits:
+      cpu: "2"
+      memory: "2Gi"
+    requests:
+      cpu: "500m"
+      memory: "512Mi"
+  # Probes configuration
+  probes:
+    liveness:
+      enabled: true
+      initialDelaySeconds: 40
+      periodSeconds: 30
+      timeoutSeconds: 10
+      failureThreshold: 3
+      successThreshold: 1
+    readiness:
+      enabled: true
+      initialDelaySeconds: 20
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 3
+      successThreshold: 1
+    startup:
+      enabled: true
+      initialDelaySeconds: 10
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 30
+      successThreshold: 1
+  # HAIM Configuration (mounted as config.yaml)
+  config:
+    version: "3.0"
+    dimensionality: 16384
+    encoding:
+      mode: "binary"
+      token_method: "bundle"
+    tiers:
+      hot:
+        max_memories: 2000
+        ltp_threshold_min: 0.7
+        eviction_policy: "lru"
+      warm:
+        max_memories: 100000
+        ltp_threshold_min: 0.3
+        consolidation_interval_hours: 1
+        storage_backend: "mmap"
+      cold:
+        storage_backend: "filesystem"
+        compression: "gzip"
+        archive_threshold_days: 30
+    ltp:
+      initial_importance: 0.5
+      decay_lambda: 0.01
+      permanence_threshold: 0.95
+      half_life_days: 30.0
+    hysteresis:
+      promote_delta: 0.15
+      demote_delta: 0.10
+    gpu:
+      enabled: false
+      device: "cuda:0"
+      batch_size: 1000
+      fallback_to_cpu: true
+    observability:
+      metrics_port: 9090
+      log_level: "INFO"
+      structured_logging: true
+    paths:
+      data_dir: "/app/data"
+      memory_file: "/app/data/memory.jsonl"
+      codebook_file: "/app/data/codebook.json"
+      concepts_file: "/app/data/concepts.json"
+      synapses_file: "/app/data/synapses.json"
+      warm_mmap_dir: "/app/data/warm_tier"
+      cold_archive_dir: "/app/data/cold_archive"
+    mcp:
+      enabled: false
+      transport: "stdio"
+      host: "127.0.0.1"
+      port: 8110
+      api_base_url: "http://localhost:8100"
+      timeout_seconds: 15
+      allow_tools:
+        - "memory_store"
+        - "memory_query"
+        - "memory_get"
+        - "memory_delete"
+        - "memory_stats"
+        - "memory_health"
+  # Environment variables
+  env:
+    logLevel: "INFO"
+    host: "0.0.0.0"
+    port: 8100
+  # API Key (set via secret)
+  apiKey:
+    # Use existing secret
+    existingSecret: ""
+    # Key in the secret containing the API key
+    key: "HAIM_API_KEY"
+    # If not using existing secret, set value here (NOT RECOMMENDED for production)
+    value: ""
+  # Persistence
+  persistence:
+    enabled: true
+    accessModes:
+      - ReadWriteOnce
+    size: 10Gi
+    # storageClass: ""
+    annotations: {}
+  # Service configuration
+  service:
+    type: ClusterIP
+    port: 8100
+    targetPort: 8100
+    metricsPort: 9090
+    annotations: {}
+    labels: {}
+  # Ingress configuration
+  ingress:
+    enabled: false
+    className: ""
+    annotations: {}
+      # kubernetes.io/ingress.class: nginx
+      # kubernetes.io/tls-acme: "true"
+    hosts:
+      - host: mnemocore.local
+        paths:
+          - path: /
+            pathType: Prefix
+    tls: []
+    #  - secretName: mnemocore-tls
+    #    hosts:
+    #      - mnemocore.local
+  # Autoscaling configuration
+  autoscaling:
+    enabled: true
+    minReplicas: 2
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 70
+    targetMemoryUtilizationPercentage: 80
+    # Custom metrics
+    metrics: []
+    behavior: {}
+  # Pod Disruption Budget
+  podDisruptionBudget:
+    enabled: true
+    minAvailable: 1
+    # maxUnavailable: 1
+  # Pod security context
+  podSecurityContext:
+    runAsNonRoot: true
+    runAsUser: 1000
+    runAsGroup: 1000
+    fsGroup: 1000
+  # Container security context
+  securityContext:
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop:
+        - ALL
+    readOnlyRootFilesystem: true
+    runAsNonRoot: true
+  # Node selector
+  nodeSelector: {}
+  # Tolerations
+  tolerations: []
+  # Affinity
+  affinity: {}
+  # Pod annotations
+  podAnnotations:
+    prometheus.io/scrape: "true"
+    prometheus.io/port: "9090"
+    prometheus.io/path: "/metrics"
+  # Pod labels
+  podLabels: {}
+  # Priority class name
+  priorityClassName: ""
+  # Termination grace period
+  terminationGracePeriodSeconds: 30
+  # Service account
+  serviceAccount:
+    create: true
+    name: ""
+    annotations: {}
+  # Init containers
+  initContainers: []
+  # Extra volumes
+  extraVolumes: []
+  # Extra volume mounts
+  extraVolumeMounts: []
+# Redis Configuration
+redis:
+  # Enable Redis as part of this chart
+  enabled: true
+  # Use Bitnami Redis chart or embedded config
+  embedded:
+    enabled: false
+  # When not using Bitnami chart
+  image:
+    repository: redis
+    tag: "7.2-alpine"
+    pullPolicy: IfNotPresent
+  # Redis configuration
+  config:
+    maxmemory: "512mb"
+    maxmemoryPolicy: "allkeys-lru"
+    save: "60 1"
+    logLevel: "warning"
+  # Resource limits
+  resources:
+    limits:
+      cpu: "1"
+      memory: "512Mi"
+    requests:
+      cpu: "100m"
+      memory: "128Mi"
+  # Probes
+  probes:
+    liveness:
+      enabled: true
+      initialDelaySeconds: 10
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 5
+    readiness:
+      enabled: true
+      initialDelaySeconds: 5
+      periodSeconds: 5
+      timeoutSeconds: 3
+      failureThreshold: 5
+  # Service
+  service:
+    type: ClusterIP
+    port: 6379
+  # Persistence
+  persistence:
+    enabled: true
+    accessModes:
+      - ReadWriteOnce
+    size: 5Gi
+    # storageClass: ""
+  # URL override (if using external Redis)
+  url: ""
+  existingSecret: ""
+# Qdrant Configuration
+qdrant:
+  # Enable Qdrant as part of this chart
+  enabled: true
+  # Use official Qdrant chart or embedded config
+  embedded:
+    enabled: false
+  # When not using official chart
+  image:
+    repository: qdrant/qdrant
+    tag: "latest"
+    pullPolicy: IfNotPresent
+  # Qdrant configuration
+  config:
+    grpcPort: 6334
+    logLevel: "INFO"
+  # HAIM-specific collection settings
+  collections:
+    hot:
+      name: "haim_hot"
+      binaryQuantization: true
+      alwaysRam: true
+      hnswM: 16
+      hnswEfConstruct: 100
+    warm:
+      name: "haim_warm"
+      binaryQuantization: true
+      alwaysRam: true
+      hnswM: 16
+      hnswEfConstruct: 100
+  # Resource limits
+  resources:
+    limits:
+      cpu: "2"
+      memory: "4Gi"
+    requests:
+      cpu: "500m"
+      memory: "1Gi"
+  # Probes
+  probes:
+    liveness:
+      enabled: true
+      initialDelaySeconds: 15
+      periodSeconds: 10
+      timeoutSeconds: 5
+      failureThreshold: 5
+    readiness:
+      enabled: true
+      initialDelaySeconds: 10
+      periodSeconds: 5
+      timeoutSeconds: 3
+      failureThreshold: 5
+  # Services
+  service:
+    type: ClusterIP
+    httpPort: 6333
+    grpcPort: 6334
+  # Persistence
+  persistence:
+    enabled: true
+    accessModes:
+      - ReadWriteOnce
+    size: 20Gi
+    # storageClass: ""
+  # URL override (if using external Qdrant)
+  url: ""
+  existingSecret: ""
+# Network Policies
+networkPolicy:
+  enabled: false
+  ingress:
+    - from:
+        - namespaceSelector:
+            matchLabels:
+              name: mnemocore
+      ports:
+        - protocol: TCP
+          port: 8100
+        - protocol: TCP
+          port: 9090
+# Service Monitor (Prometheus Operator)
+serviceMonitor:
+  enabled: false
+  namespace: ""
+  interval: 30s
+  scrapeTimeout: 10s
+  labels: {}
+  annotations: {}
+  relabelings: []
+  metricRelabelings: []
+# Grafana Dashboard
+grafana:
+  dashboard:
+    enabled: false
+    namespace: ""
+    labels:
+      grafana_dashboard: "1"
+    annotations: {}
+# Prometheus Rules
+prometheusRule:
+  enabled: false
+  namespace: ""
+  additionalLabels: {}
+  rules: []
+# Test configuration
+test:
+  enabled: false
+  image:
+    repository: busybox
+    tag: "latest"

k8s/README.md ADDED Viewed

	@@ -0,0 +1,324 @@

+# MnemoCore Kubernetes Deployment
+This directory contains Kubernetes manifests and Helm charts for deploying MnemoCore to a Kubernetes cluster.
+## Overview
+MnemoCore is a cognitive memory infrastructure that uses Hyperdimensional Computing (HDC) to provide persistent, scalable memory for AI systems. The Kubernetes deployment includes:
+- **MnemoCore API** - Main API service with health checks and metrics
+- **Redis** - In-memory data store for hot tier and caching
+- **Qdrant** - Vector database for similarity search
+## Prerequisites
+- Kubernetes 1.25+
+- Helm 3.8+
+- kubectl configured to access your cluster
+- (Optional) Prometheus Operator for metrics scraping
+- (Optional) cert-manager for TLS certificates
+## Quick Start
+### 1. Install using Helm
+```bash
+# Add required Helm repositories
+helm repo add bitnami https://charts.bitnami.com/bitnami
+helm repo add qdrant https://qdrant.github.io/qdrant-helm
+helm repo update
+# Install MnemoCore with default values
+helm install mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --create-namespace \
+  --set mnemocore.apiKey.value="your-secure-api-key"
+```
+### 2. Install with custom values
+```bash
+# Create a values file
+cat > values-prod.yaml << EOF
+mnemocore:
+  replicaCount: 3
+  apiKey:
+    existingSecret: mnemocore-api-key
+  resources:
+    limits:
+      cpu: "4"
+      memory: "4Gi"
+    requests:
+      cpu: "1"
+      memory: "1Gi"
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 20
+    targetCPUUtilizationPercentage: 60
+redis:
+  persistence:
+    size: 20Gi
+qdrant:
+  persistence:
+    size: 100Gi
+global:
+  storageClass: "fast-ssd"
+EOF
+helm install mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --create-namespace \
+  -f values-prod.yaml
+```
+### 3. Verify the installation
+```bash
+# Check pod status
+kubectl get pods -n mnemocore
+# Check services
+kubectl get svc -n mnemocore
+# Check HPA status
+kubectl get hpa -n mnemocore
+# Port-forward for local testing
+kubectl port-forward svc/mnemocore 8100:8100 -n mnemocore
+# Test the API
+curl http://localhost:8100/health
+```
+## Configuration
+### Key Configuration Parameters
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `mnemocore.replicaCount` | Number of API replicas | `2` |
+| `mnemocore.image.repository` | Container image repository | `mnemocore` |
+| `mnemocore.image.tag` | Container image tag | `latest` |
+| `mnemocore.resources.limits.cpu` | CPU limit | `2` |
+| `mnemocore.resources.limits.memory` | Memory limit | `2Gi` |
+| `mnemocore.autoscaling.enabled` | Enable HPA | `true` |
+| `mnemocore.autoscaling.minReplicas` | Minimum replicas | `2` |
+| `mnemocore.autoscaling.maxReplicas` | Maximum replicas | `10` |
+| `mnemocore.apiKey.existingSecret` | Existing secret for API key | `""` |
+| `redis.enabled` | Deploy Redis | `true` |
+| `qdrant.enabled` | Deploy Qdrant | `true` |
+### Resource Limits
+| Component | CPU Limit | Memory Limit | CPU Request | Memory Request |
+|-----------|-----------|--------------|-------------|----------------|
+| MnemoCore | 2 | 2Gi | 500m | 512Mi |
+| Redis | 1 | 512Mi | 100m | 128Mi |
+| Qdrant | 2 | 4Gi | 500m | 1Gi |
+### Probe Configuration
+| Probe | Initial Delay | Period | Timeout | Failure Threshold |
+|-------|---------------|--------|---------|-------------------|
+| Liveness | 40s | 30s | 10s | 3 |
+| Readiness | 20s | 10s | 5s | 3 |
+| Startup | 10s | 10s | 5s | 30 |
+## Production Deployment
+### 1. Create Secrets
+```bash
+# Create API key secret
+kubectl create secret generic mnemocore-api-key \
+  --from-literal=HAIM_API_KEY='your-secure-api-key' \
+  -n mnemocore
+# Or use sealed-secrets/external-secrets for GitOps
+```
+### 2. Configure Storage
+```bash
+# Ensure you have a storage class configured
+kubectl get storageclass
+# For production, use fast SSD storage
+helm install mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set global.storageClass=fast-ssd \
+  --set mnemocore.persistence.size=50Gi \
+  --set redis.persistence.size=20Gi \
+  --set qdrant.persistence.size=200Gi
+```
+### 3. Enable Ingress
+```bash
+helm install mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set mnemocore.ingress.enabled=true \
+  --set mnemocore.ingress.className=nginx \
+  --set 'mnemocore.ingress.hosts[0].host=mnemocore.yourdomain.com' \
+  --set 'mnemocore.ingress.hosts[0].paths[0].path=/' \
+  --set 'mnemocore.ingress.hosts[0].paths[0].pathType=Prefix' \
+  --set 'mnemocore.ingress.tls[0].secretName=mnemocore-tls' \
+  --set 'mnemocore.ingress.tls[0].hosts[0]=mnemocore.yourdomain.com'
+```
+### 4. Enable Network Policies
+```bash
+helm install mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set networkPolicy.enabled=true
+```
+## Monitoring
+### Prometheus Integration
+```bash
+# Enable ServiceMonitor for Prometheus Operator
+helm install mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set serviceMonitor.enabled=true \
+  --set serviceMonitor.labels.release=prometheus
+```
+### Available Metrics
+MnemoCore exposes the following metrics on port 9090:
+- `mnemocore_memory_count_total` - Total number of memories stored
+- `mnemocore_memory_tier_hot` - Number of memories in hot tier
+- `mnemocore_memory_tier_warm` - Number of memories in warm tier
+- `mnemocore_memory_tier_cold` - Number of memories in cold tier
+- `mnemocore_query_duration_seconds` - Query latency histogram
+- `mnemocore_ltp_avg` - Average LTP score
+- `mnemocore_api_requests_total` - Total API requests
+- `mnemocore_api_request_duration_seconds` - API request latency
+### Grafana Dashboard
+Import the provided `grafana-dashboard.json` to visualize MnemoCore metrics.
+## Scaling
+### Manual Scaling
+```bash
+# Scale to 5 replicas
+kubectl scale deployment mnemocore --replicas=5 -n mnemocore
+```
+### Autoscaling
+HPA is enabled by default. Customize scaling behavior:
+```bash
+helm upgrade mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set mnemocore.autoscaling.minReplicas=3 \
+  --set mnemocore.autoscaling.maxReplicas=50 \
+  --set mnemocore.autoscaling.targetCPUUtilizationPercentage=50
+```
+## Upgrading
+```bash
+# Upgrade to a new version
+helm upgrade mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set mnemocore.image.tag=v3.6.0
+# Rollback if needed
+helm rollback mnemocore -n mnemocore
+```
+## Troubleshooting
+### Check Logs
+```bash
+# MnemoCore logs
+kubectl logs -l app.kubernetes.io/name=mnemocore -n mnemocore -f
+# Redis logs
+kubectl logs -l app.kubernetes.io/component=redis -n mnemocore -f
+# Qdrant logs
+kubectl logs -l app.kubernetes.io/component=qdrant -n mnemocore -f
+```
+### Common Issues
+1. **Pod stuck in Pending**
+   - Check storage class availability
+   - Check resource requests vs node capacity
+2. **Health check failing**
+   - Check Redis and Qdrant connectivity
+   - Verify environment variables
+3. **High memory usage**
+   - Reduce `mnemocore.config.tiers.hot.max_memories`
+   - Enable GPU for faster encoding
+### Debug Mode
+```bash
+# Run with debug logging
+helm upgrade mnemocore ./helm/mnemocore \
+  --namespace mnemocore \
+  --set mnemocore.env.logLevel=DEBUG
+```
+## Uninstalling
+```bash
+# Remove the Helm release
+helm uninstall mnemocore -n mnemocore
+# Remove the namespace (optional)
+kubectl delete namespace mnemocore
+# Remove PVCs (caution: data loss)
+kubectl delete pvc -n mnemocore --all
+```
+## Architecture
+```
+                    ┌─────────────────┐
+                    │     Ingress     │
+                    │   (Optional)    │
+                    └────────┬────────┘
+                             │
+                    ┌────────▼────────┐
+                    │  MnemoCore API  │
+                    │   (HPA: 2-10)   │
+                    │  Port: 8100     │
+                    └────────┬────────┘
+                             │
+              ┌──────────────┼──────────────┐
+              │              │              │
+     ┌────────▼────────┐    │    ┌────────▼────────┐
+     │      Redis      │    │    │     Qdrant      │
+     │  Port: 6379     │    │    │ Port: 6333/6334 │
+     │  Hot Tier Cache │    │    │ Vector Storage  │
+     └─────────────────┘    │    └─────────────────┘
+                            │
+                    ┌───────▼───────┐
+                    │  Persistent   │
+                    │    Storage    │
+                    └───────────────┘
+```
+## License
+MIT License - See LICENSE file for details.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,109 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "mnemocore"
+version = "4.5.0"
+description = "MnemoCore – Infrastructure for Persistent Cognitive Memory. A hierarchical AI memory engine with hot/warm/cold tiers, vector search, and subconscious consolidation."
+readme = "README.md"
+license = { file = "LICENSE" }
+requires-python = ">=3.10"
+authors = [
+    { name = "Robin", email = "" },
+]
+keywords = [
+    "ai",
+    "memory",
+    "cognitive",
+    "vector-search",
+    "qdrant",
+    "llm",
+    "hyperdimensional-computing",
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Typing :: Typed",
+]
+# Runtime dependencies (migrated from requirements.txt)
+dependencies = [
+    "numpy>=1.24",
+    "requests>=2.31.0",
+    "fastapi>=0.100.0",
+    "uvicorn>=0.23.0",
+    "pydantic>=2.0.0",
+    "pyyaml>=6.0",
+    "redis>=5.0.0",
+    "qdrant-client>=1.7.0",
+    "prometheus-client>=0.17.0",
+    "loguru>=0.7.0",
+    "msgpack>=1.0.0",
+    "mcp>=0.1.0",
+    "faiss-cpu>=1.7.4",
+    "pybreaker>=1.0.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "hypothesis>=6.0.0",
+    "mypy>=1.0.0",
+    "flake8>=6.0.0",
+    "isort>=5.0.0",
+    "black>=23.0.0",
+    "coverage>=7.0.0",
+]
+viz = [
+    "plotly>=5.0.0",
+    "pandas>=2.0.0",
+]
+[project.urls]
+Homepage = "https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory"
+Repository = "https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory"
+"Bug Tracker" = "https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory/issues"
+[project.scripts]
+mnemocore = "mnemocore.api.main:app"
+# ── Hatchling build configuration ─────────────────────────────────────────────
+[tool.hatch.build.targets.wheel]
+packages = ["src/mnemocore"]
+[tool.hatch.build.targets.sdist]
+include = [
+    "src/",
+    "README.md",
+    "LICENSE",
+    "CHANGELOG.md",
+    "config.yaml",
+]
+# ── Pytest ─────────────────────────────────────────────────────────────────────
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = "-v --tb=short"
+asyncio_mode = "auto"
+# ── Coverage ───────────────────────────────────────────────────────────────────
+[tool.coverage.run]
+source = ["src"]
+omit = ["tests/*", "**/__pycache__/*"]
+[tool.coverage.report]
+show_missing = true
+skip_covered = false

pytest.ini ADDED Viewed

	@@ -0,0 +1,9 @@

+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts = -v --tb=short
+markers =
+    integration: marks tests requiring external services (Redis, Qdrant)
+asyncio_mode = auto

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+# Development Dependencies
+# Install with: pip install -r requirements-dev.txt
+# Code Formatting
+black>=23.0.0
+# Import Sorting
+isort>=5.12.0
+# Style Guide Enforcement
+flake8>=6.0.0
+# Static Type Checking
+mypy>=1.0.0
+# Testing
+pytest>=7.0.0
+pytest-cov>=4.0.0
+pytest-asyncio>=0.21.0
+# Security Scanning
+pip-audit>=2.6.0
+bandit>=1.7.0
+# Documentation
+sphinx>=7.0.0
+sphinx-rtd-theme>=1.3.0
+# Pre-commit hooks (optional)
+pre-commit>=3.0.0