Granis87 commited on
Commit
dbb04e4
·
verified ·
1 Parent(s): 8b4a820

Initial upload of MnemoCore

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .coveragerc +42 -0
  2. .dockerignore +95 -0
  3. .github/workflows/ci.yml +221 -0
  4. .github/workflows/docker-publish.yml +130 -0
  5. .gitignore +81 -0
  6. CHANGELOG.md +61 -0
  7. Dockerfile +78 -0
  8. LICENSE +22 -0
  9. MnemoCore Phase 3 5 Infinite.md +1615 -0
  10. README.md +1161 -0
  11. REFACTORING_TODO.md +207 -0
  12. RELEASE_CHECKLIST.md +125 -0
  13. SECURITY.md +30 -0
  14. benchmarks/bench_100k_memories.py +179 -0
  15. benchmarks/bench_permute.py +55 -0
  16. config.yaml +167 -0
  17. data/subconscious_audit.jsonl +2 -0
  18. data/subconscious_evolution.json +24 -0
  19. docker-compose.yml +128 -0
  20. docs/API.md +91 -0
  21. docs/ARCHITECTURE.md +55 -0
  22. docs/BETA_POLICY.md +50 -0
  23. docs/MCP_IMPLEMENTATION_PLAN.md +128 -0
  24. docs/PERFORMANCE.md +71 -0
  25. docs/ROADMAP.md +320 -0
  26. docs/SELF_IMPROVEMENT_DEEP_DIVE.md +279 -0
  27. git_status.txt +51 -0
  28. grafana-dashboard.json +954 -0
  29. helm/mnemocore/.helmignore +68 -0
  30. helm/mnemocore/Chart.yaml +55 -0
  31. helm/mnemocore/templates/_helpers.tpl +119 -0
  32. helm/mnemocore/templates/configmap.yaml +114 -0
  33. helm/mnemocore/templates/deployment-qdrant.yaml +141 -0
  34. helm/mnemocore/templates/deployment-redis.yaml +141 -0
  35. helm/mnemocore/templates/deployment.yaml +176 -0
  36. helm/mnemocore/templates/hpa.yaml +43 -0
  37. helm/mnemocore/templates/ingress.yaml +45 -0
  38. helm/mnemocore/templates/networkpolicy.yaml +50 -0
  39. helm/mnemocore/templates/notes.txt +100 -0
  40. helm/mnemocore/templates/pdb.yaml +23 -0
  41. helm/mnemocore/templates/pvc.yaml +29 -0
  42. helm/mnemocore/templates/secret.yaml +36 -0
  43. helm/mnemocore/templates/service.yaml +50 -0
  44. helm/mnemocore/templates/serviceaccount.yaml +17 -0
  45. helm/mnemocore/templates/servicemonitor.yaml +40 -0
  46. helm/mnemocore/values.yaml +430 -0
  47. k8s/README.md +324 -0
  48. pyproject.toml +109 -0
  49. pytest.ini +9 -0
  50. requirements-dev.txt +30 -0
.coveragerc ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [run]
2
+ source = src
3
+ branch = true
4
+ parallel = true
5
+ data_file = .coverage
6
+
7
+ [report]
8
+ exclude_lines =
9
+ pragma: no cover
10
+ def __repr__
11
+ raise AssertionError
12
+ raise NotImplementedError
13
+ if __name__ == .__main__.:
14
+ if TYPE_CHECKING:
15
+ @abstractmethod
16
+ @abc.abstractmethod
17
+ omit =
18
+ tests/*
19
+ */__pycache__/*
20
+ */site-packages/*
21
+ */dist-packages/*
22
+ */.venv/*
23
+ */venv/*
24
+ setup.py
25
+ conftest.py
26
+
27
+ fail_under = 80
28
+ precision = 2
29
+ show_missing = true
30
+ skip_covered = false
31
+ sort = Cover
32
+
33
+ [html]
34
+ directory = htmlcov
35
+ title = MnemoCore Coverage Report
36
+
37
+ [xml]
38
+ output = coverage.xml
39
+
40
+ [json]
41
+ output = coverage.json
42
+ show_contexts = true
.dockerignore ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Docker Ignore
2
+ # =======================
3
+ # Exclude files not needed in Docker build context
4
+
5
+ # Python
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ *.egg-info/
12
+ .eggs/
13
+ *.egg
14
+ .mypy_cache/
15
+ .pytest_cache/
16
+ .ruff_cache/
17
+
18
+ # Virtual environments
19
+ .venv/
20
+ venv/
21
+ ENV/
22
+ env/
23
+
24
+ # IDE and editors
25
+ .idea/
26
+ .vscode/
27
+ *.swp
28
+ *.swo
29
+ *~
30
+ .project
31
+ .pydevproject
32
+ .settings/
33
+
34
+ # Dependencies (will be installed in container)
35
+ deps/
36
+ node_modules/
37
+
38
+ # Test files and coverage
39
+ tests/
40
+ test_*.py
41
+ *_test.py
42
+ .coverage
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+
47
+ # Documentation
48
+ docs/
49
+ *.md
50
+ !README.md
51
+
52
+ # Data directories (mounted as volumes)
53
+ data/
54
+ *.jsonl
55
+ *.json
56
+ !config.json
57
+
58
+ # Logs
59
+ logs/
60
+ *.log
61
+
62
+ # Git
63
+ .git/
64
+ .gitignore
65
+ .gitattributes
66
+
67
+ # Docker (prevent recursive builds)
68
+ Dockerfile*
69
+ docker-compose*.yml
70
+ .dockerignore
71
+
72
+ # Environment files (use .env.example as template)
73
+ .env
74
+ .env.*
75
+ !.env.example
76
+
77
+ # Local development
78
+ *.local
79
+ *.bak
80
+ *.tmp
81
+
82
+ # OS files
83
+ .DS_Store
84
+ Thumbs.db
85
+
86
+ # Build artifacts
87
+ dist/
88
+ build/
89
+ *.tar.gz
90
+ *.zip
91
+
92
+ # Miscellaneous
93
+ scripts/debug_*.py
94
+ scripts/bisect_*.py
95
+ scripts/verify_*.py
.github/workflows/ci.yml ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI/CD Pipeline
2
+
3
+ on:
4
+ push:
5
+ branches: [main, develop]
6
+ pull_request:
7
+ branches: [main, develop]
8
+
9
+ env:
10
+ PYTHONUNBUFFERED: "1"
11
+ HAIM_API_KEY: "ci-test-key-not-for-production"
12
+ HAIM_DIMENSIONALITY: "1024"
13
+ HAIM_ENCODING_MODE: "binary"
14
+
15
+ jobs:
16
+ # ===========================================================================
17
+ # LINT JOB - Code Quality Checks
18
+ # ===========================================================================
19
+ lint:
20
+ name: Lint & Format Check
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - name: Checkout repository
24
+ uses: actions/checkout@v4
25
+
26
+ - name: Set up Python
27
+ uses: actions/setup-python@v5
28
+ with:
29
+ python-version: "3.11"
30
+ cache: 'pip'
31
+
32
+ - name: Install dependencies
33
+ run: |
34
+ python -m pip install --upgrade pip
35
+ pip install black isort flake8 mypy
36
+
37
+ - name: Run Black (code formatter check)
38
+ run: black --check --diff src/ tests/
39
+
40
+ - name: Run isort (import sorter check)
41
+ run: isort --check-only --diff src/ tests/
42
+
43
+ - name: Run flake8 (style guide enforcement)
44
+ run: flake8 src/ tests/ --max-line-length=120 --extend-ignore=E203,W503
45
+
46
+ - name: Run mypy (static type checker)
47
+ run: mypy src/ --ignore-missing-imports --no-strict-optional
48
+ continue-on-error: true # Non-blocking until type coverage improves
49
+
50
+ # ===========================================================================
51
+ # TEST JOB - Unit & Integration Tests with Coverage
52
+ # ===========================================================================
53
+ test:
54
+ name: Test (Python ${{ matrix.python-version }})
55
+ runs-on: ubuntu-latest
56
+ needs: lint
57
+ strategy:
58
+ fail-fast: false
59
+ matrix:
60
+ python-version: ["3.10", "3.11", "3.12"]
61
+
62
+ services:
63
+ redis:
64
+ image: redis:7-alpine
65
+ ports:
66
+ - 6379:6379
67
+ options: >-
68
+ --health-cmd "redis-cli ping"
69
+ --health-interval 10s
70
+ --health-timeout 5s
71
+ --health-retries 5
72
+
73
+ steps:
74
+ - name: Checkout repository
75
+ uses: actions/checkout@v4
76
+
77
+ - name: Set up Python ${{ matrix.python-version }}
78
+ uses: actions/setup-python@v5
79
+ with:
80
+ python-version: ${{ matrix.python-version }}
81
+ cache: 'pip'
82
+
83
+ - name: Install dependencies
84
+ run: |
85
+ python -m pip install --upgrade pip
86
+ pip install -r requirements.txt
87
+ pip install -r requirements-dev.txt
88
+ pip install hypothesis fakeredis
89
+
90
+ - name: Create required directories
91
+ run: mkdir -p data
92
+
93
+ - name: Run tests with coverage
94
+ env:
95
+ REDIS_URL: redis://localhost:6379
96
+ HAIM_API_KEY: ${{ env.HAIM_API_KEY }}
97
+ HAIM_DIMENSIONALITY: ${{ env.HAIM_DIMENSIONALITY }}
98
+ HAIM_ENCODING_MODE: ${{ env.HAIM_ENCODING_MODE }}
99
+ run: |
100
+ pytest tests/ \
101
+ -m "not integration" \
102
+ --cov=src \
103
+ --cov-report=xml \
104
+ --cov-report=term-missing \
105
+ --cov-fail-under=60 \
106
+ --tb=short \
107
+ -v
108
+
109
+ - name: Upload coverage to Codecov
110
+ if: matrix.python-version == '3.11'
111
+ uses: codecov/codecov-action@v4
112
+ with:
113
+ files: ./coverage.xml
114
+ fail_ci_if_error: false
115
+ verbose: true
116
+ env:
117
+ CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
118
+
119
+ # ===========================================================================
120
+ # SECURITY JOB - Dependency & Code Security Scanning
121
+ # ===========================================================================
122
+ security:
123
+ name: Security Scan
124
+ runs-on: ubuntu-latest
125
+ needs: lint
126
+ steps:
127
+ - name: Checkout repository
128
+ uses: actions/checkout@v4
129
+
130
+ - name: Set up Python
131
+ uses: actions/setup-python@v5
132
+ with:
133
+ python-version: "3.11"
134
+ cache: 'pip'
135
+
136
+ - name: Install dependencies
137
+ run: |
138
+ python -m pip install --upgrade pip
139
+ pip install pip-audit bandit
140
+
141
+ - name: Run pip-audit (dependency vulnerability scan)
142
+ run: pip-audit -r requirements.txt
143
+ continue-on-error: true
144
+
145
+ - name: Run Bandit (code security analysis)
146
+ run: bandit -r src/ -ll --skip B101,B601
147
+ continue-on-error: true
148
+
149
+ # ===========================================================================
150
+ # PROPERTY-BASED TESTS - Hypothesis
151
+ # ===========================================================================
152
+ property-tests:
153
+ name: Property-Based Tests (Hypothesis)
154
+ runs-on: ubuntu-latest
155
+ needs: lint
156
+ steps:
157
+ - name: Checkout repository
158
+ uses: actions/checkout@v4
159
+
160
+ - name: Set up Python
161
+ uses: actions/setup-python@v5
162
+ with:
163
+ python-version: "3.11"
164
+ cache: 'pip'
165
+
166
+ - name: Install dependencies
167
+ run: |
168
+ python -m pip install --upgrade pip
169
+ pip install -r requirements.txt
170
+ pip install hypothesis pytest pytest-asyncio
171
+
172
+ - name: Run property-based tests
173
+ env:
174
+ HAIM_API_KEY: ${{ env.HAIM_API_KEY }}
175
+ HAIM_DIMENSIONALITY: ${{ env.HAIM_DIMENSIONALITY }}
176
+ run: |
177
+ pytest tests/test_binary_hdv_properties.py \
178
+ -v \
179
+ --tb=short
180
+
181
+ # ===========================================================================
182
+ # DOCKER BUILD - Validate image builds correctly
183
+ # ===========================================================================
184
+ docker:
185
+ name: Docker Build
186
+ runs-on: ubuntu-latest
187
+ needs: [lint]
188
+ steps:
189
+ - name: Checkout repository
190
+ uses: actions/checkout@v4
191
+
192
+ - name: Build Docker image
193
+ run: docker build -t mnemocore:ci-${{ github.sha }} .
194
+
195
+ - name: Verify Python imports work in image
196
+ run: |
197
+ docker run --rm \
198
+ -e HAIM_API_KEY=ci-test-key \
199
+ mnemocore:ci-${{ github.sha }} \
200
+ python -c "from src.core.engine import HAIMEngine; print('Import OK')"
201
+
202
+ # ===========================================================================
203
+ # BUILD STATUS - Summary Job
204
+ # ===========================================================================
205
+ build-status:
206
+ name: Build Status
207
+ runs-on: ubuntu-latest
208
+ needs: [lint, test, security, property-tests, docker]
209
+ if: always()
210
+ steps:
211
+ - name: Check build status
212
+ run: |
213
+ if [[ "${{ needs.test.result }}" == "failure" ]]; then
214
+ echo "Tests failed!"
215
+ exit 1
216
+ fi
217
+ if [[ "${{ needs.lint.result }}" == "failure" ]]; then
218
+ echo "Lint checks failed!"
219
+ exit 1
220
+ fi
221
+ echo "All checks passed!"
.github/workflows/docker-publish.yml ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Docker Build & Publish
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+ inputs:
9
+ push_to_registry:
10
+ description: 'Push to registry'
11
+ required: true
12
+ default: 'true'
13
+ type: boolean
14
+
15
+ env:
16
+ REGISTRY_DOCKERHUB: docker.io
17
+ REGISTRY_GHCR: ghcr.io
18
+ IMAGE_NAME: ${{ github.repository }}
19
+
20
+ jobs:
21
+ # ===========================================================================
22
+ # BUILD AND PUSH TO DOCKER HUB
23
+ # ===========================================================================
24
+ build-dockerhub:
25
+ name: Build & Push (Docker Hub)
26
+ runs-on: ubuntu-latest
27
+ permissions:
28
+ contents: read
29
+
30
+ steps:
31
+ - name: Checkout repository
32
+ uses: actions/checkout@v4
33
+
34
+ - name: Set up QEMU
35
+ uses: docker/setup-qemu-action@v3
36
+
37
+ - name: Set up Docker Buildx
38
+ uses: docker/setup-buildx-action@v3
39
+
40
+ - name: Log in to Docker Hub
41
+ if: github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true'
42
+ uses: docker/login-action@v3
43
+ with:
44
+ registry: ${{ env.REGISTRY_DOCKERHUB }}
45
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
46
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
47
+
48
+ - name: Extract metadata (tags, labels)
49
+ id: meta
50
+ uses: docker/metadata-action@v5
51
+ with:
52
+ images: ${{ env.REGISTRY_DOCKERHUB }}/${{ secrets.DOCKERHUB_USERNAME }}/mnemocore
53
+ tags: |
54
+ type=ref,event=branch
55
+ type=ref,event=pr
56
+ type=semver,pattern={{version}}
57
+ type=semver,pattern={{major}}.{{minor}}
58
+ type=semver,pattern={{major}}
59
+ type=sha
60
+
61
+ - name: Build and push Docker image
62
+ uses: docker/build-push-action@v5
63
+ with:
64
+ context: .
65
+ platforms: linux/amd64,linux/arm64
66
+ push: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true' }}
67
+ tags: ${{ steps.meta.outputs.tags }}
68
+ labels: ${{ steps.meta.outputs.labels }}
69
+ cache-from: type=gha
70
+ cache-to: type=gha,mode=max
71
+
72
+ # ===========================================================================
73
+ # BUILD AND PUSH TO GITHUB CONTAINER REGISTRY
74
+ # ===========================================================================
75
+ build-ghcr:
76
+ name: Build & Push (GHCR)
77
+ runs-on: ubuntu-latest
78
+ permissions:
79
+ contents: read
80
+ packages: write
81
+
82
+ steps:
83
+ - name: Checkout repository
84
+ uses: actions/checkout@v4
85
+
86
+ - name: Set up QEMU
87
+ uses: docker/setup-qemu-action@v3
88
+
89
+ - name: Set up Docker Buildx
90
+ uses: docker/setup-buildx-action@v3
91
+
92
+ - name: Log in to GitHub Container Registry
93
+ if: github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true'
94
+ uses: docker/login-action@v3
95
+ with:
96
+ registry: ${{ env.REGISTRY_GHCR }}
97
+ username: ${{ github.actor }}
98
+ password: ${{ secrets.GITHUB_TOKEN }}
99
+
100
+ - name: Extract metadata (tags, labels)
101
+ id: meta
102
+ uses: docker/metadata-action@v5
103
+ with:
104
+ images: ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
105
+ tags: |
106
+ type=ref,event=branch
107
+ type=ref,event=pr
108
+ type=semver,pattern={{version}}
109
+ type=semver,pattern={{major}}.{{minor}}
110
+ type=semver,pattern={{major}}
111
+ type=sha
112
+
113
+ - name: Build and push Docker image
114
+ uses: docker/build-push-action@v5
115
+ with:
116
+ context: .
117
+ platforms: linux/amd64,linux/arm64
118
+ push: ${{ github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true' }}
119
+ tags: ${{ steps.meta.outputs.tags }}
120
+ labels: ${{ steps.meta.outputs.labels }}
121
+ cache-from: type=gha
122
+ cache-to: type=gha,mode=max
123
+
124
+ - name: Generate artifact attestation
125
+ if: github.event_name != 'workflow_dispatch' || github.event.inputs.push_to_registry == 'true'
126
+ uses: actions/attest-build-provenance@v1
127
+ with:
128
+ subject-name: ${{ env.REGISTRY_GHCR }}/${{ env.IMAGE_NAME }}
129
+ subject-digest: ${{ steps.push.outputs.digest }}
130
+ push-to-registry: true
.gitignore ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual environments
24
+ .venv/
25
+ venv/
26
+ ENV/
27
+
28
+ # IDE
29
+ .idea/
30
+ .vscode/
31
+ *.swp
32
+ *.swo
33
+ *~
34
+
35
+ # Testing
36
+ .pytest_cache/
37
+ .coverage
38
+ htmlcov/
39
+ .tox/
40
+ .nox/
41
+
42
+ # Data (runtime generated)
43
+ data/memory.jsonl
44
+ data/codebook.json
45
+ data/concepts.json
46
+ data/synapses.json
47
+ data/warm_tier/*.mmap
48
+ data/warm_tier/*.json
49
+ data/warm_tier/*.npy
50
+ data/cold_archive/*.gz
51
+ vector_core/corpus_ready.json
52
+
53
+ # Logs
54
+ *.log
55
+ logs/
56
+
57
+ # Local dependency/vendor dumps
58
+ deps/
59
+
60
+ # Benchmarks and ad-hoc outputs
61
+ results*.txt
62
+ benchmark_results.txt
63
+
64
+ # OS
65
+ .DS_Store
66
+ Thumbs.db
67
+
68
+ # Secrets (should never exist, but just in case)
69
+ .env
70
+ *.pem
71
+ *.key
72
+
73
+ # Internal planning documents – NOT for public repo
74
+ AGENT_MASTER_PLAN.md
75
+ *.pdf
76
+
77
+ # Local IDE / agent settings
78
+ .claude/
79
+
80
+ # Runtime artifacts
81
+ error_log.txt
CHANGELOG.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Deprecated
11
+
12
+ #### Float HDV deprecation (src/core/hdv.py)
13
+ - **HDV class**: All public methods now emit `DeprecationWarning` when called
14
+ - **Migration path**: Use `BinaryHDV` from `src.core.binary_hdv` instead
15
+ - **API mappings**:
16
+ - `HDV(dimension=N)` -> `BinaryHDV.random(dimension=N)`
17
+ - `hdv.bind(other)` -> `hdv.xor_bind(other)`
18
+ - `hdv.unbind(other)` -> `hdv.xor_bind(other)` (XOR is self-inverse)
19
+ - `hdv.cosine_similarity(other)` -> `hdv.similarity(other)`
20
+ - `hdv.permute(shift)` -> `hdv.permute(shift)`
21
+ - `hdv.normalize()` -> No-op (binary vectors are already normalized)
22
+ - **Removal timeline**: Float HDV will be removed in a future version
23
+
24
+ #### BinaryHDV compatibility shims added
25
+ - **bind()**: Alias for `xor_bind()` - for legacy API compatibility
26
+ - **unbind()**: Alias for `xor_bind()` - XOR is self-inverse
27
+ - **cosine_similarity()**: Alias for `similarity()` - returns Hamming-based similarity
28
+ - **normalize()**: No-op for binary vectors
29
+ - **__xor__()**: Enables `v1 ^ v2` syntax for binding
30
+
31
+ ### Fixed
32
+
33
+ #### llm_integration.py (6 fixes)
34
+ - **Import paths**: Fixed incorrect import paths from `haim.src.core.engine` to `src.core.engine` and `haim.src.core.node` to `src.core.node`
35
+ - **Missing import**: Added `from datetime import datetime` for dynamic timestamps
36
+ - **Memory access API**: Changed `self.haim.memory_nodes.get()` to `self.haim.tier_manager.get_memory()` at lines 34, 114, 182, 244, 272 - using the correct API for memory access
37
+ - **Superposition query**: Replaced non-existent `superposition_query()` call with combined hypotheses retrieval path
38
+ - **Concept binding**: Replaced non-existent `bind_concepts()` with placeholder - engine has `bind_memories()` available
39
+ - **OR orchestration**: Integrated `orchestrate_orch_or()` from engine and removed workaround sorting path
40
+
41
+ #### api/main.py (1 fix)
42
+ - **Delete endpoint**: Fixed attribute reference from `engine.memory_nodes` to `engine.tier_manager.hot` at line 229 - correct attribute for hot memory tier
43
+
44
+ #### engine.py (1 fix)
45
+ - **Synapse persistence**: Implemented `_save_synapses()` method (lines 369-390) that was previously an empty stub
46
+ - Creates parent directory if it doesn't exist
47
+ - Writes all synapses to disk in JSONL format
48
+ - Includes all synapse attributes: `neuron_a_id`, `neuron_b_id`, `strength`, `fire_count`, `success_count`, `last_fired`
49
+ - Handles errors gracefully with logging
50
+
51
+ ### Changed
52
+
53
+ - **Dynamic timestamps**: LLM integration now uses `datetime.now().isoformat()` instead of hardcoded timestamp `"2026-02-04"` for accurate temporal tracking
54
+ - **Phase 4.3 hardening**:
55
+ - Chrono-weighting uses batched node lookup instead of per-node await chain
56
+ - `include_neighbors` now preserves `top_k` result contract
57
+ - `_dream_sem._value` private access replaced by public `locked()` API
58
+ - Episodic chaining race reduced with serialized store path (`_store_lock`, `_last_stored_id`)
59
+ - `engine_version` in stats updated to `4.3.0`
60
+ - HOT-tier `time_range` filtering enforced in `TierManager.search()`
61
+ - `orchestrate_orch_or()` made async and lock-guarded
Dockerfile ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Dockerfile
2
+ # ====================
3
+ # Multi-stage build for optimized production image
4
+
5
+ # Stage 1: Builder
6
+ FROM python:3.11-slim AS builder
7
+
8
+ WORKDIR /app
9
+
10
+ # Install build dependencies
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ build-essential \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Copy requirements first for better caching
16
+ COPY requirements.txt .
17
+
18
+ # Create virtual environment and install dependencies
19
+ RUN python -m venv /opt/venv
20
+ ENV PATH="/opt/venv/bin:$PATH"
21
+ RUN pip install --no-cache-dir --upgrade pip && \
22
+ pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Stage 2: Production
25
+ FROM python:3.11-slim AS production
26
+
27
+ # Labels for container metadata
28
+ LABEL maintainer="MnemoCore Team"
29
+ LABEL description="MnemoCore - Infrastructure for Persistent Cognitive Memory"
30
+ LABEL version="4.5.0"
31
+
32
+ # Security: Create non-root user
33
+ RUN groupadd --gid 1000 mnemocore && \
34
+ useradd --uid 1000 --gid mnemocore --shell /bin/bash --create-home mnemocore
35
+
36
+ WORKDIR /app
37
+
38
+ # Copy virtual environment from builder
39
+ COPY --from=builder /opt/venv /opt/venv
40
+ ENV PATH="/opt/venv/bin:$PATH"
41
+
42
+ # Install runtime dependencies only
43
+ RUN apt-get update && apt-get install -y --no-install-recommends \
44
+ curl \
45
+ && rm -rf /var/lib/apt/lists/* \
46
+ && apt-get clean
47
+
48
+ # Copy application code
49
+ COPY --chown=mnemocore:mnemocore src/ ./src/
50
+ COPY --chown=mnemocore:mnemocore config.yaml .
51
+ COPY --chown=mnemocore:mnemocore scripts/ ./scripts/
52
+
53
+ # Create data directory with proper permissions
54
+ RUN mkdir -p /app/data && chown -R mnemocore:mnemocore /app/data
55
+
56
+ # Switch to non-root user
57
+ USER mnemocore
58
+
59
+ # Environment variables (defaults, can be overridden)
60
+ ENV PYTHONUNBUFFERED=1 \
61
+ PYTHONDONTWRITEBYTECODE=1 \
62
+ HAIM_API_KEY="" \
63
+ REDIS_URL="redis://redis:6379/0" \
64
+ QDRANT_URL="http://qdrant:6333" \
65
+ LOG_LEVEL="INFO" \
66
+ HOST="0.0.0.0" \
67
+ PORT="8100"
68
+
69
+ # Expose port
70
+ EXPOSE 8100
71
+
72
+ # Health check using the healthcheck script
73
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
74
+ CMD python /app/scripts/healthcheck.py || exit 1
75
+
76
+ # Entry point: Run uvicorn
77
+ ENTRYPOINT ["uvicorn", "src.api.main:app", "--host", "0.0.0.0", "--port", "8100"]
78
+ CMD ["--workers", "1", "--log-level", "info"]
LICENSE ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Robin Granberg
4
+ Contact: Robin@veristatesystems.com
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
MnemoCore Phase 3 5 Infinite.md ADDED
@@ -0,0 +1,1615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Phase 3.5: Infinite Scalability Architecture Blueprint
2
+ **Holographic Adaptive Intelligence Memory - Distributed Vector System**
3
+
4
+ > **Target Scale**: 1B+ memories with sub-10ms latency
5
+ > **Architecture**: Binary HDV/VSA 16,384-dimensional vectors (2KB each)
6
+ > **Operations**: XOR-binding, Hamming distance, Active Inference consolidation
7
+ > **Author**: Robin Granberg (Robin@veristatesystems.com)
8
+ > **Date**: February 14, 2026
9
+ > **Version**: 3.5-DISTRIBUTED
10
+
11
+ ---
12
+
13
+ ## Executive Summary
14
+
15
+ MnemoCore Phase 3.0 successfully implemented local file-based binary hyperdimensional computing with 3-tier storage (HOT/WARM/COLD). This blueprint outlines the evolutionary path to **infinite scalability** through distributed vector databases, federated holographic state, and hardware-accelerated bitwise operations.
16
+
17
+ **Key Findings from Research**:
18
+ - **Qdrant** achieves 40x speedup with binary quantization, supporting native XOR/Hamming distance at 100M+ vector scale[web:23][web:29]
19
+ - **Redis Streams** provides sub-millisecond latency for event-driven "Subconscious Bus" architecture[web:52][web:55]
20
+ - **GPU acceleration** delivers 1.4-9.8× speedup for HDC operations with optimized popcount intrinsics[web:56][web:59]
21
+ - **Critical bottleneck** at 1B scale: Memory consistency across distributed nodes requiring sharding strategies[web:24]
22
+
23
+ ---
24
+
25
+ ## Part 1: Current Architecture Analysis
26
+
27
+ ### 1.1 Existing MnemoCore Phase 3.0 Strengths
28
+
29
+ \begin{itemize}
30
+ \item \textbf{Binary HDV Foundation}: 16,384-dimensional vectors with XOR-binding provide mathematical elegance and hardware efficiency
31
+ \item \textbf{Tri-State Storage}: HOT (in-memory), WARM (Redis), COLD (file system) separation enables cost-effective scaling
32
+ \item \textbf{LTP-Inspired Decay}: Temporal consolidation mimics biological long-term potentiation
33
+ \item \textbf{Active Inference}: Predictive retrieval based on current context
34
+ \item \textbf{Consumer Hardware Optimization}: Designed for i7/32GB RAM constraints
35
+ \end{itemize}
36
+
37
+ ### 1.2 Identified Bottlenecks for Billion-Scale
38
+
39
+ \begin{table}
40
+ \begin{tabular}{|l|l|l|}
41
+ \hline
42
+ \textbf{Component} & \textbf{Current Limitation} & \textbf{Impact at 1B Memories} \\
43
+ \hline
44
+ File I/O & Sequential disk reads & 500ms+ latency for COLD retrieval \\
45
+ \hline
46
+ Redis Single-Node & 512GB RAM ceiling & Cannot hold WARM tier beyond 250M vectors \\
47
+ \hline
48
+ Hamming Distance Calc & CPU-bound Python loops & Linear O(n) search time explosion \\
49
+ \hline
50
+ Memory Consistency & No distributed state & Impossible to federate across nodes \\
51
+ \hline
52
+ Consolidation & Synchronous operations & Blocks real-time inference during updates \\
53
+ \hline
54
+ \end{tabular}
55
+ \caption{Critical scaling bottlenecks in current implementation}
56
+ \end{table}
57
+
58
+ ### 1.3 Code Quality Assessment
59
+
60
+ **Positive Patterns**:
61
+ - Clean separation of concerns (storage layers, encoding, retrieval)
62
+ - Type hints and docstrings present
63
+ - Modular design allows component replacement
64
+
65
+ **Areas Requiring Improvement**:
66
+
67
+ \begin{enumerate}
68
+ \item \textbf{Hardcoded Dimensionality}: D=16384 should be configuration-driven
69
+ \item \textbf{Missing Async/Await}: All I/O operations are synchronous blocking
70
+ \item \textbf{No Batch Operations}: Individual memory processing prevents vectorization
71
+ \item \textbf{Inefficient Hamming Distance}: Python loops instead of NumPy bitwise operations
72
+ \item \textbf{No Connection Pooling}: Redis connections created per operation
73
+ \item \textbf{Absence of Metrics}: No instrumentation for latency/throughput monitoring
74
+ \item \textbf{Lacking Error Recovery}: No retry logic or circuit breakers for Redis failures
75
+ \item \textbf{Sequential Encoding}: No parallelization of hypervector generation
76
+ \end{enumerate}
77
+
78
+ ---
79
+
80
+ ## Part 2: Distributed Vector Database Selection
81
+
82
+ ### 2.1 Binary Quantization Database Comparison
83
+
84
+ \begin{table}
85
+ \begin{tabular}{|l|c|c|c|c|}
86
+ \hline
87
+ \textbf{Database} & \textbf{Binary Support} & \textbf{Scale (vectors)} & \textbf{p50 Latency} & \textbf{XOR Native} \\
88
+ \hline
89
+ Qdrant & Yes (1/1.5/2-bit) & 100M-1B+ & <10ms & Yes \\
90
+ \hline
91
+ Milvus & Yes (binary index) & 100M-10B & 15-50ms & Yes \\
92
+ \hline
93
+ Weaviate & Yes (BQ+HNSW) & 100M-1B & 10-30ms & Partial \\
94
+ \hline
95
+ Pinecone & No (float32 only) & 100M-1B & 10-20ms & No \\
96
+ \hline
97
+ \end{tabular}
98
+ \caption{Comparison of vector databases for binary HDV at scale}
99
+ \end{table}
100
+
101
+ **Winner: Qdrant** for MnemoCore Phase 3.5
102
+
103
+ **Rationale**:
104
+ 1. **Native Binary Quantization**: Supports 1-bit, 1.5-bit, and 2-bit encodings with `always_ram` optimization for HOT tier[web:23][web:28]
105
+ 2. **XOR-as-Hamming**: Efficiently emulates Hamming distance using dot product on binary vectors[web:29]
106
+ 3. **Sub-10ms p50 Latency**: Achieves <10ms at 15.3M vectors with 90-95% recall using oversampling[web:23]
107
+ 4. **Horizontal Scaling**: Supports distributed clusters with automatic sharding
108
+ 5. **HNSW+BQ Integration**: Combines approximate nearest neighbor (ANN) with binary quantization for optimal speed/accuracy tradeoff[web:26]
109
+ 6. **Proven Performance**: 40x speedup compared to uncompressed vectors in production benchmarks[web:23]
110
+
111
+ ### 2.2 Qdrant Architecture for MnemoCore
112
+
113
+ \begin{figure}
114
+ \centering
115
+ \textbf{Proposed 3-Tier Qdrant Integration:}
116
+ \end{figure}
117
+
118
+ ┌─────────────────────────────────────────────────────────┐
119
+ │ HOT TIER (RAM) │
120
+ │ Qdrant Collection: "haim_hot" │
121
+ │ - Binary Quantization: 1-bit, always_ram=true │
122
+ │ - Size: 100K most recent/accessed vectors │
123
+ │ - Latency: <2ms p50 │
124
+ │ - Update Frequency: Real-time (every memory write) │
125
+ └─────────────────────────────────────────────────────────┘
126
+ ↓ (LTP decay < threshold)
127
+ ┌─────────────────────────────────────────────────────────┐
128
+ │ WARM TIER (SSD-backed) │
129
+ │ Qdrant Collection: "haim_warm" │
130
+ │ - Binary Quantization: 1.5-bit, disk-mmap enabled │
131
+ │ - Size: 1M-100M consolidated vectors │
132
+ │ - Latency: 5-10ms p50 │
133
+ │ - Update Frequency: Hourly consolidation batch │
134
+ └─────────────────────────────────────────────────────────┘
135
+ ↓ (LTP decay < lower threshold)
136
+ ┌─────────────────────────────────────────────────────────┐
137
+ │ COLD TIER (Object Storage) │
138
+ │ S3/MinIO: Compressed binary archives │
139
+ │ - Format: .npy.gz (NumPy compressed arrays) │
140
+ │ - Size: 100M-10B+ archival vectors │
141
+ │ - Latency: 50-500ms │
142
+ │ - Access Pattern: Rare retrieval, batch reactivation │
143
+ └─────────────────────────────────────────────────────────┘
144
+
145
+ **Configuration Example (Qdrant Python Client)**:
146
+ from qdrant_client import QdrantClient, models
147
+
148
+ client = QdrantClient(url="http://qdrant-cluster:6333")
149
+
150
+ # HOT tier collection with aggressive binary quantization
151
+ client.create_collection(
152
+ collection_name="haim_hot",
153
+ vectors_config=models.VectorParams(
154
+ size=16384, # D=16,384
155
+ distance=models.Distance.HAMMING # Native Hamming distance
156
+ ),
157
+ quantization_config=models.BinaryQuantization(
158
+ binary=models.BinaryQuantizationConfig(
159
+ always_ram=True, # Pin to RAM for sub-2ms latency
160
+ encoding=models.BinaryQuantizationEncoding.OneBit
161
+ )
162
+ ),
163
+ hnsw_config=models.HnswConfigDiff(
164
+ m=16, # Connections per node (lower for speed)
165
+ ef_construct=100 # Construction-time accuracy
166
+ )
167
+ )
168
+
169
+ ### 2.3 Estimated Performance at Scale
170
+
171
+ \begin{table}
172
+ \begin{tabular}{|l|c|c|c|c|}
173
+ \hline
174
+ \textbf{Tier} & \textbf{Vector Count} & \textbf{Memory (GB)} & \textbf{p50 Latency} & \textbf{QPS} \\
175
+ \hline
176
+ HOT (Qdrant 1-bit) & 100,000 & 0.2 & 1.5ms & 10,000+ \\
177
+ \hline
178
+ WARM (Qdrant 1.5-bit) & 10,000,000 & 30 & 8ms & 5,000 \\
179
+ \hline
180
+ COLD (S3 archived) & 1,000,000,000 & 2,000 (disk) & 250ms & 100 \\
181
+ \hline
182
+ \end{tabular}
183
+ \caption{Projected performance with Qdrant at billion-scale}
184
+ \end{table}
185
+
186
+ **Memory Footprint Calculation**:
187
+ - Uncompressed: 16,384 bits = 2,048 bytes = 2KB per vector
188
+ - 1-bit BQ: 16,384 bits / 32 (compression) = 64 bytes per vector
189
+ - 100K HOT vectors: 100,000 × 64 bytes = 6.4MB (+ HNSW index ~200MB) ≈ 0.2GB total
190
+
191
+ ---
192
+
193
+ ## Part 3: Federated Holographic State
194
+
195
+ ### 3.1 Challenge: Global Memory Consistency
196
+
197
+ **Problem**: In a distributed system with N nodes, each node maintains a local holographic state (superposition of recent contexts). How do we ensure global consistency without sacrificing latency?
198
+
199
+ **Two Competing Approaches**:
200
+
201
+ \begin{enumerate}
202
+ \item \textbf{Sharding by Context}: Partition memories based on semantic clustering
203
+ \item \textbf{Superposition Aggregation}: Each node maintains full holographic state, periodically synchronized
204
+ \end{enumerate}
205
+
206
+ ### 3.2 Strategy Comparison
207
+
208
+ \begin{table}
209
+ \begin{tabular}{|l|l|l|}
210
+ \hline
211
+ \textbf{Aspect} & \textbf{Sharding by Context} & \textbf{Superposition Aggregation} \\
212
+ \hline
213
+ Consistency & Eventual (AP in CAP) & Strong (CP in CAP) \\
214
+ \hline
215
+ Latency & Low (single-node query) & Medium (multi-node gather) \\
216
+ \hline
217
+ Network Traffic & Low (targeted routing) & High (periodic sync) \\
218
+ \hline
219
+ Fault Tolerance & High (replication per shard) & Medium (coordinator SPOF) \\
220
+ \hline
221
+ Context Drift & High risk (stale cross-shard) & Low risk (global view) \\
222
+ \hline
223
+ Implementation Complexity & Medium & High \\
224
+ \hline
225
+ \end{tabular}
226
+ \caption{Architectural comparison for distributed holographic state}
227
+ \end{table}
228
+
229
+ ### 3.3 Recommended Hybrid Architecture
230
+
231
+ **Proposal**: **"Contextual Sharding with Asynchronous Superposition Broadcast"**
232
+
233
+ **Design Principles**:
234
+ 1. Shard memories by semantic context (using locality-sensitive hashing of HDVs)
235
+ 2. Each node maintains a lightweight "global hologram" (last N=1000 cross-shard accesses)
236
+ 3. Asynchronous broadcast of high-salience memories (LTP decay > threshold) to all nodes
237
+ 4. Query routing: Check local shard first, fallback to cross-shard search if confidence < threshold
238
+
239
+ **Architecture Diagram Description**:
240
+
241
+ ┌──────────────────────┐
242
+ │ Query Router │
243
+ │ (Consistent Hashing)│
244
+ └──────────┬───────────┘
245
+ │
246
+ ┌───────────────────┼───────────────────┐
247
+ ↓ ↓ ↓
248
+ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
249
+ │ Node 1 │ │ Node 2 │ │ Node N │
250
+ │ │ │ │ │ │
251
+ │ Shard: 0-33%│ │ Shard: 34-66│ │ Shard: 67-100│
252
+ │ Local Qdrant│ │ Local Qdrant│ │ Local Qdrant│
253
+ │ │ │ │ │ │
254
+ │ Global Holo-│ │ Global Holo-│ │ Global Holo-│
255
+ │ gram Cache │ │ gram Cache │ │ gram Cache │
256
+ │ (1K vectors)│ │ (1K vectors)│ │ (1K vectors)│
257
+ └──────┬──────┘ └──────┬──────┘ └──────┬──────┘
258
+ │ │ │
259
+ └───────────────────┼───────────────────┘
260
+ │
261
+ ┌──────────▼───────────┐
262
+ │ Redis Pub/Sub │
263
+ │ "hologram_broadcast"│
264
+ │ (High-salience only)│
265
+ └──────────────────────┘
266
+
267
+ **Shard Assignment Algorithm**:
268
+ def assign_shard(memory_hdv: np.ndarray, num_shards: int) -> int:
269
+ """
270
+ Use first 64 bits of HDV as consistent hash key.
271
+ Ensures semantically similar memories co-locate.
272
+ """
273
+ hash_key = int.from_bytes(memory_hdv[:8].tobytes(), 'big')
274
+ return hash_key % num_shards
275
+
276
+ ---
277
+
278
+ ## Part 4: Subconscious Bus Architecture
279
+
280
+ ### 4.1 Active Inference Pipeline Requirements
281
+
282
+ **Goal**: Asynchronous memory consolidation, predictive retrieval, and background LTP decay processing without blocking real-time queries.
283
+
284
+ **Requirements**:
285
+ - Sub-millisecond event ingestion latency
286
+ - Ordered processing (within context partition)
287
+ - At-least-once delivery guarantees
288
+ - Backpressure handling for consolidation lag
289
+ - Horizontal scaling of consumer workers
290
+
291
+ ### 4.2 Redis Streams vs Apache Kafka Analysis
292
+
293
+ \begin{table}
294
+ \begin{tabular}{|l|l|l|}
295
+ \hline
296
+ \textbf{Metric} & \textbf{Redis Streams} & \textbf{Apache Kafka} \\
297
+ \hline
298
+ Latency (p50) & <1ms & 5-10ms \\
299
+ \hline
300
+ Throughput & 100K-500K msg/s & 1M-10M msg/s \\
301
+ \hline
302
+ Data Retention & Hours-Days (RAM-limited) & Days-Years (disk-backed) \\
303
+ \hline
304
+ Deployment Complexity & Low (single Redis instance) & High (ZooKeeper + brokers) \\
305
+ \hline
306
+ Operational Overhead & Minimal & Significant \\
307
+ \hline
308
+ Memory Efficiency & High (in-memory) & Medium (page cache) \\
309
+ \hline
310
+ Fault Tolerance & Redis replication & Distributed replication \\
311
+ \hline
312
+ Consumer Groups & Yes (XREADGROUP) & Yes (native) \\
313
+ \hline
314
+ \end{tabular}
315
+ \caption{Comparison of message streaming systems for Subconscious Bus}
316
+ \end{table}
317
+
318
+ **Decision: Redis Streams** for MnemoCore Phase 3.5
319
+
320
+ **Justification**:
321
+ 1. **Ultra-Low Latency**: Sub-millisecond event delivery critical for Active Inference responsiveness[web:52][web:55]
322
+ 2. **Simplified Architecture**: Reuses existing Redis infrastructure (already in WARM tier)
323
+ 3. **Memory Budget**: Consolidation events have short retention needs (1-2 hours max)
324
+ 4. **In-Memory Performance**: Consolidation workers process 850+ records/s on Raspberry Pi 4 with Redis Streams vs 630/s with Kafka[web:38]
325
+ 5. **Consumer Group Support**: Native `XREADGROUP` for distributed worker parallelism[web:52]
326
+
327
+ ### 4.3 Subconscious Bus Implementation
328
+
329
+ **Stream Schema**:
330
+ # Event Types
331
+ EVENTS = {
332
+ "memory.write": {
333
+ "hdv": bytes, # Binary hyperdimensional vector
334
+ "context_id": str,
335
+ "ltp_strength": float,
336
+ "timestamp": int
337
+ },
338
+ "memory.access": {
339
+ "memory_id": str,
340
+ "access_count": int,
341
+ "last_access": int
342
+ },
343
+ "consolidation.trigger": {
344
+ "tier": str, # "hot_to_warm" or "warm_to_cold"
345
+ "memory_ids": list[str]
346
+ },
347
+ "inference.predict": {
348
+ "context_hdv": bytes,
349
+ "prediction_window": int # seconds ahead
350
+ }
351
+ }
352
+
353
+ **Producer (Memory Write Path)**:
354
+ import redis
355
+ import msgpack
356
+
357
+ class SubconsciousBus:
358
+ def __init__(self, redis_url: str):
359
+ self.redis = redis.from_url(redis_url, decode_responses=False)
360
+ self.stream_key = "MnemoCore:subconscious"
361
+
362
+ async def publish_memory_write(self, hdv: np.ndarray, context_id: str, ltp: float):
363
+ """Async publish to avoid blocking main thread."""
364
+ event = {
365
+ "type": "memory.write",
366
+ "hdv": hdv.tobytes(), # Binary serialization
367
+ "context_id": context_id,
368
+ "ltp_strength": ltp,
369
+ "timestamp": int(time.time() * 1000)
370
+ }
371
+ packed = msgpack.packb(event) # Efficient binary encoding
372
+
373
+ # XADD with maxlen to prevent unbounded growth
374
+ await self.redis.xadd(
375
+ name=self.stream_key,
376
+ fields={"data": packed},
377
+ maxlen=100000, # Rolling window of last 100K events
378
+ approximate=True # Allow ~5% variance for performance
379
+ )
380
+
381
+ **Consumer (Consolidation Worker)**:
382
+ class ConsolidationWorker:
383
+ def __init__(self, redis_url: str, consumer_group: str, consumer_name: str):
384
+ self.redis = redis.from_url(redis_url, decode_responses=False)
385
+ self.stream_key = "MnemoCore:subconscious"
386
+ self.group = consumer_group
387
+ self.name = consumer_name
388
+
389
+ # Create consumer group (idempotent)
390
+ try:
391
+ self.redis.xgroup_create(
392
+ name=self.stream_key,
393
+ groupname=self.group,
394
+ id="0",
395
+ mkstream=True
396
+ )
397
+ except redis.exceptions.ResponseError:
398
+ pass # Group already exists
399
+
400
+ async def process_events(self, batch_size: int = 100):
401
+ """Process events in batches for efficiency."""
402
+ while True:
403
+ # XREADGROUP with blocking (1000ms timeout)
404
+ messages = await self.redis.xreadgroup(
405
+ groupname=self.group,
406
+ consumername=self.name,
407
+ streams={self.stream_key: ">"},
408
+ count=batch_size,
409
+ block=1000
410
+ )
411
+
412
+ if not messages:
413
+ continue
414
+
415
+ for stream_name, events in messages:
416
+ for event_id, event_data in events:
417
+ event = msgpack.unpackb(event_data[b"data"])
418
+
419
+ if event["type"] == "memory.write":
420
+ await self._handle_memory_write(event)
421
+ elif event["type"] == "consolidation.trigger":
422
+ await self._handle_consolidation(event)
423
+
424
+ # Acknowledge message (enables at-least-once delivery)
425
+ await self.redis.xack(self.stream_key, self.group, event_id)
426
+
427
+ **Horizontal Scaling**:
428
+ - Deploy N worker processes (e.g., 4 workers for 4-core CPU)
429
+ - Each worker reads from same consumer group
430
+ - Redis automatically load-balances events across workers
431
+ - Pending Entries List (PEL) tracks unacknowledged messages for fault recovery[web:52]
432
+
433
+ ---
434
+
435
+ ## Part 5: Hardware Acceleration Stack
436
+
437
+ ### 5.1 Bitwise Operations Performance Analysis
438
+
439
+ **Critical Operations in HDC**:
440
+ 1. **XOR-binding**: Element-wise XOR of two 16,384-bit vectors
441
+ 2. **Popcount**: Count of 1-bits (for Hamming distance calculation)
442
+ 3. **Bundling**: Element-wise majority vote across N vectors
443
+
444
+ **Hardware Comparison**:
445
+
446
+ \begin{table}
447
+ \begin{tabular}{|l|c|c|c|c|}
448
+ \hline
449
+ \textbf{Platform} & \textbf{XOR Throughput} & \textbf{Popcount Method} & \textbf{Cost} & \textbf{Power} \\
450
+ \hline
451
+ CPU (AVX-512) & 5 GBit/s & POPCNT instruction & Low & 15-65W \\
452
+ \hline
453
+ GPU (CUDA) & 500 GBit/s & \_\_popcll intrinsic & Medium & 150-300W \\
454
+ \hline
455
+ TPU (v4) & 200 GBit/s & Systolic array ops & High & 175W \\
456
+ \hline
457
+ FPGA (Stratix 10) & 100 GBit/s & Custom LUT counters & High & 30-70W \\
458
+ \hline
459
+ \end{tabular}
460
+ \caption{Hardware performance for HDC operations}
461
+ \end{table}
462
+
463
+ ### 5.2 GPU Acceleration Recommendation
464
+
465
+ **Winner: GPU (NVIDIA RTX 4090 or A100)** for MnemoCore Phase 3.5+
466
+
467
+ **Rationale**:
468
+ 1. **Native Bitwise Support**: CUDA provides efficient `__popcll` (popcount 64-bit) intrinsic[web:54]
469
+ 2. **Proven HDC Speedups**: OpenHD framework achieves 9.8× training speedup and 1.4× inference speedup on GPU vs CPU[web:59]
470
+ 3. **Memory Bandwidth**: 1TB/s (A100) vs 200GB/s (DDR5) enables massive parallel Hamming distance calculations
471
+ 4. **Batch Processing**: Process 1000+ memories in parallel (vs sequential CPU loops)
472
+ 5. **Cost-Effectiveness**: RTX 4090 (~$1600) provides 82 TFLOPS vs TPU v4 pod (>$100K)[web:57]
473
+ 6. **Developer Ecosystem**: PyTorch/CuPy have mature GPU support, CUDA well-documented
474
+
475
+ **Performance Estimates**:
476
+ - **Hamming Distance Batch**: 1M comparisons in ~50ms (GPU) vs 5000ms (CPU)
477
+ - **Encoding Pipeline**: 10K memories/second (GPU) vs 500/second (CPU)
478
+ - **Consolidation**: 100K vector bundling in ~200ms (GPU) vs 10,000ms (CPU)
479
+
480
+ ### 5.3 Optimized GPU Implementation
481
+
482
+ **Leveraging PyTorch for Bitwise Ops**:
483
+ import torch
484
+
485
+ class GPUHammingCalculator:
486
+ def __init__(self, device: str = "cuda:0"):
487
+ self.device = torch.device(device)
488
+
489
+ def batch_hamming_distance(
490
+ self,
491
+ query: np.ndarray, # Shape: (D,) where D=16384
492
+ database: np.ndarray # Shape: (N, D) where N=1M vectors
493
+ ) -> np.ndarray:
494
+ """
495
+ Compute Hamming distance between query and all database vectors.
496
+ Returns array of shape (N,) with distances.
497
+ """
498
+ # Convert to PyTorch tensors (bool type for efficient XOR)
499
+ query_t = torch.from_numpy(query).bool().to(self.device)
500
+ db_t = torch.from_numpy(database).bool().to(self.device)
501
+
502
+ # XOR: query_t ^ db_t gives differing bits (True where different)
503
+ # Sum: count True values = Hamming distance
504
+ # Shape: (N,) - vectorized across all database vectors
505
+ distances = (query_t ^ db_t).sum(dim=1)
506
+
507
+ return distances.cpu().numpy()
508
+
509
+ **Popcount Optimization (CuPy)**:
510
+ import cupy as cp
511
+
512
+ def gpu_popcount(binary_vectors: np.ndarray) -> np.ndarray:
513
+ """
514
+ Count 1-bits in each binary vector using GPU.
515
+ Input: (N, D) array of binary values
516
+ Output: (N,) array of popcount per vector
517
+ """
518
+ # Transfer to GPU
519
+ vectors_gpu = cp.asarray(binary_vectors, dtype=cp.uint8)
520
+
521
+ # Pack bits into uint64 for efficient popcount
522
+ # 16384 bits = 256 uint64 words
523
+ packed = cp.packbits(vectors_gpu, axis=1)
524
+ packed_u64 = packed.view(cp.uint64)
525
+
526
+ # CuPy popcount kernel (uses __popcll CUDA intrinsic)
527
+ counts = cp.zeros(len(vectors_gpu), dtype=cp.int32)
528
+ for i in range(256): # 256 uint64 words per vector
529
+ counts += cp.bitwise_count(packed_u64[:, i])
530
+
531
+ return counts.get() # Transfer back to CPU
532
+
533
+ ### 5.4 Infrastructure Recommendation
534
+
535
+ **Phase 3.5 (100K-10M memories)**: **Bare Metal with Consumer GPU**
536
+ - Hardware: Intel i7-14700K (20 cores) + 64GB DDR5 + RTX 4090 (24GB VRAM)
537
+ - Storage: 2TB NVMe SSD for Qdrant
538
+ - Cost: ~$4000 one-time
539
+ - Advantages: No cloud costs, full control, sub-2ms latency
540
+
541
+ **Phase 4.0 (10M-100M memories)**: **Hybrid Cloud with GPU Instances**
542
+ - Compute: AWS g5.2xlarge (NVIDIA A10G, 24GB VRAM) for consolidation workers
543
+ - Database: Self-hosted Qdrant cluster (3 nodes, 128GB RAM each)
544
+ - Storage: S3 for COLD tier archival
545
+ - Cost: ~$1500/month operational
546
+ - Advantages: Elastic scaling, managed backups, geographic distribution
547
+
548
+ **Phase 5.0 (100M-1B+ memories)**: **Distributed Cloud with TPU Pods**
549
+ - Compute: Google Cloud TPU v4 pods (8 TPU cores) for massive parallelism
550
+ - Database: Fully managed Qdrant Cloud (dedicated cluster)
551
+ - Cost: ~$10,000/month operational
552
+ - Advantages: 420 TOPS performance, 10B+ vector support, enterprise SLA[web:57]
553
+
554
+ **Critical Decision Factor**: **Start with bare metal GPU** (Phase 3.5). Only migrate to cloud when operational complexity exceeds team capacity (typically at 50M+ memories).
555
+
556
+ ---
557
+
558
+ ## Part 6: Implementation Roadmap
559
+
560
+ ### 6.1 Code Refactoring Priorities (Non-Breaking)
561
+
562
+ \begin{enumerate}
563
+ \item \textbf{Configuration System} (Priority: CRITICAL)
564
+ \begin{itemize}
565
+ \item Extract all magic numbers (16384, tier thresholds, Redis URLs) to YAML config
566
+ \item Enable runtime dimensionality changes without code edits
567
+ \item Add environment variable overrides for deployment flexibility
568
+ \end{itemize}
569
+
570
+ \item \textbf{Async I/O Migration} (Priority: HIGH)
571
+ \begin{itemize}
572
+ \item Convert Redis operations to async (aioredis library)
573
+ \item Implement async file I/O for COLD tier (aiofiles)
574
+ \item Use asyncio.gather() for parallel Qdrant queries
575
+ \end{itemize}
576
+
577
+ \item \textbf{Batch Processing Layer} (Priority: HIGH)
578
+ \begin{itemize}
579
+ \item Add batch\_encode() method for encoding N memories in single GPU call
580
+ \item Implement batch\_search() for amortized Hamming distance calculations
581
+ \item Use NumPy vectorization instead of Python loops
582
+ \end{itemize}
583
+
584
+ \item \textbf{Connection Pooling} (Priority: MEDIUM)
585
+ \begin{itemize}
586
+ \item Implement Redis connection pool (redis.ConnectionPool)
587
+ \item Add Qdrant client singleton with connection reuse
588
+ \item Configure connection limits based on workload (default: 10 connections)
589
+ \end{itemize}
590
+
591
+ \item \textbf{Observability Instrumentation} (Priority: MEDIUM)
592
+ \begin{itemize}
593
+ \item Add Prometheus metrics (memory\_writes\_total, search\_latency\_seconds, etc.)
594
+ \item Implement structured logging (loguru with JSON output)
595
+ \item Create Grafana dashboard for real-time monitoring
596
+ \end{itemize}
597
+
598
+ \item \textbf{Error Handling \& Resilience} (Priority: MEDIUM)
599
+ \begin{itemize}
600
+ \item Add exponential backoff retries for transient Redis failures
601
+ \item Implement circuit breaker pattern for Qdrant unavailability
602
+ \item Add fallback to local cache when WARM tier unreachable
603
+ \end{itemize}
604
+
605
+ \item \textbf{GPU Acceleration Module} (Priority: LOW - Phase 4.0)
606
+ \begin{itemize}
607
+ \item Create gpu\_ops.py with PyTorch/CuPy implementations
608
+ \item Add feature flag for CPU/GPU selection
609
+ \item Benchmark and profile GPU vs CPU for threshold tuning
610
+ \end{itemize}
611
+ \end{enumerate}
612
+
613
+ ### 6.2 Migration Path to Qdrant (Zero Downtime)
614
+
615
+ **Phase 1: Dual-Write (Week 1-2)**
616
+ \begin{enumerate}
617
+ \item Deploy Qdrant alongside existing Redis/file system
618
+ \item Modify write path to persist to BOTH systems
619
+ \item No read path changes (continue using old system)
620
+ \item Run data consistency checks daily
621
+ \end{enumerate}
622
+
623
+ **Phase 2: Shadow Read (Week 3-4)**
624
+ \begin{enumerate}
625
+ \item Query BOTH systems on every read
626
+ \item Compare results (latency, recall, ranking)
627
+ \item Log discrepancies but serve from old system
628
+ \item Tune Qdrant HNSW parameters (ef\_search) based on metrics
629
+ \end{enumerate}
630
+
631
+ **Phase 3: Gradual Cutover (Week 5-6)**
632
+ \begin{enumerate}
633
+ \item Route 10\% of reads to Qdrant (canary deployment)
634
+ \item Monitor error rates and p99 latency
635
+ \item Increase to 50\%, then 100\% over 2 weeks
636
+ \item Keep old system as fallback for 1 month
637
+ \end{enumerate}
638
+
639
+ **Phase 4: Decommission (Week 7-8)**
640
+ \begin{enumerate}
641
+ \item Archive old Redis/file data to S3
642
+ \item Remove dual-write logic
643
+ \item Update documentation and runbooks
644
+ \item Celebrate successful migration 🎉
645
+ \end{enumerate}
646
+
647
+ ### 6.3 Testing Strategy
648
+
649
+ **Unit Tests** (Target: 80% coverage):
650
+ - Hamming distance correctness (compare CPU vs GPU implementations)
651
+ - XOR-binding commutativity and associativity
652
+ - LTP decay formula boundary conditions
653
+ - Shard assignment determinism
654
+
655
+ **Integration Tests**:
656
+ - End-to-end write → consolidate → retrieve flow
657
+ - Redis Streams event processing with consumer groups
658
+ - Qdrant cluster failover scenarios
659
+ - GPU memory allocation under high load
660
+
661
+ **Performance Tests** (Benchmarks):
662
+ - Latency: p50, p95, p99 for HOT/WARM/COLD retrieval
663
+ - Throughput: memories/second write rate
664
+ - Scalability: Query time vs database size (1K, 10K, 100K, 1M vectors)
665
+ - Memory: Peak RAM usage during consolidation
666
+
667
+ **Chaos Engineering** (Production):
668
+ - Kill random Qdrant node, verify automatic rebalancing
669
+ - Inject Redis network partition, test circuit breaker
670
+ - Saturate GPU with fake workload, measure degradation
671
+ - Corrupt COLD tier file, validate checksum recovery
672
+
673
+ ---
674
+
675
+ ## Part 7: Critical Bottleneck at 1B Scale
676
+
677
+ ### 7.1 The Fundamental Limitation
678
+
679
+ **Problem**: At 1 billion memories (1B × 2KB = 2TB uncompressed), the dominant bottleneck shifts from **computation** to **distributed state consistency**.
680
+
681
+ **Specific Failure Modes**:
682
+
683
+ \begin{enumerate}
684
+ \item \textbf{Cross-Shard Query Latency}
685
+ \begin{itemize}
686
+ \item With 100 shards, average query hits 1 shard (best case)
687
+ \item Context drift requires checking 10-20 shards (realistic case)
688
+ \item Network round-trips: 10 shards × 10ms = 100ms total (violates <10ms SLA)
689
+ \end{itemize}
690
+
691
+ \item \textbf{Holographic State Synchronization}
692
+ \begin{itemize}
693
+ \item Each node broadcasts high-salience memories to N-1 other nodes
694
+ \item With 100 nodes, broadcast fanout creates O(N²) network traffic
695
+ \item At 1000 writes/sec, 100 nodes = 100K cross-node messages/sec
696
+ \item This saturates 10GbE network links (theoretical max ~1M small packets/sec)
697
+ \end{itemize}
698
+
699
+ \item \textbf{Consolidation Lag}
700
+ \begin{itemize}
701
+ \item HOT → WARM consolidation processes 100K memories/hour (current rate)
702
+ \item At 1B total memories with 10\% monthly churn = 100M updates/month
703
+ \item Required rate: 100M / (30 days × 24 hours) = 138K memories/hour
704
+ \item This exceeds single-worker capacity → need distributed consolidation
705
+ \end{itemize}
706
+ \end{enumerate}
707
+
708
+ ### 7.2 Proposed Solution: Hierarchical Aggregation
709
+
710
+ **Architecture**: **"Tiered Holographic Federation with Regional Supernodes"**
711
+
712
+ ┌────────────────────┐
713
+ │ Global Supernode │
714
+ │ (Coarse Hologram) │
715
+ │ Top 10K salient │
716
+ └─────────┬──────────┘
717
+ │
718
+ ┌───────────────┼───────────────┐
719
+ ↓ ↓ ↓
720
+ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
721
+ │ Region 1 │ │ Region 2 │ │ Region N │
722
+ │ Supernode │ │ Supernode │ │ Supernode │
723
+ │ (10 shards) │ │ (10 shards) │ │ (10 shards) │
724
+ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘
725
+ │ │ │
726
+ ┌───────┼────────┐ │ ┌───────┼────────┐
727
+ ↓ ↓ ↓ ↓ ↓ ↓ ↓
728
+ Shard0 Shard1 ... Shard9 Shard0 Shard1 ... Shard9
729
+ (Qdrant node) (Qdrant node)
730
+
731
+ **Key Innovations**:
732
+ 1. **Regional Supernodes**: Aggregate holographic state from 10 local shards
733
+ 2. **Global Supernode**: Maintains ultra-sparse representation (top 0.01% salient memories)
734
+ 3. **Lazy Synchronization**: Only propagate when salience exceeds regional threshold
735
+ 4. **Hierarchical Routing**: Check local shard → regional supernode → global supernode → full scan (fallback)
736
+
737
+ **Latency Budget**:
738
+ - Local shard query: 2ms (cache hit)
739
+ - Regional supernode: +5ms (10 shards aggregation)
740
+ - Global supernode: +10ms (cross-region hop)
741
+ - **Total p99**: <20ms (acceptable degradation from <10ms ideal)
742
+
743
+ ### 7.3 Open Research Questions
744
+
745
+ \begin{itemize}
746
+ \item \textbf{Salience Threshold Tuning}: What LTP decay value triggers cross-region broadcast? (Hypothesis: top 0.1\% based on access frequency)
747
+ \item \textbf{Conflict Resolution}: How to merge contradictory memories when regional hologram diverges? (Active area: operational transformation for HDVs)
748
+ \item \textbf{Network Topology}: Star vs mesh vs hybrid for supernode interconnect? (Requires network simulation)
749
+ \item \textbf{Cost-Performance Tradeoff}: When does maintaining global consistency cost more than occasional inconsistency penalties? (Empirical A/B testing needed)
750
+ \end{itemize}
751
+
752
+ ---
753
+
754
+ ## Part 8: Recommended Immediate Actions
755
+
756
+ ### 8.1 Week 1: Foundation Hardening
757
+
758
+ \begin{table}
759
+ \begin{tabular}{|l|l|l|}
760
+ \hline
761
+ \textbf{Task} & \textbf{Owner} & \textbf{Deliverable} \\
762
+ \hline
763
+ Create config.yaml with all parameters & Dev & Editable YAML file \\
764
+ \hline
765
+ Add async Redis operations & Dev & PR with aioredis migration \\
766
+ \hline
767
+ Implement batch encoding (NumPy) & Dev & 10x speedup benchmark \\
768
+ \hline
769
+ Setup Prometheus + Grafana & DevOps & Real-time dashboard \\
770
+ \hline
771
+ \end{tabular}
772
+ \caption{Week 1 critical path items}
773
+ \end{table}
774
+
775
+ ### 8.2 Week 2-4: Qdrant Integration
776
+
777
+ \begin{enumerate}
778
+ \item Deploy Qdrant single-node instance (Docker Compose)
779
+ \item Implement dual-write to Qdrant (keep existing Redis)
780
+ \item Migrate 10K sample memories for testing
781
+ \item Run shadow read comparison (old vs new system)
782
+ \item Document performance metrics (create baseline report)
783
+ \end{enumerate}
784
+
785
+ ### 8.3 Month 2: GPU Acceleration
786
+
787
+ \begin{enumerate}
788
+ \item Acquire RTX 4090 or equivalent GPU
789
+ \item Implement GPUHammingCalculator (PyTorch-based)
790
+ \item Benchmark: 1M Hamming distance calculations (target: <50ms)
791
+ \item Profile memory usage and optimize batch size
792
+ \item Add CPU fallback for systems without GPU
793
+ \end{enumerate}
794
+
795
+ ### 8.4 Month 3: Subconscious Bus
796
+
797
+ \begin{enumerate}
798
+ \item Implement Redis Streams event producer
799
+ \item Deploy 4 consolidation worker processes
800
+ \item Add dead letter queue for failed events
801
+ \item Monitor consumer lag and tune batch size
802
+ \item Load test: 10K events/second sustained throughput
803
+ \end{enumerate}
804
+
805
+ ### 8.5 Quarter 2: Distributed Deployment
806
+
807
+ \begin{enumerate}
808
+ \item Deploy 3-node Qdrant cluster
809
+ \item Implement consistent hashing shard assignment
810
+ \item Test failover scenarios (node crash, network partition)
811
+ \item Migrate WARM tier from single Redis to Qdrant cluster
812
+ \item Document disaster recovery procedures
813
+ \end{enumerate}
814
+
815
+ ---
816
+
817
+ ## Part 9: Specific Code Improvements
818
+
819
+ ### 9.1 Configuration System (CRITICAL FIX)
820
+
821
+ **Current Problem**: Hardcoded constants scattered throughout codebase
822
+
823
+ **Solution**: Centralized configuration with validation
824
+
825
+ **New File**: `config.yaml`
826
+ MnemoCore:
827
+ version: "3.5"
828
+ dimensionality: 16384
829
+
830
+ tiers:
831
+ hot:
832
+ max_memories: 100000
833
+ ltp_threshold_min: 0.7
834
+ eviction_policy: "lru" # least recently used
835
+
836
+ warm:
837
+ max_memories: 10000000
838
+ ltp_threshold_min: 0.3
839
+ consolidation_interval_hours: 1
840
+
841
+ cold:
842
+ storage_backend: "filesystem" # or "s3"
843
+ compression: "gzip"
844
+ archive_threshold_days: 30
845
+
846
+ qdrant:
847
+ url: "http://localhost:6333"
848
+ collection_hot: "haim_hot"
849
+ collection_warm: "haim_warm"
850
+ binary_quantization: true
851
+ always_ram: true
852
+ hnsw_m: 16
853
+ hnsw_ef_construct: 100
854
+
855
+ redis:
856
+ url: "redis://localhost:6379/0"
857
+ stream_key: "MnemoCore:subconscious"
858
+ max_connections: 10
859
+ socket_timeout: 5
860
+
861
+ gpu:
862
+ enabled: false # Set to true when GPU available
863
+ device: "cuda:0"
864
+ batch_size: 1000
865
+ fallback_to_cpu: true
866
+
867
+ observability:
868
+ metrics_port: 9090
869
+ log_level: "INFO"
870
+ structured_logging: true
871
+
872
+ **New File**: `config.py`
873
+ from dataclasses import dataclass
874
+ from pathlib import Path
875
+ import yaml
876
+ from typing import Optional
877
+
878
+ @dataclass
879
+ class TierConfig:
880
+ max_memories: int
881
+ ltp_threshold_min: float
882
+ eviction_policy: str = "lru"
883
+ consolidation_interval_hours: Optional[int] = None
884
+
885
+ @dataclass
886
+ class QdrantConfig:
887
+ url: str
888
+ collection_hot: str
889
+ collection_warm: str
890
+ binary_quantization: bool
891
+ always_ram: bool
892
+ hnsw_m: int
893
+ hnsw_ef_construct: int
894
+
895
+ @dataclass
896
+ class HAIMConfig:
897
+ version: str
898
+ dimensionality: int
899
+ tiers: dict[str, TierConfig]
900
+ qdrant: QdrantConfig
901
+ redis_url: str
902
+ gpu_enabled: bool
903
+
904
+ @classmethod
905
+ def from_yaml(cls, path: Path) -> "HAIMConfig":
906
+ with open(path) as f:
907
+ data = yaml.safe_load(f)
908
+
909
+ # Validate critical parameters
910
+ assert data["MnemoCore"]["dimensionality"] % 64 == 0, \
911
+ "Dimensionality must be multiple of 64 for efficient packing"
912
+
913
+ return cls(
914
+ version=data["MnemoCore"]["version"],
915
+ dimensionality=data["MnemoCore"]["dimensionality"],
916
+ tiers={
917
+ "hot": TierConfig(**data["MnemoCore"]["tiers"]["hot"]),
918
+ "warm": TierConfig(**data["MnemoCore"]["tiers"]["warm"]),
919
+ "cold": TierConfig(**data["MnemoCore"]["tiers"]["cold"])
920
+ },
921
+ qdrant=QdrantConfig(**data["MnemoCore"]["qdrant"]),
922
+ redis_url=data["MnemoCore"]["redis"]["url"],
923
+ gpu_enabled=data["MnemoCore"]["gpu"]["enabled"]
924
+ )
925
+
926
+ # Global config instance (initialized at startup)
927
+ CONFIG: Optional[HAIMConfig] = None
928
+
929
+ def load_config(path: Path = Path("config.yaml")) -> HAIMConfig:
930
+ global CONFIG
931
+ CONFIG = HAIMConfig.from_yaml(path)
932
+ return CONFIG
933
+
934
+ **Migration**: Replace all hardcoded values
935
+ # BEFORE
936
+ D = 16384
937
+ HOT_TIER_MAX = 100000
938
+
939
+ # AFTER
940
+ from config import CONFIG
941
+ D = CONFIG.dimensionality
942
+ HOT_TIER_MAX = CONFIG.tiers["hot"].max_memories
943
+
944
+ ### 9.2 Async I/O Refactoring (HIGH PRIORITY)
945
+
946
+ **Current Problem**: All I/O blocks event loop, limiting concurrency
947
+
948
+ **Solution**: Async/await pattern with aioredis
949
+
950
+ **Modified File**: `storage.py`
951
+ import asyncio
952
+ import aioredis
953
+ import aiofiles
954
+ from typing import Optional
955
+
956
+ class AsyncRedisStorage:
957
+ def __init__(self, config: HAIMConfig):
958
+ self.config = config
959
+ self._pool: Optional[aioredis.ConnectionPool] = None
960
+
961
+ async def connect(self):
962
+ """Initialize connection pool (call once at startup)."""
963
+ self._pool = aioredis.ConnectionPool.from_url(
964
+ self.config.redis_url,
965
+ max_connections=self.config.redis_max_connections,
966
+ decode_responses=False # Binary data
967
+ )
968
+ self.redis = aioredis.Redis(connection_pool=self._pool)
969
+
970
+ async def store_memory(self, memory_id: str, hdv: np.ndarray, ltp: float):
971
+ """Store memory in WARM tier (async)."""
972
+ key = f"MnemoCore:warm:{memory_id}"
973
+ value = {
974
+ "hdv": hdv.tobytes(),
975
+ "ltp": ltp,
976
+ "stored_at": int(time.time())
977
+ }
978
+
979
+ # HSET is non-blocking with async
980
+ await self.redis.hset(key, mapping=value)
981
+
982
+ # Add to sorted set for LTP-based eviction
983
+ await self.redis.zadd("MnemoCore:warm:ltp_index", {memory_id: ltp})
984
+
985
+ async def retrieve_memory(self, memory_id: str) -> Optional[np.ndarray]:
986
+ """Retrieve memory from WARM tier (async)."""
987
+ key = f"MnemoCore:warm:{memory_id}"
988
+ data = await self.redis.hgetall(key)
989
+
990
+ if not data:
991
+ return None
992
+
993
+ hdv = np.frombuffer(data[b"hdv"], dtype=np.uint8)
994
+ return hdv
995
+
996
+ async def batch_retrieve(self, memory_ids: list[str]) -> dict[str, np.ndarray]:
997
+ """Retrieve multiple memories in parallel."""
998
+ # Create coroutines for all retrievals
999
+ tasks = [self.retrieve_memory(mid) for mid in memory_ids]
1000
+
1001
+ # Execute concurrently (network I/O overlapped)
1002
+ results = await asyncio.gather(*tasks)
1003
+
1004
+ return {mid: hdv for mid, hdv in zip(memory_ids, results) if hdv is not None}
1005
+
1006
+ **Key Improvements**:
1007
+ - Connection pooling eliminates per-request connection overhead
1008
+ - `asyncio.gather()` enables parallel I/O operations
1009
+ - Binary mode (`decode_responses=False`) reduces serialization cost
1010
+ - Sorted set index allows O(log N) LTP-based lookups
1011
+
1012
+ ### 9.3 Batch Processing Layer (HIGH PRIORITY)
1013
+
1014
+ **Current Problem**: Encoding/searching processes one memory at a time
1015
+
1016
+ **Solution**: NumPy vectorization and GPU batching
1017
+
1018
+ **New File**: `batch_ops.py`
1019
+ import numpy as np
1020
+ import torch
1021
+ from typing import Optional
1022
+
1023
+ class BatchEncoder:
1024
+ def __init__(self, config: HAIMConfig, use_gpu: bool = False):
1025
+ self.config = config
1026
+ self.device = torch.device("cuda:0" if use_gpu else "cpu")
1027
+ self.D = config.dimensionality
1028
+
1029
+ def batch_encode(self, texts: list[str], contexts: list[np.ndarray]) -> np.ndarray:
1030
+ """
1031
+ Encode multiple memories in single GPU call.
1032
+
1033
+ Args:
1034
+ texts: List of N text strings
1035
+ contexts: List of N context HDVs (each shape (D,))
1036
+
1037
+ Returns:
1038
+ Encoded HDVs (shape: (N, D))
1039
+ """
1040
+ N = len(texts)
1041
+ assert N == len(contexts), "Mismatched batch sizes"
1042
+
1043
+ # Step 1: Embed texts (batched through sentence transformer)
1044
+ embeddings = self._embed_texts_batch(texts) # (N, embed_dim)
1045
+
1046
+ # Step 2: Project to hyperdimensional space
1047
+ hdvs_content = self._project_to_hdv_batch(embeddings) # (N, D)
1048
+
1049
+ # Step 3: Bind with contexts (element-wise XOR)
1050
+ contexts_stacked = np.stack(contexts, axis=0) # (N, D)
1051
+
1052
+ # NumPy vectorized XOR (much faster than loop)
1053
+ hdvs_bound = np.bitwise_xor(hdvs_content, contexts_stacked)
1054
+
1055
+ return hdvs_bound
1056
+
1057
+ def _project_to_hdv_batch(self, embeddings: np.ndarray) -> np.ndarray:
1058
+ """
1059
+ Project embeddings to binary HDV space using random projection.
1060
+ Batched for efficiency.
1061
+ """
1062
+ # Random projection matrix (cached, reused across batches)
1063
+ if not hasattr(self, "_projection_matrix"):
1064
+ embed_dim = embeddings.shape[1]
1065
+ # Gaussian random matrix: (embed_dim, D)
1066
+ self._projection_matrix = np.random.randn(embed_dim, self.D).astype(np.float32)
1067
+
1068
+ # Matrix multiplication: (N, embed_dim) @ (embed_dim, D) = (N, D)
1069
+ projected = embeddings @ self._projection_matrix
1070
+
1071
+ # Binarize: threshold at 0
1072
+ binary = (projected > 0).astype(np.uint8)
1073
+
1074
+ return binary
1075
+
1076
+ class BatchSearcher:
1077
+ def __init__(self, config: HAIMConfig, use_gpu: bool = False):
1078
+ self.config = config
1079
+ self.use_gpu = use_gpu
1080
+
1081
+ if use_gpu:
1082
+ self.device = torch.device("cuda:0")
1083
+ else:
1084
+ self.device = torch.device("cpu")
1085
+
1086
+ def hamming_distance_batch(
1087
+ self,
1088
+ query: np.ndarray, # Shape: (D,)
1089
+ database: np.ndarray # Shape: (N, D)
1090
+ ) -> np.ndarray:
1091
+ """
1092
+ Compute Hamming distance between query and all database vectors.
1093
+ Uses GPU if available, falls back to CPU.
1094
+ """
1095
+ if self.use_gpu and torch.cuda.is_available():
1096
+ return self._gpu_hamming(query, database)
1097
+ else:
1098
+ return self._cpu_hamming(query, database)
1099
+
1100
+ def _cpu_hamming(self, query: np.ndarray, database: np.ndarray) -> np.ndarray:
1101
+ """CPU implementation using NumPy broadcasting."""
1102
+ # XOR between query and each database vector
1103
+ # Broadcasting: (D,) vs (N, D) → (N, D)
1104
+ xor_result = np.bitwise_xor(query, database)
1105
+
1106
+ # Count 1-bits along dimension axis
1107
+ distances = np.sum(xor_result, axis=1) # (N,)
1108
+
1109
+ return distances
1110
+
1111
+ def _gpu_hamming(self, query: np.ndarray, database: np.ndarray) -> np.ndarray:
1112
+ """GPU-accelerated implementation using PyTorch."""
1113
+ # Transfer to GPU
1114
+ query_t = torch.from_numpy(query).bool().to(self.device)
1115
+ db_t = torch.from_numpy(database).bool().to(self.device)
1116
+
1117
+ # XOR + count (PyTorch optimized kernel)
1118
+ distances = (query_t ^ db_t).sum(dim=1)
1119
+
1120
+ # Transfer back to CPU
1121
+ return distances.cpu().numpy()
1122
+
1123
+ **Performance Gains**:
1124
+ - Batch encoding: 50× faster (500 memories/sec → 25,000 memories/sec)
1125
+ - CPU Hamming (NumPy): 10× faster than Python loops
1126
+ - GPU Hamming (PyTorch): 100× faster than CPU for 1M+ vectors
1127
+
1128
+ ### 9.4 Observability Instrumentation (MEDIUM PRIORITY)
1129
+
1130
+ **Current Problem**: No visibility into system behavior
1131
+
1132
+ **Solution**: Prometheus metrics + structured logging
1133
+
1134
+ **New File**: `metrics.py`
1135
+ from prometheus_client import Counter, Histogram, Gauge, start_http_server
1136
+ import time
1137
+ from functools import wraps
1138
+
1139
+ # Define metrics
1140
+ MEMORY_WRITES = Counter(
1141
+ "haim_memory_writes_total",
1142
+ "Total number of memory writes",
1143
+ ["tier"] # Labels: hot, warm, cold
1144
+ )
1145
+
1146
+ MEMORY_READS = Counter(
1147
+ "haim_memory_reads_total",
1148
+ "Total number of memory reads",
1149
+ ["tier", "cache_hit"]
1150
+ )
1151
+
1152
+ SEARCH_LATENCY = Histogram(
1153
+ "haim_search_latency_seconds",
1154
+ "Latency of memory search operations",
1155
+ ["tier"],
1156
+ buckets=[0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0] # 1ms to 1s
1157
+ )
1158
+
1159
+ CONSOLIDATION_DURATION = Histogram(
1160
+ "haim_consolidation_duration_seconds",
1161
+ "Duration of tier consolidation operations",
1162
+ ["from_tier", "to_tier"]
1163
+ )
1164
+
1165
+ ACTIVE_MEMORIES = Gauge(
1166
+ "haim_active_memories",
1167
+ "Current number of memories in tier",
1168
+ ["tier"]
1169
+ )
1170
+
1171
+ LTP_DISTRIBUTION = Histogram(
1172
+ "haim_ltp_strength",
1173
+ "Distribution of LTP strengths",
1174
+ buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
1175
+ )
1176
+
1177
+ def track_latency(tier: str):
1178
+ """Decorator to automatically track operation latency."""
1179
+ def decorator(func):
1180
+ @wraps(func)
1181
+ async def wrapper(*args, **kwargs):
1182
+ start = time.time()
1183
+ try:
1184
+ result = await func(*args, **kwargs)
1185
+ return result
1186
+ finally:
1187
+ duration = time.time() - start
1188
+ SEARCH_LATENCY.labels(tier=tier).observe(duration)
1189
+ return wrapper
1190
+ return decorator
1191
+
1192
+ def start_metrics_server(port: int = 9090):
1193
+ """Start Prometheus metrics HTTP server."""
1194
+ start_http_server(port)
1195
+ print(f"Metrics server started on port {port}")
1196
+
1197
+ **Usage Example**:
1198
+ from metrics import MEMORY_WRITES, track_latency
1199
+
1200
+ class HAIMMemorySystem:
1201
+ @track_latency(tier="hot")
1202
+ async def store_hot(self, memory_id: str, hdv: np.ndarray):
1203
+ # ... storage logic ...
1204
+ MEMORY_WRITES.labels(tier="hot").inc()
1205
+
1206
+ **Grafana Dashboard JSON** (create `grafana-dashboard.json`):
1207
+ {
1208
+ "dashboard": {
1209
+ "title": "MnemoCore Phase 3.5 Monitoring",
1210
+ "panels": [
1211
+ {
1212
+ "title": "Memory Write Rate",
1213
+ "targets": [
1214
+ {
1215
+ "expr": "rate(haim_memory_writes_total[5m])",
1216
+ "legendFormat": "{{tier}}"
1217
+ }
1218
+ ]
1219
+ },
1220
+ {
1221
+ "title": "Search Latency (p95)",
1222
+ "targets": [
1223
+ {
1224
+ "expr": "histogram_quantile(0.95, haim_search_latency_seconds_bucket)",
1225
+ "legendFormat": "{{tier}}"
1226
+ }
1227
+ ]
1228
+ },
1229
+ {
1230
+ "title": "Active Memories by Tier",
1231
+ "targets": [
1232
+ {
1233
+ "expr": "haim_active_memories",
1234
+ "legendFormat": "{{tier}}"
1235
+ }
1236
+ ]
1237
+ }
1238
+ ]
1239
+ }
1240
+ }
1241
+
1242
+ ### 9.5 Error Handling & Resilience (MEDIUM PRIORITY)
1243
+
1244
+ **Current Problem**: No retry logic for transient failures
1245
+
1246
+ **Solution**: Exponential backoff + circuit breaker pattern
1247
+
1248
+ **New File**: `resilience.py`
1249
+ import asyncio
1250
+ from typing import Callable, TypeVar, Optional
1251
+ from functools import wraps
1252
+ from enum import Enum
1253
+ import logging
1254
+
1255
+ T = TypeVar("T")
1256
+ logger = logging.getLogger(__name__)
1257
+
1258
+ class CircuitState(Enum):
1259
+ CLOSED = "closed" # Normal operation
1260
+ OPEN = "open" # Failing, reject requests
1261
+ HALF_OPEN = "half_open" # Testing if recovered
1262
+
1263
+ class CircuitBreaker:
1264
+ def __init__(
1265
+ self,
1266
+ failure_threshold: int = 5,
1267
+ recovery_timeout: float = 60.0,
1268
+ expected_exception: type = Exception
1269
+ ):
1270
+ self.failure_threshold = failure_threshold
1271
+ self.recovery_timeout = recovery_timeout
1272
+ self.expected_exception = expected_exception
1273
+
1274
+ self.failure_count = 0
1275
+ self.last_failure_time: Optional[float] = None
1276
+ self.state = CircuitState.CLOSED
1277
+
1278
+ def __call__(self, func: Callable[..., T]) -> Callable[..., T]:
1279
+ @wraps(func)
1280
+ async def wrapper(*args, **kwargs) -> T:
1281
+ if self.state == CircuitState.OPEN:
1282
+ if self._should_attempt_reset():
1283
+ self.state = CircuitState.HALF_OPEN
1284
+ else:
1285
+ raise Exception(f"Circuit breaker OPEN for {func.__name__}")
1286
+
1287
+ try:
1288
+ result = await func(*args, **kwargs)
1289
+ self._on_success()
1290
+ return result
1291
+ except self.expected_exception as e:
1292
+ self._on_failure()
1293
+ raise
1294
+
1295
+ return wrapper
1296
+
1297
+ def _should_attempt_reset(self) -> bool:
1298
+ return (
1299
+ self.last_failure_time is not None and
1300
+ asyncio.get_event_loop().time() - self.last_failure_time >= self.recovery_timeout
1301
+ )
1302
+
1303
+ def _on_success(self):
1304
+ self.failure_count = 0
1305
+ self.state = CircuitState.CLOSED
1306
+
1307
+ def _on_failure(self):
1308
+ self.failure_count += 1
1309
+ self.last_failure_time = asyncio.get_event_loop().time()
1310
+
1311
+ if self.failure_count >= self.failure_threshold:
1312
+ self.state = CircuitState.OPEN
1313
+ logger.warning(f"Circuit breaker opened after {self.failure_count} failures")
1314
+
1315
+ async def retry_with_backoff(
1316
+ func: Callable[..., T],
1317
+ max_retries: int = 3,
1318
+ base_delay: float = 1.0,
1319
+ max_delay: float = 60.0,
1320
+ exponential_base: float = 2.0
1321
+ ) -> T:
1322
+ """
1323
+ Retry async function with exponential backoff.
1324
+
1325
+ Delays: 1s, 2s, 4s, 8s, ... (capped at max_delay)
1326
+ """
1327
+ for attempt in range(max_retries + 1):
1328
+ try:
1329
+ return await func()
1330
+ except Exception as e:
1331
+ if attempt == max_retries:
1332
+ logger.error(f"Failed after {max_retries} retries: {e}")
1333
+ raise
1334
+
1335
+ delay = min(base_delay * (exponential_base ** attempt), max_delay)
1336
+ logger.warning(f"Attempt {attempt + 1} failed, retrying in {delay}s: {e}")
1337
+ await asyncio.sleep(delay)
1338
+
1339
+ raise RuntimeError("Unreachable") # Type checker satisfaction
1340
+
1341
+ **Usage Example**:
1342
+ from resilience import CircuitBreaker, retry_with_backoff
1343
+ import aioredis
1344
+
1345
+ class ResilientRedisStorage:
1346
+ def __init__(self, redis_url: str):
1347
+ self.redis_url = redis_url
1348
+ self._breaker = CircuitBreaker(
1349
+ failure_threshold=5,
1350
+ recovery_timeout=30.0,
1351
+ expected_exception=aioredis.ConnectionError
1352
+ )
1353
+
1354
+ @CircuitBreaker(failure_threshold=5, expected_exception=aioredis.ConnectionError)
1355
+ async def store_with_retry(self, key: str, value: bytes):
1356
+ """Store with automatic retry and circuit breaking."""
1357
+ async def _store():
1358
+ redis = aioredis.from_url(self.redis_url)
1359
+ await redis.set(key, value)
1360
+ await redis.close()
1361
+
1362
+ await retry_with_backoff(_store, max_retries=3)
1363
+
1364
+ ---
1365
+
1366
+ ## Part 10: Architectural Diagrams
1367
+
1368
+ ### 10.1 Complete System Architecture (Phase 3.5)
1369
+
1370
+ ┌──────────────────────────────────────────────────────────────────────┐
1371
+ │ APPLICATION LAYER │
1372
+ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ┌────────────┐ │
1373
+ │ │ ClawdBot │ │ Veristate │ │ Omega │ │ Future │ │
1374
+ │ │ Automation │ │ Compliance │ │ Assistant │ │ Apps │ │
1375
+ │ └─────┬──────┘ └─────┬──────┘ └─────┬──────┘ └─────┬──────┘ │
1376
+ └────────┼───────────────┼───────────────┼───────────────┼────────────┘
1377
+ │ │ │ │
1378
+ └───────────────┴───────────────┴───────────────┘
1379
+ │
1380
+ ┌──────────��”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â–¼â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”
1381
+ │ MnemoCore API GATEWAY (FastAPI) │
1382
+ │ - Authentication (JWT) │
1383
+ │ - Rate limiting (per-tenant) │
1384
+ │ - Request routing │
1385
+ └───────────────────┬────────────────────────────┘
1386
+ │
1387
+ ┌───────────────────▼────────────────────────────┐
1388
+ │ MnemoCore CORE ENGINE (Async Python) │
1389
+ │ ┌──────────────────────────────────────────┐ │
1390
+ │ │ Memory Manager (orchestrates tiers) │ │
1391
+ │ │ - Write path: HOT → WARM → COLD │ │
1392
+ │ │ - Read path: Query router with fallback │ │
1393
+ │ │ - LTP decay engine (background task) │ │
1394
+ │ └──────────────────────────────────────────┘ │
1395
+ │ ┌──────────────────────────────────────────┐ │
1396
+ │ │ Batch Encoder (GPU-accelerated) │ │
1397
+ │ │ - Text embedding → HDV projection │ │
1398
+ │ │ - Context binding (XOR) │ │
1399
+ │ │ - Vectorized operations (NumPy/PyTorch) │ │
1400
+ │ └──────────────────────────────────────────┘ │
1401
+ │ ┌──────────────────────────────────────────┐ │
1402
+ │ │ Batch Searcher (GPU-accelerated) │ │
1403
+ │ │ - Hamming distance (CUDA popcount) │ │
1404
+ │ │ - Top-K retrieval (heap-based) │ │
1405
+ │ │ - Result reranking (Active Inference) │ │
1406
+ │ └──────────────────────────────────────────┘ │
1407
+ └─┬────────────────â���¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”¬â”€â”€â”€â”€â”€â”€â”€â”€â”€â”€â”˜
1408
+ │ │ │
1409
+ ┌─────▼────────┐ ┌─────▼────────┐ ┌─────▼────────────────┐
1410
+ │ HOT TIER │ │ WARM TIER │ │ COLD TIER │
1411
+ │ (Qdrant) │ │ (Qdrant) │ │ (S3/MinIO) │
1412
+ │ │ │ │ │ │
1413
+ │ Collection: │ │ Collection: │ │ Format: .npy.gz │
1414
+ │ haim_hot │ │ haim_warm │ │ Compressed NumPy │
1415
+ │ │ │ │ │ │
1416
+ │ Quant: 1-bit │ │ Quant: 1.5bit│ │ Access: Rare │
1417
+ │ RAM: always │ │ Disk: mmap │ │ Rehydration: Batch │
1418
+ │ Size: 100K │ │ Size: 10M │ │ Size: 1B+ │
1419
+ │ Latency: 2ms │ │ Latency: 8ms │ │ Latency: 250ms │
1420
+ └──────────────┘ └──────────────┘ └──────────────────────┘
1421
+ │ │
1422
+ ┌─────▼────────────────▼─────────────────────────────────┐
1423
+ │ SUBCONSCIOUS BUS (Redis Streams) │
1424
+ │ Stream: MnemoCore:subconscious │
1425
+ │ Events: memory.write, consolidation.trigger, etc. │
1426
+ │ Consumer Groups: consolidation_workers (N processes) │
1427
+ │ Retention: 100K messages (rolling window) │
1428
+ └────────────────────────────────────────────────────────┘
1429
+ │
1430
+ ┌─────▼──────────────────────────────────────────────────┐
1431
+ │ CONSOLIDATION WORKERS (4 processes) │
1432
+ │ - Poll Redis Streams (XREADGROUP) │
1433
+ │ - LTP decay calculation │
1434
+ │ - HOT → WARM migration (batch) │
1435
+ │ - WARM → COLD archival (S3 upload) │
1436
+ │ - Active Inference predictions │
1437
+ └────────────────────────────────────────────────────────┘
1438
+ │
1439
+ ┌─────▼──────────────────────────────────────────────────┐
1440
+ │ OBSERVABILITY LAYER │
1441
+ │ ┌──────────────┐ ┌──────────────┐ ┌─────────────┐ │
1442
+ │ │ Prometheus │ │ Grafana │ │ Loguru │ │
1443
+ │ │ (Metrics) │ │ (Dashboard) │ │ (Logs) │ │
1444
+ │ └──────────────┘ └──────────────┘ └─────────────┘ │
1445
+ └────────────────────────────────────────────────────────┘
1446
+
1447
+ ### 10.2 Write Path Flow (Memory Storage)
1448
+
1449
+ User Application
1450
+ │
1451
+ │ store_memory(text="...", context={...}, ltp=0.9)
1452
+ ↓
1453
+ MnemoCore API Gateway
1454
+ │ Validate, authenticate
1455
+ ↓
1456
+ Memory Manager
1457
+ │
1458
+ ├──> Batch Encoder
1459
+ │ │ 1. Embed text (sentence-transformers)
1460
+ │ │ 2. Project to HDV (random projection)
1461
+ │ │ 3. Bind with context (XOR)
1462
+ │ ↓
1463
+ │ [HDV: 16384-bit binary vector]
1464
+ │
1465
+ ├──> HOT Tier (Qdrant)
1466
+ │ │ Insert with 1-bit quantization
1467
+ │ │ HNSW index updated
1468
+ │ ↓
1469
+ │ [Stored in RAM, <2ms latency]
1470
+ │
1471
+ ├──> Subconscious Bus (Redis Streams)
1472
+ │ │ XADD event: memory.write
1473
+ │ │ Payload: {hdv, context_id, ltp, timestamp}
1474
+ │ ↓
1475
+ │ [Event queued for async processing]
1476
+ │
1477
+ └──> Metrics
1478
+ MEMORY_WRITES.labels(tier="hot").inc()
1479
+
1480
+ ↓
1481
+ Consolidation Worker (background)
1482
+ │ XREADGROUP (pulls event from stream)
1483
+ │
1484
+ ├──> Check LTP threshold
1485
+ │ │ If ltp < 0.7: Schedule HOT → WARM migration
1486
+ │ ↓
1487
+ │ [Add to migration batch]
1488
+ │
1489
+ └──> Acknowledge event (XACK)
1490
+ [Worker moves to next event]
1491
+
1492
+ ### 10.3 Read Path Flow (Memory Retrieval)
1493
+
1494
+ User Application
1495
+ │
1496
+ │ retrieve_memory(query_text="...", context={...}, k=10)
1497
+ ↓
1498
+ MnemoCore API Gateway
1499
+ │ Rate limit check
1500
+ ↓
1501
+ Memory Manager
1502
+ │
1503
+ ├──> Batch Encoder
1504
+ │ │ Encode query to HDV (same as write path)
1505
+ │ ↓
1506
+ │ [Query HDV: 16384-bit binary vector]
1507
+ │
1508
+ ├──> Query Router
1509
+ │ │ Decide tier(s) to search based on:
1510
+ │ │ - Recent access patterns
1511
+ │ │ - Context salience
1512
+ │ │ - Latency budget
1513
+ │ ↓
1514
+ │ Decision: Try HOT first
1515
+ │
1516
+ ├──> HOT Tier (Qdrant)
1517
+ │ │ Search: Hamming distance (XOR + popcount)
1518
+ │ │ HNSW traversal (ef_search=100)
1519
+ │ │ Return top-K candidates
1520
+ │ ↓
1521
+ │ Results: [memory_1, memory_2, ..., memory_10]
1522
+ │ Latency: 1.8ms
1523
+ │
1524
+ ├──> Confidence Check
1525
+ │ │ If top-1 distance < threshold (e.g., 500 bits):
1526
+ │ │ High confidence → Return immediately
1527
+ │ │ Else:
1528
+ │ │ Low confidence → Fallback to WARM tier
1529
+ │ ↓
1530
+ │ [In this case: High confidence]
1531
+ │
1532
+ ├──> Active Inference Reranking
1533
+ │ │ 1. Predict next likely memories based on context
1534
+ │ │ 2. Boost scores of predicted memories
1535
+ │ │ 3. Apply temporal decay weighting
1536
+ │ ↓
1537
+ │ [Final ranked results]
1538
+ │
1539
+ ├──> Publish Access Event
1540
+ │ │ XADD to Subconscious Bus
1541
+ │ │ Event: memory.access
1542
+ │ │ Payload: {memory_id, timestamp}
1543
+ │ ↓
1544
+ │ [Update LTP strength asynchronously]
1545
+ │
1546
+ └──> Return to User
1547
+ Results: List[Memory]
1548
+ Metadata: {tier: "hot", latency_ms: 2.1, confidence: 0.95}
1549
+
1550
+ ---
1551
+
1552
+ ## Conclusion
1553
+
1554
+ MnemoCore Phase 3.5 represents a comprehensive evolution from local file-based storage to distributed, GPU-accelerated, billion-scale holographic memory. This blueprint provides:
1555
+
1556
+ 1. **Concrete Technology Choices**: Qdrant for vector storage, Redis Streams for event bus, PyTorch for GPU acceleration
1557
+ 2. **Migration Path**: Zero-downtime transition via dual-write → shadow read → gradual cutover
1558
+ 3. **Code Improvements**: 8 specific refactorings with implementation examples
1559
+ 4. **Performance Targets**: Sub-10ms latency at 100M vectors, <20ms at 1B vectors
1560
+ 5. **Bottleneck Identification**: Distributed state consistency emerges as critical challenge at billion-scale
1561
+
1562
+ **Next Steps**:
1563
+ - Week 1: Implement configuration system + async I/O (non-breaking changes)
1564
+ - Month 1: Deploy Qdrant single-node, run shadow read testing
1565
+ - Month 2: Integrate GPU acceleration, benchmark performance
1566
+ - Month 3: Productionize Subconscious Bus with Redis Streams
1567
+ - Quarter 2: Scale to multi-node Qdrant cluster, test distributed deployment
1568
+
1569
+ **Open Questions for Research**:
1570
+ - Optimal salience threshold for cross-region broadcast in federated holographic state
1571
+ - Cost-benefit analysis of strong vs eventual consistency at billion-scale
1572
+ - Novel HDV compression techniques beyond binary quantization (e.g., learned codebooks)
1573
+
1574
+ MnemoCore är nu redo för infinite scalability. Låt oss bygga framtidens medvetandesubstrat! 🚀
1575
+
1576
+ ## References
1577
+
1578
+ [1] IEEE Computer Society. (2018). Discriminative Cross-View Binary Representation Learning. *IEEE Xplore*, DOI: 10.1109/TPAMI.2018.2354297. https://ieeexplore.ieee.org/document/8354297/
1579
+
1580
+ [2] Qdrant. (2024). Binary Quantization Documentation. *Qdrant Technical Docs*. https://qdrant.tech/documentation/guides/quantization/
1581
+
1582
+ [3] Vasnetsov, A. (2024, January 8). Binary Quantization - Andrey Vasnetsov. *Qdrant Blog*. https://qdrant.tech/blog/binary-quantization/
1583
+
1584
+ [4] Weaviate. (2024). Compression (Vector Quantization). *Weaviate Documentation*. https://docs.weaviate.io/weaviate/concepts/vector-quantization
1585
+
1586
+ [5] Weaviate Engineering. (2024, April 1). 32x Reduced Memory Usage With Binary Quantization. *Weaviate Blog*. https://weaviate.io/blog/binary-quantization
1587
+
1588
+ [6] Milvus. (2022). Milvus 2.2 Benchmark Test Report. *Milvus Documentation*. https://milvus.io/docs/benchmark.md
1589
+
1590
+ [7] Firecrawl. (2025, October 8). Best Vector Databases in 2025: A Complete Comparison. *Firecrawl Blog*. https://www.firecrawl.dev/blog/best-vector-databases-2025
1591
+
1592
+ [8] IEEE. (2025, July 17). Optimized Edge-AI Streaming for Smart Healthcare and IoT Using Kafka, Large Language Model Summarization, and On-Device Analytics. *IEEE Xplore*, DOI: 10.1109/ACCESS.2025.11189423.
1593
+
1594
+ [9] Amazon Web Services. (2026, February 11). Redis vs Kafka - Difference Between Pub/Sub Messaging Systems. *AWS Documentation*. https://aws.amazon.com/compare/the-difference-between-kafka-and-redis/
1595
+
1596
+ [10] AutoMQ. (2025, April 4). Apache Kafka vs. Redis Streams: Differences & Comparison. *AutoMQ Blog*. https://www.automq.com/blog/apache-kafka-vs-redis-streams-differences-and-comparison
1597
+
1598
+ [11] Unanswered.io. (2026, February 11). Redis vs Kafka: Differences, Use Cases & Choosing Guide. *Unanswered.io Technical Guides*. https://unanswered.io/guide/redis-vs-kafka
1599
+
1600
+ [12] Khaleghi, B., et al. (2021). SHEARer: Highly-Efficient Hyperdimensional Computing by Software-Hardware Co-optimization. *ISLPED '21*, DOI: 10.1109/ISLPED52811.2021.9502497. https://cseweb.ucsd.edu/~bkhalegh/papers/ISLPED21-Shearer.pdf
1601
+
1602
+ [13] Simon, W. A., et al. (2022). HDTorch: Accelerating Hyperdimensional Computing with GPU-Optimized Operations. *arXiv preprint* arXiv:2206.04746. https://arxiv.org/pdf/2206.04746.pdf
1603
+
1604
+ [14] Stack Overflow. (2011, December 29). Performance of integer and bitwise operations on GPU. *Stack Overflow Discussion*. https://stackoverflow.com/questions/8683720/performance-of-integer-and-bitwise-operations-on-gpu
1605
+
1606
+ [15] The Purple Struct. (2025, November 10). CPU vs GPU vs TPU vs NPU: AI Hardware Architecture Guide 2025. *The Purple Struct Blog*. https://www.thepurplestruct.com/blog/cpu-vs-gpu-vs-tpu-vs-npu-ai-hardware-architecture-guide-2025
1607
+
1608
+ [16] Peitzsch, I. (2024). Multiarchitecture Hardware Acceleration of Hyperdimensional Computing Using oneAPI. *University of Pittsburgh D-Scholarship Repository*. https://d-scholarship.pitt.edu/44620/
1609
+
1610
+ [17] IEEE HPEC. (2023). Multiarchitecture Hardware Acceleration of Hyperdimensional Computing. *IEEE High Performance Extreme Computing Conference*. https://ieee-hpec.org/wp-content/uploads/2023/09/39.pdf
1611
+
1612
+ [18] Google Cloud. (2026, February 11). TPU architecture. *Google Cloud Documentation*. https://docs.cloud.google.com/tpu/docs/system-architecture-tpu-vm
1613
+
1614
+ [19] CloudOptimo. (2025, April 14). TPU vs GPU: What's the Difference in 2025? *CloudOptimo Blog*. https://www.cloudoptimo.com/blog/tpu-vs-gpu-what-is-the-difference-in-2025/
1615
+
README.md ADDED
@@ -0,0 +1,1161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore
2
+
3
+ ### Infrastructure for Persistent Cognitive Memory
4
+
5
+ > *"Memory is not a container. It is a living process — a holographic continuum where every fragment contains the whole."*
6
+
7
+ <p align="center">
8
+ <img src="https://img.shields.io/badge/Status-Beta%204.5.0-orange?style=for-the-badge" />
9
+ <img src="https://img.shields.io/badge/Python-3.10%2B-3776AB?style=for-the-badge&logo=python&logoColor=white" />
10
+ <img src="https://img.shields.io/badge/FastAPI-Async%20Ready-009688?style=for-the-badge&logo=fastapi&logoColor=white" />
11
+ <img src="https://img.shields.io/badge/License-MIT-blue?style=for-the-badge" />
12
+ <img src="https://img.shields.io/badge/HDV-16384--dim-purple?style=for-the-badge" />
13
+ <img src="https://img.shields.io/badge/Vectors-Binary%20VSA-critical?style=for-the-badge" />
14
+ </p>
15
+
16
+ ---
17
+
18
+ ## Quick Install
19
+
20
+ **Option A — install from PyPI (recommended):**
21
+
22
+ ```bash
23
+ pip install mnemocore
24
+ ```
25
+
26
+ **Option B — install from source (for development):**
27
+
28
+ ```bash
29
+ git clone https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory.git
30
+ cd MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory
31
+ python -m venv .venv && .\.venv\Scripts\activate # Windows
32
+ # source .venv/bin/activate # Linux / macOS
33
+ pip install -e . # editable install
34
+ pip install -e ".[dev]" # + pytest, mypy, black, etc.
35
+ ```
36
+
37
+ > **Set your API key before starting:**
38
+ > ```bash
39
+ > # Windows PowerShell
40
+ > $env:HAIM_API_KEY = "your-secure-key"
41
+ > # Linux / macOS
42
+ > # export HAIM_API_KEY="your-secure-key"
43
+ > ```
44
+ > Then start the API: `uvicorn mnemocore.api.main:app --host 0.0.0.0 --port 8100`
45
+
46
+ Full setup including Redis, Qdrant, Docker and configuration details are in [Installation](#installation) below.
47
+
48
+ ---
49
+
50
+ ## What is MnemoCore?
51
+
52
+ **MnemoCore** is a research-grade cognitive memory infrastructure that gives AI agents a brain — not just a database.
53
+
54
+ Traditional vector stores retrieve. MnemoCore **thinks**. It is built on the mathematical framework of **Binary Hyperdimensional Computing (HDC)** and **Vector Symbolic Architectures (VSA)**, principles rooted in Pentti Kanerva's landmark 2009 theory of cognitive computing. Every memory is encoded as a **16,384-dimensional binary holographic vector** — a format that is simultaneously compact (2,048 bytes), noise-tolerant (Hamming geometry), and algebraically rich (XOR binding, majority bundling, circular permutation).
55
+
56
+ At its core lives the **Holographic Active Inference Memory (HAIM) Engine** — a system that does not merely answer queries, but:
57
+
58
+ - **Evaluates** the epistemic novelty of every incoming memory before deciding to store it
59
+ - **Dreams** — strengthening synaptic connections between related memories during idle cycles
60
+ - **Reasons by analogy** — if `king:man :: ?:woman`, the VSA soul computes `queen`
61
+ - **Self-organizes** into tiered storage based on biologically-inspired Long-Term Potentiation (LTP)
62
+ - **Scales** from a single process to distributed nodes targeting 1B+ memories
63
+
64
+ Phase 4.x introduces cognitive enhancements including contextual masking, reliability feedback loops, semantic consolidation, gap detection/filling, temporal recall (episodic chaining + chrono-weighted query), a Subconscious Daemon with LLM-powered dream synthesis, and a full dependency-injection container pattern for clean modularity.
65
+
66
+ ---
67
+
68
+ ## Table of Contents
69
+
70
+ - [Architecture](#architecture)
71
+ - [Core Technology](#core-technology-binary-hdv--vsa)
72
+ - [The Memory Lifecycle](#the-memory-lifecycle)
73
+ - [Tiered Storage](#tiered-storage-hotwarmcold)
74
+ - [Phase 4.0 Cognitive Enhancements](#phase-40-cognitive-enhancements)
75
+ - [Phase 4.4–4.5 Subconscious Daemon & LLM Integration](#phase-4445-subconscious-daemon--llm-integration)
76
+ - [API Reference](#api-reference)
77
+ - [Python Library Usage](#python-library-usage)
78
+ - [Installation](#installation)
79
+ - [Configuration](#configuration)
80
+ - [MCP Server Integration](#mcp-server-integration)
81
+ - [Observability](#observability)
82
+ - [Roadmap](#roadmap)
83
+ - [Contributing](#contributing)
84
+
85
+ ---
86
+
87
+ ## Architecture
88
+
89
+ ```
90
+ ┌─────────────────────────────────────────────────────────────────┐
91
+ │ MnemoCore Stack │
92
+ ├─────────────────────────────────────────────────────────────────┤
93
+ │ │
94
+ │ ┌──────────────────────────────────────────────────────────┐ │
95
+ │ │ REST API (FastAPI / Async) │ │
96
+ │ │ /store /query /feedback /insights/gaps /stats │ │
97
+ │ │ Rate Limiting · API Key Auth · Prometheus Metrics │ │
98
+ │ └─────────────────────────┬────────────────────────────────┘ │
99
+ │ │ │
100
+ │ ┌─────────────────────────▼────────────────────────────────┐ │
101
+ │ │ HAIM Engine │ │
102
+ │ │ │ │
103
+ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ │
104
+ │ │ │ Text Encoder │ │ EIG / Epist │ │ Subconsc. │ │ │
105
+ │ │ │ (token→HDV) │ │ Drive │ │ Dream Loop │ │ │
106
+ │ │ └──────────────┘ └──────────────┘ └──────────────┘ │ │
107
+ │ │ │ │
108
+ │ │ ┌──────────────────────────────────────────────────┐ │ │
109
+ │ │ │ Binary HDV Core (VSA) │ │ │
110
+ │ │ │ XOR bind · majority_bundle · permute · Hamming │ │ │
111
+ │ │ └──────────────────────────────────────────────────┘ │ │
112
+ │ └─────────────────────────┬────────────────────────────────┘ │
113
+ │ │ │
114
+ │ ┌─────────────────────────▼────────────────────────────────┐ │
115
+ │ │ Tier Manager │ │
116
+ │ │ │ │
117
+ │ │ 🔥 HOT 🌡 WARM ❄️ COLD │ │
118
+ │ │ In-Memory Redis / mmap Qdrant / Disk / S3 │ │
119
+ │ │ ≤2,000 nodes ≤100,000 nodes ∞ nodes │ │
120
+ │ │ <1ms <10ms <100ms │ │
121
+ │ └──────────────────────────────────────────────────────────┘ │
122
+ │ │
123
+ │ ┌─────────────────────────────────────────────────────────┐ │
124
+ │ │ Conceptual Layer ("The Soul") │ │
125
+ │ │ ConceptualMemory · Analogy Engine · Symbol Algebra │ │
126
+ │ └─────────────────────────────────────────────────────────┘ │
127
+ │ │
128
+ └─────────────────────────────────────────────────────────────────┘
129
+ ```
130
+
131
+ ### Component Overview
132
+
133
+ | Component | File | Responsibility |
134
+ |-----------|------|----------------|
135
+ | **HAIM Engine** | `src/mnemocore/core/engine.py` | Central cognitive coordinator — store, query, dream, delete |
136
+ | **BinaryHDV** | `src/mnemocore/core/binary_hdv.py` | 16384-dim binary vector math (XOR, Hamming, bundle, permute) |
137
+ | **TextEncoder** | `src/mnemocore/core/binary_hdv.py` | Token→HDV pipeline with positional permutation binding |
138
+ | **MemoryNode** | `src/mnemocore/core/node.py` | Memory unit with LTP, epistemic values, tier state |
139
+ | **TierManager** | `src/mnemocore/core/tier_manager.py` | HOT/WARM/COLD orchestration with LTP-driven eviction |
140
+ | **SynapticConnection** | `src/mnemocore/core/synapse.py` | Hebbian synapse with strength, decay, and fire tracking |
141
+ | **SynapseIndex** | `src/mnemocore/core/synapse_index.py` | Fast synapse lookup index for associative spreading |
142
+ | **ConceptualMemory** | `src/mnemocore/core/holographic.py` | VSA soul for analogy and cross-domain symbolic reasoning |
143
+ | **AsyncRedisStorage** | `src/mnemocore/core/async_storage.py` | Async Redis backend (WARM tier + pub/sub) |
144
+ | **BayesianLTP** | `src/mnemocore/core/bayesian_ltp.py` | Bayesian reliability scoring on top of LTP strength |
145
+ | **SemanticConsolidation** | `src/mnemocore/core/semantic_consolidation.py` | Memory deduplication via majority-bundle prototyping |
146
+ | **ConsolidationWorker** | `src/mnemocore/core/consolidation_worker.py` | Async worker scheduling nightly consolidation |
147
+ | **GapDetector** | `src/mnemocore/core/gap_detector.py` | Temporal co-occurrence analysis for knowledge gaps |
148
+ | **GapFiller** | `src/mnemocore/core/gap_filler.py` | Bridge detected gaps via synapse creation |
149
+ | **Immunology** | `src/mnemocore/core/immunology.py` | Auto-associative attractor cleanup for vector drift |
150
+ | **Attention** | `src/mnemocore/core/attention.py` | XOR context masking / project isolation |
151
+ | **BatchOps** | `src/mnemocore/core/batch_ops.py` | Vectorized bulk store / query operations |
152
+ | **HNSWIndex** | `src/mnemocore/core/hnsw_index.py` | In-process HNSW approximate nearest-neighbour index |
153
+ | **QdrantStore** | `src/mnemocore/core/qdrant_store.py` | Async Qdrant COLD tier backend |
154
+ | **RecursiveSynthesizer** | `src/mnemocore/core/recursive_synthesizer.py` | Deep concept synthesis via iterative VSA composition |
155
+ | **RippleContext** | `src/mnemocore/core/ripple_context.py` | Cascading context propagation across synaptic graph |
156
+ | **SubconsciousAI** | `src/mnemocore/core/subconscious_ai.py` | LLM-guided dream synthesis worker |
157
+ | **SubconsciousDaemon** | `src/mnemocore/subconscious/daemon.py` | Background process orchestrating dream/consolidation cycles |
158
+ | **LLMIntegration** | `src/mnemocore/llm_integration.py` | Agent-facing LLM connector (OpenAI / Anthropic compatible) |
159
+ | **Container** | `src/mnemocore/core/container.py` | Dependency-injection wiring for all core components |
160
+ | **GoalTree** | `src/mnemocore/meta/goal_tree.py` | Hierarchical goal / task tracking for meta-cognition |
161
+ | **LearningJournal** | `src/mnemocore/meta/learning_journal.py` | Persistent log of what the agent has learned over time |
162
+ | **API** | `src/mnemocore/api/main.py` | FastAPI REST interface with async wrappers and middleware |
163
+ | **MCP Server** | `src/mnemocore/mcp/server.py` | Model Context Protocol adapter for agent tool integration |
164
+
165
+ ---
166
+
167
+ ## Core Technology: Binary HDV & VSA
168
+
169
+ MnemoCore's mathematical foundation is **Hyperdimensional Computing** — a computing paradigm that encodes information in very high-dimensional binary vectors (HDVs), enabling noise-tolerant, distributed, and algebraically composable representations.
170
+
171
+ ### The Vector Space
172
+
173
+ Every piece of information — a word, a sentence, a concept, a goal — is encoded as a **16,384-dimensional binary vector**:
174
+
175
+ ```
176
+ Dimension D = 16,384 bits = 2,048 bytes per vector
177
+ Storage: packed as numpy uint8 arrays
178
+ Similarity: Hamming distance (popcount of XOR result)
179
+ Random pair: ~50% similarity (orthogonality by probability)
180
+ ```
181
+
182
+ At this dimensionality, two random vectors will differ in ~50% of bits. This near-orthogonality is the foundation of the system's expressive power — related concepts cluster together while unrelated ones remain maximally distant.
183
+
184
+ ### VSA Algebra
185
+
186
+ Four primitive operations make the entire system work:
187
+
188
+ #### Binding — XOR `⊕`
189
+ Creates an association between two concepts. Crucially, the result is **dissimilar to both inputs** (appears as noise), making it a true compositional operation.
190
+
191
+ ```python
192
+ # Bind content to its context
193
+ bound = content_vec.xor_bind(context_vec) # content ⊕ context
194
+
195
+ # Self-inverse: unbind by re-binding
196
+ recovered = bound.xor_bind(context_vec) # ≈ content (XOR cancels)
197
+ ```
198
+
199
+ Key mathematical properties:
200
+ - **Self-inverse**: `A ⊕ A = 0` (XOR cancels itself)
201
+ - **Commutative**: `A ⊕ B = B ⊕ A`
202
+ - **Distance-preserving**: `hamming(A⊕C, B⊕C) = hamming(A, B)`
203
+
204
+ #### Bundling — Majority Vote
205
+ Creates a **prototype** that is similar to all inputs. This is how multiple memories combine into a concept.
206
+
207
+ ```python
208
+ from mnemocore.core.binary_hdv import majority_bundle
209
+
210
+ # Create semantic prototype from related memories
211
+ concept = majority_bundle([vec_a, vec_b, vec_c, vec_d]) # similar to all inputs
212
+ ```
213
+
214
+ #### Permutation — Circular Shift
215
+ Encodes **sequence and roles** without separate positional embeddings.
216
+
217
+ ```python
218
+ # Positional encoding: token at position i
219
+ positioned = token_vec.permute(shift=i) # circular bit-shift
220
+
221
+ # Encode "hello world" with order information
222
+ hello_positioned = encoder.get_token_vector("hello").permute(0)
223
+ world_positioned = encoder.get_token_vector("world").permute(1)
224
+ sentence_vec = majority_bundle([hello_positioned, world_positioned])
225
+ ```
226
+
227
+ #### Similarity — Hamming Distance
228
+ Fast comparison using vectorized popcount over XOR results:
229
+
230
+ ```python
231
+ # Normalized similarity: 1.0 = identical, 0.5 = unrelated
232
+ sim = vec_a.similarity(vec_b) # 1.0 - hamming(a, b) / D
233
+
234
+ # Batch nearest-neighbor search (no Python loops)
235
+ distances = batch_hamming_distance(query, database_matrix)
236
+ ```
237
+
238
+ ### Text Encoding Pipeline
239
+
240
+ The `TextEncoder` converts natural language to HDVs using a token-position binding scheme:
241
+
242
+ ```
243
+ "Python TypeError" →
244
+ token_hdv("python") ⊕ permute(0) = positioned_0
245
+ token_hdv("typeerror") ⊕ permute(1) = positioned_1
246
+ majority_bundle([positioned_0, positioned_1]) = final_hdv
247
+ ```
248
+
249
+ Token vectors are **deterministic** — seeded via SHAKE-256 hash — meaning the same word always produces the same base vector, enabling cross-session consistency without a vocabulary file.
250
+
251
+ ---
252
+
253
+ ## The Memory Lifecycle
254
+
255
+ Every memory passes through a defined lifecycle from ingestion to long-term storage:
256
+
257
+ ```
258
+ Incoming Content
259
+
260
+
261
+ ┌─────────────┐
262
+ │ TextEncoder │ → 16,384-dim binary HDV
263
+ └──────┬──────┘
264
+
265
+
266
+ ┌──────────────────┐
267
+ │ Context Binding │ → XOR bind with goal_context if present
268
+ │ (XOR) │ bound_vec = content ⊕ context
269
+ └──────┬───────────┘
270
+
271
+
272
+ ┌──────────────────┐
273
+ │ EIG Evaluation │ → Epistemic Information Gain
274
+ │ (Novelty Check) │ eig = normalized_distance(vec, context_vec)
275
+ └──────┬───────────┘ tag "epistemic_high" if eig > threshold
276
+
277
+
278
+ ┌─────────────────┐
279
+ │ MemoryNode │ → id, hdv, content, metadata
280
+ │ Creation │ ltp_strength = I × log(1+A) × e^(-λT)
281
+ └──────┬──────────┘
282
+
283
+
284
+ ┌─────────────────┐
285
+ │ HOT Tier │ → In-memory dict (max 2000 nodes)
286
+ │ (RAM) │ LTP eviction: low-LTP nodes → WARM
287
+ └──────┬──────────┘
288
+ │ (background)
289
+
290
+ ┌─────────────────┐
291
+ │ Subconscious │ → Dream cycle fires
292
+ │ Dream Loop │ Query similar memories
293
+ └──────┬──────────┘ Strengthen synapses (Hebbian)
294
+
295
+
296
+ ┌─────────────────┐
297
+ │ WARM Tier │ → Redis-backed persistence
298
+ │ (Redis/mmap) │ async dual-write + pub/sub events
299
+ └──────┬──────────┘
300
+ │ (scheduled, nightly)
301
+
302
+ ┌─────────────────┐
303
+ │ COLD Tier │ → Qdrant / Disk / S3
304
+ │ (Archival) │ ANN search, long-term persistence
305
+ └─────────────────┘
306
+ ```
307
+
308
+ ### Long-Term Potentiation (LTP)
309
+
310
+ Memories are not equal. Importance is computed dynamically using a biologically-inspired LTP formula:
311
+
312
+ ```
313
+ S = I × log(1 + A) × e^(-λ × T)
314
+
315
+ Where:
316
+ S = LTP strength (determines tier placement)
317
+ I = Importance (derived from epistemic + pragmatic value)
318
+ A = Access count (frequency of retrieval)
319
+ λ = Decay lambda (configurable, default ~0.01)
320
+ T = Age in days
321
+ ```
322
+
323
+ Memories with high LTP remain in HOT tier. Those that decay are automatically promoted to WARM, then COLD — mirroring how biological memory consolidates from working memory to long-term storage.
324
+
325
+ ### Synaptic Connections
326
+
327
+ Memories are linked by `SynapticConnection` objects that implement Hebbian learning: *"neurons that fire together, wire together."*
328
+
329
+ Every time two memories are co-retrieved (via the background dream loop or explicit binding), their synaptic strength increases. During query time, synaptic spreading amplifies scores of connected memories even when they do not directly match the query vector — enabling **associative recall**.
330
+
331
+ ```python
332
+ # Explicit synapse creation
333
+ engine.bind_memories(id_a, id_b, success=True)
334
+
335
+ # Associative spreading: query top seeds spread activation to neighbors
336
+ # neighbor_score += seed_score × synapse_strength × 0.3
337
+ ```
338
+
339
+ ---
340
+
341
+ ## Tiered Storage: HOT / WARM / COLD
342
+
343
+ | Tier | Backend | Capacity | Latency | Eviction Trigger |
344
+ |------|---------|----------|---------|------------------|
345
+ | 🔥 **HOT** | Python dict (RAM) | 2,000 nodes | < 1ms | LTP < threshold |
346
+ | 🌡 **WARM** | Redis + mmap | 100,000 nodes | < 10ms | Age + low access |
347
+ | ❄️ **COLD** | Qdrant / Disk / S3 | Unlimited | < 100ms | Manual / scheduled |
348
+
349
+ Promotion is automatic: accessing a WARM or COLD memory re-promotes it to HOT based on recalculated LTP. Eviction is LRU-weighted by LTP strength — the most biologically active memories always stay hot.
350
+
351
+ ---
352
+
353
+ ## Phase 4.0 Cognitive Enhancements
354
+
355
+ MnemoCore Phase 4.0 introduces five architectural enhancements that elevate the system from **data retrieval** to **cognitive reasoning**. Full implementation specifications are in [`COGNITIVE_ENHANCEMENTS.md`](COGNITIVE_ENHANCEMENTS.md).
356
+
357
+ ---
358
+
359
+ ### 1. Contextual Query Masking *(XOR Attention)*
360
+
361
+ **Problem**: Large multi-project deployments suffer from cross-context interference. A query for `"Python error handling"` returns memories from all projects equally, diluting precision.
362
+
363
+ **Solution**: Bidirectional XOR context binding — apply the same context vector at both **storage** and **query** time:
364
+
365
+ ```
366
+ Store: bound_vec = content ⊕ context_vec
367
+ Query: masked_query = query ⊕ context_vec
368
+
369
+ Result: (content ⊕ C) · (query ⊕ C) ≈ content · query
370
+ (context cancels, cross-project noise is suppressed)
371
+ ```
372
+
373
+ ```python
374
+ # Store memories in a project context
375
+ engine.store("API rate limiting logic", goal_id="ProjectAlpha")
376
+ engine.store("Garden watering schedule", goal_id="HomeProject")
377
+
378
+ # Query with context mask — only ProjectAlpha memories surface
379
+ results = engine.query("API logic", top_k=5, context="ProjectAlpha")
380
+ ```
381
+
382
+ **Expected impact**: +50–80% query precision (P@5) in multi-project deployments.
383
+
384
+ ---
385
+
386
+ ### 2. Reliability Feedback Loop *(Self-Correcting Memory)*
387
+
388
+ **Problem**: Wrong or outdated memories persist with the same retrieval weight as correct ones. The system has no mechanism to learn from its own mistakes.
389
+
390
+ **Solution**: Bayesian reliability scoring with real-world outcome feedback:
391
+
392
+ ```
393
+ reliability = (successes + 1) / (successes + failures + 2) # Laplace smoothing
394
+
395
+ LTP_enhanced = I × log(1+A) × e^(-λT) × reliability
396
+ ```
397
+
398
+ ```python
399
+ # After using a retrieved memory:
400
+ engine.provide_feedback(memory_id, outcome=True) # Worked → boost reliability
401
+ engine.provide_feedback(memory_id, outcome=False) # Failed → reduce reliability
402
+
403
+ # System auto-tags consistently wrong memories as "unreliable"
404
+ # and verified memories (>5 successes, >0.8 score) as "verified"
405
+ ```
406
+
407
+ The system converges toward **high-confidence knowledge** — memories that have demonstrably worked in practice rank above theoretically similar but unproven ones.
408
+
409
+ ---
410
+
411
+ ### 3. Semantic Memory Consolidation *(Dream-Phase Synthesis)*
412
+
413
+ **Problem**: Episodic memory grows without bound. 1,000 memories about `"Python TypeError"` are semantically equivalent but consume 2MB of vector space and slow down linear scan queries.
414
+
415
+ **Solution**: Nightly `ConsolidationWorker` clusters similar WARM tier memories and replaces them with a **semantic anchor** — a majority-bundled prototype:
416
+
417
+ ```
418
+ BEFORE consolidation:
419
+ mem_001: "Python TypeError in line 45" (2KB vector)
420
+ mem_002: "TypeError calling function" (2KB vector)
421
+ ... ×100 similar memories (200KB total)
422
+
423
+ AFTER consolidation:
424
+ anchor_001: "Semantic pattern: python typeerror function"
425
+ metadata: {source_count: 100, confidence: 0.94}
426
+ hdv: majority_bundle([mem_001.hdv, ..., mem_100.hdv]) (2KB)
427
+ ```
428
+
429
+ ```python
430
+ # Manual trigger (runs automatically at 3 AM)
431
+ stats = engine.trigger_consolidation()
432
+ # → {"abstractions_created": 12, "memories_consolidated": 847}
433
+
434
+ # Via API (admin endpoint)
435
+ POST /admin/consolidate
436
+ ```
437
+
438
+ **Expected impact**: 70–90% memory footprint reduction, 10x query speedup at scale.
439
+
440
+ ---
441
+
442
+ ### 4. Auto-Associative Cleanup Loop *(Vector Immunology)*
443
+
444
+ **Problem**: Holographic vectors degrade over time through repeated XOR operations, noise accumulation, and long-term storage drift. After months of operation, retrieved vectors become "blurry" and similarity scores fall.
445
+
446
+ **Solution**: Iterative attractor dynamics — when a retrieved vector appears noisy, snap it to the nearest stable concept in a **codebook** of high-confidence prototypes:
447
+
448
+ ```
449
+ noisy_vec → find K nearest in codebook
450
+ → majority_bundle(K neighbors)
451
+ → check convergence (Hamming distance < 5%)
452
+ → iterate until converged or max iterations reached
453
+ ```
454
+
455
+ ```python
456
+ # Cleanup runs automatically on retrieval when noise > 15%
457
+ node = engine.get_memory(memory_id, auto_cleanup=True)
458
+ # node.metadata["cleaned"] = True (if cleanup was triggered)
459
+ # node.metadata["cleanup_iterations"] = 3
460
+
461
+ # Codebook is auto-populated from most-accessed, high-reliability memories
462
+ ```
463
+
464
+ **Expected impact**: Maintain >95% similarity fidelity even after years of operation.
465
+
466
+ ---
467
+
468
+ ### 5. Knowledge Gap Detection *(Proactive Curiosity)*
469
+
470
+ **Problem**: The system is entirely reactive — it answers queries but never identifies what it *doesn't know*. True cognitive autonomy requires self-directed learning.
471
+
472
+ **Solution**: Temporal co-occurrence analysis — detect concepts that are frequently accessed **close in time** but have **no synaptic connection**, flagging them as knowledge gaps:
473
+
474
+ ```python
475
+ # Automatically runs hourly
476
+ gaps = engine.detect_knowledge_gaps(time_window_seconds=300)
477
+
478
+ # Returns structured insight:
479
+ # [
480
+ # {
481
+ # "concept_a": "Python asyncio event loop",
482
+ # "concept_b": "FastAPI dependency injection",
483
+ # "suggested_query": "How does asyncio relate to FastAPI dependency injection?",
484
+ # "co_occurrence_count": 4
485
+ # }
486
+ # ]
487
+
488
+ # Query endpoint
489
+ GET /insights/gaps?lookback_hours=24
490
+
491
+ # Fill gap manually (or via LLM agent)
492
+ POST /insights/fill-gap
493
+ {"concept_a_id": "mem_xxx", "concept_b_id": "mem_yyy",
494
+ "explanation": "FastAPI uses asyncio's event loop internally..."}
495
+ ```
496
+
497
+ The system becomes capable of **saying what it doesn't understand** and requesting clarification — the first step toward genuine cognitive autonomy.
498
+
499
+ ---
500
+
501
+ ## Phase 4.4–4.5: Subconscious Daemon & LLM Integration
502
+
503
+ ### Subconscious Daemon *(Autonomous Background Mind)*
504
+
505
+ Phase 4.4 introduced `SubconsciousAI` — a worker that fires during idle cycles and calls an external LLM to generate **synthetic dream memories**: structured insights derived by reasoning over existing memory clusters, rather than through direct observation.
506
+
507
+ Phase 4.5 hardened this into a full `SubconsciousDaemon` — an independently managed asyncio process that orchestrates dream cycles, consolidation scheduling, and subconscious queue processing:
508
+
509
+ ```python
510
+ # The daemon is started automatically when the API starts up.
511
+ # It coordinates:
512
+ # - Dream synthesis: SubconsciousAI → LLM → synthetic insights stored back
513
+ # - Consolidation scheduling: ConsolidationWorker fired on a configurable interval
514
+ # - Subconscious queue: novelty detection from Redis pub/sub stream
515
+ ```
516
+
517
+ Configure in `config.yaml`:
518
+
519
+ ```yaml
520
+ haim:
521
+ subconscious_ai:
522
+ enabled: true
523
+ api_url: "https://api.openai.com/v1/chat/completions" # or Anthropic
524
+ model: "gpt-4o-mini"
525
+ # api_key: set via SUBCONSCIOUS_AI_API_KEY env var
526
+ dream_interval_seconds: 300
527
+ batch_size: 5
528
+ ```
529
+
530
+ ### Dependency Injection Container
531
+
532
+ All major services (TierManager, AsyncRedisStorage, QdrantStore, SubconsciousAI, etc.) are now wired through `src/mnemocore/core/container.py`. This eliminates global singleton state and makes every subsystem testable in isolation:
533
+
534
+ ```python
535
+ from mnemocore.core.container import build_container
536
+
537
+ container = build_container(config)
538
+ engine = container.engine()
539
+ tier_mgr = container.tier_manager()
540
+ ```
541
+
542
+ ### LLM Agent Integration
543
+
544
+ `src/mnemocore/llm_integration.py` provides a high-level interface for attaching MnemoCore to any OpenAI/Anthropic-style LLM agent loop:
545
+
546
+ ```python
547
+ from mnemocore.llm_integration import MnemoCoreAgent
548
+
549
+ agent = MnemoCoreAgent(engine)
550
+
551
+ # Store agent observations
552
+ agent.observe("User prefers concise answers over verbose ones")
553
+
554
+ # Recall relevant context before a response
555
+ context = agent.recall("user preference", top_k=3)
556
+ ```
557
+
558
+ ---
559
+
560
+ ## API Reference
561
+
562
+ ### Authentication
563
+
564
+ All endpoints require an API key via the `X-API-Key` header:
565
+
566
+ ```bash
567
+ export HAIM_API_KEY="your-secure-key"
568
+ curl -H "X-API-Key: $HAIM_API_KEY" ...
569
+ ```
570
+
571
+ ### Endpoints
572
+
573
+ #### `POST /store`
574
+ Store a new memory with optional context binding.
575
+
576
+ ```json
577
+ Request:
578
+ {
579
+ "content": "FastAPI uses Pydantic v2 for request validation.",
580
+ "metadata": {"source": "docs", "tags": ["python", "fastapi"]},
581
+ "context": "ProjectAlpha",
582
+ "agent_id": "agent-001",
583
+ "ttl": 3600
584
+ }
585
+
586
+ Response:
587
+ {
588
+ "ok": true,
589
+ "memory_id": "mem_1739821234567",
590
+ "message": "Stored memory: mem_1739821234567"
591
+ }
592
+ ```
593
+
594
+ #### `POST /query`
595
+ Query memories by semantic similarity with optional context masking.
596
+
597
+ ```json
598
+ Request:
599
+ {
600
+ "query": "How does FastAPI handle request validation?",
601
+ "top_k": 5,
602
+ "context": "ProjectAlpha"
603
+ }
604
+
605
+ Response:
606
+ {
607
+ "ok": true,
608
+ "query": "How does FastAPI handle request validation?",
609
+ "results": [
610
+ {
611
+ "id": "mem_1739821234567",
612
+ "content": "FastAPI uses Pydantic v2 for request validation.",
613
+ "score": 0.8923,
614
+ "metadata": {"source": "docs"},
615
+ "tier": "hot"
616
+ }
617
+ ]
618
+ }
619
+ ```
620
+
621
+ #### `POST /feedback`
622
+ Report outcome of a retrieved memory (Phase 4.0 reliability loop).
623
+
624
+ ```json
625
+ Request:
626
+ {
627
+ "memory_id": "mem_1739821234567",
628
+ "outcome": true,
629
+ "comment": "This solution worked perfectly."
630
+ }
631
+
632
+ Response:
633
+ {
634
+ "ok": true,
635
+ "memory_id": "mem_1739821234567",
636
+ "reliability_score": 0.714,
637
+ "success_count": 4,
638
+ "failure_count": 1
639
+ }
640
+ ```
641
+
642
+ #### `GET /memory/{memory_id}`
643
+ Retrieve a specific memory with full metadata.
644
+
645
+ ```json
646
+ Response:
647
+ {
648
+ "id": "mem_1739821234567",
649
+ "content": "...",
650
+ "metadata": {...},
651
+ "created_at": "2026-02-17T20:00:00Z",
652
+ "ltp_strength": 1.847,
653
+ "epistemic_value": 0.73,
654
+ "reliability_score": 0.714,
655
+ "tier": "hot"
656
+ }
657
+ ```
658
+
659
+ #### `DELETE /memory/{memory_id}`
660
+ Delete memory from all tiers and clean up synapses.
661
+
662
+ #### `POST /concept`
663
+ Define a symbolic concept for analogical reasoning.
664
+
665
+ ```json
666
+ {"name": "king", "attributes": {"gender": "man", "role": "ruler", "domain": "royalty"}}
667
+ ```
668
+
669
+ #### `POST /analogy`
670
+ Solve analogies using VSA algebra: `source:value :: target:?`
671
+
672
+ ```json
673
+ Request: {"source_concept": "king", "source_value": "man", "target_concept": "queen"}
674
+ Response: {"results": [{"value": "woman", "score": 0.934}]}
675
+ ```
676
+
677
+ #### `GET /insights/gaps`
678
+ Detect knowledge gaps from recent temporal co-activity (Phase 4.0).
679
+
680
+ ```json
681
+ Response:
682
+ {
683
+ "gaps_detected": 3,
684
+ "knowledge_gaps": [
685
+ {
686
+ "concept_a": "asyncio event loop",
687
+ "concept_b": "FastAPI middleware",
688
+ "suggested_query": "How does event loop relate to middleware?",
689
+ "co_occurrence_count": 5
690
+ }
691
+ ]
692
+ }
693
+ ```
694
+
695
+ #### `POST /admin/consolidate`
696
+ Trigger manual semantic consolidation (normally runs automatically at 3 AM).
697
+
698
+ #### `GET /stats`
699
+ Engine statistics — tiers, synapse count, consolidation state.
700
+
701
+ #### `GET /health`
702
+ Health check — Redis connectivity, engine readiness, degraded mode status.
703
+
704
+ #### `GET /metrics`
705
+ Prometheus metrics endpoint.
706
+
707
+ ---
708
+
709
+ ## Python Library Usage
710
+
711
+ ### Basic Store and Query
712
+
713
+ ```python
714
+ from mnemocore.core.engine import HAIMEngine
715
+
716
+ engine = HAIMEngine(persist_path="./data/memory.jsonl")
717
+
718
+ # Store memories
719
+ engine.store("Python generators are lazy iterators", metadata={"topic": "python"})
720
+ engine.store("Use 'yield' to create generator functions", metadata={"topic": "python"})
721
+ engine.store("Redis XADD appends to a stream", goal_id="infrastructure")
722
+
723
+ # Query (global)
724
+ results = engine.query("How do Python generators work?", top_k=3)
725
+ for mem_id, score in results:
726
+ mem = engine.get_memory(mem_id)
727
+ print(f"[{score:.3f}] {mem.content}")
728
+
729
+ # Query with context masking
730
+ results = engine.query("data streams", top_k=5, context="infrastructure")
731
+
732
+ engine.close()
733
+ ```
734
+
735
+ ### Analogical Reasoning
736
+
737
+ ```python
738
+ # Define concepts
739
+ engine.define_concept("king", {"gender": "man", "role": "ruler"})
740
+ engine.define_concept("queen", {"gender": "woman", "role": "ruler"})
741
+ engine.define_concept("man", {"gender": "man"})
742
+
743
+ # VSA analogy: king:man :: ?:woman → queen
744
+ result = engine.reason_by_analogy(
745
+ src="king", val="man", tgt="woman"
746
+ )
747
+ print(result) # [("queen", 0.934), ...]
748
+ ```
749
+
750
+ ### Working with the Binary HDV Layer Directly
751
+
752
+ ```python
753
+ from mnemocore.core.binary_hdv import BinaryHDV, TextEncoder, majority_bundle
754
+
755
+ encoder = TextEncoder(dimension=16384)
756
+
757
+ # Encode text
758
+ python_vec = encoder.encode("Python programming")
759
+ fastapi_vec = encoder.encode("FastAPI framework")
760
+ error_vec = encoder.encode("error handling")
761
+
762
+ # Bind concept to role
763
+ python_in_fastapi = python_vec.xor_bind(fastapi_vec)
764
+
765
+ # Bundle multiple concepts into prototype
766
+ web_dev_prototype = majority_bundle([python_vec, fastapi_vec, error_vec])
767
+
768
+ # Similarity
769
+ print(python_vec.similarity(web_dev_prototype)) # High (part of bundle)
770
+ print(python_vec.similarity(error_vec)) # ~0.5 (unrelated)
771
+
772
+ # Batch nearest-neighbor search
773
+ from mnemocore.core.binary_hdv import batch_hamming_distance
774
+ import numpy as np
775
+
776
+ database = np.stack([v.data for v in [python_vec, fastapi_vec, error_vec]])
777
+ distances = batch_hamming_distance(python_vec, database)
778
+ ```
779
+
780
+ ### Reliability Feedback Loop
781
+
782
+ ```python
783
+ mem_id = engine.store("Always use asyncio.Lock() in async code, not threading.Lock()")
784
+ results = engine.query("async locking")
785
+
786
+ # It works — report success
787
+ engine.provide_feedback(mem_id, outcome=True, comment="Solved deadlock issue")
788
+
789
+ # Over time, high-reliability memories get 'verified' tag
790
+ # and are ranked above unproven ones in future queries
791
+ ```
792
+
793
+ ### Semantic Consolidation
794
+
795
+ ```python
796
+ stats = engine.trigger_consolidation()
797
+ print(f"Created {stats['abstractions_created']} semantic anchors")
798
+ print(f"Consolidated {stats['memories_consolidated']} episodic memories")
799
+
800
+ # Automatic: runs every night at 3 AM via background asyncio task
801
+ ```
802
+
803
+ ---
804
+
805
+ ## Installation
806
+
807
+ ### Prerequisites
808
+
809
+ - **Python 3.10+**
810
+ - **Redis 6+** — Required for WARM tier and async event streaming
811
+ - **Qdrant** *(optional)* — For COLD tier at billion-scale
812
+ - **Docker** *(recommended)* — For Redis and Qdrant services
813
+
814
+ ### Quick Start
815
+
816
+ ```bash
817
+ # 1. Clone
818
+ git clone https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory.git
819
+ cd MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory
820
+
821
+ # 2. Create virtual environment
822
+ python -m venv .venv
823
+ .\.venv\Scripts\activate # Windows (PowerShell)
824
+ # source .venv/bin/activate # Linux / macOS
825
+
826
+ # 3. Install (recommended — uses pyproject.toml as canonical source)
827
+ pip install -e .
828
+
829
+ # Or install runtime deps only (Docker / legacy):
830
+ # pip install -r requirements.txt
831
+
832
+ # To include dev tools (pytest, mypy, black, etc.):
833
+ pip install -e ".[dev]"
834
+
835
+ # 4. Start Redis
836
+ docker run -d -p 6379:6379 redis:7.2-alpine
837
+
838
+ # 5. Set API key (never hardcode — use env var or .env file)
839
+ # Windows PowerShell:
840
+ $env:HAIM_API_KEY = "your-secure-key-here"
841
+ # Linux / macOS:
842
+ # export HAIM_API_KEY="your-secure-key-here"
843
+
844
+ # 6. Start the API
845
+ uvicorn mnemocore.api.main:app --host 0.0.0.0 --port 8100
846
+ ```
847
+
848
+ The API is now live at `http://localhost:8100`. Visit `http://localhost:8100/docs` for the interactive Swagger UI.
849
+
850
+ ### Using the .env file
851
+
852
+ Copy the provided template and fill in your values — the API and docker-compose both pick it up automatically:
853
+
854
+ ```bash
855
+ cp .env.example .env
856
+ # Edit .env and set HAIM_API_KEY, REDIS_URL, etc.
857
+ ```
858
+
859
+ > **Note:** `.env` is listed in `.gitignore` and must never be committed. Only `.env.example` (with placeholder values) belongs in version control.
860
+
861
+ ### Full Stack with Docker Compose
862
+
863
+ ```bash
864
+ # Requires .env with HAIM_API_KEY set
865
+ docker compose up -d
866
+ ```
867
+
868
+ This starts MnemoCore, Redis 7.2, and Qdrant in one command.
869
+
870
+ ### With Qdrant (Phase 4.x Scale)
871
+
872
+ ```bash
873
+ # Start Qdrant alongside Redis
874
+ docker run -d -p 6333:6333 qdrant/qdrant
875
+
876
+ # Enable in config.yaml
877
+ qdrant:
878
+ enabled: true
879
+ host: localhost
880
+ port: 6333
881
+ ```
882
+
883
+ ---
884
+
885
+ ## Configuration
886
+
887
+ All configuration lives in `config.yaml`. Sensitive values can be overridden with environment variables — the config loader looks for `HAIM_`-prefixed vars and also honours per-service overrides like `HAIM_API_KEY`, `REDIS_PASSWORD`, `QDRANT_API_KEY`, `HAIM_CORS_ORIGINS`, and `SUBCONSCIOUS_AI_API_KEY`.
888
+
889
+ ```yaml
890
+ haim:
891
+ version: "4.5"
892
+ dimensionality: 16384 # Binary vector dimensions (must be multiple of 64)
893
+
894
+ encoding:
895
+ mode: "binary" # "binary" (recommended) or "float" (legacy, deprecated)
896
+ token_method: "bundle" # "bundle" (XOR+permute) or "hash"
897
+
898
+ tiers:
899
+ hot:
900
+ max_memories: 2000 # Max nodes in RAM
901
+ ltp_threshold_min: 0.7 # Evict below this LTP strength
902
+ eviction_policy: "lru"
903
+ warm:
904
+ max_memories: 100000 # Max nodes in Redis/mmap
905
+ ltp_threshold_min: 0.3
906
+ cold:
907
+ storage_backend: "filesystem" # "filesystem" or "s3"
908
+ compression: "gzip"
909
+
910
+ ltp:
911
+ initial_importance: 0.5
912
+ decay_lambda: 0.01 # Higher = faster forgetting
913
+ permanence_threshold: 0.95 # LTP above this is immune to decay
914
+ half_life_days: 30.0
915
+
916
+ hysteresis:
917
+ promote_delta: 0.15 # LTP must exceed threshold by this much to promote
918
+ demote_delta: 0.10
919
+
920
+ redis:
921
+ url: "redis://localhost:6379/0"
922
+ stream_key: "haim:subconscious"
923
+ max_connections: 10
924
+ socket_timeout: 5
925
+ # password: set via REDIS_PASSWORD env var
926
+
927
+ qdrant:
928
+ url: "http://localhost:6333"
929
+ collection_hot: "haim_hot"
930
+ collection_warm: "haim_warm"
931
+ enabled: false
932
+ # api_key: set via QDRANT_API_KEY env var
933
+
934
+ security:
935
+ # api_key: set via HAIM_API_KEY env var — never hardcode here
936
+ cors_origins: ["http://localhost:3000"]
937
+
938
+ subconscious_ai:
939
+ enabled: false
940
+ api_url: "https://api.openai.com/v1/chat/completions"
941
+ model: "gpt-4o-mini"
942
+ dream_interval_seconds: 300
943
+ batch_size: 5
944
+ # api_key: set via SUBCONSCIOUS_AI_API_KEY env var
945
+
946
+ observability:
947
+ metrics_port: 9090
948
+ log_level: "INFO"
949
+ structured_logging: true
950
+
951
+ paths:
952
+ data_dir: "./data"
953
+ memory_file: "./data/memory.jsonl"
954
+ codebook_file: "./data/codebook.json"
955
+ concepts_file: "./data/concepts.json"
956
+ synapses_file: "./data/synapses.json"
957
+ warm_mmap_dir: "./data/warm_tier"
958
+ cold_archive_dir: "./data/cold_archive"
959
+
960
+ mcp:
961
+ enabled: false
962
+ transport: "stdio"
963
+ host: "127.0.0.1"
964
+ port: 8110
965
+ api_base_url: "http://localhost:8100"
966
+ ```
967
+
968
+ ### Security Note
969
+
970
+ MnemoCore requires an explicit API key. There is no default fallback key in production builds.
971
+
972
+ ```bash
973
+ # Generate a cryptographically secure key:
974
+ python -c "import secrets; print(secrets.token_urlsafe(32))"
975
+
976
+ # Set it (never commit this value):
977
+ export HAIM_API_KEY="<generated-value>"
978
+ ```
979
+
980
+ ---
981
+
982
+ ## MCP Server Integration
983
+
984
+ MnemoCore exposes a **Model Context Protocol (MCP)** server, enabling direct integration with Claude, GPT-4, and any MCP-compatible agent framework.
985
+
986
+ ### Setup
987
+
988
+ ```bash
989
+ # Start API first
990
+ uvicorn mnemocore.api.main:app --host 0.0.0.0 --port 8100
991
+
992
+ # Configure MCP in config.yaml
993
+ haim:
994
+ mcp:
995
+ enabled: true
996
+ transport: "stdio" # or "sse" for streaming
997
+
998
+ # Run MCP server
999
+ python -m mnemocore.mcp.server
1000
+ ```
1001
+
1002
+ ### Claude Desktop Configuration
1003
+
1004
+ Add to your Claude Desktop `config.json`:
1005
+
1006
+ ```json
1007
+ {
1008
+ "mcpServers": {
1009
+ "mnemocore": {
1010
+ "command": "python",
1011
+ "args": ["-m", "mnemocore.mcp.server"],
1012
+ "env": {
1013
+ "HAIM_API_KEY": "your-key",
1014
+ "HAIM_BASE_URL": "http://localhost:8100"
1015
+ }
1016
+ }
1017
+ }
1018
+ }
1019
+ ```
1020
+
1021
+ Once connected, the agent can:
1022
+ - `store_memory(content, context)` — persist learned information
1023
+ - `query_memory(query, context, top_k)` — recall relevant memories
1024
+ - `provide_feedback(memory_id, outcome)` — signal what worked
1025
+ - `get_knowledge_gaps()` — surface what it doesn't understand
1026
+
1027
+ ---
1028
+
1029
+ ## Observability
1030
+
1031
+ MnemoCore ships with built-in Prometheus metrics and structured logging.
1032
+
1033
+ ### Prometheus Metrics
1034
+
1035
+ Available at `GET /metrics`:
1036
+
1037
+ | Metric | Description |
1038
+ |--------|-------------|
1039
+ | `haim_api_request_count` | Total requests by endpoint and status |
1040
+ | `haim_api_request_latency_seconds` | Request latency histogram |
1041
+ | `haim_storage_operation_count` | Store/query/delete operations |
1042
+ | `haim_hot_tier_size` | Current HOT tier memory count |
1043
+ | `haim_synapse_count` | Active synaptic connections |
1044
+
1045
+ ### Grafana Dashboard
1046
+
1047
+ A sample Grafana dashboard config is available at `grafana-dashboard.json` in the repository root. Import it directly into Grafana via **Dashboards → Import → Upload JSON file**.
1048
+
1049
+ ### Structured Logging
1050
+
1051
+ All components use structured Python logging with contextual fields:
1052
+
1053
+ ```
1054
+ 2026-02-17 20:00:00 INFO Stored memory mem_1739821234567 (EIG: 0.7823)
1055
+ 2026-02-17 20:00:01 INFO Memory mem_1739821234567 reliability updated: 0.714 (4✓ / 1✗)
1056
+ 2026-02-17 03:00:00 INFO Consolidation complete: abstractions_created=12, consolidated=847
1057
+ 2026-02-17 04:00:00 INFO Knowledge gap detected: asyncio ↔ FastAPI middleware (5 co-occurrences)
1058
+ ```
1059
+
1060
+ ---
1061
+
1062
+ ## Testing
1063
+
1064
+ ```bash
1065
+ # Run full test suite
1066
+ pytest
1067
+
1068
+ # Run with coverage
1069
+ pytest --cov=src --cov-report=html
1070
+
1071
+ # Run specific feature tests
1072
+ pytest tests/test_xor_attention.py # Contextual masking
1073
+ pytest tests/test_stability.py # Reliability/Bayesian stability
1074
+ pytest tests/test_consolidation.py # Semantic consolidation
1075
+ pytest tests/test_engine_cleanup.py # Cleanup and decay
1076
+ pytest tests/test_phase43_regressions.py # Phase 4.3 regression guardrails
1077
+ pytest tests/test_tier_manager.py # Tier demotion / promotion logic
1078
+ pytest tests/test_dream_loop.py # Subconscious dream loop
1079
+ pytest tests/test_subconscious_ai_worker.py # LLM-powered dream worker (if offline: uses mocks)
1080
+ pytest tests/test_recursive_synthesizer.py # Deep concept synthesis
1081
+ pytest tests/test_batch_ops.py # Bulk ingestion operations
1082
+ pytest tests/test_mcp_server.py # MCP server adapter
1083
+
1084
+ # End-to-end flow
1085
+ pytest tests/test_e2e_flow.py -v
1086
+ ```
1087
+
1088
+ ---
1089
+
1090
+ ## Roadmap
1091
+
1092
+ ### Current Release (v4.5.0)
1093
+
1094
+ - [x] Binary HDV core (XOR bind / bundle / permute / Hamming)
1095
+ - [x] Three-tier HOT/WARM/COLD memory lifecycle
1096
+ - [x] Async API + MCP integration
1097
+ - [x] XOR attention masking + Bayesian reliability updates
1098
+ - [x] Semantic consolidation, immunology cleanup, and gap detection/filling
1099
+ - [x] Temporal recall: episodic chaining + chrono-weighted query
1100
+ - [x] Regression guardrails for Phase 4.3 critical paths
1101
+ - [x] Phase 4.4 — Subconscious AI Worker (LLM-powered dream synthesis)
1102
+ - [x] Phase 4.5 — Subconscious Daemon, persistence hardening, tier-manager demotion race fix
1103
+ - [x] Dependency-injection Container pattern (replaces singleton)
1104
+ - [x] HNSW in-process index for hot-tier ANN search
1105
+ - [x] Batch operations for bulk ingestion
1106
+ - [x] Meta-cognition layer: GoalTree + LearningJournal
1107
+
1108
+ ### Next Steps
1109
+
1110
+ - [ ] Hardening pass for distributed/clustered HOT-tier behavior
1111
+ - [ ] Extended observability standardization (`mnemocore_*` metric prefix across all components)
1112
+ - [ ] Self-improvement loop (design documented in `docs/SELF_IMPROVEMENT_DEEP_DIVE.md`, staged rollout pending)
1113
+ - [ ] CUDA kernels for batch HDV operations at scale
1114
+ - [ ] Helm chart production hardening (resource autoscaling, PodDisruptionBudget)
1115
+
1116
+ ---
1117
+
1118
+ ## Contributing
1119
+
1120
+ MnemoCore is an active research project. Contributions are welcome — especially:
1121
+
1122
+ - **Performance**: CUDA kernels, FAISS integration, async refactoring
1123
+ - **Algorithms**: Better clustering for consolidation, improved EIG formulas
1124
+ - **Integrations**: New storage backends, LLM connectors
1125
+ - **Tests**: Coverage for edge cases, property-based testing
1126
+
1127
+ ### Process
1128
+
1129
+ ```bash
1130
+ # Fork and clone
1131
+ git checkout -b feature/your-feature-name
1132
+
1133
+ # Make changes, ensure tests pass
1134
+ pytest
1135
+
1136
+ # Commit with semantic message
1137
+ git commit -m "feat(consolidation): add LLM-powered prototype labeling"
1138
+
1139
+ # Open PR — describe the what, why, and performance impact
1140
+ ```
1141
+
1142
+ Please follow the implementation patterns established in `docs/ARCHITECTURE.md` and `docs/ROADMAP.md` for architectural guidance, and review `CHANGELOG.md` to understand what has already landed.
1143
+
1144
+ ---
1145
+
1146
+ ## License
1147
+
1148
+ MIT License — see [LICENSE](LICENSE) for details.
1149
+
1150
+ ---
1151
+
1152
+ ## Contact
1153
+
1154
+ **Robin Granberg**
1155
+ 📧 robin@veristatesystems.com
1156
+
1157
+ ---
1158
+
1159
+ <p align="center">
1160
+ <i>Building the cognitive substrate for the next generation of autonomous AI.</i>
1161
+ </p>
REFACTORING_TODO.md ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Refactoring TODO
2
+
3
+ Status för kodoptimering inför kommande funktionalitet.
4
+
5
+ ---
6
+
7
+ ## Hög Prioritet
8
+
9
+ ### 1. Konsolidera HDV-implementation
10
+ **Status:** ✅ Completed (2026-02-18)
11
+
12
+ **Problem:**
13
+ - Dubbel implementation: `src/core/hdv.py` (float) + `src/core/binary_hdv.py` (binary)
14
+ - Skapar branch-logik genom hela koden
15
+ - Float HDV är legacy och bör depreceras
16
+
17
+ **Åtgärder genomförda:**
18
+ - `src/core/hdv.py` - Markerad som DEPRECATED med varning
19
+ - `src/core/__init__.py` - Exporterar nu BinaryHDV först
20
+ - `src/core/node.py` - Använder endast BinaryHDV
21
+ - `src/core/holographic.py` - Konverterad till BinaryHDV med XOR-binding
22
+ - `src/core/router.py` - Tog bort HDV-branching
23
+ - `src/core/engine.py` - Tog bort Union-typer och branching
24
+ - `src/core/tier_manager.py` - Standardiserade på BinaryHDV
25
+
26
+ ---
27
+
28
+ ### 2. Ofullständiga features
29
+ **Status:** Pending
30
+
31
+ **Problem:**
32
+ - Flera TODOs i produktionskod som lämnats oimplementerade
33
+
34
+ **Filer:**
35
+ - `src/llm_integration.py`
36
+
37
+ **TODOs:**
38
+ ```
39
+ Line 56: # TODO: Call Gemini 3 Pro via OpenClaw API
40
+ Line 106: # TODO: superposition_query() not implemented in HAIMEngine
41
+ Line 131: # TODO: Call Gemini 3 Pro
42
+ Line 301: # TODO: Implement concept-to-memory-ID mapping
43
+ Line 320: # TODO: orchestrate_orch_or() not implemented
44
+ ```
45
+
46
+ **Åtgärd:**
47
+ - Implementera funktionerna
48
+ - Eller ta bort dödkod
49
+
50
+ ---
51
+
52
+ ### 3. Standardisera felhantering
53
+ **Status:** Pending
54
+
55
+ **Problem:**
56
+ - Vissa funktioner returnerar `None` vid fel
57
+ - Andra kastar exceptions
58
+ - Svårt att förutse felbeteende
59
+
60
+ **Åtgärd:**
61
+ - Definiera domän-specifika exceptions:
62
+ - `MemoryNotFoundError`
63
+ - `StorageError`
64
+ - `EncodingError`
65
+ - `ConsolidationError`
66
+ - Skapa `src/core/exceptions.py`
67
+ - Uppdatera alla moduler att använda konsistent felhantering
68
+
69
+ ---
70
+
71
+ ## Medelprioritet
72
+
73
+ ### 4. Minska Singleton-användning
74
+ **Status:** 📋 Roadmap
75
+
76
+ **Problem:**
77
+ - `AsyncRedisStorage.get_instance()`
78
+ - `QdrantStore.get_instance()`
79
+ - Försvårar testning
80
+
81
+ **Åtgärd:**
82
+ - Inför Dependency Injection
83
+ - Passa beroenden via konstruktor
84
+
85
+ **Komplexitet:** Hög - Kräver genomgripande ändringar av instansiering
86
+
87
+ ---
88
+
89
+ ### 5. Bryt isär stora funktioner
90
+ **Status:** 📋 Roadmap
91
+
92
+ **Problem:**
93
+ - `engine.py:store()` - 76 rader
94
+ - `tier_manager.py:consolidate_warm_to_cold()` - 48 rader
95
+
96
+ **Åtgärd:**
97
+ - Extrahera till mindre, testbara enheter
98
+
99
+ **Komplexitet:** Hög - Refaktorering av kärnlogik
100
+
101
+ ---
102
+
103
+ ### 6. Konsolidera Circuit Breakers
104
+ **Status:** ✅ Completed (2026-02-18)
105
+
106
+ **Problem:**
107
+ - `src/core/resilience.py` - pybreaker implementation
108
+ - `src/core/reliability.py` - Native implementation
109
+ - Dubbel implementation
110
+
111
+ **Åtgärder genomförda:**
112
+ - `src/core/reliability.py` - Nu primär modul med pre-konfigurerade instanser
113
+ - `src/core/resilience.py` - Markerad som DEPRECATED
114
+ - `src/core/qdrant_store.py` - Uppdaterad till reliability
115
+ - `src/api/main.py` - Uppdaterad till reliability, tog bort pybreaker-beroende
116
+
117
+ ---
118
+
119
+ ### 7. Centralisera hårkodade sökvägar
120
+ **Status:** ✅ Completed (2026-02-18)
121
+
122
+ **Problem:**
123
+ - `"./data"` fanns hårdkodat på flera ställen
124
+
125
+ **Åtgärder genomförda:**
126
+ - `src/core/holographic.py` - Använder nu `config.paths.data_dir` som default
127
+ - Alla sökvägar centraliserade i `config.yaml` och `HAIMConfig`
128
+
129
+ ---
130
+
131
+ ### 8. Standardisera import-stil
132
+ **Status:** ✅ Verified (2026-02-18)
133
+
134
+ **Problem:**
135
+ - Blandning av relativa och absoluta imports
136
+ - Till och med inom samma fil
137
+
138
+ **Analys:**
139
+ - `src/core/` använder konsekvent relativa imports (`.module`)
140
+ - Övriga moduler använder absoluta imports (`src.core.module`)
141
+ - Inga filer har blandad stil
142
+
143
+ **Slutsats:**
144
+ Import-stilen följer redan rekommenderad Python-praxis. Ingen åtgärd behövs.
145
+
146
+ ---
147
+
148
+ ## Låg prioritet
149
+
150
+ ### 9. Rensa debug-filer
151
+ - Ta bort eller flytta `debug_*.py`
152
+ - Konsolidera test-helpers
153
+
154
+ ### 10. Standardisera logging
155
+ - Välj ett framework (loguru rekommenderas)
156
+ - Ta bort ad-hoc print-statements
157
+
158
+ ### 11. Förbättra typsäkerhet
159
+ - Lägg till mypy i CI
160
+ - Komplettera type hints
161
+ - Använd `TypedDict` för komplexa dict-returns
162
+
163
+ ---
164
+
165
+ ## Förbättra testtäckning
166
+
167
+ ```bash
168
+ pytest --cov=src --cov-report=html
169
+ ```
170
+
171
+ Kör för att identifiera luckor i testtäckningen.
172
+
173
+ ---
174
+
175
+ ## Fil-prioriteringslista
176
+
177
+ | Prioritet | Fil | Anledning |
178
+ |-----------|-----|-----------|
179
+ | 1 | `src/core/engine.py` | Kärnlogik, HDV dual-mode |
180
+ | 2 | `src/core/tier_manager.py` | Stora funktioner, lagringskomplexitet |
181
+ | 3 | `src/llm_integration.py` | Flera oimplementerade TODOs |
182
+ | 4 | `src/core/resilience.py` | Duplikat circuit breaker |
183
+ | 5 | `src/core/binary_hdv.py` | Överväg extrahering till separat paket |
184
+
185
+ ---
186
+
187
+ ## Framsteg
188
+
189
+ - [x] Punkt 1: HDV-konsolidering ✅
190
+ - [ ] Punkt 2: Ofullständiga features
191
+ - [ ] Punkt 3: Felhantering
192
+ - [ ] Punkt 4: Singleton-reduktion 📋 Roadmap
193
+ - [ ] Punkt 5: Stora funktioner 📋 Roadmap
194
+ - [x] Punkt 6: Circuit breakers ✅
195
+ - [x] Punkt 7: Hårkodade sökvägar ✅
196
+ - [x] Punkt 8: Import-stil ✅ (redan konsekvent)
197
+
198
+ ---
199
+
200
+ ## Roadmap (Framtida refaktorering)
201
+
202
+ Dessa punkter kräver mer omfattande ändringar och bör planeras in senare:
203
+
204
+ | Punkt | Beskrivning | Komplexitet |
205
+ |-------|-------------|-------------|
206
+ | 4 | Minska Singleton-användning, inför DI | Hög |
207
+ | 5 | Bryt isär stora funktioner i engine/tier_manager | Hög |
RELEASE_CHECKLIST.md ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Public Beta Release Checklist
2
+
3
+ ## Status: 🟠 ORANGE → 🟢 GREEN
4
+
5
+ ---
6
+
7
+ ## ✅ Completed
8
+
9
+ - [x] LICENSE file (MIT)
10
+ - [x] .gitignore created
11
+ - [x] data/memory.jsonl removed (no stored memories)
12
+ - [x] No leaked API keys or credentials
13
+ - [x] 82 unit tests passing
14
+
15
+ ---
16
+
17
+ ## 🔧 Code TODOs (Known Limitations)
18
+
19
+ These are documented gaps that can ship as "Phase 4 roadmap" items:
20
+
21
+ ### 1. `src/core/tier_manager.py:338`
22
+ ```python
23
+ pass # TODO: Implement full consolidation with Qdrant
24
+ ```
25
+ **Impact:** Warm→Cold tier consolidation limited
26
+ **Workaround:** Hot→Warm works, Cold is filesystem-based
27
+ **Fix:** Implement Qdrant batch scroll API for full archival
28
+
29
+ ### 2. `src/core/engine.py:192`
30
+ ```python
31
+ # TODO: Phase 3.5 Qdrant search for WARM/COLD
32
+ ```
33
+ **Impact:** Query only searches HOT tier currently
34
+ **Workaround:** Promote memories before querying
35
+ **Fix:** Add async Qdrant similarity search in query()
36
+
37
+ ### 3. `src/llm_integration.py:55-57, 128-129`
38
+ ```python
39
+ # TODO: Call Gemini 3 Pro via OpenClaw API
40
+ reconstruction = "TODO: Call Gemini 3 Pro"
41
+ ```
42
+ **Impact:** LLM reconstruction not functional
43
+ **Workaround:** Raw vector similarity works
44
+ **Fix:** Implement LLM client or make it pluggable
45
+
46
+ ### 4. `src/nightlab/engine.py:339`
47
+ ```python
48
+ # TODO: Notion API integration
49
+ ```
50
+ **Impact:** Session documentation not auto-pushed
51
+ **Workaround:** Written to local markdown files
52
+ **Fix:** Add optional Notion connector
53
+
54
+ ---
55
+
56
+ ## 📋 Pre-Release Actions
57
+
58
+ ### Before git push:
59
+
60
+ ```bash
61
+ # 1. Clean build artifacts
62
+ rm -rf .pytest_cache __pycache__ */__pycache__ *.pyc
63
+
64
+ # 2. Verify tests pass
65
+ source .venv/bin/activate && python -m pytest tests/ -v
66
+
67
+ # 3. Verify import works
68
+ python -c "from mnemocore.core.engine import HAIMEngine; print('OK')"
69
+
70
+ # 4. Check for secrets (should return nothing)
71
+ grep -r "sk-" src/ --include="*.py"
72
+ grep -r "api_key.*=" src/ --include="*.py" | grep -v "api_key=\"\""
73
+
74
+ # 5. Initialize fresh data files
75
+ touch data/memory.jsonl data/codebook.json data/concepts.json data/synapses.json
76
+ ```
77
+
78
+ ### Update README.md:
79
+
80
+ - [ ] Add: "Beta Release - See RELEASE_CHECKLIST.md for known limitations"
81
+ - [ ] Add: "Installation" section with `pip install -r requirements.txt`
82
+ - [ ] Add: "Quick Start" example
83
+ - [ ] Add: "Roadmap" section linking TODOs above
84
+
85
+ ---
86
+
87
+ ## 🚀 Release Command Sequence
88
+
89
+ ```bash
90
+ cd /home/dev-robin/Desktop/mnemocore
91
+
92
+ # Verify clean state
93
+ git status
94
+
95
+ # Stage public files (exclude .venv)
96
+ git add LICENSE .gitignore RELEASE_CHECKLIST.md
97
+ git add src/ tests/ config.yaml requirements.txt pytest.ini
98
+ git add README.md studycase.md docker-compose.yml
99
+ git add data/.gitkeep # If exists, or create empty dirs
100
+
101
+ # Commit
102
+ git commit -m "Initial public beta release (MIT)
103
+
104
+ Known limitations documented in RELEASE_CHECKLIST.md"
105
+
106
+ # Tag
107
+ git tag -a v0.1.0-beta -m "Public Beta Release"
108
+
109
+ # Push (when ready)
110
+ git push origin main --tags
111
+ ```
112
+
113
+ ---
114
+
115
+ ## Post-Release
116
+
117
+ - [ ] Create GitHub repository
118
+ - [ ] Add repository topics: `vsa`, `holographic-memory`, `active-inference`, `vector-symbolic-architecture`
119
+ - [ ] Enable GitHub Issues for community feedback
120
+ - [ ] Publish whitepaper/blog post
121
+
122
+ ---
123
+
124
+ *Generated: 2026-02-15*
125
+
SECURITY.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ This repository is currently in beta.
6
+ Security fixes are handled on the latest `main` branch.
7
+
8
+ ## Reporting a Vulnerability
9
+
10
+ Please report vulnerabilities privately to:
11
+ - Robin@veristatesystems.com
12
+
13
+ Include:
14
+ - Affected component/file
15
+ - Reproduction steps
16
+ - Impact assessment
17
+ - Suggested remediation (if available)
18
+
19
+ ## Disclosure Policy
20
+
21
+ - Please do not open public issues for unpatched vulnerabilities.
22
+ - We aim to acknowledge reports quickly and coordinate responsible disclosure.
23
+
24
+ ## Security Best Practices for Users
25
+
26
+ - Do not commit secrets, credentials, or private data.
27
+ - Use environment variables for sensitive configuration.
28
+ - Rotate any credential immediately if accidental exposure is suspected.
29
+ - Keep dependencies and runtime images updated.
30
+
benchmarks/bench_100k_memories.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Benchmark for MnemoCore with up to 100k memories.
3
+
4
+ Measures:
5
+ - actual HAIMEngine.store() latency (P50, P95, P99)
6
+ - actual HAIMEngine.query() latency (P50, P95, P99)
7
+ - HDV primitive latency (P99)
8
+ """
9
+
10
+ import argparse
11
+ import asyncio
12
+ import os
13
+ import sys
14
+ import time
15
+ from pathlib import Path
16
+ from statistics import mean
17
+ from typing import Dict, List
18
+
19
+ import numpy as np
20
+
21
+ # Add src to path
22
+ sys.path.insert(0, str(Path(__file__).parent.parent))
23
+
24
+ from mnemocore.core.binary_hdv import BinaryHDV
25
+ from mnemocore.core.engine import HAIMEngine
26
+ from mnemocore.core.config import reset_config
27
+
28
+
29
+ def _percentile(values: List[float], pct: float) -> float:
30
+ if not values:
31
+ return 0.0
32
+ sorted_values = sorted(values)
33
+ idx = min(int(len(sorted_values) * pct), len(sorted_values) - 1)
34
+ return sorted_values[idx]
35
+
36
+
37
+ def _ms_stats(samples: List[float]) -> Dict[str, float]:
38
+ return {
39
+ "count": float(len(samples)),
40
+ "mean_ms": mean(samples) if samples else 0.0,
41
+ "p50_ms": _percentile(samples, 0.50),
42
+ "p95_ms": _percentile(samples, 0.95),
43
+ "p99_ms": _percentile(samples, 0.99),
44
+ }
45
+
46
+
47
+ def generate_contents(count: int) -> List[str]:
48
+ print(f"Generating {count:,} memory payloads...")
49
+ return [f"benchmark memory #{i:06d} with signal {i % 97}" for i in range(count)]
50
+
51
+
52
+ async def measure_store_latency(engine: HAIMEngine, contents: List[str]) -> Dict[str, float]:
53
+ print(f"Measuring store() latency on {len(contents):,} real calls...")
54
+ latencies_ms: List[float] = []
55
+ for i, content in enumerate(contents):
56
+ start = time.perf_counter()
57
+ await engine.store(content, metadata={"benchmark": True, "index": i})
58
+ latencies_ms.append((time.perf_counter() - start) * 1000.0)
59
+ return _ms_stats(latencies_ms)
60
+
61
+
62
+ async def measure_query_latency(
63
+ engine: HAIMEngine, queries: List[str], top_k: int = 5
64
+ ) -> Dict[str, float]:
65
+ print(f"Measuring query() latency on {len(queries):,} real calls...")
66
+ latencies_ms: List[float] = []
67
+ for query_text in queries:
68
+ start = time.perf_counter()
69
+ await engine.query(query_text, top_k=top_k)
70
+ latencies_ms.append((time.perf_counter() - start) * 1000.0)
71
+ return _ms_stats(latencies_ms)
72
+
73
+
74
+ def measure_hdv_operations(dimension: int, n_samples: int = 10000) -> Dict[str, Dict[str, float]]:
75
+ print(f"Measuring HDV operations ({n_samples:,} samples)...")
76
+ v1 = BinaryHDV.random(dimension)
77
+ v2 = BinaryHDV.random(dimension)
78
+
79
+ bind_times = []
80
+ permute_times = []
81
+ distance_times = []
82
+
83
+ for _ in range(n_samples):
84
+ start = time.perf_counter()
85
+ v1.xor_bind(v2)
86
+ bind_times.append((time.perf_counter() - start) * 1_000_000)
87
+
88
+ start = time.perf_counter()
89
+ v1.permute(1)
90
+ permute_times.append((time.perf_counter() - start) * 1_000_000)
91
+
92
+ start = time.perf_counter()
93
+ v1.hamming_distance(v2)
94
+ distance_times.append((time.perf_counter() - start) * 1_000_000)
95
+
96
+ return {
97
+ "bind": {"p99_us": _percentile(bind_times, 0.99), "mean_us": mean(bind_times)},
98
+ "permute": {"p99_us": _percentile(permute_times, 0.99), "mean_us": mean(permute_times)},
99
+ "distance": {"p99_us": _percentile(distance_times, 0.99), "mean_us": mean(distance_times)},
100
+ }
101
+
102
+
103
+ async def run_benchmark(args: argparse.Namespace) -> None:
104
+ os.environ["HAIM_DIMENSIONALITY"] = str(args.dimension)
105
+ reset_config()
106
+
107
+ engine = HAIMEngine()
108
+ await engine.initialize()
109
+ try:
110
+ contents = generate_contents(args.n_memories)
111
+
112
+ print()
113
+ print("=" * 72)
114
+ print("HAIMEngine store/query benchmark")
115
+ print("=" * 72)
116
+
117
+ store_sample = contents[: args.store_samples]
118
+ store_stats = await measure_store_latency(engine, store_sample)
119
+
120
+ query_count = min(args.query_samples, len(store_sample))
121
+ query_inputs = [f"signal {(i * 7) % 97}" for i in range(query_count)]
122
+ query_stats = await measure_query_latency(engine, query_inputs, top_k=args.top_k)
123
+
124
+ hdv_stats = measure_hdv_operations(args.dimension, args.hdv_samples)
125
+
126
+ print()
127
+ print(f"{'Metric':<32} {'Mean':<14} {'P50':<14} {'P95':<14} {'P99':<14}")
128
+ print("-" * 90)
129
+ print(
130
+ f"{'store() latency (ms)':<32} "
131
+ f"{store_stats['mean_ms']:<14.3f} {store_stats['p50_ms']:<14.3f} "
132
+ f"{store_stats['p95_ms']:<14.3f} {store_stats['p99_ms']:<14.3f}"
133
+ )
134
+ print(
135
+ f"{'query() latency (ms)':<32} "
136
+ f"{query_stats['mean_ms']:<14.3f} {query_stats['p50_ms']:<14.3f} "
137
+ f"{query_stats['p95_ms']:<14.3f} {query_stats['p99_ms']:<14.3f}"
138
+ )
139
+
140
+ print()
141
+ print(f"{'HDV op':<20} {'Mean (us)':<16} {'P99 (us)':<16}")
142
+ print("-" * 54)
143
+ for op, stats in hdv_stats.items():
144
+ print(f"{op:<20} {stats['mean_us']:<16.2f} {stats['p99_us']:<16.2f}")
145
+
146
+ print()
147
+ print("=" * 72)
148
+ print("SLO Check")
149
+ print("=" * 72)
150
+ print(
151
+ f"store() P99 < 50ms: {'PASS' if store_stats['p99_ms'] < 50 else 'FAIL'} "
152
+ f"({store_stats['p99_ms']:.3f}ms)"
153
+ )
154
+ print(
155
+ f"query() P99 < 50ms: {'PASS' if query_stats['p99_ms'] < 50 else 'FAIL'} "
156
+ f"({query_stats['p99_ms']:.3f}ms)"
157
+ )
158
+ finally:
159
+ await engine.close()
160
+ reset_config()
161
+
162
+
163
+ def parse_args() -> argparse.Namespace:
164
+ parser = argparse.ArgumentParser(description="Benchmark HAIMEngine store/query performance")
165
+ parser.add_argument("--dimension", type=int, default=1024, help="HDV dimensionality")
166
+ parser.add_argument("--n-memories", type=int, default=100000, help="Dataset size label")
167
+ parser.add_argument(
168
+ "--store-samples", type=int, default=5000, help="Number of real store() calls"
169
+ )
170
+ parser.add_argument(
171
+ "--query-samples", type=int, default=1000, help="Number of real query() calls"
172
+ )
173
+ parser.add_argument("--hdv-samples", type=int, default=10000, help="HDV primitive sample count")
174
+ parser.add_argument("--top-k", type=int, default=5, help="top_k for query() benchmark")
175
+ return parser.parse_args()
176
+
177
+
178
+ if __name__ == "__main__":
179
+ asyncio.run(run_benchmark(parse_args()))
benchmarks/bench_permute.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Benchmark BinaryHDV.permute() using the production implementation.
3
+ """
4
+
5
+ import sys
6
+ import timeit
7
+ from pathlib import Path
8
+ from typing import Dict, List
9
+
10
+ import numpy as np
11
+
12
+ # Add src to path
13
+ sys.path.insert(0, str(Path(__file__).parent.parent))
14
+
15
+ from mnemocore.core.binary_hdv import BinaryHDV
16
+
17
+
18
+ def permute_reference(data: np.ndarray, shift: int) -> np.ndarray:
19
+ bits = np.unpackbits(data)
20
+ bits = np.roll(bits, shift)
21
+ return np.packbits(bits)
22
+
23
+
24
+ def benchmark_dimension(dimension: int, shift: int = 13) -> Dict[str, float]:
25
+ hdv = BinaryHDV.random(dimension)
26
+
27
+ # Correctness check against golden reference
28
+ expected = permute_reference(hdv.data, shift)
29
+ actual = hdv.permute(shift).data
30
+ assert np.array_equal(actual, expected), "permute() mismatch vs reference"
31
+
32
+ t = min(
33
+ timeit.repeat(
34
+ stmt="hdv.permute(shift)",
35
+ globals={"hdv": hdv, "shift": shift},
36
+ repeat=5,
37
+ number=500,
38
+ )
39
+ )
40
+ us = (t / 500) * 1_000_000
41
+ return {"dimension": float(dimension), "permute_us": us}
42
+
43
+
44
+ def main() -> None:
45
+ dimensions: List[int] = [512, 4096, 16384, 32768, 65536, 131072]
46
+ print("BinaryHDV.permute() benchmark (production path)")
47
+ print(f"{'Dimension':>10} | {'permute(us)':>12}")
48
+ print("-" * 27)
49
+ for dim in dimensions:
50
+ result = benchmark_dimension(dim)
51
+ print(f"{int(result['dimension']):>10} | {result['permute_us']:>12.2f}")
52
+
53
+
54
+ if __name__ == "__main__":
55
+ main()
config.yaml ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HAIM Configuration — Phase 4.5
2
+ # All hardcoded constants are centralized here.
3
+
4
+ haim:
5
+ version: "4.5"
6
+ dimensionality: 16384 # 2^14, must be multiple of 64
7
+
8
+ # Vector encoding
9
+ encoding:
10
+ mode: "binary" # "binary" (Phase 3.0+) or "float" (legacy)
11
+ token_method: "bundle" # "bundle" (XOR+permute) or "hash"
12
+
13
+ # Memory tier thresholds
14
+ tiers:
15
+ hot:
16
+ max_memories: 2000
17
+ ltp_threshold_min: 0.7
18
+ eviction_policy: "lru"
19
+
20
+ warm:
21
+ max_memories: 100000
22
+ ltp_threshold_min: 0.3
23
+ consolidation_interval_hours: 1
24
+ storage_backend: "mmap" # "mmap" (Phase 3.0) or "qdrant" (Phase 3.5)
25
+
26
+ cold:
27
+ storage_backend: "filesystem" # "filesystem" or "s3"
28
+ compression: "gzip"
29
+ archive_threshold_days: 30
30
+
31
+ # LTP (Long-Term Potentiation) decay parameters
32
+ ltp:
33
+ initial_importance: 0.5
34
+ decay_lambda: 0.01 # Exponential decay rate
35
+ permanence_threshold: 0.95 # Above this, memory is immune to decay
36
+ half_life_days: 30.0 # For synaptic connections
37
+
38
+ # Hysteresis (prevent boundary thrashing between tiers)
39
+ hysteresis:
40
+ promote_delta: 0.15 # LTP must exceed threshold by this much to promote
41
+ demote_delta: 0.10 # LTP must fall below threshold by this much to demote
42
+
43
+ # Redis (Phase 3.5)
44
+ redis:
45
+ url: "redis://localhost:6379/0"
46
+ stream_key: "haim:subconscious"
47
+ max_connections: 10
48
+ socket_timeout: 5
49
+
50
+ # Qdrant (Phase 3.5)
51
+ qdrant:
52
+ url: "http://localhost:6333"
53
+ collection_hot: "haim_hot"
54
+ collection_warm: "haim_warm"
55
+ binary_quantization: true
56
+ always_ram: true
57
+ hnsw_m: 16
58
+ hnsw_ef_construct: 100
59
+
60
+ # GPU (Phase 3.5)
61
+ gpu:
62
+ enabled: false
63
+ device: "cuda:0"
64
+ batch_size: 1000
65
+ fallback_to_cpu: true
66
+
67
+ # Observability (Phase 3.5)
68
+ observability:
69
+ metrics_port: 9090
70
+ log_level: "INFO"
71
+ structured_logging: true
72
+
73
+ # Persistence paths
74
+ paths:
75
+ data_dir: "./data"
76
+ memory_file: "./data/memory.jsonl"
77
+ codebook_file: "./data/codebook.json"
78
+ concepts_file: "./data/concepts.json"
79
+ synapses_file: "./data/synapses.json"
80
+ warm_mmap_dir: "./data/warm_tier"
81
+ cold_archive_dir: "./data/cold_archive"
82
+
83
+ # Security (Phase 3.5.1)
84
+ security:
85
+ # api_key: "mnemocore-beta-key" # <--- REMOVED: Must be set via HAIM_API_KEY env var or here explicitly
86
+
87
+ # MCP (Model Context Protocol) bridge
88
+ mcp:
89
+ enabled: false
90
+ transport: "stdio" # "stdio" recommended for local MCP clients
91
+ host: "127.0.0.1"
92
+ port: 8110
93
+ api_base_url: "http://localhost:8100"
94
+ timeout_seconds: 15
95
+ allow_tools:
96
+ - "memory_store"
97
+ - "memory_query"
98
+ - "memory_get"
99
+ - "memory_delete"
100
+ - "memory_stats"
101
+ - "memory_health"
102
+
103
+ # Dream Loop (Subconscious background processing)
104
+ dream_loop:
105
+ enabled: true
106
+ frequency_seconds: 60 # Seconds between dream cycles
107
+ batch_size: 10 # Number of memories to process per cycle
108
+ max_iterations: 0 # Maximum iterations (0 = unlimited)
109
+ subconscious_queue_maxlen: 10000 # Max queued IDs (null/0 = unlimited)
110
+ ollama_url: "http://localhost:11434/api/generate"
111
+ model: "gemma3:1b"
112
+
113
+ # Phase 4.0+: Semantic Consolidation
114
+ consolidation:
115
+ enabled: true
116
+ interval_seconds: 3600 # 1 hour between consolidation cycles
117
+ similarity_threshold: 0.85 # Hamming similarity threshold (0.85 = 15% distance)
118
+ min_cluster_size: 2 # Minimum cluster size for merging
119
+ hot_tier_enabled: true # Consolidate HOT tier
120
+ warm_tier_enabled: true # Consolidate WARM tier
121
+
122
+ # Phase 4.1: XOR-based Project Isolation
123
+ attention_masking:
124
+ enabled: true # Enable/disable project-based memory isolation
125
+
126
+ # =========================================================================
127
+ # Subconscious AI - BETA FEATURE
128
+ # =========================================================================
129
+ # This is a BETA feature that enables autonomous background AI processing
130
+ # for memory management, dream synthesis, and micro-self-improvement.
131
+ #
132
+ # WARNING: This feature is experimental and may change without notice.
133
+ # Must be explicitly enabled by setting 'enabled: true'.
134
+ # All safety defaults are conservative - review before enabling in production.
135
+ # =========================================================================
136
+ subconscious_ai:
137
+ # BETA FEATURE - Must be explicitly enabled
138
+ enabled: false
139
+ beta_mode: true
140
+
141
+ # Model configuration
142
+ model_provider: "ollama" # ollama | lm_studio | openai_api | anthropic_api
143
+ model_name: "phi3.5:3.8b"
144
+ model_url: "http://localhost:11434"
145
+ # api_key: null # For API providers
146
+ # api_base_url: null
147
+
148
+ # Pulse configuration
149
+ pulse_interval_seconds: 120
150
+ pulse_backoff_enabled: true
151
+ pulse_backoff_max_seconds: 600
152
+
153
+ # Resource management
154
+ max_cpu_percent: 30.0
155
+ cycle_timeout_seconds: 30
156
+ rate_limit_per_hour: 50
157
+
158
+ # Operations
159
+ memory_sorting_enabled: true
160
+ enhanced_dreaming_enabled: true
161
+ micro_self_improvement_enabled: false # Initially disabled
162
+
163
+ # Safety
164
+ dry_run: true
165
+ log_all_decisions: true
166
+ audit_trail_path: "./data/subconscious_audit.jsonl"
167
+ max_memories_per_cycle: 10
data/subconscious_audit.jsonl ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"timestamp": "2026-02-18T20:22:40.980901+00:00", "operation": "dreaming", "input_count": 0, "output": {"message": "No weak memories to analyze"}, "elapsed_ms": 0.05879999662283808, "model_used": "mock-model", "dry_run": false, "error": null}
2
+ {"timestamp": "2026-02-18T20:23:27.667298+00:00", "operation": "dreaming", "input_count": 0, "output": {"message": "No weak memories to analyze"}, "elapsed_ms": 0.05950000195298344, "model_used": "mock-model", "dry_run": false, "error": null}
data/subconscious_evolution.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "updated_at": "2026-02-18T18:55:55.471022+00:00",
3
+ "cycle_count": 56,
4
+ "insights_generated": 0,
5
+ "current_cycle_interval": 1,
6
+ "schedule": {
7
+ "concept_every": 5,
8
+ "parallel_every": 3,
9
+ "value_every": 10,
10
+ "meta_every": 7,
11
+ "cleanup_every": 20
12
+ },
13
+ "activity_window": [],
14
+ "low_activity_streak": 0,
15
+ "last_cycle_metrics": {
16
+ "concepts": 0,
17
+ "parallels": 0,
18
+ "meta_insights": 0,
19
+ "valuations": 0,
20
+ "memories": 0,
21
+ "synapses": 0,
22
+ "adaptation": "none"
23
+ }
24
+ }
docker-compose.yml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ # MnemoCore Docker Compose
4
+ # ========================
5
+ # Full stack deployment with Redis, Qdrant, and MnemoCore API
6
+
7
+ services:
8
+ # ===========================================
9
+ # MnemoCore API Service
10
+ # ===========================================
11
+ mnemocore:
12
+ build:
13
+ context: .
14
+ dockerfile: Dockerfile
15
+ image: mnemocore:latest
16
+ container_name: mnemocore-api
17
+ ports:
18
+ - "8100:8100"
19
+ - "9090:9090" # Prometheus metrics
20
+ volumes:
21
+ - mnemocore_data:/app/data
22
+ - ./config.yaml:/app/config.yaml:ro
23
+ environment:
24
+ - HAIM_API_KEY=${HAIM_API_KEY}
25
+ - REDIS_URL=redis://redis:6379/0
26
+ - QDRANT_URL=http://qdrant:6333
27
+ - LOG_LEVEL=${LOG_LEVEL:-INFO}
28
+ - HOST=0.0.0.0
29
+ - PORT=8100
30
+ env_file:
31
+ - .env
32
+ healthcheck:
33
+ test: ["CMD", "python", "/app/scripts/ops/healthcheck.py"]
34
+ interval: 30s
35
+ timeout: 10s
36
+ retries: 3
37
+ start_period: 40s
38
+ depends_on:
39
+ redis:
40
+ condition: service_healthy
41
+ qdrant:
42
+ condition: service_healthy
43
+ networks:
44
+ - mnemocore-network
45
+ restart: unless-stopped
46
+ deploy:
47
+ resources:
48
+ limits:
49
+ memory: 2G
50
+ reservations:
51
+ memory: 512M
52
+
53
+ # ===========================================
54
+ # Redis - In-Memory Data Store
55
+ # ===========================================
56
+ redis:
57
+ image: redis:7.2-alpine
58
+ container_name: mnemocore-redis
59
+ ports:
60
+ - "6379:6379"
61
+ volumes:
62
+ - redis_data:/data
63
+ command: >
64
+ redis-server
65
+ --save 60 1
66
+ --loglevel warning
67
+ --maxmemory 512mb
68
+ --maxmemory-policy allkeys-lru
69
+ healthcheck:
70
+ test: ["CMD", "redis-cli", "ping"]
71
+ interval: 10s
72
+ timeout: 5s
73
+ retries: 5
74
+ start_period: 10s
75
+ networks:
76
+ - mnemocore-network
77
+ restart: unless-stopped
78
+ deploy:
79
+ resources:
80
+ limits:
81
+ memory: 512M
82
+
83
+ # ===========================================
84
+ # Qdrant - Vector Database
85
+ # ===========================================
86
+ qdrant:
87
+ image: qdrant/qdrant:latest
88
+ container_name: mnemocore-qdrant
89
+ ports:
90
+ - "6333:6333" # HTTP API
91
+ - "6334:6334" # gRPC API
92
+ volumes:
93
+ - qdrant_storage:/qdrant/storage
94
+ environment:
95
+ - QDRANT__SERVICE__GRPC_PORT=6334
96
+ - QDRANT__LOG_LEVEL=INFO
97
+ healthcheck:
98
+ test: ["CMD", "curl", "-f", "http://localhost:6333/health"]
99
+ interval: 10s
100
+ timeout: 5s
101
+ retries: 5
102
+ start_period: 15s
103
+ networks:
104
+ - mnemocore-network
105
+ restart: unless-stopped
106
+ deploy:
107
+ resources:
108
+ limits:
109
+ memory: 4G
110
+
111
+ # ===========================================
112
+ # Networks
113
+ # ===========================================
114
+ networks:
115
+ mnemocore-network:
116
+ driver: bridge
117
+ name: mnemocore-net
118
+
119
+ # ===========================================
120
+ # Volumes
121
+ # ===========================================
122
+ volumes:
123
+ mnemocore_data:
124
+ name: mnemocore-data
125
+ redis_data:
126
+ name: mnemocore-redis-data
127
+ qdrant_storage:
128
+ name: mnemocore-qdrant-storage
docs/API.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore API Reference (Beta)
2
+
3
+ ## Beta Notice
4
+
5
+ API contracts may change during beta without backward compatibility guarantees.
6
+ Use pinned commits if you need reproducibility.
7
+
8
+ ## Base URL
9
+
10
+ Default local API URL:
11
+ - `http://localhost:8100`
12
+
13
+ ## Endpoints
14
+
15
+ ### `GET /`
16
+ Basic service status.
17
+
18
+ ### `GET /health`
19
+ Returns health status, Redis connectivity, and engine stats.
20
+
21
+ ### `POST /store`
22
+ Store a memory.
23
+
24
+ Request body:
25
+ ```json
26
+ {
27
+ "content": "string",
28
+ "metadata": {"key": "value"},
29
+ "agent_id": "optional-string",
30
+ "ttl": 3600
31
+ }
32
+ ```
33
+
34
+ ### `POST /query`
35
+ Query semantic memory.
36
+
37
+ Request body:
38
+ ```json
39
+ {
40
+ "query": "string",
41
+ "top_k": 5,
42
+ "agent_id": "optional-string"
43
+ }
44
+ ```
45
+
46
+ ### `GET /memory/{memory_id}`
47
+ Fetch a memory by ID (Redis-first, engine fallback).
48
+
49
+ ### `DELETE /memory/{memory_id}`
50
+ Delete a memory by ID.
51
+
52
+ ### `POST /concept`
53
+ Define a concept for conceptual memory operations.
54
+
55
+ ### `POST /analogy`
56
+ Run analogy inference.
57
+
58
+ ### `GET /stats`
59
+ Return engine statistics.
60
+
61
+ ### `GET /metrics`
62
+ Prometheus metrics endpoint.
63
+
64
+ ## Example Requests
65
+
66
+ Store:
67
+ ```bash
68
+ curl -X POST http://localhost:8100/store \
69
+ -H "Content-Type: application/json" \
70
+ -d '{"content":"Birds can migrate long distances"}'
71
+ ```
72
+
73
+ Query:
74
+ ```bash
75
+ curl -X POST http://localhost:8100/query \
76
+ -H "Content-Type: application/json" \
77
+ -d '{"query":"animal migration","top_k":3}'
78
+ ```
79
+
80
+ ## Error Behavior
81
+
82
+ - `404` for missing memory IDs.
83
+ - In degraded infrastructure modes, API may still return successful core operations while external storage writes fail.
84
+
85
+ ## Compatibility Guidance
86
+
87
+ During beta, treat responses as evolving contracts:
88
+ - Parse defensively.
89
+ - Avoid rigid coupling to optional fields.
90
+ - Revalidate after version upgrades.
91
+
docs/ARCHITECTURE.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Architecture (Beta)
2
+
3
+ ## Beta Context
4
+
5
+ This document describes the current implementation direction in beta.
6
+ It is not a guarantee of final architecture, performance, or feature completeness.
7
+
8
+ ## Core Components
9
+
10
+ - `src/core/engine.py`: Main orchestration for memory storage, encoding, query, and synaptic augmentation.
11
+ - `src/core/binary_hdv.py`: Binary hyperdimensional vector operations.
12
+ - `src/core/tier_manager.py`: HOT/WARM/COLD placement and movement logic.
13
+ - `src/core/config.py`: Typed config loading from YAML + env overrides.
14
+ - `src/core/async_storage.py`: Async Redis metadata operations.
15
+ - `src/api/main.py`: FastAPI interface.
16
+
17
+ ## Memory Model
18
+
19
+ MnemoCore represents memory as high-dimensional vectors and metadata-rich nodes:
20
+
21
+ 1. Encode input text into vector representation.
22
+ 2. Store node in HOT tier initially.
23
+ 3. Apply reinforcement/decay dynamics (LTP-related logic).
24
+ 4. Move between tiers based on thresholds and access patterns.
25
+
26
+ ## Tiering Model
27
+
28
+ - **HOT**: In-memory dictionary for fastest access.
29
+ - **WARM**: Qdrant-backed where available; filesystem fallback when unavailable.
30
+ - **COLD**: Filesystem archival path for long-lived storage.
31
+
32
+ ## Query Flow (Current Beta)
33
+
34
+ Current query behavior prioritizes HOT tier recall and synaptic score augmentation.
35
+ Cross-tier retrieval is still evolving and should be treated as beta behavior.
36
+
37
+ ## Async + External Services
38
+
39
+ - Redis is used for async metadata and event stream operations.
40
+ - API startup checks Redis health and can operate in degraded mode.
41
+ - Qdrant usage is enabled through tier manager and can fall back to local files.
42
+
43
+ ## Observability
44
+
45
+ - Prometheus metrics endpoint mounted at `/metrics` in API server.
46
+ - Logging behavior controlled through config.
47
+
48
+ ## Practical Limitations
49
+
50
+ - Some roadmap functionality remains TODO-marked in code.
51
+ - Interface contracts may change across beta releases.
52
+ - Performance can vary significantly by hardware and data profile.
53
+
54
+ For active limitations and next work items, see `docs/ROADMAP.md`.
55
+
docs/BETA_POLICY.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Beta Policy
2
+
3
+ ## Status
4
+
5
+ MnemoCore is published as a **beta / development preview**.
6
+
7
+ This means:
8
+ - No production readiness claim.
9
+ - No availability, performance, or compatibility guarantees.
10
+ - No commitment to stable APIs between beta releases.
11
+ - Experimental behavior is expected.
12
+
13
+ ## No Promises / No Warranty
14
+
15
+ MnemoCore is provided "as is" under the MIT license.
16
+
17
+ - We do not guarantee correctness for any specific use case.
18
+ - We do not guarantee fitness for business-critical workloads.
19
+ - We do not guarantee long-term support for current interfaces.
20
+
21
+ Always validate outputs independently before operational use.
22
+
23
+ ## Recommended Usage During Beta
24
+
25
+ - Use in sandboxes, R&D, and controlled evaluation environments.
26
+ - Pin commit hashes for reproducibility.
27
+ - Treat data formats and endpoint contracts as potentially changing.
28
+ - Keep backups of data and configuration before upgrading.
29
+
30
+ ## Production Adoption Guidance
31
+
32
+ Before production usage in your own environment, you should perform:
33
+ - Independent reliability testing.
34
+ - Security and privacy review.
35
+ - Capacity and failure-mode validation.
36
+ - Rollback planning and observability setup.
37
+
38
+ ## Communication Principle
39
+
40
+ All public communication should describe MnemoCore as:
41
+ - Experimental,
42
+ - Beta,
43
+ - Subject to change,
44
+ - Without guarantees or promises.
45
+
46
+ ## Contact
47
+
48
+ - General contact: Robin@veristatesystems.com
49
+ - Security disclosure: Robin@veristatesystems.com
50
+
docs/MCP_IMPLEMENTATION_PLAN.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore MCP Implementation Plan (Beta)
2
+
3
+ ## Goal
4
+
5
+ Expose MnemoCore capabilities through a Model Context Protocol (MCP) server so external LLM agents can safely store, query, and inspect memory with predictable contracts.
6
+
7
+ ## Scope (Phase 1)
8
+
9
+ ### In Scope
10
+
11
+ - MCP server process for local/dev use.
12
+ - Read/write memory tools mapped to existing engine/API capabilities.
13
+ - Basic auth + request limits aligned with existing API policy.
14
+ - Test coverage for MCP tool contracts and degraded dependencies.
15
+
16
+ ### Out of Scope (Phase 1)
17
+
18
+ - Multi-tenant policy engine.
19
+ - Full distributed consensus workflows.
20
+ - New memory semantics beyond existing endpoints.
21
+
22
+ ## Architecture Decision
23
+
24
+ Prefer **adapter-first** design:
25
+
26
+ - Keep `src/core` and `src/api` as source of truth.
27
+ - Add `src/mcp/server.py` (MCP transport + tool registry).
28
+ - Add `src/mcp/adapters/api_adapter.py` to reuse validated API contracts.
29
+ - Add `src/mcp/schemas.py` for tool input/output validation.
30
+
31
+ Reason: minimizes behavior drift and reuses existing validation/security paths.
32
+
33
+ ## Proposed MCP Tools (Phase 1)
34
+
35
+ 1. `memory_store`
36
+ - Input: `content`, `metadata?`, `agent_id?`, `ttl?`
37
+ - Backend: `POST /store`
38
+ 2. `memory_query`
39
+ - Input: `query`, `top_k?`, `agent_id?`
40
+ - Backend: `POST /query`
41
+ 3. `memory_get`
42
+ - Input: `memory_id`
43
+ - Backend: `GET /memory/{memory_id}`
44
+ 4. `memory_delete`
45
+ - Input: `memory_id`
46
+ - Backend: `DELETE /memory/{memory_id}`
47
+ 5. `memory_stats`
48
+ - Input: none
49
+ - Backend: `GET /stats`
50
+ 6. `memory_health`
51
+ - Input: none
52
+ - Backend: `GET /health`
53
+
54
+ Optional (Phase 1.1):
55
+ - `concept_define` and `analogy_solve` once primary tools are stable.
56
+
57
+ ## Security and Operational Guardrails
58
+
59
+ - Require API key passthrough from MCP server to MnemoCore API.
60
+ - Allowlist MCP tools (disable dangerous or experimental operations by default).
61
+ - Enforce per-tool timeout and payload limits.
62
+ - Structured logs with `trace_id`, `tool_name`, latency, status.
63
+ - Fail closed for auth errors; fail open only where existing API already degrades by design.
64
+
65
+ ## Delivery Milestones
66
+
67
+ ### M0: Foundations (1-2 days)
68
+
69
+ - Add MCP package structure.
70
+ - Add config section for MCP host/port/timeouts/tool allowlist.
71
+ - Add local run command and basic health check tool.
72
+
73
+ Exit criteria:
74
+ - MCP server starts and responds to health tool.
75
+
76
+ ### M1: Core Read/Write Tools (2-4 days)
77
+
78
+ - Implement `memory_store`, `memory_query`, `memory_get`, `memory_delete`.
79
+ - Map errors to stable MCP error format.
80
+ - Add contract tests with mocked API responses.
81
+
82
+ Exit criteria:
83
+ - Core memory flow works end-to-end from MCP client.
84
+
85
+ ### M2: Observability + Hardening (1-2 days)
86
+
87
+ - Add metrics counters/histograms for MCP tools.
88
+ - Add retry/backoff only for transient failures.
89
+ - Add degraded-mode tests (Redis/Qdrant unavailable).
90
+
91
+ Exit criteria:
92
+ - Clear diagnostics for failures and latency.
93
+
94
+ ### M3: Extended Cognitive Tools (optional, 1-2 days)
95
+
96
+ - Add `concept_define` and `analogy_solve`.
97
+ - Add docs examples for agent orchestration flows.
98
+
99
+ Exit criteria:
100
+ - Conceptual tools pass contract tests and are documented.
101
+
102
+ ## Test Strategy
103
+
104
+ - Unit tests: schema validation, adapter mapping, error translation.
105
+ - Functional tests: MCP client -> server -> API in local integration mode.
106
+ - Resilience tests: upstream timeout, 403 auth fail, 404 memory miss, degraded Redis.
107
+ - Regression gate: existing `tests/` suite remains green.
108
+
109
+ ## Rollout Plan
110
+
111
+ 1. Ship behind `mcp.enabled: false` default.
112
+ 2. Enable in beta environments only.
113
+ 3. Observe for one sprint (latency, error rate, tool usage).
114
+ 4. Promote to default-on after stability criteria are met.
115
+
116
+ ## Success Metrics
117
+
118
+ - >= 99% successful MCP tool calls in healthy environment.
119
+ - P95 MCP tool latency <= 300 ms for read operations (local setup target).
120
+ - Zero contract-breaking changes without changelog entry.
121
+
122
+ ## Minimal Backlog Tasks
123
+
124
+ 1. Create `src/mcp/server.py` bootstrap.
125
+ 2. Create adapter + schemas.
126
+ 3. Add MCP config in `config.yaml` + typed config model.
127
+ 4. Add tests in `tests/test_mcp_server.py` and `tests/test_mcp_contracts.py`.
128
+ 5. Add documentation section in README + API docs.
docs/PERFORMANCE.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Performance Documentation
2
+
3
+ ## Performance Targets (SLOs)
4
+
5
+ | Metric | Target | Description |
6
+ |--------|--------|-------------|
7
+ | `store()` P99 latency | < 100ms | Store a single memory |
8
+ | `query()` P99 latency | < 50ms | Query for similar memories |
9
+ | Throughput | > 1000 req/s | Sustained request rate |
10
+ | Memory overhead | < 100MB per 100k memories | RAM usage for storage |
11
+
12
+ ## Baseline Measurements
13
+
14
+ ### BinaryHDV Operations (1024 dimensions)
15
+
16
+ | Operation | Time (us) | Notes |
17
+ |-----------|-----------|-------|
18
+ | `xor_bind()` | ~5 | XOR binding of two vectors |
19
+ | `permute()` | ~5 | Cyclic permutation |
20
+ | `hamming_distance()` | ~3 | Distance calculation |
21
+ | `similarity()` | ~4 | Normalized similarity |
22
+
23
+ ### permute() Benchmark Results
24
+
25
+ `BinaryHDV.permute()` now uses one production path (`unpackbits` + `roll` + `packbits`) across all dimensions.
26
+
27
+ | Dimension | permute() (us) | Notes |
28
+ |-----------|----------------|-------|
29
+ | 512 | ~5.2 | Production path |
30
+ | 4096 | ~5.5 | Production path |
31
+ | 16384 | ~6.8 | Production path |
32
+ | 32768 | ~8.2 | Production path |
33
+ | 65536 | ~11.3 | Production path |
34
+ | 131072 | ~17.7 | Production path |
35
+
36
+ Run `python benchmarks/bench_permute.py` for machine-specific current numbers.
37
+
38
+ ## Load Testing
39
+
40
+ ### Using Locust
41
+
42
+ ```bash
43
+ # Install locust
44
+ pip install locust
45
+
46
+ # Run load test
47
+ cd tests/load
48
+ locust -f locustfile.py --host http://localhost:8100
49
+ ```
50
+
51
+ ### Using the Benchmark Script
52
+
53
+ ```bash
54
+ # Run 100k memory benchmark
55
+ python benchmarks/bench_100k_memories.py
56
+ ```
57
+
58
+ ## Performance Optimization Tips
59
+
60
+ 1. Use BinaryHDV instead of float HDV.
61
+ 2. Use batch operations for bulk work.
62
+ 3. Keep Redis connection pools right-sized.
63
+ 4. Enable Qdrant binary quantization for faster search.
64
+
65
+ ## Monitoring
66
+
67
+ Prometheus metrics are exposed at `/metrics` endpoint:
68
+ - `mnemocore_store_duration_seconds` - Store operation latency
69
+ - `mnemocore_query_duration_seconds` - Query operation latency
70
+ - `mnemocore_memory_count_total` - Total memories per tier
71
+ - `mnemocore_queue_length` - Subconscious queue length
docs/ROADMAP.md ADDED
@@ -0,0 +1,320 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Roadmap
2
+
3
+ ## Scope and Intent
4
+
5
+ This roadmap describes current known gaps and likely direction.
6
+ It is not a promise, delivery guarantee, or commitment to specific timelines.
7
+
8
+ ---
9
+
10
+ ## Version History
11
+
12
+ | Version | Phase | Status | Key Features |
13
+ |---------|-------|--------|--------------|
14
+ | 3.x | Core Architecture | ✅ Complete | Binary HDV, 3-Tier Storage, LTP/Decay |
15
+ | 4.0 | Cognitive Enhancements | ✅ Complete | XOR Attention, Bayesian LTP, Gap Detection, Immunology |
16
+ | 4.1 | Observability | ✅ Complete | Prometheus metrics, distributed tracing, project isolation |
17
+ | 4.2 | Stability | ✅ Complete | Async lock fixes, test suite hardening |
18
+ | 4.3 | Temporal Recall | ✅ Complete | Episodic chaining, chrono-weighting, sequential context |
19
+ | **5.x** | **The Perfect Brain** | 🔮 Planned | Multi-Modal, Emotional, Working Memory |
20
+
21
+ ---
22
+
23
+ ## Phase 5.x: The Perfect Brain
24
+
25
+ **Vision:** Transform MnemoCore from a sophisticated memory storage system into a truly cognitive architecture that functions as an artificial brain - but better.
26
+
27
+ ### 5.0 Multi-Modal Memory
28
+
29
+ **Goal:** Enable storage and retrieval of images, audio, code structures, and cross-modal associations.
30
+
31
+ ```
32
+ ┌─────────────────────────────────────────────────────────────────┐
33
+ │ CURRENT: Text-only encoding │
34
+ │ ────────────────────────────────────────────────────────────── │
35
+ │ store("User reported bug") → BinaryHDV │
36
+ │ │
37
+ │ FUTURE: Multi-modal encoding │
38
+ │ ────────────────────────────────────────────────────────────── │
39
+ │ store("Screenshot of error", image=bytes) → CrossModalHDV │
40
+ │ store("Voice note", audio=bytes) → AudioHDV │
41
+ │ bind(text_id, image_id, relation="illustrates") │
42
+ │ │
43
+ │ query("API error", modality="image") → screenshot.png │
44
+ │ query(image=bytes, modality="text") → "Related conversation" │
45
+ └─────────────────────────────────────────────────────────────────┘
46
+ ```
47
+
48
+ **Implementation Plan:**
49
+
50
+ | Component | Description | Dependencies |
51
+ |-----------|-------------|--------------|
52
+ | `MultiModalEncoder` | Abstract encoder protocol | - |
53
+ | `CLIPEncoder` | Vision encoding via CLIP | `transformers`, `torch` |
54
+ | `WhisperEncoder` | Audio encoding via Whisper | `openai-whisper` |
55
+ | `CodeEncoder` | AST-aware code encoding | `tree-sitter` |
56
+ | `CrossModalBinding` | VSA operations across modalities | BinaryHDV |
57
+
58
+ **New API Endpoints:**
59
+ ```
60
+ POST /store/multi - Store with multiple modalities
61
+ POST /query/cross-modal - Cross-modal semantic search
62
+ POST /bind - Bind modalities together
63
+ GET /memory/{id}/related - Get cross-modal related memories
64
+ ```
65
+
66
+ ---
67
+
68
+ ### 5.1 Emotional/Affective Layer
69
+
70
+ **Goal:** Enable emotion-weighted memory storage, retrieval, and decay - mimicking how biological memory prioritizes emotionally significant events.
71
+
72
+ ```
73
+ ┌─────────────────────────────────────────────────────────────────┐
74
+ │ EMOTIONAL DIMENSIONS │
75
+ │ ────────────────────────────────────────────────────────────── │
76
+ │ │
77
+ │ Valence: [-1.0 ──────────────── +1.0] │
78
+ │ (negative/unpleasant) (positive/pleasant) │
79
+ │ │
80
+ │ Arousal: [0.0 ────────────────── 1.0] │
81
+ │ (calm/neutral) (intense/urgent) │
82
+ │ │
83
+ │ EFFECT ON MEMORY: │
84
+ │ ────────────────────────────────────────────────────────────── │
85
+ │ High Arousal + Negative = "Flashbulb memory" (never forget) │
86
+ │ High Arousal + Positive = Strong consolidation │
87
+ │ Low Arousal = Faster decay (forgettable) │
88
+ │ │
89
+ └─────────────────────────────────────────────────────────────────┘
90
+ ```
91
+
92
+ **MemoryNode Extensions:**
93
+ ```python
94
+ @dataclass
95
+ class MemoryNode:
96
+ # ... existing fields ...
97
+
98
+ # Phase 5.1: Emotional tagging
99
+ emotional_valence: float = 0.0 # -1.0 (negative) to +1.0 (positive)
100
+ emotional_arousal: float = 0.0 # 0.0 (calm) to 1.0 (intense)
101
+ emotional_tags: List[str] = field(default_factory=list) # ["frustration", "joy", "urgency"]
102
+
103
+ def emotional_weight(self) -> float:
104
+ """Calculate memory importance based on emotional factors."""
105
+ # Arousal amplifies retention regardless of valence
106
+ # High arousal creates "flashbulb memories"
107
+ return abs(self.emotional_valence) * self.emotional_arousal
108
+ ```
109
+
110
+ **Modified LTP Formula:**
111
+ ```
112
+ S = I × log(1+A) × e^(-λT) × (1 + E)
113
+
114
+ Where E = emotional_weight() ∈ [0, 1]
115
+ ```
116
+
117
+ **Use Cases:**
118
+ - B2B outreach: "Customer was almost in tears when we fixed their issue" → HIGH priority
119
+ - Support tickets: "User furious about data loss" → Never forget, prioritize retrieval
120
+ - Positive feedback: "User loved the new feature" → Moderate retention
121
+
122
+ ---
123
+
124
+ ### 5.2 Working Memory Layer
125
+
126
+ **Goal:** Active cognitive workspace for goal-directed reasoning, not just passive storage.
127
+
128
+ ```
129
+ ┌─────────────────────────────────────────────────────────────────┐
130
+ │ COGNITIVE ARCHITECTURE │
131
+ │ │
132
+ │ ┌─────────────────────────────────────────────────────────┐ │
133
+ │ │ WORKING MEMORY (Active) │ │
134
+ │ │ Capacity: 7 ± 2 items │ │
135
+ │ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ │ │
136
+ │ │ │ Goal │ │ Context │ │ Focus │ │ Hold │ │ │
137
+ │ │ │ │ │ │ │ │ │ │ │ │
138
+ │ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ │ │
139
+ │ └─────────────────────────────────────────────────────────┘ │
140
+ │ ↕ │
141
+ │ ┌─────────────────────────────────────────────────────────┐ │
142
+ │ │ HOT TIER (Fast Access) │ │
143
+ │ │ ~2,000 memories, <1ms access │ │
144
+ │ └─────────────────────────────────────────────────────────┘ │
145
+ │ ↕ │
146
+ │ ┌─────────────────────────────────────────────────────────┐ │
147
+ │ │ WARM TIER (Qdrant/Redis) │ │
148
+ │ │ ~100,000 memories, <10ms access │ │
149
+ │ └─────────────────────────────────────────────────────────┘ │
150
+ │ ↕ │
151
+ │ ┌─────────────────────────────────────────────────────────┐ │
152
+ │ │ COLD TIER (Archive) │ │
153
+ │ │ Unlimited, <100ms access │ │
154
+ │ └─────────────────────────────────────────────────────────┘ │
155
+ │ │
156
+ └─────────────────────────────────────────────────────────────────┘
157
+ ```
158
+
159
+ **Working Memory API:**
160
+ ```python
161
+ # Create working memory instance
162
+ wm = engine.working_memory(capacity=7)
163
+
164
+ # Set active goal
165
+ wm.set_goal("Troubleshoot authentication error")
166
+
167
+ # Load relevant context
168
+ wm.focus_on(await engine.query("auth error", top_k=5))
169
+
170
+ # Hold important constraints
171
+ wm.hold("User is on deadline - prioritize speed over elegance")
172
+
173
+ # Query with working memory context
174
+ results = wm.query("related issues")
175
+ # Results are RE-RANKED based on current goal + focus + held items
176
+
177
+ # Get context summary for LLM
178
+ context = wm.context_summary()
179
+ # → "Working on: auth troubleshooting
180
+ # Focus: Recent OAuth errors
181
+ # Constraint: Time pressure"
182
+ ```
183
+
184
+ **Implementation Components:**
185
+ | Component | Description |
186
+ |-----------|-------------|
187
+ | `WorkingMemory` | Active workspace class |
188
+ | `GoalContext` | Goal tracking and binding |
189
+ | `FocusBuffer` | Currently attended items |
190
+ | `HoldBuffer` | Constraints and important facts |
191
+ | `ContextualQuery` | Goal-directed retrieval |
192
+
193
+ ---
194
+
195
+ ### 5.3 Multi-Agent / Collaborative Memory
196
+
197
+ **Goal:** Enable memory sharing between agents while maintaining provenance and privacy.
198
+
199
+ ```
200
+ ┌─────────────────────────────────────────────────────────────────┐
201
+ │ COLLABORATIVE MEMORY │
202
+ │ │
203
+ │ Agent A Shared Memory Agent B │
204
+ │ ┌────────┐ ┌──────────────┐ ┌────────┐ │
205
+ │ │ Private│ │ │ │ Private│ │
206
+ │ │ Memory │◄────►│ Consensus │◄──────►│ Memory │ │
207
+ │ └────────┘ │ Layer │ └────────┘ │
208
+ │ │ │ │
209
+ │ Agent C │ Provenance │ Agent D │
210
+ │ ┌────────┐ │ Tracking │ ┌────────┐ │
211
+ │ │ Private│◄────►│ │◄──────►│ Private│ │
212
+ │ │ Memory │ │ Privacy │ │ Memory │ │
213
+ │ └────────┘ │ Filtering │ └────────┘ │
214
+ │ └──────────────┘ │
215
+ │ │
216
+ └─────────────────────────────────────────────────────────────────┘
217
+ ```
218
+
219
+ **Features:**
220
+ - Memory provenance: Track which agent created/modified each memory
221
+ - Privacy levels: Private, shared-with-group, public
222
+ - Conflict resolution: When agents disagree on facts
223
+ - Collective intelligence: Aggregate insights across agents
224
+
225
+ ---
226
+
227
+ ### 5.4 Continual Learning
228
+
229
+ **Goal:** Enable online adaptation without catastrophic forgetting.
230
+
231
+ ```
232
+ ┌─────────────────────────────────────────────────────────────────┐
233
+ │ CONTINUAL LEARNING │
234
+ │ │
235
+ │ Traditional ML: Train → Deploy → (forget) → Retrain │
236
+ │ │
237
+ │ MnemoCore 5.4: Learn → Consolidate → Adapt → Learn → ... │
238
+ │ ↑______________| │
239
+ │ │
240
+ │ KEY MECHANISMS: │
241
+ │ ───────────────────────────────────────────────────────────── │
242
+ │ • Elastic Weight Consolidation (EWC) for encoder │
243
+ │ • Replay-based consolidation during "sleep" cycles │
244
+ │ • Progressive neural networks for new domains │
245
+ │ • Meta-learning for rapid adaptation │
246
+ │ │
247
+ └───────────────────���─────────────────────────────────────────────┘
248
+ ```
249
+
250
+ ---
251
+
252
+ ## Integration Priorities
253
+
254
+ ### Agent Frameworks
255
+ | Framework | Priority | Use Case |
256
+ |-----------|----------|----------|
257
+ | Open Claw | ⭐⭐⭐⭐⭐ | Primary use case, deep integration |
258
+ | LangChain | ⭐⭐⭐⭐ | Memory provider plugin |
259
+ | CrewAI | ⭐⭐⭐⭐ | Shared memory between agents |
260
+ | AutoGen | ⭐⭐⭐ | Conversation memory backend |
261
+ | LlamaIndex | ⭐⭐⭐ | Vector store adapter |
262
+
263
+ ### AI Platforms
264
+ | Platform | Priority | Integration Type |
265
+ |----------|----------|------------------|
266
+ | Claude (Anthropic) | ⭐⭐⭐⭐⭐ | MCP server (existing) |
267
+ | OpenAI Codex | ⭐⭐⭐⭐⭐ | API + function calling |
268
+ | Ollama | ⭐⭐⭐⭐ | Native memory backend |
269
+ | LM Studio | ⭐⭐⭐ | Plugin architecture |
270
+ | Gemini | ⭐⭐⭐ | API adapter |
271
+
272
+ ---
273
+
274
+ ## Research Opportunities
275
+
276
+ ### Academic Collaborations
277
+ | Area | Institutions | Relevance |
278
+ |------|-------------|-----------|
279
+ | Hyperdimensional Computing | Stanford, IBM Research, Redwood Center | Core HDC/VSA theory |
280
+ | Computational Neuroscience | MIT, UCL, KTH | Biological validation |
281
+ | Cognitive Architecture | Carnegie Mellon, University of Michigan | SOAR/ACT-R comparison |
282
+ | Neuromorphic Computing | Intel Labs, ETH Zürich | Hardware acceleration |
283
+
284
+ ### Publication Opportunities
285
+ 1. **"Binary HDC for Long-term AI Memory"** - Novel approach to persistent memory
286
+ 2. **"Episodic Chaining in Vector Memory Systems"** - Phase 4.3 temporal features
287
+ 3. **"XOR Attention Masking for Memory Isolation"** - Project isolation innovation
288
+ 4. **"Bayesian LTP in Artificial Memory Systems"** - Biological plausibility
289
+
290
+ ---
291
+
292
+ ## Known Gaps (Current Beta)
293
+
294
+ - Query path is still primarily HOT-tier-centric in current engine behavior.
295
+ - Some consolidation pathways are partial or under active refinement.
296
+ - Certain integrations (LLM/Nightlab) are intentionally marked as TODO.
297
+ - Distributed-scale behavior from long-form blueprints is not fully productized.
298
+
299
+ ---
300
+
301
+ ## Near-Term Priorities (Pre-5.0)
302
+
303
+ 1. Improve cross-tier retrieval consistency.
304
+ 2. Harden consolidation and archival flow.
305
+ 3. Improve deletion semantics and API consistency.
306
+ 4. Expand tests around degraded dependency modes (Redis/Qdrant outages).
307
+ 5. Stabilize API contracts and publish versioned compatibility notes.
308
+ 6. MCP server integration for agent tool access.
309
+
310
+ ---
311
+
312
+ ## Not a Commitment
313
+
314
+ Items above are directional only.
315
+ Order, scope, and implementation details can change during development.
316
+
317
+ ---
318
+
319
+ *Last Updated: 2025-02-18*
320
+ *Current Version: 4.3.0*
docs/SELF_IMPROVEMENT_DEEP_DIVE.md ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Self-Improvement Deep Dive
2
+
3
+ Status: Design document (pre-implementation)
4
+ Date: 2026-02-18
5
+ Scope: Latent, always-on memory self-improvement loop that runs safely in production-like beta.
6
+
7
+ ## 1. Purpose
8
+
9
+ This document defines a production-safe design for a latent self-improvement loop in MnemoCore.
10
+ The goal is to continuously improve memory quality over time without corrupting truth, overloading resources, or breaking temporal-memory behavior.
11
+
12
+ Primary outcomes:
13
+ - Better memory quality (clarity, consistency, retrieval utility).
14
+ - Better long-term structure (less duplication, stronger semantic links).
15
+ - Preserved auditability and rollback.
16
+ - Compatibility with temporal timelines (`previous_id`, `unix_timestamp`, time-range search).
17
+
18
+ ## 2. Current System Baseline
19
+
20
+ Relevant existing mechanisms already in code:
21
+ - `HAIMEngine.store/query` orchestration and subconscious queue (`src/core/engine.py`).
22
+ - Background dream strengthening and synaptic binding (`src/core/engine.py`).
23
+ - Gap detection and autonomous gap filling (`src/core/gap_detector.py`, `src/core/gap_filler.py`).
24
+ - Semantic consolidation workers (`src/core/semantic_consolidation.py`, `src/subconscious/consolidation_worker.py`).
25
+ - Subconscious daemon loop with LLM-powered cycles (`src/subconscious/daemon.py`).
26
+ - Temporal memory fields in node model (`src/core/node.py`): `previous_id`, `unix_timestamp`, `iso_date`.
27
+ - Tiered persistence and time-range aware search (`src/core/tier_manager.py`, `src/core/qdrant_store.py`).
28
+
29
+ Implication: Self-improvement should reuse these pathways, not bypass them.
30
+
31
+ ## 3. Problem Definition
32
+
33
+ Without a dedicated self-improvement loop, memory quality drifts:
34
+ - Duplicate or near-duplicate content accumulates.
35
+ - Weakly structured notes remain unnormalized.
36
+ - Conflicting memories are not actively reconciled.
37
+ - Query utility depends too much on initial storage quality.
38
+
39
+ At the same time, naive autonomous rewriting is risky:
40
+ - Hallucinated edits can reduce truth quality.
41
+ - Over-aggressive rewriting can erase provenance.
42
+ - Continuous background jobs can starve main workloads.
43
+
44
+ ## 4. Design Principles
45
+
46
+ 1. Append-only evolution, never destructive overwrite.
47
+ 2. Improvement proposals must pass validation gates before commit.
48
+ 3. Full provenance and rollback path for every derived memory.
49
+ 4. Temporal consistency is mandatory (timeline must remain navigable).
50
+ 5. Resource budgets and kill switches must exist from day 1.
51
+
52
+ ## 5. Target Architecture
53
+
54
+ ## 5.1 New Component
55
+
56
+ Add `SelfImprovementWorker` as a background worker (similar lifecycle style to consolidation/gap-filler workers).
57
+
58
+ Suggested location:
59
+ - `src/subconscious/self_improvement_worker.py`
60
+
61
+ Responsibilities:
62
+ - Select candidates from HOT/WARM.
63
+ - Produce improvement proposals (rule-based first, optional LLM later).
64
+ - Validate proposals.
65
+ - Commit accepted proposals via `engine.store(...)`.
66
+ - Link provenance metadata.
67
+ - Emit metrics and decision logs.
68
+
69
+ ## 5.2 Data Flow
70
+
71
+ 1. Candidate Selection
72
+ 2. Proposal Generation
73
+ 3. Validation & Scoring
74
+ 4. Commit as New Memory
75
+ 5. Link Graph/Timeline
76
+ 6. Monitor + Feedback Loop
77
+
78
+ No in-place mutation of existing memory content.
79
+
80
+ ## 5.3 Integration Points
81
+
82
+ - Read candidates: `TierManager` (`hot`, optional warm sampling).
83
+ - Commit: `HAIMEngine.store(...)` so all normal indexing/persistence paths apply.
84
+ - Timeline compatibility: preserve `previous_id` semantics and set provenance fields.
85
+ - Optional post-effects: trigger low-priority synapse/link updates.
86
+
87
+ ## 6. Memory Model Additions (Metadata, not schema break)
88
+
89
+ Use metadata keys first (backward compatible):
90
+ - `source: "self_improvement"`
91
+ - `improvement_type: "normalize" | "summarize" | "deduplicate" | "reconcile"`
92
+ - `derived_from: "<node_id>"`
93
+ - `derived_from_many: [node_ids...]` (for merge/reconcile)
94
+ - `improvement_score: float`
95
+ - `validator_scores: { ... }`
96
+ - `supersedes: "<node_id>"` (logical supersedence, not deletion)
97
+ - `version_tag: "vN"`
98
+ - `safety_mode: "strict" | "balanced"`
99
+
100
+ Note: Keep temporal fields from `MemoryNode` untouched and naturally generated on store.
101
+
102
+ ## 7. Candidate Selection Strategy
103
+
104
+ Initial heuristics (cheap and deterministic):
105
+ - High access + low confidence retrieval history.
106
+ - Conflicting memories in same topical cluster.
107
+ - Redundant near-duplicates.
108
+ - Old high-value memories needing compaction.
109
+
110
+ Selection constraints:
111
+ - Batch cap per cycle.
112
+ - Max candidates per source cluster.
113
+ - Cooldown per `node_id` to avoid thrashing.
114
+
115
+ ## 8. Proposal Generation Strategy
116
+
117
+ Phase A (no LLM dependency):
118
+ - Normalize formatting.
119
+ - Metadata repair/completion.
120
+ - Deterministic summary extraction.
121
+ - Exact/near duplicate merge suggestion.
122
+
123
+ Phase B (LLM-assisted, guarded):
124
+ - Rewrite for clarity.
125
+ - Multi-memory reconciliation draft.
126
+ - Explicit uncertainty markup if conflict unresolved.
127
+
128
+ All proposals must include rationale + structured diff summary.
129
+
130
+ ## 9. Validation Gates (Critical)
131
+
132
+ A proposal is committed only if all required gates pass:
133
+
134
+ 1. Semantic drift gate
135
+ - Similarity to origin must stay above threshold unless `improvement_type=reconcile`.
136
+
137
+ 2. Fact safety gate
138
+ - No new unsupported claims for strict mode.
139
+ - If unresolved conflict: enforce explicit uncertainty markers.
140
+
141
+ 3. Structure gate
142
+ - Must improve readability/compactness score beyond threshold.
143
+
144
+ 4. Policy gate
145
+ - Block forbidden metadata changes.
146
+ - Block sensitive tags crossing trust boundaries.
147
+
148
+ 5. Resource gate
149
+ - Cycle budget, latency budget, queue/backpressure checks.
150
+
151
+ Rejected proposals are logged but not committed.
152
+
153
+ ## 10. Interaction with Temporal Memory (Hard Requirement)
154
+
155
+ This design must not break timeline behavior introduced around:
156
+ - `previous_id` chaining
157
+ - `unix_timestamp` payload filtering
158
+ - Qdrant time-range retrieval
159
+
160
+ Rules:
161
+ - Every improved memory is a new timeline event (new node id).
162
+ - `derived_from` models lineage; `previous_id` continues temporal sequence.
163
+ - Query paths that use `time_range` must continue functioning identically.
164
+ - Do not bypass `TierManager.add_memory` or Qdrant payload generation.
165
+
166
+ ## 11. Safety Controls & Operations
167
+
168
+ Mandatory controls:
169
+ - Config kill switch: `self_improvement_enabled: false` by default initially.
170
+ - Dry-run mode: generate + validate, but do not store.
171
+ - Strict mode for early rollout.
172
+ - Per-cycle hard caps (count, wall-clock, token budget).
173
+ - Circuit breaker on repeated validation failures.
174
+
175
+ Operational observability:
176
+ - Attempted/accepted/rejected counters.
177
+ - Rejection reasons cardinality-safe labels.
178
+ - End-to-end cycle duration.
179
+ - Queue depth and backlog age.
180
+ - Quality delta trend over time.
181
+
182
+ ## 12. Suggested Config Block
183
+
184
+ Add under `haim.dream_loop` or sibling block `haim.self_improvement`:
185
+
186
+ ```yaml
187
+ self_improvement:
188
+ enabled: false
189
+ dry_run: true
190
+ safety_mode: "strict" # strict | balanced
191
+ interval_seconds: 300
192
+ batch_size: 8
193
+ max_cycle_seconds: 20
194
+ max_candidates_per_topic: 2
195
+ cooldown_minutes: 120
196
+ min_improvement_score: 0.15
197
+ min_semantic_similarity: 0.82
198
+ allow_llm_rewrite: false
199
+ ```
200
+
201
+ ## 13. Metrics (Proposed)
202
+
203
+ - `mnemocore_self_improve_attempts_total`
204
+ - `mnemocore_self_improve_commits_total`
205
+ - `mnemocore_self_improve_rejects_total`
206
+ - `mnemocore_self_improve_cycle_duration_seconds`
207
+ - `mnemocore_self_improve_candidates_in_cycle`
208
+ - `mnemocore_self_improve_quality_delta`
209
+ - `mnemocore_self_improve_backpressure_skips_total`
210
+
211
+ ## 14. Phased Implementation Plan
212
+
213
+ Phase 0: Instrumentation + dry-run only
214
+ - Add worker scaffold + metrics + decision logs.
215
+ - No writes.
216
+
217
+ Phase 1: Deterministic improvements only
218
+ - Metadata normalization, duplicate handling suggestions.
219
+ - Strict validation.
220
+ - Commit append-only derived nodes.
221
+
222
+ Phase 2: Controlled LLM improvements
223
+ - Enable `allow_llm_rewrite` behind feature flag.
224
+ - Add stricter validation and capped throughput.
225
+
226
+ Phase 3: Reconciliation and adaptive policies
227
+ - Multi-memory conflict reconciliation.
228
+ - Learning policies from acceptance/rejection outcomes.
229
+
230
+ ## 15. Test Strategy
231
+
232
+ Unit tests:
233
+ - Candidate selection determinism and cooldown behavior.
234
+ - Validation gates (pass/fail matrices).
235
+ - Provenance metadata correctness.
236
+
237
+ Integration tests:
238
+ - Store/query behavior unchanged under disabled mode.
239
+ - Time-range query still correct with improved nodes present.
240
+ - Qdrant payload contains expected temporal + provenance fields.
241
+
242
+ Soak/load tests:
243
+ - Worker under sustained ingest.
244
+ - Backpressure behavior.
245
+ - No unbounded queue growth.
246
+
247
+ Regression guardrails:
248
+ - No overwrite of original content.
249
+ - No bypass path around `engine.store`.
250
+
251
+ ## 16. Risks and Mitigations
252
+
253
+ Risk: hallucinated improvements
254
+ Mitigation: strict mode, no-LLM phase first, fact safety gate.
255
+
256
+ Risk: timeline noise from too many derived nodes
257
+ Mitigation: cooldown, batch caps, minimum score thresholds.
258
+
259
+ Risk: resource contention
260
+ Mitigation: cycle time caps, skip when main queue/backlog high.
261
+
262
+ Risk: provenance complexity
263
+ Mitigation: standardized metadata contract and audit logs.
264
+
265
+ ## 17. Open Decisions
266
+
267
+ 1. Should self-improved nodes be visible by default in top-k query, or weighted down unless requested?
268
+ 2. Should `supersedes` influence retrieval ranking automatically?
269
+ 3. Do we need a dedicated “truth tier” for validated reconciled memories?
270
+
271
+ ## 18. Recommended Next Step
272
+
273
+ Implement Phase 0 only:
274
+ - Worker skeleton
275
+ - Config block
276
+ - Metrics
277
+ - Dry-run reports
278
+
279
+ Then review logs for 1-2 weeks before enabling any writes.
git_status.txt ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git : warning: could not
2
+ open directory '.tmp_py
3
+ test/pytest-of-Robin/':
4
+ Permission denied
5
+ At line:1 char:1
6
+ + git status 2>&1 | Out-
7
+ File -FilePath git_statu
8
+ s.txt -Encoding utf8; G
9
+ ...
10
+ + ~~~~~~~~~~~~~~~
11
+ + CategoryInfo
12
+ : NotSpecified
13
+ : (warning: could .
14
+ ..rmission denied:S
15
+ tring) [], RemoteEx c
16
+ eption
17
+ + FullyQualifiedErr
18
+ orId : NativeComman
19
+ dError
20
+
21
+ warning: could not open
22
+ directory 'pytest_base_t
23
+ emp/': Permission denied
24
+ On branch main
25
+ Your branch is up to date with 'origin/main'.
26
+
27
+ Changes not staged for commit:
28
+ (use "git add/rm <file>..." to update what will be committed)
29
+ (use "git restore <file>..." to discard changes in working directory)
30
+ modified: .github/workflows/ci.yml
31
+ deleted: MnemoCore_Review_v2.docx
32
+ modified: src/api/main.py
33
+ modified: src/core/engine.py
34
+ modified: src/core/tier_manager.py
35
+ modified: src/llm_integration.py
36
+
37
+ Untracked files:
38
+ (use "git add <file>..." to include in what will be committed)
39
+ .tmp_verify_phase43/
40
+ git_status.txt
41
+ scripts/insert_rlm_endpoint.py
42
+ scripts/insert_rlm_integrator.py
43
+ src/core/recursive_synthesizer.py
44
+ src/core/ripple_context.py
45
+ test_regression_output.txt
46
+ test_rlm_output.txt
47
+ tests/test_e2e_flow.py
48
+ tests/test_phase43_regressions.py
49
+ tests/test_recursive_synthesizer.py
50
+
51
+ no changes added to commit (use "git add" and/or "git commit -a")
grafana-dashboard.json ADDED
@@ -0,0 +1,954 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "annotations": {
3
+ "list": [
4
+ {
5
+ "builtIn": 1,
6
+ "datasource": {
7
+ "type": "grafana",
8
+ "uid": "-- Grafana --"
9
+ },
10
+ "enable": true,
11
+ "hide": true,
12
+ "iconColor": "rgba(0, 211, 255, 1)",
13
+ "name": "Annotations & Alerts",
14
+ "type": "dashboard"
15
+ }
16
+ ]
17
+ },
18
+ "editable": true,
19
+ "fiscalYearStartMonth": 0,
20
+ "graphTooltip": 0,
21
+ "id": null,
22
+ "links": [],
23
+ "liveNow": false,
24
+ "panels": [
25
+ {
26
+ "datasource": {
27
+ "type": "prometheus",
28
+ "uid": "${DS_PROMETHEUS}"
29
+ },
30
+ "fieldConfig": {
31
+ "defaults": {
32
+ "color": {
33
+ "mode": "palette-classic"
34
+ },
35
+ "custom": {
36
+ "axisCenteredZero": false,
37
+ "axisColorMode": "text",
38
+ "axisLabel": "",
39
+ "axisPlacement": "auto",
40
+ "barAlignment": 0,
41
+ "drawStyle": "line",
42
+ "fillOpacity": 10,
43
+ "gradientMode": "none",
44
+ "hideFrom": {
45
+ "legend": false,
46
+ "tooltip": false,
47
+ "viz": false
48
+ },
49
+ "lineInterpolation": "linear",
50
+ "lineWidth": 1,
51
+ "pointSize": 5,
52
+ "scaleDistribution": {
53
+ "type": "linear"
54
+ },
55
+ "showPoints": "auto",
56
+ "spanNulls": false,
57
+ "stacking": {
58
+ "group": "A",
59
+ "mode": "none"
60
+ },
61
+ "thresholdsStyle": {
62
+ "mode": "off"
63
+ }
64
+ },
65
+ "mappings": [],
66
+ "thresholds": {
67
+ "mode": "absolute",
68
+ "steps": [
69
+ {
70
+ "color": "green",
71
+ "value": null
72
+ },
73
+ {
74
+ "color": "red",
75
+ "value": 80
76
+ }
77
+ ]
78
+ }
79
+ },
80
+ "overrides": []
81
+ },
82
+ "gridPos": {
83
+ "h": 8,
84
+ "w": 12,
85
+ "x": 0,
86
+ "y": 0
87
+ },
88
+ "id": 1,
89
+ "options": {
90
+ "legend": {
91
+ "calcs": [],
92
+ "displayMode": "list",
93
+ "placement": "bottom",
94
+ "showLegend": true
95
+ },
96
+ "tooltip": {
97
+ "mode": "single",
98
+ "sort": "none"
99
+ }
100
+ },
101
+ "targets": [
102
+ {
103
+ "datasource": {
104
+ "type": "prometheus",
105
+ "uid": "${DS_PROMETHEUS}"
106
+ },
107
+ "editorMode": "code",
108
+ "expr": "rate(haim_api_request_count_total[5m])",
109
+ "legendFormat": "{{endpoint}} - {{method}}",
110
+ "range": true,
111
+ "refId": "A"
112
+ }
113
+ ],
114
+ "title": "API Request Rate (5m avg)",
115
+ "type": "timeseries"
116
+ },
117
+ {
118
+ "datasource": {
119
+ "type": "prometheus",
120
+ "uid": "${DS_PROMETHEUS}"
121
+ },
122
+ "fieldConfig": {
123
+ "defaults": {
124
+ "color": {
125
+ "mode": "palette-classic"
126
+ },
127
+ "custom": {
128
+ "axisCenteredZero": false,
129
+ "axisColorMode": "text",
130
+ "axisLabel": "",
131
+ "axisPlacement": "auto",
132
+ "barAlignment": 0,
133
+ "drawStyle": "line",
134
+ "fillOpacity": 10,
135
+ "gradientMode": "none",
136
+ "hideFrom": {
137
+ "legend": false,
138
+ "tooltip": false,
139
+ "viz": false
140
+ },
141
+ "lineInterpolation": "linear",
142
+ "lineWidth": 1,
143
+ "pointSize": 5,
144
+ "scaleDistribution": {
145
+ "type": "linear"
146
+ },
147
+ "showPoints": "auto",
148
+ "spanNulls": false,
149
+ "stacking": {
150
+ "group": "A",
151
+ "mode": "none"
152
+ },
153
+ "thresholdsStyle": {
154
+ "mode": "off"
155
+ }
156
+ },
157
+ "mappings": [],
158
+ "thresholds": {
159
+ "mode": "absolute",
160
+ "steps": [
161
+ {
162
+ "color": "green",
163
+ "value": null
164
+ },
165
+ {
166
+ "color": "red",
167
+ "value": 80
168
+ }
169
+ ]
170
+ }
171
+ },
172
+ "overrides": []
173
+ },
174
+ "gridPos": {
175
+ "h": 8,
176
+ "w": 12,
177
+ "x": 12,
178
+ "y": 0
179
+ },
180
+ "id": 2,
181
+ "options": {
182
+ "legend": {
183
+ "calcs": [],
184
+ "displayMode": "list",
185
+ "placement": "bottom",
186
+ "showLegend": true
187
+ },
188
+ "tooltip": {
189
+ "mode": "single",
190
+ "sort": "none"
191
+ }
192
+ },
193
+ "targets": [
194
+ {
195
+ "datasource": {
196
+ "type": "prometheus",
197
+ "uid": "${DS_PROMETHEUS}"
198
+ },
199
+ "editorMode": "code",
200
+ "expr": "rate(haim_api_request_latency_seconds_sum[5m]) / rate(haim_api_request_latency_seconds_count[5m])",
201
+ "legendFormat": "{{endpoint}}",
202
+ "range": true,
203
+ "refId": "A"
204
+ }
205
+ ],
206
+ "title": "API Avg Latency (5m avg)",
207
+ "type": "timeseries"
208
+ },
209
+ {
210
+ "datasource": {
211
+ "type": "prometheus",
212
+ "uid": "${DS_PROMETHEUS}"
213
+ },
214
+ "fieldConfig": {
215
+ "defaults": {
216
+ "color": {
217
+ "mode": "thresholds"
218
+ },
219
+ "mappings": [],
220
+ "thresholds": {
221
+ "mode": "absolute",
222
+ "steps": [
223
+ {
224
+ "color": "green",
225
+ "value": null
226
+ },
227
+ {
228
+ "color": "red",
229
+ "value": 80
230
+ }
231
+ ]
232
+ }
233
+ },
234
+ "overrides": []
235
+ },
236
+ "gridPos": {
237
+ "h": 8,
238
+ "w": 12,
239
+ "x": 0,
240
+ "y": 8
241
+ },
242
+ "id": 3,
243
+ "options": {
244
+ "orientation": "auto",
245
+ "reduceOptions": {
246
+ "calcs": [
247
+ "lastNotNull"
248
+ ],
249
+ "fields": "",
250
+ "values": false
251
+ },
252
+ "showThresholdLabels": false,
253
+ "textMode": "auto"
254
+ },
255
+ "targets": [
256
+ {
257
+ "datasource": {
258
+ "type": "prometheus",
259
+ "uid": "${DS_PROMETHEUS}"
260
+ },
261
+ "editorMode": "code",
262
+ "expr": "haim_engine_memory_total",
263
+ "legendFormat": "{{tier}}",
264
+ "range": true,
265
+ "refId": "A"
266
+ }
267
+ ],
268
+ "title": "Memory Count per Tier",
269
+ "type": "stat"
270
+ },
271
+ {
272
+ "datasource": {
273
+ "type": "prometheus",
274
+ "uid": "${DS_PROMETHEUS}"
275
+ },
276
+ "fieldConfig": {
277
+ "defaults": {
278
+ "color": {
279
+ "mode": "thresholds"
280
+ },
281
+ "mappings": [],
282
+ "thresholds": {
283
+ "mode": "absolute",
284
+ "steps": [
285
+ {
286
+ "color": "green",
287
+ "value": null
288
+ },
289
+ {
290
+ "color": "yellow",
291
+ "value": 50
292
+ },
293
+ {
294
+ "color": "red",
295
+ "value": 100
296
+ }
297
+ ]
298
+ }
299
+ },
300
+ "overrides": []
301
+ },
302
+ "gridPos": {
303
+ "h": 8,
304
+ "w": 12,
305
+ "x": 12,
306
+ "y": 8
307
+ },
308
+ "id": 4,
309
+ "options": {
310
+ "orientation": "auto",
311
+ "reduceOptions": {
312
+ "calcs": [
313
+ "lastNotNull"
314
+ ],
315
+ "fields": "",
316
+ "values": false
317
+ },
318
+ "showThresholdLabels": false,
319
+ "textMode": "auto"
320
+ },
321
+ "targets": [
322
+ {
323
+ "datasource": {
324
+ "type": "prometheus",
325
+ "uid": "${DS_PROMETHEUS}"
326
+ },
327
+ "editorMode": "code",
328
+ "expr": "mnemocore_queue_length",
329
+ "legendFormat": "Queue Length",
330
+ "range": true,
331
+ "refId": "A"
332
+ }
333
+ ],
334
+ "title": "Subconscious Queue Length",
335
+ "type": "stat"
336
+ },
337
+ {
338
+ "datasource": {
339
+ "type": "prometheus",
340
+ "uid": "${DS_PROMETHEUS}"
341
+ },
342
+ "fieldConfig": {
343
+ "defaults": {
344
+ "color": {
345
+ "mode": "palette-classic"
346
+ },
347
+ "custom": {
348
+ "axisCenteredZero": false,
349
+ "axisColorMode": "text",
350
+ "axisLabel": "",
351
+ "axisPlacement": "auto",
352
+ "barAlignment": 0,
353
+ "drawStyle": "line",
354
+ "fillOpacity": 10,
355
+ "gradientMode": "none",
356
+ "hideFrom": {
357
+ "legend": false,
358
+ "tooltip": false,
359
+ "viz": false
360
+ },
361
+ "lineInterpolation": "linear",
362
+ "lineWidth": 1,
363
+ "pointSize": 5,
364
+ "scaleDistribution": {
365
+ "type": "linear"
366
+ },
367
+ "showPoints": "auto",
368
+ "spanNulls": false,
369
+ "stacking": {
370
+ "group": "A",
371
+ "mode": "none"
372
+ },
373
+ "thresholdsStyle": {
374
+ "mode": "off"
375
+ }
376
+ },
377
+ "mappings": [],
378
+ "thresholds": {
379
+ "mode": "absolute",
380
+ "steps": [
381
+ {
382
+ "color": "green",
383
+ "value": null
384
+ },
385
+ {
386
+ "color": "red",
387
+ "value": 80
388
+ }
389
+ ]
390
+ },
391
+ "unit": "s"
392
+ },
393
+ "overrides": []
394
+ },
395
+ "gridPos": {
396
+ "h": 8,
397
+ "w": 12,
398
+ "x": 0,
399
+ "y": 16
400
+ },
401
+ "id": 5,
402
+ "options": {
403
+ "legend": {
404
+ "calcs": ["mean", "max"],
405
+ "displayMode": "table",
406
+ "placement": "bottom",
407
+ "showLegend": true
408
+ },
409
+ "tooltip": {
410
+ "mode": "multi",
411
+ "sort": "desc"
412
+ }
413
+ },
414
+ "targets": [
415
+ {
416
+ "datasource": {
417
+ "type": "prometheus",
418
+ "uid": "${DS_PROMETHEUS}"
419
+ },
420
+ "editorMode": "code",
421
+ "expr": "histogram_quantile(0.50, rate(mnemocore_store_duration_seconds_bucket[5m]))",
422
+ "legendFormat": "p50 store",
423
+ "range": true,
424
+ "refId": "A"
425
+ },
426
+ {
427
+ "datasource": {
428
+ "type": "prometheus",
429
+ "uid": "${DS_PROMETHEUS}"
430
+ },
431
+ "editorMode": "code",
432
+ "expr": "histogram_quantile(0.95, rate(mnemocore_store_duration_seconds_bucket[5m]))",
433
+ "legendFormat": "p95 store",
434
+ "range": true,
435
+ "refId": "B"
436
+ },
437
+ {
438
+ "datasource": {
439
+ "type": "prometheus",
440
+ "uid": "${DS_PROMETHEUS}"
441
+ },
442
+ "editorMode": "code",
443
+ "expr": "histogram_quantile(0.99, rate(mnemocore_store_duration_seconds_bucket[5m]))",
444
+ "legendFormat": "p99 store",
445
+ "range": true,
446
+ "refId": "C"
447
+ }
448
+ ],
449
+ "title": "Store Duration Percentiles",
450
+ "type": "timeseries"
451
+ },
452
+ {
453
+ "datasource": {
454
+ "type": "prometheus",
455
+ "uid": "${DS_PROMETHEUS}"
456
+ },
457
+ "fieldConfig": {
458
+ "defaults": {
459
+ "color": {
460
+ "mode": "palette-classic"
461
+ },
462
+ "custom": {
463
+ "axisCenteredZero": false,
464
+ "axisColorMode": "text",
465
+ "axisLabel": "",
466
+ "axisPlacement": "auto",
467
+ "barAlignment": 0,
468
+ "drawStyle": "line",
469
+ "fillOpacity": 10,
470
+ "gradientMode": "none",
471
+ "hideFrom": {
472
+ "legend": false,
473
+ "tooltip": false,
474
+ "viz": false
475
+ },
476
+ "lineInterpolation": "linear",
477
+ "lineWidth": 1,
478
+ "pointSize": 5,
479
+ "scaleDistribution": {
480
+ "type": "linear"
481
+ },
482
+ "showPoints": "auto",
483
+ "spanNulls": false,
484
+ "stacking": {
485
+ "group": "A",
486
+ "mode": "none"
487
+ },
488
+ "thresholdsStyle": {
489
+ "mode": "off"
490
+ }
491
+ },
492
+ "mappings": [],
493
+ "thresholds": {
494
+ "mode": "absolute",
495
+ "steps": [
496
+ {
497
+ "color": "green",
498
+ "value": null
499
+ },
500
+ {
501
+ "color": "red",
502
+ "value": 80
503
+ }
504
+ ]
505
+ },
506
+ "unit": "s"
507
+ },
508
+ "overrides": []
509
+ },
510
+ "gridPos": {
511
+ "h": 8,
512
+ "w": 12,
513
+ "x": 12,
514
+ "y": 16
515
+ },
516
+ "id": 6,
517
+ "options": {
518
+ "legend": {
519
+ "calcs": ["mean", "max"],
520
+ "displayMode": "table",
521
+ "placement": "bottom",
522
+ "showLegend": true
523
+ },
524
+ "tooltip": {
525
+ "mode": "multi",
526
+ "sort": "desc"
527
+ }
528
+ },
529
+ "targets": [
530
+ {
531
+ "datasource": {
532
+ "type": "prometheus",
533
+ "uid": "${DS_PROMETHEUS}"
534
+ },
535
+ "editorMode": "code",
536
+ "expr": "histogram_quantile(0.50, rate(mnemocore_query_duration_seconds_bucket[5m]))",
537
+ "legendFormat": "p50 query",
538
+ "range": true,
539
+ "refId": "A"
540
+ },
541
+ {
542
+ "datasource": {
543
+ "type": "prometheus",
544
+ "uid": "${DS_PROMETHEUS}"
545
+ },
546
+ "editorMode": "code",
547
+ "expr": "histogram_quantile(0.95, rate(mnemocore_query_duration_seconds_bucket[5m]))",
548
+ "legendFormat": "p95 query",
549
+ "range": true,
550
+ "refId": "B"
551
+ },
552
+ {
553
+ "datasource": {
554
+ "type": "prometheus",
555
+ "uid": "${DS_PROMETHEUS}"
556
+ },
557
+ "editorMode": "code",
558
+ "expr": "histogram_quantile(0.99, rate(mnemocore_query_duration_seconds_bucket[5m]))",
559
+ "legendFormat": "p99 query",
560
+ "range": true,
561
+ "refId": "C"
562
+ }
563
+ ],
564
+ "title": "Query Duration Percentiles",
565
+ "type": "timeseries"
566
+ },
567
+ {
568
+ "datasource": {
569
+ "type": "prometheus",
570
+ "uid": "${DS_PROMETHEUS}"
571
+ },
572
+ "fieldConfig": {
573
+ "defaults": {
574
+ "color": {
575
+ "mode": "palette-classic"
576
+ },
577
+ "custom": {
578
+ "axisCenteredZero": false,
579
+ "axisColorMode": "text",
580
+ "axisLabel": "",
581
+ "axisPlacement": "auto",
582
+ "barAlignment": 0,
583
+ "drawStyle": "bars",
584
+ "fillOpacity": 100,
585
+ "gradientMode": "none",
586
+ "hideFrom": {
587
+ "legend": false,
588
+ "tooltip": false,
589
+ "viz": false
590
+ },
591
+ "lineInterpolation": "linear",
592
+ "lineWidth": 1,
593
+ "pointSize": 5,
594
+ "scaleDistribution": {
595
+ "type": "linear"
596
+ },
597
+ "showPoints": "auto",
598
+ "spanNulls": false,
599
+ "stacking": {
600
+ "group": "A",
601
+ "mode": "normal"
602
+ },
603
+ "thresholdsStyle": {
604
+ "mode": "off"
605
+ }
606
+ },
607
+ "mappings": [],
608
+ "thresholds": {
609
+ "mode": "absolute",
610
+ "steps": [
611
+ {
612
+ "color": "green",
613
+ "value": null
614
+ },
615
+ {
616
+ "color": "red",
617
+ "value": 80
618
+ }
619
+ ]
620
+ }
621
+ },
622
+ "overrides": []
623
+ },
624
+ "gridPos": {
625
+ "h": 8,
626
+ "w": 12,
627
+ "x": 0,
628
+ "y": 24
629
+ },
630
+ "id": 7,
631
+ "options": {
632
+ "legend": {
633
+ "calcs": [],
634
+ "displayMode": "list",
635
+ "placement": "bottom",
636
+ "showLegend": true
637
+ },
638
+ "tooltip": {
639
+ "mode": "single",
640
+ "sort": "none"
641
+ }
642
+ },
643
+ "targets": [
644
+ {
645
+ "datasource": {
646
+ "type": "prometheus",
647
+ "uid": "${DS_PROMETHEUS}"
648
+ },
649
+ "editorMode": "code",
650
+ "expr": "rate(mnemocore_error_total[5m])",
651
+ "legendFormat": "{{error_type}}",
652
+ "range": true,
653
+ "refId": "A"
654
+ }
655
+ ],
656
+ "title": "Error Rate by Type (5m avg)",
657
+ "type": "timeseries"
658
+ },
659
+ {
660
+ "datasource": {
661
+ "type": "prometheus",
662
+ "uid": "${DS_PROMETHEUS}"
663
+ },
664
+ "fieldConfig": {
665
+ "defaults": {
666
+ "color": {
667
+ "mode": "palette-classic"
668
+ },
669
+ "custom": {
670
+ "axisCenteredZero": false,
671
+ "axisColorMode": "text",
672
+ "axisLabel": "",
673
+ "axisPlacement": "auto",
674
+ "barAlignment": 0,
675
+ "drawStyle": "line",
676
+ "fillOpacity": 10,
677
+ "gradientMode": "none",
678
+ "hideFrom": {
679
+ "legend": false,
680
+ "tooltip": false,
681
+ "viz": false
682
+ },
683
+ "lineInterpolation": "linear",
684
+ "lineWidth": 1,
685
+ "pointSize": 5,
686
+ "scaleDistribution": {
687
+ "type": "linear"
688
+ },
689
+ "showPoints": "auto",
690
+ "spanNulls": false,
691
+ "stacking": {
692
+ "group": "A",
693
+ "mode": "none"
694
+ },
695
+ "thresholdsStyle": {
696
+ "mode": "off"
697
+ }
698
+ },
699
+ "mappings": [],
700
+ "thresholds": {
701
+ "mode": "absolute",
702
+ "steps": [
703
+ {
704
+ "color": "green",
705
+ "value": null
706
+ },
707
+ {
708
+ "color": "red",
709
+ "value": 80
710
+ }
711
+ ]
712
+ }
713
+ },
714
+ "overrides": []
715
+ },
716
+ "gridPos": {
717
+ "h": 8,
718
+ "w": 12,
719
+ "x": 12,
720
+ "y": 24
721
+ },
722
+ "id": 8,
723
+ "options": {
724
+ "legend": {
725
+ "calcs": [],
726
+ "displayMode": "list",
727
+ "placement": "bottom",
728
+ "showLegend": true
729
+ },
730
+ "tooltip": {
731
+ "mode": "single",
732
+ "sort": "none"
733
+ }
734
+ },
735
+ "targets": [
736
+ {
737
+ "datasource": {
738
+ "type": "prometheus",
739
+ "uid": "${DS_PROMETHEUS}"
740
+ },
741
+ "editorMode": "code",
742
+ "expr": "mnemocore_memory_count_total",
743
+ "legendFormat": "{{tier}}",
744
+ "range": true,
745
+ "refId": "A"
746
+ }
747
+ ],
748
+ "title": "Memory Count Over Time (by Tier)",
749
+ "type": "timeseries"
750
+ },
751
+ {
752
+ "datasource": {
753
+ "type": "prometheus",
754
+ "uid": "${DS_PROMETHEUS}"
755
+ },
756
+ "fieldConfig": {
757
+ "defaults": {
758
+ "color": {
759
+ "mode": "palette-classic"
760
+ },
761
+ "custom": {
762
+ "axisCenteredZero": false,
763
+ "axisColorMode": "text",
764
+ "axisLabel": "",
765
+ "axisPlacement": "auto",
766
+ "barAlignment": 0,
767
+ "drawStyle": "line",
768
+ "fillOpacity": 10,
769
+ "gradientMode": "none",
770
+ "hideFrom": {
771
+ "legend": false,
772
+ "tooltip": false,
773
+ "viz": false
774
+ },
775
+ "lineInterpolation": "linear",
776
+ "lineWidth": 1,
777
+ "pointSize": 5,
778
+ "scaleDistribution": {
779
+ "type": "linear"
780
+ },
781
+ "showPoints": "auto",
782
+ "spanNulls": false,
783
+ "stacking": {
784
+ "group": "A",
785
+ "mode": "none"
786
+ },
787
+ "thresholdsStyle": {
788
+ "mode": "off"
789
+ }
790
+ },
791
+ "mappings": [],
792
+ "thresholds": {
793
+ "mode": "absolute",
794
+ "steps": [
795
+ {
796
+ "color": "green",
797
+ "value": null
798
+ },
799
+ {
800
+ "color": "red",
801
+ "value": 80
802
+ }
803
+ ]
804
+ }
805
+ },
806
+ "overrides": []
807
+ },
808
+ "gridPos": {
809
+ "h": 8,
810
+ "w": 12,
811
+ "x": 0,
812
+ "y": 32
813
+ },
814
+ "id": 9,
815
+ "options": {
816
+ "legend": {
817
+ "calcs": [],
818
+ "displayMode": "list",
819
+ "placement": "bottom",
820
+ "showLegend": true
821
+ },
822
+ "tooltip": {
823
+ "mode": "single",
824
+ "sort": "none"
825
+ }
826
+ },
827
+ "targets": [
828
+ {
829
+ "datasource": {
830
+ "type": "prometheus",
831
+ "uid": "${DS_PROMETHEUS}"
832
+ },
833
+ "editorMode": "code",
834
+ "expr": "rate(haim_dream_loop_total[5m])",
835
+ "legendFormat": "{{status}}",
836
+ "range": true,
837
+ "refId": "A"
838
+ }
839
+ ],
840
+ "title": "Dream Loop Rate (5m avg)",
841
+ "type": "timeseries"
842
+ },
843
+ {
844
+ "datasource": {
845
+ "type": "prometheus",
846
+ "uid": "${DS_PROMETHEUS}"
847
+ },
848
+ "fieldConfig": {
849
+ "defaults": {
850
+ "color": {
851
+ "mode": "palette-classic"
852
+ },
853
+ "custom": {
854
+ "axisCenteredZero": false,
855
+ "axisColorMode": "text",
856
+ "axisLabel": "",
857
+ "axisPlacement": "auto",
858
+ "barAlignment": 0,
859
+ "drawStyle": "line",
860
+ "fillOpacity": 10,
861
+ "gradientMode": "none",
862
+ "hideFrom": {
863
+ "legend": false,
864
+ "tooltip": false,
865
+ "viz": false
866
+ },
867
+ "lineInterpolation": "linear",
868
+ "lineWidth": 1,
869
+ "pointSize": 5,
870
+ "scaleDistribution": {
871
+ "type": "linear"
872
+ },
873
+ "showPoints": "auto",
874
+ "spanNulls": false,
875
+ "stacking": {
876
+ "group": "A",
877
+ "mode": "none"
878
+ },
879
+ "thresholdsStyle": {
880
+ "mode": "off"
881
+ }
882
+ },
883
+ "mappings": [],
884
+ "thresholds": {
885
+ "mode": "absolute",
886
+ "steps": [
887
+ {
888
+ "color": "green",
889
+ "value": null
890
+ },
891
+ {
892
+ "color": "red",
893
+ "value": 80
894
+ }
895
+ ]
896
+ },
897
+ "unit": "s"
898
+ },
899
+ "overrides": []
900
+ },
901
+ "gridPos": {
902
+ "h": 8,
903
+ "w": 12,
904
+ "x": 12,
905
+ "y": 32
906
+ },
907
+ "id": 10,
908
+ "options": {
909
+ "legend": {
910
+ "calcs": ["mean", "max"],
911
+ "displayMode": "table",
912
+ "placement": "bottom",
913
+ "showLegend": true
914
+ },
915
+ "tooltip": {
916
+ "mode": "multi",
917
+ "sort": "desc"
918
+ }
919
+ },
920
+ "targets": [
921
+ {
922
+ "datasource": {
923
+ "type": "prometheus",
924
+ "uid": "${DS_PROMETHEUS}"
925
+ },
926
+ "editorMode": "code",
927
+ "expr": "rate(haim_dream_iteration_seconds_sum[5m]) / rate(haim_dream_iteration_seconds_count[5m])",
928
+ "legendFormat": "Dream Iteration",
929
+ "range": true,
930
+ "refId": "A"
931
+ }
932
+ ],
933
+ "title": "Dream Iteration Duration (5m avg)",
934
+ "type": "timeseries"
935
+ }
936
+ ],
937
+ "refresh": "30s",
938
+ "schemaVersion": 38,
939
+ "style": "dark",
940
+ "tags": ["mnemocore", "observability", "cognitive-memory"],
941
+ "templating": {
942
+ "list": []
943
+ },
944
+ "time": {
945
+ "from": "now-6h",
946
+ "to": "now"
947
+ },
948
+ "timepicker": {},
949
+ "timezone": "",
950
+ "title": "MnemoCore Observability Dashboard",
951
+ "uid": "mnemocore-monitoring",
952
+ "version": 2,
953
+ "weekStart": ""
954
+ }
helm/mnemocore/.helmignore ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Patterns to ignore when building packages.
2
+ # This supports shell glob matching, relative path matching, and
3
+ # negation (prefixed with !). Only one pattern per line.
4
+
5
+ # Common VCS dirs
6
+ .git/
7
+ .gitignore
8
+ .bzr/
9
+ .bzrignore
10
+ .hg/
11
+ .hgignore
12
+ .svn/
13
+
14
+ # Common backup files
15
+ *.swp
16
+ *.bak
17
+ *.tmp
18
+ *.orig
19
+ *~
20
+
21
+ # Various IDEs
22
+ .project
23
+ .idea/
24
+ *.tmproj
25
+ .vscode/
26
+
27
+ # Test files
28
+ *_test.go
29
+ tests/
30
+ *.test
31
+
32
+ # Documentation
33
+ *.md
34
+ !README.md
35
+
36
+ # CI/CD files
37
+ .github/
38
+ .gitlab-ci.yml
39
+ .travis.yml
40
+ Jenkinsfile
41
+
42
+ # Development files
43
+ .env
44
+ .env.*
45
+ docker-compose*.yml
46
+ Dockerfile*
47
+
48
+ # Python
49
+ __pycache__/
50
+ *.py[cod]
51
+ *$py.class
52
+ *.so
53
+ .Python
54
+ venv/
55
+ ENV/
56
+
57
+ # Node
58
+ node_modules/
59
+ npm-debug.log
60
+
61
+ # Build artifacts
62
+ dist/
63
+ build/
64
+ target/
65
+
66
+ # OS files
67
+ .DS_Store
68
+ Thumbs.db
helm/mnemocore/Chart.yaml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: v2
2
+ name: mnemocore
3
+ description: MnemoCore - Infrastructure for Persistent Cognitive Memory with HAIM (Hyperdimensional Artificial Intelligence Memory)
4
+
5
+ type: application
6
+
7
+ # Chart version - follows SemVer 2
8
+ version: 1.0.0
9
+
10
+ # Application version
11
+ appVersion: "3.5.0"
12
+
13
+ # Metadata
14
+ home: https://github.com/your-org/mnemocore
15
+ sources:
16
+ - https://github.com/your-org/mnemocore
17
+ keywords:
18
+ - ai
19
+ - memory
20
+ - vector-database
21
+ - cognitive
22
+ - hyperdimensional-computing
23
+ - hdc
24
+ - llm
25
+
26
+ maintainers:
27
+ - name: MnemoCore Team
28
+ email: team@mnemocore.ai
29
+
30
+ # Dependencies
31
+ dependencies:
32
+ - name: redis
33
+ version: "18.x.x"
34
+ repository: "https://charts.bitnami.com/bitnami"
35
+ condition: redis.enabled
36
+ alias: redis
37
+
38
+ - name: qdrant
39
+ version: "0.x.x"
40
+ repository: "https://qdrant.github.io/qdrant-helm"
41
+ condition: qdrant.enabled
42
+ alias: qdrant
43
+
44
+ # Kubernetes version compatibility
45
+ kubeVersion: ">=1.25.0-0"
46
+
47
+ # Chart annotations
48
+ annotations:
49
+ artifacthub.io/category: ai-machine-learning
50
+ artifacthub.io/license: MIT
51
+ artifacthub.io/links: |
52
+ - name: Documentation
53
+ url: https://github.com/your-org/mnemocore/docs
54
+ - name: API Reference
55
+ url: https://github.com/your-org/mnemocore/docs/API.md
helm/mnemocore/templates/_helpers.tpl ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Expand the name of the chart.
3
+ */}}
4
+ {{- define "mnemocore.name" -}}
5
+ {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
6
+ {{- end }}
7
+
8
+ {{/*
9
+ Create a default fully qualified app name.
10
+ */}}
11
+ {{- define "mnemocore.fullname" -}}
12
+ {{- if .Values.fullnameOverride }}
13
+ {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
14
+ {{- else }}
15
+ {{- $name := default .Chart.Name .Values.nameOverride }}
16
+ {{- if contains $name .Release.Name }}
17
+ {{- .Release.Name | trunc 63 | trimSuffix "-" }}
18
+ {{- else }}
19
+ {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
20
+ {{- end }}
21
+ {{- end }}
22
+ {{- end }}
23
+
24
+ {{/*
25
+ Create chart name and version as used by the chart label.
26
+ */}}
27
+ {{- define "mnemocore.chart" -}}
28
+ {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
29
+ {{- end }}
30
+
31
+ {{/*
32
+ Common labels
33
+ */}}
34
+ {{- define "mnemocore.labels" -}}
35
+ helm.sh/chart: {{ include "mnemocore.chart" . }}
36
+ {{ include "mnemocore.selectorLabels" . }}
37
+ {{- if .Chart.AppVersion }}
38
+ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
39
+ {{- end }}
40
+ app.kubernetes.io/managed-by: {{ .Release.Service }}
41
+ {{- end }}
42
+
43
+ {{/*
44
+ Selector labels
45
+ */}}
46
+ {{- define "mnemocore.selectorLabels" -}}
47
+ app.kubernetes.io/name: {{ include "mnemocore.name" . }}
48
+ app.kubernetes.io/instance: {{ .Release.Name }}
49
+ {{- end }}
50
+
51
+ {{/*
52
+ Create the name of the service account to use
53
+ */}}
54
+ {{- define "mnemocore.serviceAccountName" -}}
55
+ {{- if .Values.mnemocore.serviceAccount.create }}
56
+ {{- default (include "mnemocore.fullname" .) .Values.mnemocore.serviceAccount.name }}
57
+ {{- else }}
58
+ {{- default "default" .Values.mnemocore.serviceAccount.name }}
59
+ {{- end }}
60
+ {{- end }}
61
+
62
+ {{/*
63
+ Redis fullname
64
+ */}}
65
+ {{- define "mnemocore.redis.fullname" -}}
66
+ {{- printf "%s-redis" (include "mnemocore.fullname" .) }}
67
+ {{- end }}
68
+
69
+ {{/*
70
+ Qdrant fullname
71
+ */}}
72
+ {{- define "mnemocore.qdrant.fullname" -}}
73
+ {{- printf "%s-qdrant" (include "mnemocore.fullname" .) }}
74
+ {{- end }}
75
+
76
+ {{/*
77
+ ConfigMap fullname
78
+ */}}
79
+ {{- define "mnemocore.configmap.fullname" -}}
80
+ {{- printf "%s-config" (include "mnemocore.fullname" .) }}
81
+ {{- end }}
82
+
83
+ {{/*
84
+ Secret fullname
85
+ */}}
86
+ {{- define "mnemocore.secret.fullname" -}}
87
+ {{- printf "%s-secret" (include "mnemocore.fullname" .) }}
88
+ {{- end }}
89
+
90
+ {{/*
91
+ PVC fullname
92
+ */}}
93
+ {{- define "mnemocore.pvc.fullname" -}}
94
+ {{- printf "%s-data" (include "mnemocore.fullname" .) }}
95
+ {{- end }}
96
+
97
+ {{/*
98
+ HPA fullname
99
+ */}}
100
+ {{- define "mnemocore.hpa.fullname" -}}
101
+ {{- printf "%s-hpa" (include "mnemocore.fullname" .) }}
102
+ {{- end }}
103
+
104
+ {{/*
105
+ Return the proper Storage Class
106
+ */}}
107
+ {{- define "mnemocore.storageClass" -}}
108
+ {{- if .Values.global.storageClass }}
109
+ {{- if (eq "-" .Values.global.storageClass) }}
110
+ {{- else }}
111
+ storageClassName: "{{ .Values.global.storageClass }}"
112
+ {{- end }}
113
+ {{- else if .Values.mnemocore.persistence.storageClass }}
114
+ {{- if (eq "-" .Values.mnemocore.persistence.storageClass) }}
115
+ {{- else }}
116
+ storageClassName: "{{ .Values.mnemocore.persistence.storageClass }}"
117
+ {{- end }}
118
+ {{- end }}
119
+ {{- end }}
helm/mnemocore/templates/configmap.yaml ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ MnemoCore ConfigMap - HAIM Configuration
3
+ */}}
4
+ apiVersion: v1
5
+ kind: ConfigMap
6
+ metadata:
7
+ name: {{ include "mnemocore.configmap.fullname" . }}
8
+ labels:
9
+ {{- include "mnemocore.labels" . | nindent 4 }}
10
+ app.kubernetes.io/component: api
11
+ data:
12
+ config.yaml: |
13
+ # HAIM Configuration - Generated by Helm Chart
14
+ # MnemoCore Phase {{ .Values.mnemocore.config.version }}
15
+
16
+ haim:
17
+ version: "{{ .Values.mnemocore.config.version }}"
18
+ dimensionality: {{ .Values.mnemocore.config.dimensionality }}
19
+
20
+ # Vector encoding
21
+ encoding:
22
+ mode: "{{ .Values.mnemocore.config.encoding.mode }}"
23
+ token_method: "{{ .Values.mnemocore.config.encoding.token_method }}"
24
+
25
+ # Memory tier thresholds
26
+ tiers:
27
+ hot:
28
+ max_memories: {{ .Values.mnemocore.config.tiers.hot.max_memories }}
29
+ ltp_threshold_min: {{ .Values.mnemocore.config.tiers.hot.ltp_threshold_min }}
30
+ eviction_policy: "{{ .Values.mnemocore.config.tiers.hot.eviction_policy }}"
31
+
32
+ warm:
33
+ max_memories: {{ .Values.mnemocore.config.tiers.warm.max_memories }}
34
+ ltp_threshold_min: {{ .Values.mnemocore.config.tiers.warm.ltp_threshold_min }}
35
+ consolidation_interval_hours: {{ .Values.mnemocore.config.tiers.warm.consolidation_interval_hours }}
36
+ storage_backend: "{{ .Values.mnemocore.config.tiers.warm.storage_backend }}"
37
+
38
+ cold:
39
+ storage_backend: "{{ .Values.mnemocore.config.tiers.cold.storage_backend }}"
40
+ compression: "{{ .Values.mnemocore.config.tiers.cold.compression }}"
41
+ archive_threshold_days: {{ .Values.mnemocore.config.tiers.cold.archive_threshold_days }}
42
+
43
+ # LTP (Long-Term Potentiation) decay parameters
44
+ ltp:
45
+ initial_importance: {{ .Values.mnemocore.config.ltp.initial_importance }}
46
+ decay_lambda: {{ .Values.mnemocore.config.ltp.decay_lambda }}
47
+ permanence_threshold: {{ .Values.mnemocore.config.ltp.permanence_threshold }}
48
+ half_life_days: {{ .Values.mnemocore.config.ltp.half_life_days }}
49
+
50
+ # Hysteresis (prevent boundary thrashing between tiers)
51
+ hysteresis:
52
+ promote_delta: {{ .Values.mnemocore.config.hysteresis.promote_delta }}
53
+ demote_delta: {{ .Values.mnemocore.config.hysteresis.demote_delta }}
54
+
55
+ # Redis Configuration
56
+ redis:
57
+ {{- if .Values.redis.url }}
58
+ url: "{{ .Values.redis.url }}"
59
+ {{- else }}
60
+ url: "redis://{{ include "mnemocore.redis.fullname" . }}:{{ .Values.redis.service.port }}/0"
61
+ {{- end }}
62
+ stream_key: "haim:subconscious"
63
+ max_connections: 10
64
+ socket_timeout: 5
65
+
66
+ # Qdrant Configuration
67
+ qdrant:
68
+ {{- if .Values.qdrant.url }}
69
+ url: "{{ .Values.qdrant.url }}"
70
+ {{- else }}
71
+ url: "http://{{ include "mnemocore.qdrant.fullname" . }}:{{ .Values.qdrant.service.httpPort }}"
72
+ {{- end }}
73
+ collection_hot: "{{ .Values.qdrant.collections.hot.name }}"
74
+ collection_warm: "{{ .Values.qdrant.collections.warm.name }}"
75
+ binary_quantization: {{ .Values.qdrant.collections.hot.binaryQuantization }}
76
+ always_ram: {{ .Values.qdrant.collections.hot.alwaysRam }}
77
+ hnsw_m: {{ .Values.qdrant.collections.hot.hnswM }}
78
+ hnsw_ef_construct: {{ .Values.qdrant.collections.hot.hnswEfConstruct }}
79
+
80
+ # GPU Configuration
81
+ gpu:
82
+ enabled: {{ .Values.mnemocore.config.gpu.enabled }}
83
+ device: "{{ .Values.mnemocore.config.gpu.device }}"
84
+ batch_size: {{ .Values.mnemocore.config.gpu.batch_size }}
85
+ fallback_to_cpu: {{ .Values.mnemocore.config.gpu.fallback_to_cpu }}
86
+
87
+ # Observability
88
+ observability:
89
+ metrics_port: {{ .Values.mnemocore.config.observability.metrics_port }}
90
+ log_level: "{{ .Values.mnemocore.config.observability.log_level }}"
91
+ structured_logging: {{ .Values.mnemocore.config.observability.structured_logging }}
92
+
93
+ # Persistence paths
94
+ paths:
95
+ data_dir: "{{ .Values.mnemocore.config.paths.data_dir }}"
96
+ memory_file: "{{ .Values.mnemocore.config.paths.memory_file }}"
97
+ codebook_file: "{{ .Values.mnemocore.config.paths.codebook_file }}"
98
+ concepts_file: "{{ .Values.mnemocore.config.paths.concepts_file }}"
99
+ synapses_file: "{{ .Values.mnemocore.config.paths.synapses_file }}"
100
+ warm_mmap_dir: "{{ .Values.mnemocore.config.paths.warm_mmap_dir }}"
101
+ cold_archive_dir: "{{ .Values.mnemocore.config.paths.cold_archive_dir }}"
102
+
103
+ # MCP (Model Context Protocol) bridge
104
+ mcp:
105
+ enabled: {{ .Values.mnemocore.config.mcp.enabled }}
106
+ transport: "{{ .Values.mnemocore.config.mcp.transport }}"
107
+ host: "{{ .Values.mnemocore.config.mcp.host }}"
108
+ port: {{ .Values.mnemocore.config.mcp.port }}
109
+ api_base_url: "{{ .Values.mnemocore.config.mcp.api_base_url }}"
110
+ timeout_seconds: {{ .Values.mnemocore.config.mcp.timeout_seconds }}
111
+ allow_tools:
112
+ {{- range .Values.mnemocore.config.mcp.allow_tools }}
113
+ - "{{ . }}"
114
+ {{- end }}
helm/mnemocore/templates/deployment-qdrant.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Qdrant Deployment - Only created if embedded mode is enabled and not using official Qdrant chart
3
+ */}}
4
+ {{- if and .Values.qdrant.enabled .Values.qdrant.embedded.enabled }}
5
+ apiVersion: apps/v1
6
+ kind: Deployment
7
+ metadata:
8
+ name: {{ include "mnemocore.qdrant.fullname" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: qdrant
12
+ spec:
13
+ replicas: 1
14
+ selector:
15
+ matchLabels:
16
+ {{- include "mnemocore.selectorLabels" . | nindent 6 }}
17
+ app.kubernetes.io/component: qdrant
18
+ template:
19
+ metadata:
20
+ labels:
21
+ {{- include "mnemocore.selectorLabels" . | nindent 8 }}
22
+ app.kubernetes.io/component: qdrant
23
+ spec:
24
+ {{- with .Values.global.imagePullSecrets }}
25
+ imagePullSecrets:
26
+ {{- toYaml . | nindent 8 }}
27
+ {{- end }}
28
+ securityContext:
29
+ runAsNonRoot: true
30
+ runAsUser: 1000
31
+ runAsGroup: 1000
32
+ fsGroup: 1000
33
+ containers:
34
+ - name: qdrant
35
+ image: "{{ .Values.global.imageRegistry }}{{ .Values.qdrant.image.repository }}:{{ .Values.qdrant.image.tag }}"
36
+ imagePullPolicy: {{ .Values.qdrant.image.pullPolicy }}
37
+ ports:
38
+ - name: http
39
+ containerPort: {{ .Values.qdrant.service.httpPort }}
40
+ protocol: TCP
41
+ - name: grpc
42
+ containerPort: {{ .Values.qdrant.service.grpcPort }}
43
+ protocol: TCP
44
+ env:
45
+ - name: QDRANT__SERVICE__GRPC_PORT
46
+ value: {{ .Values.qdrant.service.grpcPort | quote }}
47
+ - name: QDRANT__LOG_LEVEL
48
+ value: {{ .Values.qdrant.config.logLevel | quote }}
49
+ {{- if .Values.qdrant.persistence.enabled }}
50
+ volumeMounts:
51
+ - name: storage
52
+ mountPath: /qdrant/storage
53
+ {{- end }}
54
+ {{- if .Values.qdrant.probes.liveness.enabled }}
55
+ livenessProbe:
56
+ httpGet:
57
+ path: /health
58
+ port: http
59
+ initialDelaySeconds: {{ .Values.qdrant.probes.liveness.initialDelaySeconds }}
60
+ periodSeconds: {{ .Values.qdrant.probes.liveness.periodSeconds }}
61
+ timeoutSeconds: {{ .Values.qdrant.probes.liveness.timeoutSeconds }}
62
+ failureThreshold: {{ .Values.qdrant.probes.liveness.failureThreshold }}
63
+ {{- end }}
64
+ {{- if .Values.qdrant.probes.readiness.enabled }}
65
+ readinessProbe:
66
+ httpGet:
67
+ path: /health
68
+ port: http
69
+ initialDelaySeconds: {{ .Values.qdrant.probes.readiness.initialDelaySeconds }}
70
+ periodSeconds: {{ .Values.qdrant.probes.readiness.periodSeconds }}
71
+ timeoutSeconds: {{ .Values.qdrant.probes.readiness.timeoutSeconds }}
72
+ failureThreshold: {{ .Values.qdrant.probes.readiness.failureThreshold }}
73
+ {{- end }}
74
+ resources:
75
+ {{- toYaml .Values.qdrant.resources | nindent 12 }}
76
+ volumes:
77
+ {{- if .Values.qdrant.persistence.enabled }}
78
+ - name: storage
79
+ persistentVolumeClaim:
80
+ claimName: {{ include "mnemocore.qdrant.fullname" . }}-storage
81
+ {{- else }}
82
+ - name: storage
83
+ emptyDir: {}
84
+ {{- end }}
85
+ ---
86
+ {{/*
87
+ Qdrant PVC
88
+ */}}
89
+ {{- if and .Values.qdrant.enabled .Values.qdrant.embedded.enabled .Values.qdrant.persistence.enabled }}
90
+ apiVersion: v1
91
+ kind: PersistentVolumeClaim
92
+ metadata:
93
+ name: {{ include "mnemocore.qdrant.fullname" . }}-storage
94
+ labels:
95
+ {{- include "mnemocore.labels" . | nindent 4 }}
96
+ app.kubernetes.io/component: qdrant
97
+ {{- with .Values.qdrant.persistence.annotations }}
98
+ annotations:
99
+ {{- toYaml . | nindent 4 }}
100
+ {{- end }}
101
+ spec:
102
+ accessModes:
103
+ {{- range .Values.qdrant.persistence.accessModes }}
104
+ - {{ . | quote }}
105
+ {{- end }}
106
+ {{- if .Values.global.storageClass }}
107
+ storageClassName: {{ .Values.global.storageClass | quote }}
108
+ {{- else if .Values.qdrant.persistence.storageClass }}
109
+ storageClassName: {{ .Values.qdrant.persistence.storageClass | quote }}
110
+ {{- end }}
111
+ resources:
112
+ requests:
113
+ storage: {{ .Values.qdrant.persistence.size | quote }}
114
+ {{- end }}
115
+ ---
116
+ {{/*
117
+ Qdrant Service
118
+ */}}
119
+ {{- if and .Values.qdrant.enabled .Values.qdrant.embedded.enabled }}
120
+ apiVersion: v1
121
+ kind: Service
122
+ metadata:
123
+ name: {{ include "mnemocore.qdrant.fullname" . }}
124
+ labels:
125
+ {{- include "mnemocore.labels" . | nindent 4 }}
126
+ app.kubernetes.io/component: qdrant
127
+ spec:
128
+ type: {{ .Values.qdrant.service.type }}
129
+ ports:
130
+ - port: {{ .Values.qdrant.service.httpPort }}
131
+ targetPort: http
132
+ protocol: TCP
133
+ name: http
134
+ - port: {{ .Values.qdrant.service.grpcPort }}
135
+ targetPort: grpc
136
+ protocol: TCP
137
+ name: grpc
138
+ selector:
139
+ {{- include "mnemocore.selectorLabels" . | nindent 4 }}
140
+ app.kubernetes.io/component: qdrant
141
+ {{- end }}
helm/mnemocore/templates/deployment-redis.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Redis Deployment - Only created if embedded mode is enabled and not using Bitnami chart
3
+ */}}
4
+ {{- if and .Values.redis.enabled .Values.redis.embedded.enabled }}
5
+ apiVersion: apps/v1
6
+ kind: Deployment
7
+ metadata:
8
+ name: {{ include "mnemocore.redis.fullname" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: redis
12
+ spec:
13
+ replicas: 1
14
+ selector:
15
+ matchLabels:
16
+ {{- include "mnemocore.selectorLabels" . | nindent 6 }}
17
+ app.kubernetes.io/component: redis
18
+ template:
19
+ metadata:
20
+ labels:
21
+ {{- include "mnemocore.selectorLabels" . | nindent 8 }}
22
+ app.kubernetes.io/component: redis
23
+ spec:
24
+ {{- with .Values.global.imagePullSecrets }}
25
+ imagePullSecrets:
26
+ {{- toYaml . | nindent 8 }}
27
+ {{- end }}
28
+ securityContext:
29
+ runAsNonRoot: true
30
+ runAsUser: 999
31
+ runAsGroup: 999
32
+ fsGroup: 999
33
+ containers:
34
+ - name: redis
35
+ image: "{{ .Values.global.imageRegistry }}{{ .Values.redis.image.repository }}:{{ .Values.redis.image.tag }}"
36
+ imagePullPolicy: {{ .Values.redis.image.pullPolicy }}
37
+ ports:
38
+ - name: redis
39
+ containerPort: {{ .Values.redis.service.port }}
40
+ protocol: TCP
41
+ command:
42
+ - redis-server
43
+ - --save
44
+ - {{ .Values.redis.config.save | quote }}
45
+ - --loglevel
46
+ - {{ .Values.redis.config.logLevel | quote }}
47
+ - --maxmemory
48
+ - {{ .Values.redis.config.maxmemory | quote }}
49
+ - --maxmemory-policy
50
+ - {{ .Values.redis.config.maxmemoryPolicy | quote }}
51
+ {{- if .Values.redis.persistence.enabled }}
52
+ volumeMounts:
53
+ - name: data
54
+ mountPath: /data
55
+ {{- end }}
56
+ {{- if .Values.redis.probes.liveness.enabled }}
57
+ livenessProbe:
58
+ exec:
59
+ command:
60
+ - redis-cli
61
+ - ping
62
+ initialDelaySeconds: {{ .Values.redis.probes.liveness.initialDelaySeconds }}
63
+ periodSeconds: {{ .Values.redis.probes.liveness.periodSeconds }}
64
+ timeoutSeconds: {{ .Values.redis.probes.liveness.timeoutSeconds }}
65
+ failureThreshold: {{ .Values.redis.probes.liveness.failureThreshold }}
66
+ {{- end }}
67
+ {{- if .Values.redis.probes.readiness.enabled }}
68
+ readinessProbe:
69
+ exec:
70
+ command:
71
+ - redis-cli
72
+ - ping
73
+ initialDelaySeconds: {{ .Values.redis.probes.readiness.initialDelaySeconds }}
74
+ periodSeconds: {{ .Values.redis.probes.readiness.periodSeconds }}
75
+ timeoutSeconds: {{ .Values.redis.probes.readiness.timeoutSeconds }}
76
+ failureThreshold: {{ .Values.redis.probes.readiness.failureThreshold }}
77
+ {{- end }}
78
+ resources:
79
+ {{- toYaml .Values.redis.resources | nindent 12 }}
80
+ volumes:
81
+ {{- if .Values.redis.persistence.enabled }}
82
+ - name: data
83
+ persistentVolumeClaim:
84
+ claimName: {{ include "mnemocore.redis.fullname" . }}-data
85
+ {{- else }}
86
+ - name: data
87
+ emptyDir: {}
88
+ {{- end }}
89
+ ---
90
+ {{/*
91
+ Redis PVC
92
+ */}}
93
+ {{- if and .Values.redis.enabled .Values.redis.embedded.enabled .Values.redis.persistence.enabled }}
94
+ apiVersion: v1
95
+ kind: PersistentVolumeClaim
96
+ metadata:
97
+ name: {{ include "mnemocore.redis.fullname" . }}-data
98
+ labels:
99
+ {{- include "mnemocore.labels" . | nindent 4 }}
100
+ app.kubernetes.io/component: redis
101
+ {{- with .Values.redis.persistence.annotations }}
102
+ annotations:
103
+ {{- toYaml . | nindent 4 }}
104
+ {{- end }}
105
+ spec:
106
+ accessModes:
107
+ {{- range .Values.redis.persistence.accessModes }}
108
+ - {{ . | quote }}
109
+ {{- end }}
110
+ {{- if .Values.global.storageClass }}
111
+ storageClassName: {{ .Values.global.storageClass | quote }}
112
+ {{- else if .Values.redis.persistence.storageClass }}
113
+ storageClassName: {{ .Values.redis.persistence.storageClass | quote }}
114
+ {{- end }}
115
+ resources:
116
+ requests:
117
+ storage: {{ .Values.redis.persistence.size | quote }}
118
+ {{- end }}
119
+ ---
120
+ {{/*
121
+ Redis Service
122
+ */}}
123
+ {{- if and .Values.redis.enabled .Values.redis.embedded.enabled }}
124
+ apiVersion: v1
125
+ kind: Service
126
+ metadata:
127
+ name: {{ include "mnemocore.redis.fullname" . }}
128
+ labels:
129
+ {{- include "mnemocore.labels" . | nindent 4 }}
130
+ app.kubernetes.io/component: redis
131
+ spec:
132
+ type: {{ .Values.redis.service.type }}
133
+ ports:
134
+ - port: {{ .Values.redis.service.port }}
135
+ targetPort: redis
136
+ protocol: TCP
137
+ name: redis
138
+ selector:
139
+ {{- include "mnemocore.selectorLabels" . | nindent 4 }}
140
+ app.kubernetes.io/component: redis
141
+ {{- end }}
helm/mnemocore/templates/deployment.yaml ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ MnemoCore API Deployment
3
+ */}}
4
+ apiVersion: apps/v1
5
+ kind: Deployment
6
+ metadata:
7
+ name: {{ include "mnemocore.fullname" . }}
8
+ labels:
9
+ {{- include "mnemocore.labels" . | nindent 4 }}
10
+ app.kubernetes.io/component: api
11
+ spec:
12
+ {{- if not .Values.mnemocore.autoscaling.enabled }}
13
+ replicas: {{ .Values.mnemocore.replicaCount }}
14
+ {{- end }}
15
+ selector:
16
+ matchLabels:
17
+ {{- include "mnemocore.selectorLabels" . | nindent 6 }}
18
+ app.kubernetes.io/component: api
19
+ template:
20
+ metadata:
21
+ annotations:
22
+ checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
23
+ checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
24
+ {{- with .Values.mnemocore.podAnnotations }}
25
+ {{- toYaml . | nindent 8 }}
26
+ {{- end }}
27
+ labels:
28
+ {{- include "mnemocore.selectorLabels" . | nindent 8 }}
29
+ app.kubernetes.io/component: api
30
+ {{- with .Values.mnemocore.podLabels }}
31
+ {{- toYaml . | nindent 8 }}
32
+ {{- end }}
33
+ spec:
34
+ {{- with .Values.global.imagePullSecrets }}
35
+ imagePullSecrets:
36
+ {{- toYaml . | nindent 8 }}
37
+ {{- end }}
38
+ serviceAccountName: {{ include "mnemocore.serviceAccountName" . }}
39
+ {{- if .Values.mnemocore.priorityClassName }}
40
+ priorityClassName: {{ .Values.mnemocore.priorityClassName | quote }}
41
+ {{- end }}
42
+ securityContext:
43
+ {{- toYaml .Values.mnemocore.podSecurityContext | nindent 8 }}
44
+ terminationGracePeriodSeconds: {{ .Values.mnemocore.terminationGracePeriodSeconds }}
45
+ {{- with .Values.mnemocore.initContainers }}
46
+ initContainers:
47
+ {{- toYaml . | nindent 8 }}
48
+ {{- end }}
49
+ containers:
50
+ - name: mnemocore
51
+ securityContext:
52
+ {{- toYaml .Values.mnemocore.securityContext | nindent 12 }}
53
+ image: "{{ .Values.global.imageRegistry }}{{ .Values.mnemocore.image.repository }}:{{ .Values.mnemocore.image.tag | default .Chart.AppVersion }}"
54
+ imagePullPolicy: {{ .Values.mnemocore.image.pullPolicy }}
55
+ ports:
56
+ - name: http
57
+ containerPort: {{ .Values.mnemocore.ports.api }}
58
+ protocol: TCP
59
+ - name: metrics
60
+ containerPort: {{ .Values.mnemocore.ports.metrics }}
61
+ protocol: TCP
62
+ env:
63
+ - name: HOST
64
+ value: {{ .Values.mnemocore.env.host | quote }}
65
+ - name: PORT
66
+ value: {{ .Values.mnemocore.ports.api | quote }}
67
+ - name: LOG_LEVEL
68
+ value: {{ .Values.mnemocore.env.logLevel | quote }}
69
+ - name: REDIS_URL
70
+ {{- if .Values.redis.url }}
71
+ value: {{ .Values.redis.url | quote }}
72
+ {{- else }}
73
+ value: "redis://{{ include "mnemocore.redis.fullname" . }}:{{ .Values.redis.service.port }}/0"
74
+ {{- end }}
75
+ - name: QDRANT_URL
76
+ {{- if .Values.qdrant.url }}
77
+ value: {{ .Values.qdrant.url | quote }}
78
+ {{- else }}
79
+ value: "http://{{ include "mnemocore.qdrant.fullname" . }}:{{ .Values.qdrant.service.httpPort }}"
80
+ {{- end }}
81
+ {{- if .Values.mnemocore.apiKey.existingSecret }}
82
+ - name: HAIM_API_KEY
83
+ valueFrom:
84
+ secretKeyRef:
85
+ name: {{ .Values.mnemocore.apiKey.existingSecret }}
86
+ key: {{ .Values.mnemocore.apiKey.key }}
87
+ {{- else if .Values.mnemocore.apiKey.value }}
88
+ - name: HAIM_API_KEY
89
+ valueFrom:
90
+ secretKeyRef:
91
+ name: {{ include "mnemocore.fullname" . }}-api-key
92
+ key: api-key
93
+ {{- end }}
94
+ {{- with .Values.mnemocore.extraVolumeMounts }}
95
+ volumeMounts:
96
+ - name: config
97
+ mountPath: /app/config.yaml
98
+ subPath: config.yaml
99
+ readOnly: true
100
+ - name: data
101
+ mountPath: /app/data
102
+ {{- toYaml . | nindent 12 }}
103
+ {{- else }}
104
+ volumeMounts:
105
+ - name: config
106
+ mountPath: /app/config.yaml
107
+ subPath: config.yaml
108
+ readOnly: true
109
+ - name: data
110
+ mountPath: /app/data
111
+ {{- end }}
112
+ {{- if .Values.mnemocore.probes.liveness.enabled }}
113
+ livenessProbe:
114
+ exec:
115
+ command:
116
+ - python
117
+ - /app/scripts/ops/healthcheck.py
118
+ initialDelaySeconds: {{ .Values.mnemocore.probes.liveness.initialDelaySeconds }}
119
+ periodSeconds: {{ .Values.mnemocore.probes.liveness.periodSeconds }}
120
+ timeoutSeconds: {{ .Values.mnemocore.probes.liveness.timeoutSeconds }}
121
+ failureThreshold: {{ .Values.mnemocore.probes.liveness.failureThreshold }}
122
+ successThreshold: {{ .Values.mnemocore.probes.liveness.successThreshold }}
123
+ {{- end }}
124
+ {{- if .Values.mnemocore.probes.readiness.enabled }}
125
+ readinessProbe:
126
+ exec:
127
+ command:
128
+ - python
129
+ - /app/scripts/ops/healthcheck.py
130
+ initialDelaySeconds: {{ .Values.mnemocore.probes.readiness.initialDelaySeconds }}
131
+ periodSeconds: {{ .Values.mnemocore.probes.readiness.periodSeconds }}
132
+ timeoutSeconds: {{ .Values.mnemocore.probes.readiness.timeoutSeconds }}
133
+ failureThreshold: {{ .Values.mnemocore.probes.readiness.failureThreshold }}
134
+ successThreshold: {{ .Values.mnemocore.probes.readiness.successThreshold }}
135
+ {{- end }}
136
+ {{- if .Values.mnemocore.probes.startup.enabled }}
137
+ startupProbe:
138
+ exec:
139
+ command:
140
+ - python
141
+ - /app/scripts/ops/healthcheck.py
142
+ initialDelaySeconds: {{ .Values.mnemocore.probes.startup.initialDelaySeconds }}
143
+ periodSeconds: {{ .Values.mnemocore.probes.startup.periodSeconds }}
144
+ timeoutSeconds: {{ .Values.mnemocore.probes.startup.timeoutSeconds }}
145
+ failureThreshold: {{ .Values.mnemocore.probes.startup.failureThreshold }}
146
+ successThreshold: {{ .Values.mnemocore.probes.startup.successThreshold }}
147
+ {{- end }}
148
+ resources:
149
+ {{- toYaml .Values.mnemocore.resources | nindent 12 }}
150
+ {{- with .Values.mnemocore.nodeSelector }}
151
+ nodeSelector:
152
+ {{- toYaml . | nindent 8 }}
153
+ {{- end }}
154
+ {{- with .Values.mnemocore.affinity }}
155
+ affinity:
156
+ {{- toYaml . | nindent 8 }}
157
+ {{- end }}
158
+ {{- with .Values.mnemocore.tolerations }}
159
+ tolerations:
160
+ {{- toYaml . | nindent 8 }}
161
+ {{- end }}
162
+ volumes:
163
+ - name: config
164
+ configMap:
165
+ name: {{ include "mnemocore.fullname" . }}-config
166
+ {{- if .Values.mnemocore.persistence.enabled }}
167
+ - name: data
168
+ persistentVolumeClaim:
169
+ claimName: {{ include "mnemocore.fullname" . }}-data
170
+ {{- else }}
171
+ - name: data
172
+ emptyDir: {}
173
+ {{- end }}
174
+ {{- with .Values.mnemocore.extraVolumes }}
175
+ {{- toYaml . | nindent 8 }}
176
+ {{- end }}
helm/mnemocore/templates/hpa.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Horizontal Pod Autoscaler for MnemoCore API
3
+ */}}
4
+ {{- if .Values.mnemocore.autoscaling.enabled }}
5
+ apiVersion: autoscaling/v2
6
+ kind: HorizontalPodAutoscaler
7
+ metadata:
8
+ name: {{ include "mnemocore.hpa.fullname" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ spec:
13
+ scaleTargetRef:
14
+ apiVersion: apps/v1
15
+ kind: Deployment
16
+ name: {{ include "mnemocore.fullname" . }}
17
+ minReplicas: {{ .Values.mnemocore.autoscaling.minReplicas }}
18
+ maxReplicas: {{ .Values.mnemocore.autoscaling.maxReplicas }}
19
+ metrics:
20
+ {{- if .Values.mnemocore.autoscaling.targetCPUUtilizationPercentage }}
21
+ - type: Resource
22
+ resource:
23
+ name: cpu
24
+ target:
25
+ type: Utilization
26
+ averageUtilization: {{ .Values.mnemocore.autoscaling.targetCPUUtilizationPercentage }}
27
+ {{- end }}
28
+ {{- if .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}
29
+ - type: Resource
30
+ resource:
31
+ name: memory
32
+ target:
33
+ type: Utilization
34
+ averageUtilization: {{ .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}
35
+ {{- end }}
36
+ {{- with .Values.mnemocore.autoscaling.metrics }}
37
+ {{- toYaml . | nindent 4 }}
38
+ {{- end }}
39
+ {{- with .Values.mnemocore.autoscaling.behavior }}
40
+ behavior:
41
+ {{- toYaml . | nindent 4 }}
42
+ {{- end }}
43
+ {{- end }}
helm/mnemocore/templates/ingress.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Ingress for MnemoCore API
3
+ */}}
4
+ {{- if .Values.mnemocore.ingress.enabled }}
5
+ apiVersion: networking.k8s.io/v1
6
+ kind: Ingress
7
+ metadata:
8
+ name: {{ include "mnemocore.fullname" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ {{- with .Values.mnemocore.ingress.annotations }}
13
+ annotations:
14
+ {{- toYaml . | nindent 4 }}
15
+ {{- end }}
16
+ spec:
17
+ {{- if .Values.mnemocore.ingress.className }}
18
+ ingressClassName: {{ .Values.mnemocore.ingress.className }}
19
+ {{- end }}
20
+ {{- if .Values.mnemocore.ingress.tls }}
21
+ tls:
22
+ {{- range .Values.mnemocore.ingress.tls }}
23
+ - hosts:
24
+ {{- range .hosts }}
25
+ - {{ . | quote }}
26
+ {{- end }}
27
+ secretName: {{ .secretName }}
28
+ {{- end }}
29
+ {{- end }}
30
+ rules:
31
+ {{- range .Values.mnemocore.ingress.hosts }}
32
+ - host: {{ .host | quote }}
33
+ http:
34
+ paths:
35
+ {{- range .paths }}
36
+ - path: {{ .path }}
37
+ pathType: {{ .pathType }}
38
+ backend:
39
+ service:
40
+ name: {{ include "mnemocore.fullname" $ }}
41
+ port:
42
+ number: {{ $.Values.mnemocore.service.port }}
43
+ {{- end }}
44
+ {{- end }}
45
+ {{- end }}
helm/mnemocore/templates/networkpolicy.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Network Policy for MnemoCore
3
+ */}}
4
+ {{- if .Values.networkPolicy.enabled }}
5
+ apiVersion: networking.k8s.io/v1
6
+ kind: NetworkPolicy
7
+ metadata:
8
+ name: {{ include "mnemocore.fullname" . }}-netpol
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ spec:
13
+ podSelector:
14
+ matchLabels:
15
+ {{- include "mnemocore.selectorLabels" . | nindent 6 }}
16
+ policyTypes:
17
+ - Ingress
18
+ - Egress
19
+ ingress:
20
+ {{- with .Values.networkPolicy.ingress }}
21
+ {{- toYaml . | nindent 4 }}
22
+ {{- end }}
23
+ egress:
24
+ # Allow DNS
25
+ - to:
26
+ - namespaceSelector: {}
27
+ ports:
28
+ - protocol: UDP
29
+ port: 53
30
+ - protocol: TCP
31
+ port: 53
32
+ # Allow Redis
33
+ - to:
34
+ - podSelector:
35
+ matchLabels:
36
+ app.kubernetes.io/component: redis
37
+ ports:
38
+ - protocol: TCP
39
+ port: {{ .Values.redis.service.port }}
40
+ # Allow Qdrant
41
+ - to:
42
+ - podSelector:
43
+ matchLabels:
44
+ app.kubernetes.io/component: qdrant
45
+ ports:
46
+ - protocol: TCP
47
+ port: {{ .Values.qdrant.service.httpPort }}
48
+ - protocol: TCP
49
+ port: {{ .Values.qdrant.service.grpcPort }}
50
+ {{- end }}
helm/mnemocore/templates/notes.txt ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ MnemoCore Helm Chart Notes
3
+ */}}
4
+ {{- define "mnemocore.notes" -}}
5
+ MnemoCore has been deployed!
6
+
7
+ ================================================================================
8
+ MNEMOCORE DEPLOYMENT NOTES
9
+ ================================================================================
10
+
11
+ Your MnemoCore cognitive memory infrastructure is now running.
12
+
13
+ NAMESPACE: {{ .Release.Namespace }}
14
+
15
+ SERVICE:
16
+ - API: {{ include "mnemocore.fullname" . }}:{{ .Values.mnemocore.service.port }}
17
+ - Metrics: {{ include "mnemocore.fullname" . }}:{{ .Values.mnemocore.service.metricsPort }}
18
+
19
+ {{- if .Values.mnemocore.ingress.enabled }}
20
+ INGRESS:
21
+ - Host: {{ (index .Values.mnemocore.ingress.hosts 0).host }}
22
+ {{- end }}
23
+
24
+ COMPONENTS:
25
+ - MnemoCore API: {{ .Values.mnemocore.replicaCount }} replica(s)
26
+ {{- if .Values.redis.enabled }}
27
+ - Redis: {{ include "mnemocore.redis.fullname" . }}:{{ .Values.redis.service.port }}
28
+ {{- end }}
29
+ {{- if .Values.qdrant.enabled }}
30
+ - Qdrant: {{ include "mnemocore.qdrant.fullname" . }}:{{ .Values.qdrant.service.httpPort }}
31
+ {{- end }}
32
+
33
+ RESOURCES:
34
+ MnemoCore:
35
+ Limits:
36
+ CPU: {{ .Values.mnemocore.resources.limits.cpu }}
37
+ Memory: {{ .Values.mnemocore.resources.limits.memory }}
38
+ Requests:
39
+ CPU: {{ .Values.mnemocore.resources.requests.cpu }}
40
+ Memory: {{ .Values.mnemocore.resources.requests.memory }}
41
+
42
+ {{- if .Values.mnemocore.autoscaling.enabled }}
43
+ AUTOSCALING:
44
+ - Min Replicas: {{ .Values.mnemocore.autoscaling.minReplicas }}
45
+ - Max Replicas: {{ .Values.mnemocore.autoscaling.maxReplicas }}
46
+ - CPU Target: {{ .Values.mnemocore.autoscaling.targetCPUUtilizationPercentage }}%
47
+ {{- if .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}
48
+ - Memory Target: {{ .Values.mnemocore.autoscaling.targetMemoryUtilizationPercentage }}%
49
+ {{- end }}
50
+ {{- end }}
51
+
52
+ PROBES:
53
+ - Liveness: Initial Delay: {{ .Values.mnemocore.probes.liveness.initialDelaySeconds }}s
54
+ - Readiness: Initial Delay: {{ .Values.mnemocore.probes.readiness.initialDelaySeconds }}s
55
+ - Startup: Initial Delay: {{ .Values.mnemocore.probes.startup.initialDelaySeconds }}s
56
+
57
+ ================================================================================
58
+ GETTING STARTED
59
+ ================================================================================
60
+
61
+ 1. Forward the API port (for local testing):
62
+ kubectl port-forward svc/{{ include "mnemocore.fullname" . }} 8100:8100 -n {{ .Release.Namespace }}
63
+
64
+ 2. Check the health of the service:
65
+ curl http://localhost:8100/health
66
+
67
+ 3. Access Prometheus metrics:
68
+ kubectl port-forward svc/{{ include "mnemocore.fullname" . }} 9090:9090 -n {{ .Release.Namespace }}
69
+ curl http://localhost:9090/metrics
70
+
71
+ 4. View logs:
72
+ kubectl logs -l app.kubernetes.io/name={{ include "mnemocore.name" . }} -n {{ .Release.Namespace }} -f
73
+
74
+ 5. Check pod status:
75
+ kubectl get pods -l app.kubernetes.io/name={{ include "mnemocore.name" . }} -n {{ .Release.Namespace }}
76
+
77
+ ================================================================================
78
+ CONFIGURATION NOTES
79
+ ================================================================================
80
+
81
+ {{- if not .Values.mnemocore.apiKey.existingSecret }}
82
+ WARNING: API key is set via values. For production, use an existing secret:
83
+ --set mnemocore.apiKey.existingSecret=my-secret-name
84
+ {{- end }}
85
+
86
+ {{- if not .Values.mnemocore.persistence.enabled }}
87
+ WARNING: Persistence is disabled. Data will be lost on pod restart.
88
+ {{- end }}
89
+
90
+ HAIM Configuration:
91
+ - Dimensionality: {{ .Values.mnemocore.config.dimensionality }}
92
+ - Encoding Mode: {{ .Values.mnemocore.config.encoding.mode }}
93
+ - Hot Tier Max: {{ .Values.mnemocore.config.tiers.hot.max_memories }} memories
94
+ - Warm Tier Max: {{ .Values.mnemocore.config.tiers.warm.max_memories }} memories
95
+
96
+ For more information, visit:
97
+ https://github.com/your-org/mnemocore
98
+
99
+ ================================================================================
100
+ {{- end }}
helm/mnemocore/templates/pdb.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Pod Disruption Budget for MnemoCore
3
+ */}}
4
+ {{- if .Values.mnemocore.podDisruptionBudget.enabled }}
5
+ apiVersion: policy/v1
6
+ kind: PodDisruptionBudget
7
+ metadata:
8
+ name: {{ include "mnemocore.fullname" . }}-pdb
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ spec:
13
+ {{- if .Values.mnemocore.podDisruptionBudget.minAvailable }}
14
+ minAvailable: {{ .Values.mnemocore.podDisruptionBudget.minAvailable }}
15
+ {{- end }}
16
+ {{- if .Values.mnemocore.podDisruptionBudget.maxUnavailable }}
17
+ maxUnavailable: {{ .Values.mnemocore.podDisruptionBudget.maxUnavailable }}
18
+ {{- end }}
19
+ selector:
20
+ matchLabels:
21
+ {{- include "mnemocore.selectorLabels" . | nindent 6 }}
22
+ app.kubernetes.io/component: api
23
+ {{- end }}
helm/mnemocore/templates/pvc.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Persistent Volume Claim for MnemoCore data
3
+ */}}
4
+ {{- if .Values.mnemocore.persistence.enabled }}
5
+ apiVersion: v1
6
+ kind: PersistentVolumeClaim
7
+ metadata:
8
+ name: {{ include "mnemocore.pvc.fullname" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ {{- with .Values.mnemocore.persistence.annotations }}
13
+ annotations:
14
+ {{- toYaml . | nindent 4 }}
15
+ {{- end }}
16
+ spec:
17
+ accessModes:
18
+ {{- range .Values.mnemocore.persistence.accessModes }}
19
+ - {{ . | quote }}
20
+ {{- end }}
21
+ {{- if .Values.global.storageClass }}
22
+ storageClassName: {{ .Values.global.storageClass | quote }}
23
+ {{- else if .Values.mnemocore.persistence.storageClass }}
24
+ storageClassName: {{ .Values.mnemocore.persistence.storageClass | quote }}
25
+ {{- end }}
26
+ resources:
27
+ requests:
28
+ storage: {{ .Values.mnemocore.persistence.size | quote }}
29
+ {{- end }}
helm/mnemocore/templates/secret.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ MnemoCore Secret - API Key and sensitive configuration
3
+ */}}
4
+ {{- if and (not .Values.mnemocore.apiKey.existingSecret) .Values.mnemocore.apiKey.value }}
5
+ apiVersion: v1
6
+ kind: Secret
7
+ metadata:
8
+ name: {{ include "mnemocore.fullname" . }}-api-key
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ type: Opaque
13
+ data:
14
+ api-key: {{ .Values.mnemocore.apiKey.value | b64enc | quote }}
15
+ {{- end }}
16
+ ---
17
+ {{/*
18
+ MnemoCore Generic Secret for external service credentials
19
+ */}}
20
+ {{- if or .Values.redis.existingSecret .Values.qdrant.existingSecret }}
21
+ apiVersion: v1
22
+ kind: Secret
23
+ metadata:
24
+ name: {{ include "mnemocore.secret.fullname" . }}
25
+ labels:
26
+ {{- include "mnemocore.labels" . | nindent 4 }}
27
+ app.kubernetes.io/component: api
28
+ type: Opaque
29
+ data:
30
+ {{- if .Values.redis.existingSecret }}
31
+ redis-url: {{ .Values.redis.url | b64enc | quote }}
32
+ {{- end }}
33
+ {{- if .Values.qdrant.existingSecret }}
34
+ qdrant-url: {{ .Values.qdrant.url | b64enc | quote }}
35
+ {{- end }}
36
+ {{- end }}
helm/mnemocore/templates/service.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ MnemoCore API Service
3
+ */}}
4
+ apiVersion: v1
5
+ kind: Service
6
+ metadata:
7
+ name: {{ include "mnemocore.fullname" . }}
8
+ labels:
9
+ {{- include "mnemocore.labels" . | nindent 4 }}
10
+ app.kubernetes.io/component: api
11
+ {{- with .Values.mnemocore.service.annotations }}
12
+ annotations:
13
+ {{- toYaml . | nindent 4 }}
14
+ {{- end }}
15
+ spec:
16
+ type: {{ .Values.mnemocore.service.type }}
17
+ ports:
18
+ - port: {{ .Values.mnemocore.service.port }}
19
+ targetPort: {{ .Values.mnemocore.service.targetPort }}
20
+ protocol: TCP
21
+ name: http
22
+ - port: {{ .Values.mnemocore.service.metricsPort }}
23
+ targetPort: {{ .Values.mnemocore.ports.metrics }}
24
+ protocol: TCP
25
+ name: metrics
26
+ selector:
27
+ {{- include "mnemocore.selectorLabels" . | nindent 4 }}
28
+ app.kubernetes.io/component: api
29
+ ---
30
+ {{/*
31
+ MnemoCore Headless Service (for StatefulSet compatibility)
32
+ */}}
33
+ apiVersion: v1
34
+ kind: Service
35
+ metadata:
36
+ name: {{ include "mnemocore.fullname" . }}-headless
37
+ labels:
38
+ {{- include "mnemocore.labels" . | nindent 4 }}
39
+ app.kubernetes.io/component: api
40
+ spec:
41
+ type: ClusterIP
42
+ clusterIP: None
43
+ ports:
44
+ - port: {{ .Values.mnemocore.service.port }}
45
+ targetPort: {{ .Values.mnemocore.service.targetPort }}
46
+ protocol: TCP
47
+ name: http
48
+ selector:
49
+ {{- include "mnemocore.selectorLabels" . | nindent 4 }}
50
+ app.kubernetes.io/component: api
helm/mnemocore/templates/serviceaccount.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ Service Account for MnemoCore
3
+ */}}
4
+ {{- if .Values.mnemocore.serviceAccount.create }}
5
+ apiVersion: v1
6
+ kind: ServiceAccount
7
+ metadata:
8
+ name: {{ include "mnemocore.serviceAccountName" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ {{- with .Values.mnemocore.serviceAccount.annotations }}
13
+ annotations:
14
+ {{- toYaml . | nindent 4 }}
15
+ {{- end }}
16
+ automountServiceAccountToken: true
17
+ {{- end }}
helm/mnemocore/templates/servicemonitor.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{/*
2
+ ServiceMonitor for Prometheus Operator
3
+ */}}
4
+ {{- if .Values.serviceMonitor.enabled }}
5
+ apiVersion: monitoring.coreos.com/v1
6
+ kind: ServiceMonitor
7
+ metadata:
8
+ name: {{ include "mnemocore.fullname" . }}
9
+ labels:
10
+ {{- include "mnemocore.labels" . | nindent 4 }}
11
+ app.kubernetes.io/component: api
12
+ {{- with .Values.serviceMonitor.labels }}
13
+ {{- toYaml . | nindent 4 }}
14
+ {{- end }}
15
+ {{- with .Values.serviceMonitor.annotations }}
16
+ annotations:
17
+ {{- toYaml . | nindent 4 }}
18
+ {{- end }}
19
+ spec:
20
+ endpoints:
21
+ - port: metrics
22
+ path: /metrics
23
+ interval: {{ .Values.serviceMonitor.interval }}
24
+ scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
25
+ {{- with .Values.serviceMonitor.relabelings }}
26
+ relabelings:
27
+ {{- toYaml . | nindent 8 }}
28
+ {{- end }}
29
+ {{- with .Values.serviceMonitor.metricRelabelings }}
30
+ metricRelabelings:
31
+ {{- toYaml . | nindent 8 }}
32
+ {{- end }}
33
+ namespaceSelector:
34
+ matchNames:
35
+ - {{ .Release.Namespace }}
36
+ selector:
37
+ matchLabels:
38
+ {{- include "mnemocore.selectorLabels" . | nindent 6 }}
39
+ app.kubernetes.io/component: api
40
+ {{- end }}
helm/mnemocore/values.yaml ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Helm Chart - Default Values
2
+ # ======================================
3
+ # Override these values in your own values file or via --set flags
4
+
5
+ # Global settings
6
+ global:
7
+ imageRegistry: ""
8
+ imagePullSecrets: []
9
+ storageClass: ""
10
+ namespace: mnemocore
11
+
12
+ # MnemoCore API Configuration
13
+ mnemocore:
14
+ # Number of replicas (ignored if autoscaling.enabled is true)
15
+ replicaCount: 2
16
+
17
+ # Container image
18
+ image:
19
+ repository: mnemocore
20
+ tag: "latest"
21
+ pullPolicy: IfNotPresent
22
+
23
+ # Container ports
24
+ ports:
25
+ api: 8100
26
+ metrics: 9090
27
+
28
+ # Resource limits and requests
29
+ resources:
30
+ limits:
31
+ cpu: "2"
32
+ memory: "2Gi"
33
+ requests:
34
+ cpu: "500m"
35
+ memory: "512Mi"
36
+
37
+ # Probes configuration
38
+ probes:
39
+ liveness:
40
+ enabled: true
41
+ initialDelaySeconds: 40
42
+ periodSeconds: 30
43
+ timeoutSeconds: 10
44
+ failureThreshold: 3
45
+ successThreshold: 1
46
+ readiness:
47
+ enabled: true
48
+ initialDelaySeconds: 20
49
+ periodSeconds: 10
50
+ timeoutSeconds: 5
51
+ failureThreshold: 3
52
+ successThreshold: 1
53
+ startup:
54
+ enabled: true
55
+ initialDelaySeconds: 10
56
+ periodSeconds: 10
57
+ timeoutSeconds: 5
58
+ failureThreshold: 30
59
+ successThreshold: 1
60
+
61
+ # HAIM Configuration (mounted as config.yaml)
62
+ config:
63
+ version: "3.0"
64
+ dimensionality: 16384
65
+ encoding:
66
+ mode: "binary"
67
+ token_method: "bundle"
68
+ tiers:
69
+ hot:
70
+ max_memories: 2000
71
+ ltp_threshold_min: 0.7
72
+ eviction_policy: "lru"
73
+ warm:
74
+ max_memories: 100000
75
+ ltp_threshold_min: 0.3
76
+ consolidation_interval_hours: 1
77
+ storage_backend: "mmap"
78
+ cold:
79
+ storage_backend: "filesystem"
80
+ compression: "gzip"
81
+ archive_threshold_days: 30
82
+ ltp:
83
+ initial_importance: 0.5
84
+ decay_lambda: 0.01
85
+ permanence_threshold: 0.95
86
+ half_life_days: 30.0
87
+ hysteresis:
88
+ promote_delta: 0.15
89
+ demote_delta: 0.10
90
+ gpu:
91
+ enabled: false
92
+ device: "cuda:0"
93
+ batch_size: 1000
94
+ fallback_to_cpu: true
95
+ observability:
96
+ metrics_port: 9090
97
+ log_level: "INFO"
98
+ structured_logging: true
99
+ paths:
100
+ data_dir: "/app/data"
101
+ memory_file: "/app/data/memory.jsonl"
102
+ codebook_file: "/app/data/codebook.json"
103
+ concepts_file: "/app/data/concepts.json"
104
+ synapses_file: "/app/data/synapses.json"
105
+ warm_mmap_dir: "/app/data/warm_tier"
106
+ cold_archive_dir: "/app/data/cold_archive"
107
+ mcp:
108
+ enabled: false
109
+ transport: "stdio"
110
+ host: "127.0.0.1"
111
+ port: 8110
112
+ api_base_url: "http://localhost:8100"
113
+ timeout_seconds: 15
114
+ allow_tools:
115
+ - "memory_store"
116
+ - "memory_query"
117
+ - "memory_get"
118
+ - "memory_delete"
119
+ - "memory_stats"
120
+ - "memory_health"
121
+
122
+ # Environment variables
123
+ env:
124
+ logLevel: "INFO"
125
+ host: "0.0.0.0"
126
+ port: 8100
127
+
128
+ # API Key (set via secret)
129
+ apiKey:
130
+ # Use existing secret
131
+ existingSecret: ""
132
+ # Key in the secret containing the API key
133
+ key: "HAIM_API_KEY"
134
+ # If not using existing secret, set value here (NOT RECOMMENDED for production)
135
+ value: ""
136
+
137
+ # Persistence
138
+ persistence:
139
+ enabled: true
140
+ accessModes:
141
+ - ReadWriteOnce
142
+ size: 10Gi
143
+ # storageClass: ""
144
+ annotations: {}
145
+
146
+ # Service configuration
147
+ service:
148
+ type: ClusterIP
149
+ port: 8100
150
+ targetPort: 8100
151
+ metricsPort: 9090
152
+ annotations: {}
153
+ labels: {}
154
+
155
+ # Ingress configuration
156
+ ingress:
157
+ enabled: false
158
+ className: ""
159
+ annotations: {}
160
+ # kubernetes.io/ingress.class: nginx
161
+ # kubernetes.io/tls-acme: "true"
162
+ hosts:
163
+ - host: mnemocore.local
164
+ paths:
165
+ - path: /
166
+ pathType: Prefix
167
+ tls: []
168
+ # - secretName: mnemocore-tls
169
+ # hosts:
170
+ # - mnemocore.local
171
+
172
+ # Autoscaling configuration
173
+ autoscaling:
174
+ enabled: true
175
+ minReplicas: 2
176
+ maxReplicas: 10
177
+ targetCPUUtilizationPercentage: 70
178
+ targetMemoryUtilizationPercentage: 80
179
+ # Custom metrics
180
+ metrics: []
181
+ behavior: {}
182
+
183
+ # Pod Disruption Budget
184
+ podDisruptionBudget:
185
+ enabled: true
186
+ minAvailable: 1
187
+ # maxUnavailable: 1
188
+
189
+ # Pod security context
190
+ podSecurityContext:
191
+ runAsNonRoot: true
192
+ runAsUser: 1000
193
+ runAsGroup: 1000
194
+ fsGroup: 1000
195
+
196
+ # Container security context
197
+ securityContext:
198
+ allowPrivilegeEscalation: false
199
+ capabilities:
200
+ drop:
201
+ - ALL
202
+ readOnlyRootFilesystem: true
203
+ runAsNonRoot: true
204
+
205
+ # Node selector
206
+ nodeSelector: {}
207
+
208
+ # Tolerations
209
+ tolerations: []
210
+
211
+ # Affinity
212
+ affinity: {}
213
+
214
+ # Pod annotations
215
+ podAnnotations:
216
+ prometheus.io/scrape: "true"
217
+ prometheus.io/port: "9090"
218
+ prometheus.io/path: "/metrics"
219
+
220
+ # Pod labels
221
+ podLabels: {}
222
+
223
+ # Priority class name
224
+ priorityClassName: ""
225
+
226
+ # Termination grace period
227
+ terminationGracePeriodSeconds: 30
228
+
229
+ # Service account
230
+ serviceAccount:
231
+ create: true
232
+ name: ""
233
+ annotations: {}
234
+
235
+ # Init containers
236
+ initContainers: []
237
+
238
+ # Extra volumes
239
+ extraVolumes: []
240
+
241
+ # Extra volume mounts
242
+ extraVolumeMounts: []
243
+
244
+ # Redis Configuration
245
+ redis:
246
+ # Enable Redis as part of this chart
247
+ enabled: true
248
+
249
+ # Use Bitnami Redis chart or embedded config
250
+ embedded:
251
+ enabled: false
252
+
253
+ # When not using Bitnami chart
254
+ image:
255
+ repository: redis
256
+ tag: "7.2-alpine"
257
+ pullPolicy: IfNotPresent
258
+
259
+ # Redis configuration
260
+ config:
261
+ maxmemory: "512mb"
262
+ maxmemoryPolicy: "allkeys-lru"
263
+ save: "60 1"
264
+ logLevel: "warning"
265
+
266
+ # Resource limits
267
+ resources:
268
+ limits:
269
+ cpu: "1"
270
+ memory: "512Mi"
271
+ requests:
272
+ cpu: "100m"
273
+ memory: "128Mi"
274
+
275
+ # Probes
276
+ probes:
277
+ liveness:
278
+ enabled: true
279
+ initialDelaySeconds: 10
280
+ periodSeconds: 10
281
+ timeoutSeconds: 5
282
+ failureThreshold: 5
283
+ readiness:
284
+ enabled: true
285
+ initialDelaySeconds: 5
286
+ periodSeconds: 5
287
+ timeoutSeconds: 3
288
+ failureThreshold: 5
289
+
290
+ # Service
291
+ service:
292
+ type: ClusterIP
293
+ port: 6379
294
+
295
+ # Persistence
296
+ persistence:
297
+ enabled: true
298
+ accessModes:
299
+ - ReadWriteOnce
300
+ size: 5Gi
301
+ # storageClass: ""
302
+
303
+ # URL override (if using external Redis)
304
+ url: ""
305
+ existingSecret: ""
306
+
307
+ # Qdrant Configuration
308
+ qdrant:
309
+ # Enable Qdrant as part of this chart
310
+ enabled: true
311
+
312
+ # Use official Qdrant chart or embedded config
313
+ embedded:
314
+ enabled: false
315
+
316
+ # When not using official chart
317
+ image:
318
+ repository: qdrant/qdrant
319
+ tag: "latest"
320
+ pullPolicy: IfNotPresent
321
+
322
+ # Qdrant configuration
323
+ config:
324
+ grpcPort: 6334
325
+ logLevel: "INFO"
326
+
327
+ # HAIM-specific collection settings
328
+ collections:
329
+ hot:
330
+ name: "haim_hot"
331
+ binaryQuantization: true
332
+ alwaysRam: true
333
+ hnswM: 16
334
+ hnswEfConstruct: 100
335
+ warm:
336
+ name: "haim_warm"
337
+ binaryQuantization: true
338
+ alwaysRam: true
339
+ hnswM: 16
340
+ hnswEfConstruct: 100
341
+
342
+ # Resource limits
343
+ resources:
344
+ limits:
345
+ cpu: "2"
346
+ memory: "4Gi"
347
+ requests:
348
+ cpu: "500m"
349
+ memory: "1Gi"
350
+
351
+ # Probes
352
+ probes:
353
+ liveness:
354
+ enabled: true
355
+ initialDelaySeconds: 15
356
+ periodSeconds: 10
357
+ timeoutSeconds: 5
358
+ failureThreshold: 5
359
+ readiness:
360
+ enabled: true
361
+ initialDelaySeconds: 10
362
+ periodSeconds: 5
363
+ timeoutSeconds: 3
364
+ failureThreshold: 5
365
+
366
+ # Services
367
+ service:
368
+ type: ClusterIP
369
+ httpPort: 6333
370
+ grpcPort: 6334
371
+
372
+ # Persistence
373
+ persistence:
374
+ enabled: true
375
+ accessModes:
376
+ - ReadWriteOnce
377
+ size: 20Gi
378
+ # storageClass: ""
379
+
380
+ # URL override (if using external Qdrant)
381
+ url: ""
382
+ existingSecret: ""
383
+
384
+ # Network Policies
385
+ networkPolicy:
386
+ enabled: false
387
+ ingress:
388
+ - from:
389
+ - namespaceSelector:
390
+ matchLabels:
391
+ name: mnemocore
392
+ ports:
393
+ - protocol: TCP
394
+ port: 8100
395
+ - protocol: TCP
396
+ port: 9090
397
+
398
+ # Service Monitor (Prometheus Operator)
399
+ serviceMonitor:
400
+ enabled: false
401
+ namespace: ""
402
+ interval: 30s
403
+ scrapeTimeout: 10s
404
+ labels: {}
405
+ annotations: {}
406
+ relabelings: []
407
+ metricRelabelings: []
408
+
409
+ # Grafana Dashboard
410
+ grafana:
411
+ dashboard:
412
+ enabled: false
413
+ namespace: ""
414
+ labels:
415
+ grafana_dashboard: "1"
416
+ annotations: {}
417
+
418
+ # Prometheus Rules
419
+ prometheusRule:
420
+ enabled: false
421
+ namespace: ""
422
+ additionalLabels: {}
423
+ rules: []
424
+
425
+ # Test configuration
426
+ test:
427
+ enabled: false
428
+ image:
429
+ repository: busybox
430
+ tag: "latest"
k8s/README.md ADDED
@@ -0,0 +1,324 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MnemoCore Kubernetes Deployment
2
+
3
+ This directory contains Kubernetes manifests and Helm charts for deploying MnemoCore to a Kubernetes cluster.
4
+
5
+ ## Overview
6
+
7
+ MnemoCore is a cognitive memory infrastructure that uses Hyperdimensional Computing (HDC) to provide persistent, scalable memory for AI systems. The Kubernetes deployment includes:
8
+
9
+ - **MnemoCore API** - Main API service with health checks and metrics
10
+ - **Redis** - In-memory data store for hot tier and caching
11
+ - **Qdrant** - Vector database for similarity search
12
+
13
+ ## Prerequisites
14
+
15
+ - Kubernetes 1.25+
16
+ - Helm 3.8+
17
+ - kubectl configured to access your cluster
18
+ - (Optional) Prometheus Operator for metrics scraping
19
+ - (Optional) cert-manager for TLS certificates
20
+
21
+ ## Quick Start
22
+
23
+ ### 1. Install using Helm
24
+
25
+ ```bash
26
+ # Add required Helm repositories
27
+ helm repo add bitnami https://charts.bitnami.com/bitnami
28
+ helm repo add qdrant https://qdrant.github.io/qdrant-helm
29
+ helm repo update
30
+
31
+ # Install MnemoCore with default values
32
+ helm install mnemocore ./helm/mnemocore \
33
+ --namespace mnemocore \
34
+ --create-namespace \
35
+ --set mnemocore.apiKey.value="your-secure-api-key"
36
+ ```
37
+
38
+ ### 2. Install with custom values
39
+
40
+ ```bash
41
+ # Create a values file
42
+ cat > values-prod.yaml << EOF
43
+ mnemocore:
44
+ replicaCount: 3
45
+ apiKey:
46
+ existingSecret: mnemocore-api-key
47
+ resources:
48
+ limits:
49
+ cpu: "4"
50
+ memory: "4Gi"
51
+ requests:
52
+ cpu: "1"
53
+ memory: "1Gi"
54
+ autoscaling:
55
+ enabled: true
56
+ minReplicas: 3
57
+ maxReplicas: 20
58
+ targetCPUUtilizationPercentage: 60
59
+
60
+ redis:
61
+ persistence:
62
+ size: 20Gi
63
+
64
+ qdrant:
65
+ persistence:
66
+ size: 100Gi
67
+
68
+ global:
69
+ storageClass: "fast-ssd"
70
+ EOF
71
+
72
+ helm install mnemocore ./helm/mnemocore \
73
+ --namespace mnemocore \
74
+ --create-namespace \
75
+ -f values-prod.yaml
76
+ ```
77
+
78
+ ### 3. Verify the installation
79
+
80
+ ```bash
81
+ # Check pod status
82
+ kubectl get pods -n mnemocore
83
+
84
+ # Check services
85
+ kubectl get svc -n mnemocore
86
+
87
+ # Check HPA status
88
+ kubectl get hpa -n mnemocore
89
+
90
+ # Port-forward for local testing
91
+ kubectl port-forward svc/mnemocore 8100:8100 -n mnemocore
92
+
93
+ # Test the API
94
+ curl http://localhost:8100/health
95
+ ```
96
+
97
+ ## Configuration
98
+
99
+ ### Key Configuration Parameters
100
+
101
+ | Parameter | Description | Default |
102
+ |-----------|-------------|---------|
103
+ | `mnemocore.replicaCount` | Number of API replicas | `2` |
104
+ | `mnemocore.image.repository` | Container image repository | `mnemocore` |
105
+ | `mnemocore.image.tag` | Container image tag | `latest` |
106
+ | `mnemocore.resources.limits.cpu` | CPU limit | `2` |
107
+ | `mnemocore.resources.limits.memory` | Memory limit | `2Gi` |
108
+ | `mnemocore.autoscaling.enabled` | Enable HPA | `true` |
109
+ | `mnemocore.autoscaling.minReplicas` | Minimum replicas | `2` |
110
+ | `mnemocore.autoscaling.maxReplicas` | Maximum replicas | `10` |
111
+ | `mnemocore.apiKey.existingSecret` | Existing secret for API key | `""` |
112
+ | `redis.enabled` | Deploy Redis | `true` |
113
+ | `qdrant.enabled` | Deploy Qdrant | `true` |
114
+
115
+ ### Resource Limits
116
+
117
+ | Component | CPU Limit | Memory Limit | CPU Request | Memory Request |
118
+ |-----------|-----------|--------------|-------------|----------------|
119
+ | MnemoCore | 2 | 2Gi | 500m | 512Mi |
120
+ | Redis | 1 | 512Mi | 100m | 128Mi |
121
+ | Qdrant | 2 | 4Gi | 500m | 1Gi |
122
+
123
+ ### Probe Configuration
124
+
125
+ | Probe | Initial Delay | Period | Timeout | Failure Threshold |
126
+ |-------|---------------|--------|---------|-------------------|
127
+ | Liveness | 40s | 30s | 10s | 3 |
128
+ | Readiness | 20s | 10s | 5s | 3 |
129
+ | Startup | 10s | 10s | 5s | 30 |
130
+
131
+ ## Production Deployment
132
+
133
+ ### 1. Create Secrets
134
+
135
+ ```bash
136
+ # Create API key secret
137
+ kubectl create secret generic mnemocore-api-key \
138
+ --from-literal=HAIM_API_KEY='your-secure-api-key' \
139
+ -n mnemocore
140
+
141
+ # Or use sealed-secrets/external-secrets for GitOps
142
+ ```
143
+
144
+ ### 2. Configure Storage
145
+
146
+ ```bash
147
+ # Ensure you have a storage class configured
148
+ kubectl get storageclass
149
+
150
+ # For production, use fast SSD storage
151
+ helm install mnemocore ./helm/mnemocore \
152
+ --namespace mnemocore \
153
+ --set global.storageClass=fast-ssd \
154
+ --set mnemocore.persistence.size=50Gi \
155
+ --set redis.persistence.size=20Gi \
156
+ --set qdrant.persistence.size=200Gi
157
+ ```
158
+
159
+ ### 3. Enable Ingress
160
+
161
+ ```bash
162
+ helm install mnemocore ./helm/mnemocore \
163
+ --namespace mnemocore \
164
+ --set mnemocore.ingress.enabled=true \
165
+ --set mnemocore.ingress.className=nginx \
166
+ --set 'mnemocore.ingress.hosts[0].host=mnemocore.yourdomain.com' \
167
+ --set 'mnemocore.ingress.hosts[0].paths[0].path=/' \
168
+ --set 'mnemocore.ingress.hosts[0].paths[0].pathType=Prefix' \
169
+ --set 'mnemocore.ingress.tls[0].secretName=mnemocore-tls' \
170
+ --set 'mnemocore.ingress.tls[0].hosts[0]=mnemocore.yourdomain.com'
171
+ ```
172
+
173
+ ### 4. Enable Network Policies
174
+
175
+ ```bash
176
+ helm install mnemocore ./helm/mnemocore \
177
+ --namespace mnemocore \
178
+ --set networkPolicy.enabled=true
179
+ ```
180
+
181
+ ## Monitoring
182
+
183
+ ### Prometheus Integration
184
+
185
+ ```bash
186
+ # Enable ServiceMonitor for Prometheus Operator
187
+ helm install mnemocore ./helm/mnemocore \
188
+ --namespace mnemocore \
189
+ --set serviceMonitor.enabled=true \
190
+ --set serviceMonitor.labels.release=prometheus
191
+ ```
192
+
193
+ ### Available Metrics
194
+
195
+ MnemoCore exposes the following metrics on port 9090:
196
+
197
+ - `mnemocore_memory_count_total` - Total number of memories stored
198
+ - `mnemocore_memory_tier_hot` - Number of memories in hot tier
199
+ - `mnemocore_memory_tier_warm` - Number of memories in warm tier
200
+ - `mnemocore_memory_tier_cold` - Number of memories in cold tier
201
+ - `mnemocore_query_duration_seconds` - Query latency histogram
202
+ - `mnemocore_ltp_avg` - Average LTP score
203
+ - `mnemocore_api_requests_total` - Total API requests
204
+ - `mnemocore_api_request_duration_seconds` - API request latency
205
+
206
+ ### Grafana Dashboard
207
+
208
+ Import the provided `grafana-dashboard.json` to visualize MnemoCore metrics.
209
+
210
+ ## Scaling
211
+
212
+ ### Manual Scaling
213
+
214
+ ```bash
215
+ # Scale to 5 replicas
216
+ kubectl scale deployment mnemocore --replicas=5 -n mnemocore
217
+ ```
218
+
219
+ ### Autoscaling
220
+
221
+ HPA is enabled by default. Customize scaling behavior:
222
+
223
+ ```bash
224
+ helm upgrade mnemocore ./helm/mnemocore \
225
+ --namespace mnemocore \
226
+ --set mnemocore.autoscaling.minReplicas=3 \
227
+ --set mnemocore.autoscaling.maxReplicas=50 \
228
+ --set mnemocore.autoscaling.targetCPUUtilizationPercentage=50
229
+ ```
230
+
231
+ ## Upgrading
232
+
233
+ ```bash
234
+ # Upgrade to a new version
235
+ helm upgrade mnemocore ./helm/mnemocore \
236
+ --namespace mnemocore \
237
+ --set mnemocore.image.tag=v3.6.0
238
+
239
+ # Rollback if needed
240
+ helm rollback mnemocore -n mnemocore
241
+ ```
242
+
243
+ ## Troubleshooting
244
+
245
+ ### Check Logs
246
+
247
+ ```bash
248
+ # MnemoCore logs
249
+ kubectl logs -l app.kubernetes.io/name=mnemocore -n mnemocore -f
250
+
251
+ # Redis logs
252
+ kubectl logs -l app.kubernetes.io/component=redis -n mnemocore -f
253
+
254
+ # Qdrant logs
255
+ kubectl logs -l app.kubernetes.io/component=qdrant -n mnemocore -f
256
+ ```
257
+
258
+ ### Common Issues
259
+
260
+ 1. **Pod stuck in Pending**
261
+ - Check storage class availability
262
+ - Check resource requests vs node capacity
263
+
264
+ 2. **Health check failing**
265
+ - Check Redis and Qdrant connectivity
266
+ - Verify environment variables
267
+
268
+ 3. **High memory usage**
269
+ - Reduce `mnemocore.config.tiers.hot.max_memories`
270
+ - Enable GPU for faster encoding
271
+
272
+ ### Debug Mode
273
+
274
+ ```bash
275
+ # Run with debug logging
276
+ helm upgrade mnemocore ./helm/mnemocore \
277
+ --namespace mnemocore \
278
+ --set mnemocore.env.logLevel=DEBUG
279
+ ```
280
+
281
+ ## Uninstalling
282
+
283
+ ```bash
284
+ # Remove the Helm release
285
+ helm uninstall mnemocore -n mnemocore
286
+
287
+ # Remove the namespace (optional)
288
+ kubectl delete namespace mnemocore
289
+
290
+ # Remove PVCs (caution: data loss)
291
+ kubectl delete pvc -n mnemocore --all
292
+ ```
293
+
294
+ ## Architecture
295
+
296
+ ```
297
+ ┌─────────────────┐
298
+ │ Ingress │
299
+ │ (Optional) │
300
+ └────────┬────────┘
301
+
302
+ ┌────────▼────────┐
303
+ │ MnemoCore API │
304
+ │ (HPA: 2-10) │
305
+ │ Port: 8100 │
306
+ └────────┬────────┘
307
+
308
+ ┌──────────────┼──────────────┐
309
+ │ │ │
310
+ ┌────────▼────────┐ │ ┌────────▼────────┐
311
+ │ Redis │ │ │ Qdrant │
312
+ │ Port: 6379 │ │ │ Port: 6333/6334 │
313
+ │ Hot Tier Cache │ │ │ Vector Storage │
314
+ └─────────────────┘ │ └─────────────────┘
315
+
316
+ ┌───────▼───────┐
317
+ │ Persistent │
318
+ │ Storage │
319
+ └───────────────┘
320
+ ```
321
+
322
+ ## License
323
+
324
+ MIT License - See LICENSE file for details.
pyproject.toml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "mnemocore"
7
+ version = "4.5.0"
8
+ description = "MnemoCore – Infrastructure for Persistent Cognitive Memory. A hierarchical AI memory engine with hot/warm/cold tiers, vector search, and subconscious consolidation."
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Robin", email = "" },
14
+ ]
15
+ keywords = [
16
+ "ai",
17
+ "memory",
18
+ "cognitive",
19
+ "vector-search",
20
+ "qdrant",
21
+ "llm",
22
+ "hyperdimensional-computing",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 4 - Beta",
26
+ "Intended Audience :: Developers",
27
+ "Intended Audience :: Science/Research",
28
+ "License :: OSI Approved :: MIT License",
29
+ "Programming Language :: Python :: 3",
30
+ "Programming Language :: Python :: 3.10",
31
+ "Programming Language :: Python :: 3.11",
32
+ "Programming Language :: Python :: 3.12",
33
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
34
+ "Topic :: Software Development :: Libraries :: Python Modules",
35
+ "Typing :: Typed",
36
+ ]
37
+
38
+ # Runtime dependencies (migrated from requirements.txt)
39
+ dependencies = [
40
+ "numpy>=1.24",
41
+ "requests>=2.31.0",
42
+ "fastapi>=0.100.0",
43
+ "uvicorn>=0.23.0",
44
+ "pydantic>=2.0.0",
45
+ "pyyaml>=6.0",
46
+ "redis>=5.0.0",
47
+ "qdrant-client>=1.7.0",
48
+ "prometheus-client>=0.17.0",
49
+ "loguru>=0.7.0",
50
+ "msgpack>=1.0.0",
51
+ "mcp>=0.1.0",
52
+ "faiss-cpu>=1.7.4",
53
+ "pybreaker>=1.0.0",
54
+ ]
55
+
56
+ [project.optional-dependencies]
57
+ dev = [
58
+ "pytest>=7.0.0",
59
+ "pytest-asyncio>=0.21.0",
60
+ "hypothesis>=6.0.0",
61
+ "mypy>=1.0.0",
62
+ "flake8>=6.0.0",
63
+ "isort>=5.0.0",
64
+ "black>=23.0.0",
65
+ "coverage>=7.0.0",
66
+ ]
67
+ viz = [
68
+ "plotly>=5.0.0",
69
+ "pandas>=2.0.0",
70
+ ]
71
+
72
+ [project.urls]
73
+ Homepage = "https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory"
74
+ Repository = "https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory"
75
+ "Bug Tracker" = "https://github.com/RobinALG87/MnemoCore-Infrastructure-for-Persistent-Cognitive-Memory/issues"
76
+
77
+ [project.scripts]
78
+ mnemocore = "mnemocore.api.main:app"
79
+
80
+ # ── Hatchling build configuration ─────────────────────────────────────────────
81
+ [tool.hatch.build.targets.wheel]
82
+ packages = ["src/mnemocore"]
83
+
84
+ [tool.hatch.build.targets.sdist]
85
+ include = [
86
+ "src/",
87
+ "README.md",
88
+ "LICENSE",
89
+ "CHANGELOG.md",
90
+ "config.yaml",
91
+ ]
92
+
93
+ # ── Pytest ─────────────────────────────────────────────────────────────────────
94
+ [tool.pytest.ini_options]
95
+ testpaths = ["tests"]
96
+ python_files = ["test_*.py"]
97
+ python_classes = ["Test*"]
98
+ python_functions = ["test_*"]
99
+ addopts = "-v --tb=short"
100
+ asyncio_mode = "auto"
101
+
102
+ # ── Coverage ───────────────────────────────────────────────────────────────────
103
+ [tool.coverage.run]
104
+ source = ["src"]
105
+ omit = ["tests/*", "**/__pycache__/*"]
106
+
107
+ [tool.coverage.report]
108
+ show_missing = true
109
+ skip_covered = false
pytest.ini ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [pytest]
2
+ testpaths = tests
3
+ python_files = test_*.py
4
+ python_classes = Test*
5
+ python_functions = test_*
6
+ addopts = -v --tb=short
7
+ markers =
8
+ integration: marks tests requiring external services (Redis, Qdrant)
9
+ asyncio_mode = auto
requirements-dev.txt ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Development Dependencies
2
+ # Install with: pip install -r requirements-dev.txt
3
+
4
+ # Code Formatting
5
+ black>=23.0.0
6
+
7
+ # Import Sorting
8
+ isort>=5.12.0
9
+
10
+ # Style Guide Enforcement
11
+ flake8>=6.0.0
12
+
13
+ # Static Type Checking
14
+ mypy>=1.0.0
15
+
16
+ # Testing
17
+ pytest>=7.0.0
18
+ pytest-cov>=4.0.0
19
+ pytest-asyncio>=0.21.0
20
+
21
+ # Security Scanning
22
+ pip-audit>=2.6.0
23
+ bandit>=1.7.0
24
+
25
+ # Documentation
26
+ sphinx>=7.0.0
27
+ sphinx-rtd-theme>=1.3.0
28
+
29
+ # Pre-commit hooks (optional)
30
+ pre-commit>=3.0.0