Andrew Young
commited on
Upload folder using huggingface_hub
Browse files- .gitattributes +7 -0
- .gitignore +7 -0
- Cargo.toml +39 -0
- LICENSE +21 -0
- README.md +171 -0
- paper/ARMS_Spatial_Memory_Young_2026.tex +520 -0
- paper/figures/fig01_architecture.jpg +3 -0
- paper/figures/fig02_hexagonal.jpg +3 -0
- paper/figures/fig03_primitives.jpg +3 -0
- paper/figures/fig04_position_relationship.jpg +3 -0
- paper/figures/fig05_hippocampus.jpg +3 -0
- paper/figures/fig06_traditional_vs_arms.jpg +3 -0
- paper/figures/fig07_ecosystem.jpg +3 -0
- paper/refs.bib +78 -0
- src/adapters/index/flat.rs +278 -0
- src/adapters/index/mod.rs +15 -0
- src/adapters/mod.rs +16 -0
- src/adapters/storage/memory.rs +253 -0
- src/adapters/storage/mod.rs +15 -0
- src/core/blob.rs +152 -0
- src/core/config.rs +177 -0
- src/core/id.rs +169 -0
- src/core/merge.rs +335 -0
- src/core/mod.rs +64 -0
- src/core/point.rs +186 -0
- src/core/proximity.rs +261 -0
- src/engine/arms.rs +335 -0
- src/engine/mod.rs +12 -0
- src/lib.rs +107 -0
- src/ports/latency.rs +126 -0
- src/ports/mod.rs +28 -0
- src/ports/near.rs +95 -0
- src/ports/place.rs +91 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
paper/figures/fig01_architecture.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
paper/figures/fig02_hexagonal.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
paper/figures/fig03_primitives.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
paper/figures/fig04_position_relationship.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
paper/figures/fig05_hippocampus.jpg filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
paper/figures/fig06_traditional_vs_arms.jpg filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
paper/figures/fig07_ecosystem.jpg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/target/
|
| 2 |
+
Cargo.lock
|
| 3 |
+
.env
|
| 4 |
+
.venv/
|
| 5 |
+
.claude/
|
| 6 |
+
*.pyc
|
| 7 |
+
__pycache__/
|
Cargo.toml
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[package]
|
| 2 |
+
name = "arms-core"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
edition = "2021"
|
| 5 |
+
authors = ["Automate Capture LLC <research@automate-capture.com>"]
|
| 6 |
+
description = "ARMS: Attention Reasoning Memory Store - A spatial memory fabric for AI. Position IS relationship."
|
| 7 |
+
license = "MIT"
|
| 8 |
+
repository = "https://github.com/automate-capture/arms"
|
| 9 |
+
homepage = "https://research.automate-capture.com/arms"
|
| 10 |
+
documentation = "https://docs.rs/arms"
|
| 11 |
+
readme = "README.md"
|
| 12 |
+
keywords = ["memory", "spatial-database", "ai", "embeddings", "vector-search"]
|
| 13 |
+
categories = ["database", "science", "algorithms"]
|
| 14 |
+
exclude = [
|
| 15 |
+
"target/",
|
| 16 |
+
".venv/",
|
| 17 |
+
".git/",
|
| 18 |
+
".claude/",
|
| 19 |
+
"paper/",
|
| 20 |
+
"images/",
|
| 21 |
+
".env",
|
| 22 |
+
]
|
| 23 |
+
|
| 24 |
+
[lib]
|
| 25 |
+
name = "arms"
|
| 26 |
+
path = "src/lib.rs"
|
| 27 |
+
|
| 28 |
+
[dependencies]
|
| 29 |
+
thiserror = "1.0"
|
| 30 |
+
|
| 31 |
+
[dev-dependencies]
|
| 32 |
+
criterion = "0.5"
|
| 33 |
+
|
| 34 |
+
[features]
|
| 35 |
+
default = []
|
| 36 |
+
|
| 37 |
+
[profile.release]
|
| 38 |
+
lto = true
|
| 39 |
+
codegen-units = 1
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2026 Andrew Young / Automate Capture LLC
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
tags:
|
| 4 |
+
- spatial-database
|
| 5 |
+
- memory
|
| 6 |
+
- embeddings
|
| 7 |
+
- ai
|
| 8 |
+
- vector-search
|
| 9 |
+
- rust
|
| 10 |
+
library_name: arms-core
|
| 11 |
+
pipeline_tag: feature-extraction
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
# ARMS - Attention Reasoning Memory Store
|
| 15 |
+
|
| 16 |
+
> **Position IS Relationship** - A Spatial Memory Fabric for AI Systems
|
| 17 |
+
|
| 18 |
+
ARMS is a spatial memory fabric that enables AI systems to store and retrieve computed states by their native dimensional coordinates. Unlike traditional databases that require explicit relationships through foreign keys or learned topology through approximate nearest neighbor algorithms, ARMS operates on a fundamental principle: **proximity defines connection**.
|
| 19 |
+
|
| 20 |
+

|
| 21 |
+
|
| 22 |
+
## The Problem
|
| 23 |
+
|
| 24 |
+
Current AI memory approaches all lose information:
|
| 25 |
+
|
| 26 |
+
- **Extended context**: Expensive, doesn't scale beyond training length
|
| 27 |
+
- **RAG retrieval**: Retrieves text, requires recomputation of attention
|
| 28 |
+
- **Vector databases**: Treat all data as unstructured point clouds
|
| 29 |
+
- **External memory**: Key-value stores with explicit indexing
|
| 30 |
+
|
| 31 |
+

|
| 32 |
+
|
| 33 |
+
## The ARMS Insight
|
| 34 |
+
|
| 35 |
+
```
|
| 36 |
+
Traditional: State → Project → Index → Retrieve → Reconstruct
|
| 37 |
+
(lossy at each step)
|
| 38 |
+
|
| 39 |
+
ARMS: State → Store AT coordinates → Retrieve → Inject directly
|
| 40 |
+
(native representation preserved)
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
## The Five Primitives
|
| 44 |
+
|
| 45 |
+
Everything in ARMS reduces to five operations:
|
| 46 |
+
|
| 47 |
+

|
| 48 |
+
|
| 49 |
+
| Primitive | Type | Purpose |
|
| 50 |
+
|-----------|------|---------|
|
| 51 |
+
| **Point** | `Vec<f32>` | Any dimensionality |
|
| 52 |
+
| **Proximity** | `fn(a, b) -> f32` | How related? |
|
| 53 |
+
| **Merge** | `fn(points) -> point` | Compose together |
|
| 54 |
+
| **Place** | `fn(point, data) -> id` | Exist in space |
|
| 55 |
+
| **Near** | `fn(point, k) -> ids` | What's related? |
|
| 56 |
+
|
| 57 |
+
## Quick Start
|
| 58 |
+
|
| 59 |
+
```rust
|
| 60 |
+
use arms_core::{Arms, ArmsConfig, Point};
|
| 61 |
+
|
| 62 |
+
// Create ARMS with default config
|
| 63 |
+
let mut arms = Arms::new(ArmsConfig::new(768));
|
| 64 |
+
|
| 65 |
+
// Place a point in the space
|
| 66 |
+
let point = Point::new(vec![0.1; 768]);
|
| 67 |
+
let id = arms.place(point, b"my data".to_vec()).unwrap();
|
| 68 |
+
|
| 69 |
+
// Find nearby points
|
| 70 |
+
let query = Point::new(vec![0.1; 768]);
|
| 71 |
+
let neighbors = arms.near(&query, 5).unwrap();
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
## Hexagonal Architecture
|
| 75 |
+
|
| 76 |
+
ARMS follows a hexagonal (ports-and-adapters) architecture. The core domain contains pure math with no I/O. Ports define trait contracts. Adapters provide swappable implementations.
|
| 77 |
+
|
| 78 |
+

|
| 79 |
+
|
| 80 |
+
```
|
| 81 |
+
┌─────────────────────────────────────────────────────────────┐
|
| 82 |
+
│ ARMS │
|
| 83 |
+
├─────────────────────────────────────────────────────────────┤
|
| 84 |
+
│ CORE (pure math, no I/O) │
|
| 85 |
+
│ Point, Id, Blob, Proximity, Merge │
|
| 86 |
+
│ │
|
| 87 |
+
│ PORTS (trait contracts) │
|
| 88 |
+
│ Place, Near, Latency │
|
| 89 |
+
│ │
|
| 90 |
+
│ ADAPTERS (swappable implementations) │
|
| 91 |
+
│ Storage: Memory, NVMe (planned) │
|
| 92 |
+
│ Index: Flat, HAT (see arms-hat crate) │
|
| 93 |
+
│ │
|
| 94 |
+
│ ENGINE (orchestration) │
|
| 95 |
+
│ Arms - the main entry point │
|
| 96 |
+
└─────────────────────────────────────────────────────────────┘
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
## The Hippocampus Analogy
|
| 100 |
+
|
| 101 |
+
ARMS functions as an artificial hippocampus for AI systems:
|
| 102 |
+
|
| 103 |
+

|
| 104 |
+
|
| 105 |
+
| Hippocampus | ARMS |
|
| 106 |
+
|-------------|------|
|
| 107 |
+
| Encodes episodic memories | Stores attention states |
|
| 108 |
+
| Spatial navigation | High-dimensional proximity |
|
| 109 |
+
| Pattern completion | Near queries |
|
| 110 |
+
| Memory consolidation | Merge operations |
|
| 111 |
+
| Place cells | Points at coordinates |
|
| 112 |
+
|
| 113 |
+
## Ecosystem
|
| 114 |
+
|
| 115 |
+

|
| 116 |
+
|
| 117 |
+
### Related Crates
|
| 118 |
+
|
| 119 |
+
- [`arms-hat`](https://crates.io/crates/arms-hat) - Hierarchical Attention Tree index adapter (100% recall, 70x faster than HNSW)
|
| 120 |
+
|
| 121 |
+
### Planned Adapters
|
| 122 |
+
|
| 123 |
+
- `arms-nvme` - Persistent storage via memory-mapped files
|
| 124 |
+
- `arms-distributed` - Sharded storage across machines
|
| 125 |
+
- `arms-gpu` - CUDA-accelerated similarity search
|
| 126 |
+
- `arms-py` - Python bindings
|
| 127 |
+
|
| 128 |
+
## Proximity Functions
|
| 129 |
+
|
| 130 |
+
Built-in proximity measures:
|
| 131 |
+
|
| 132 |
+
- **Cosine** - Angle between vectors (semantic similarity)
|
| 133 |
+
- **Euclidean** - Straight-line distance
|
| 134 |
+
- **DotProduct** - Raw dot product
|
| 135 |
+
- **Manhattan** - L1 distance
|
| 136 |
+
|
| 137 |
+
## Installation
|
| 138 |
+
|
| 139 |
+
```toml
|
| 140 |
+
[dependencies]
|
| 141 |
+
arms-core = "0.1"
|
| 142 |
+
```
|
| 143 |
+
|
| 144 |
+
## Paper
|
| 145 |
+
|
| 146 |
+
The research paper is available in the [`paper/`](paper/) directory.
|
| 147 |
+
|
| 148 |
+
**ARMS: A Spatial Memory Fabric for AI Systems**
|
| 149 |
+
Andrew Young, 2026
|
| 150 |
+
|
| 151 |
+
## License
|
| 152 |
+
|
| 153 |
+
MIT License - see [LICENSE](LICENSE)
|
| 154 |
+
|
| 155 |
+
## Citation
|
| 156 |
+
|
| 157 |
+
If you use ARMS in research, please cite:
|
| 158 |
+
|
| 159 |
+
```bibtex
|
| 160 |
+
@article{young2026arms,
|
| 161 |
+
author = {Young, Andrew},
|
| 162 |
+
title = {ARMS: A Spatial Memory Fabric for AI Systems},
|
| 163 |
+
journal = {arXiv preprint},
|
| 164 |
+
year = {2026},
|
| 165 |
+
url = {https://github.com/automate-capture/arms}
|
| 166 |
+
}
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
## Author
|
| 170 |
+
|
| 171 |
+
Andrew Young - [andrew@automate-capture.com](mailto:andrew@automate-capture.com)
|
paper/ARMS_Spatial_Memory_Young_2026.tex
ADDED
|
@@ -0,0 +1,520 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
% ARMS: A Spatial Memory Fabric for AI Systems
|
| 2 |
+
% arXiv submission - January 2026
|
| 3 |
+
|
| 4 |
+
\documentclass[11pt,a4paper]{article}
|
| 5 |
+
|
| 6 |
+
% ============================================================================
|
| 7 |
+
% PACKAGES
|
| 8 |
+
% ============================================================================
|
| 9 |
+
|
| 10 |
+
\usepackage[utf8]{inputenc}
|
| 11 |
+
\usepackage[T1]{fontenc}
|
| 12 |
+
\usepackage{lmodern}
|
| 13 |
+
|
| 14 |
+
% Math
|
| 15 |
+
\usepackage{amsmath,amssymb,amsthm}
|
| 16 |
+
\usepackage{mathtools}
|
| 17 |
+
|
| 18 |
+
% Graphics
|
| 19 |
+
\usepackage{graphicx}
|
| 20 |
+
\usepackage{float}
|
| 21 |
+
\usepackage{subcaption}
|
| 22 |
+
\usepackage[dvipsnames]{xcolor}
|
| 23 |
+
\graphicspath{{figures/}{Diagrams/}}
|
| 24 |
+
|
| 25 |
+
% Tables
|
| 26 |
+
\usepackage{booktabs}
|
| 27 |
+
\usepackage{multirow}
|
| 28 |
+
\usepackage{array}
|
| 29 |
+
|
| 30 |
+
% Algorithms
|
| 31 |
+
\usepackage{algorithm}
|
| 32 |
+
\usepackage{algorithmic}
|
| 33 |
+
|
| 34 |
+
% Code
|
| 35 |
+
\usepackage{listings}
|
| 36 |
+
\lstset{
|
| 37 |
+
basicstyle=\ttfamily\small,
|
| 38 |
+
breaklines=true,
|
| 39 |
+
frame=single,
|
| 40 |
+
numbers=left,
|
| 41 |
+
numberstyle=\tiny,
|
| 42 |
+
language=Python,
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
% Links
|
| 46 |
+
\usepackage[colorlinks=true,linkcolor=blue,citecolor=blue,urlcolor=blue]{hyperref}
|
| 47 |
+
\usepackage{cleveref}
|
| 48 |
+
|
| 49 |
+
% Layout
|
| 50 |
+
\usepackage[margin=1in]{geometry}
|
| 51 |
+
|
| 52 |
+
% Bibliography
|
| 53 |
+
\usepackage[numbers,sort&compress]{natbib}
|
| 54 |
+
|
| 55 |
+
% ============================================================================
|
| 56 |
+
% CUSTOM COMMANDS
|
| 57 |
+
% ============================================================================
|
| 58 |
+
|
| 59 |
+
\DeclareMathOperator*{\argmax}{arg\,max}
|
| 60 |
+
\DeclareMathOperator*{\argmin}{arg\,min}
|
| 61 |
+
\newcommand{\R}{\mathbb{R}}
|
| 62 |
+
|
| 63 |
+
% ============================================================================
|
| 64 |
+
% TITLE
|
| 65 |
+
% ============================================================================
|
| 66 |
+
|
| 67 |
+
\title{%
|
| 68 |
+
\textbf{ARMS: A Spatial Memory Fabric for AI Systems}\\[0.5em]
|
| 69 |
+
\large Position IS Relationship
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
\author{%
|
| 73 |
+
Andrew Young\\
|
| 74 |
+
\texttt{andrew@automate-capture.com}
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
\date{January 2026}
|
| 78 |
+
|
| 79 |
+
% ============================================================================
|
| 80 |
+
% DOCUMENT
|
| 81 |
+
% ============================================================================
|
| 82 |
+
|
| 83 |
+
\begin{document}
|
| 84 |
+
|
| 85 |
+
\maketitle
|
| 86 |
+
|
| 87 |
+
% ----------------------------------------------------------------------------
|
| 88 |
+
\begin{abstract}
|
| 89 |
+
\noindent
|
| 90 |
+
This paper introduces ARMS (Attention Reasoning Memory Store), a spatial memory fabric that enables AI systems to store and retrieve computed states by their native dimensional coordinates. Unlike traditional databases that require explicit relationships through foreign keys or learned topology through approximate nearest neighbor algorithms, ARMS operates on a fundamental principle: \textbf{position IS relationship}. Proximity in high-dimensional space defines semantic connection without explicit declaration.
|
| 91 |
+
|
| 92 |
+
ARMS reduces memory operations to five primitives: \textbf{Point} (any-dimensional vectors), \textbf{Proximity} (relationship measurement), \textbf{Merge} (composition), \textbf{Place} (existence in space), and \textbf{Near} (retrieval by similarity). This minimal abstraction enables a hexagonal architecture where storage backends, index algorithms, and APIs can be swapped without changing core logic.
|
| 93 |
+
|
| 94 |
+
The framework provides the foundation for specialized index adapters like HAT (Hierarchical Attention Tree), demonstrating that domain-specific structure can be exploited for superior performance. ARMS functions as an artificial hippocampus---enabling AI systems to form, consolidate, and retrieve episodic memories through spatial organization rather than explicit indexing.
|
| 95 |
+
|
| 96 |
+
\vspace{1em}
|
| 97 |
+
\noindent\textbf{Keywords:} spatial memory, AI memory systems, vector databases, hexagonal architecture, episodic memory
|
| 98 |
+
\end{abstract}
|
| 99 |
+
|
| 100 |
+
% ----------------------------------------------------------------------------
|
| 101 |
+
\section{Introduction}
|
| 102 |
+
\label{sec:intro}
|
| 103 |
+
|
| 104 |
+
\subsection{The Memory Problem in AI}
|
| 105 |
+
|
| 106 |
+
Large language models and AI agents face a fundamental limitation: they lack persistent, retrievable memory beyond their context window. Current approaches include:
|
| 107 |
+
|
| 108 |
+
\begin{itemize}
|
| 109 |
+
\item \textbf{Extended context}: Expensive, doesn't scale beyond training length
|
| 110 |
+
\item \textbf{RAG retrieval}: Retrieves text, requires recomputation of attention
|
| 111 |
+
\item \textbf{Vector databases}: Treat all data as unstructured point clouds
|
| 112 |
+
\item \textbf{External memory}: Key-value stores with explicit indexing
|
| 113 |
+
\end{itemize}
|
| 114 |
+
|
| 115 |
+
None of these approaches preserve the \emph{native representation} of computed states. When an LLM processes text, it produces attention states in high-dimensional space. Current systems project, compress, or discard these states rather than storing them directly.
|
| 116 |
+
|
| 117 |
+
\begin{figure}[H]
|
| 118 |
+
\centering
|
| 119 |
+
\includegraphics[width=0.9\textwidth]{fig06_traditional_vs_arms.jpg}
|
| 120 |
+
\caption{Traditional approaches vs ARMS: Traditional systems project, compress, and approximate states at each step, introducing cumulative error. ARMS stores states at their native coordinates and retrieves by proximity, preserving the original representation.}
|
| 121 |
+
\label{fig:traditional}
|
| 122 |
+
\end{figure}
|
| 123 |
+
|
| 124 |
+
\subsection{The ARMS Insight}
|
| 125 |
+
|
| 126 |
+
ARMS takes a different approach:
|
| 127 |
+
|
| 128 |
+
\begin{quote}
|
| 129 |
+
\textbf{Store states at their native coordinates. Retrieve by proximity. Position IS relationship.}
|
| 130 |
+
\end{quote}
|
| 131 |
+
|
| 132 |
+
This insight has three implications:
|
| 133 |
+
|
| 134 |
+
\begin{enumerate}
|
| 135 |
+
\item \textbf{No projection loss}: States are stored in their original dimensionality
|
| 136 |
+
\item \textbf{No explicit relationships}: Semantic similarity is spatial proximity
|
| 137 |
+
\item \textbf{No learned topology}: Structure can be known or exploited, not discovered
|
| 138 |
+
\end{enumerate}
|
| 139 |
+
|
| 140 |
+
\begin{figure}[H]
|
| 141 |
+
\centering
|
| 142 |
+
\includegraphics[width=0.9\textwidth]{fig01_architecture.jpg}
|
| 143 |
+
\caption{ARMS architecture overview. The five primitives (Point, Proximity, Merge, Place, Near) form the core, with swappable storage and index adapters.}
|
| 144 |
+
\label{fig:architecture}
|
| 145 |
+
\end{figure}
|
| 146 |
+
|
| 147 |
+
\subsection{Contributions}
|
| 148 |
+
|
| 149 |
+
This paper makes the following contributions:
|
| 150 |
+
|
| 151 |
+
\begin{enumerate}
|
| 152 |
+
\item A \textbf{five-primitive abstraction} for spatial memory (Point, Proximity, Merge, Place, Near)
|
| 153 |
+
\item A \textbf{hexagonal architecture} enabling swappable storage, index, and API adapters
|
| 154 |
+
\item The \textbf{``position is relationship''} principle for AI memory systems
|
| 155 |
+
\item A \textbf{foundation framework} demonstrated through the HAT index adapter
|
| 156 |
+
\end{enumerate}
|
| 157 |
+
|
| 158 |
+
% ----------------------------------------------------------------------------
|
| 159 |
+
\section{The Five Primitives}
|
| 160 |
+
\label{sec:primitives}
|
| 161 |
+
|
| 162 |
+
ARMS reduces all memory operations to five primitives. This minimal surface area enables maximum flexibility while maintaining semantic clarity.
|
| 163 |
+
|
| 164 |
+
\begin{figure}[H]
|
| 165 |
+
\centering
|
| 166 |
+
\includegraphics[width=0.85\textwidth]{fig03_primitives.jpg}
|
| 167 |
+
\caption{The five primitives of ARMS: Point (representation), Proximity (relationship), Merge (composition), Place (storage), and Near (retrieval). These operations form the complete interface for spatial memory.}
|
| 168 |
+
\label{fig:primitives}
|
| 169 |
+
\end{figure}
|
| 170 |
+
|
| 171 |
+
\begin{table}[H]
|
| 172 |
+
\centering
|
| 173 |
+
\caption{The five primitives of ARMS.}
|
| 174 |
+
\label{tab:primitives}
|
| 175 |
+
\begin{tabular}{llp{7cm}}
|
| 176 |
+
\toprule
|
| 177 |
+
\textbf{Primitive} & \textbf{Signature} & \textbf{Purpose} \\
|
| 178 |
+
\midrule
|
| 179 |
+
Point & \texttt{Vec<f32>} & Any-dimensional vector representation \\
|
| 180 |
+
Proximity & \texttt{fn(a, b) -> f32} & Measure how related two points are \\
|
| 181 |
+
Merge & \texttt{fn(points) -> point} & Compose multiple points into one \\
|
| 182 |
+
Place & \texttt{fn(point, data) -> id} & Store a point in the space \\
|
| 183 |
+
Near & \texttt{fn(point, k) -> ids} & Find k most related points \\
|
| 184 |
+
\bottomrule
|
| 185 |
+
\end{tabular}
|
| 186 |
+
\end{table}
|
| 187 |
+
|
| 188 |
+
\subsection{Point: The Universal Representation}
|
| 189 |
+
|
| 190 |
+
A Point is simply a vector of floating-point numbers:
|
| 191 |
+
|
| 192 |
+
\begin{lstlisting}[language=Rust,caption={Point definition.}]
|
| 193 |
+
pub struct Point {
|
| 194 |
+
dims: Vec<f32>,
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
impl Point {
|
| 198 |
+
pub fn new(dims: Vec<f32>) -> Self;
|
| 199 |
+
pub fn dimensionality(&self) -> usize;
|
| 200 |
+
pub fn magnitude(&self) -> f32;
|
| 201 |
+
pub fn normalize(&self) -> Point;
|
| 202 |
+
}
|
| 203 |
+
\end{lstlisting}
|
| 204 |
+
|
| 205 |
+
Points are dimensionality-agnostic. The same ARMS instance can store 768-dimensional BERT embeddings or 1536-dimensional OpenAI embeddings---the primitives don't change.
|
| 206 |
+
|
| 207 |
+
\subsection{Proximity: Relationship Without Declaration}
|
| 208 |
+
|
| 209 |
+
Proximity functions measure how related two points are:
|
| 210 |
+
|
| 211 |
+
\begin{table}[H]
|
| 212 |
+
\centering
|
| 213 |
+
\caption{Built-in proximity functions.}
|
| 214 |
+
\label{tab:proximity}
|
| 215 |
+
\begin{tabular}{llp{6cm}}
|
| 216 |
+
\toprule
|
| 217 |
+
\textbf{Function} & \textbf{Range} & \textbf{Use Case} \\
|
| 218 |
+
\midrule
|
| 219 |
+
Cosine & $[-1, 1]$ & Semantic similarity (direction matters) \\
|
| 220 |
+
Euclidean & $[0, \infty)$ & Spatial distance (magnitude matters) \\
|
| 221 |
+
DotProduct & $(-\infty, \infty)$ & Raw correlation \\
|
| 222 |
+
Manhattan & $[0, \infty)$ & L1 distance \\
|
| 223 |
+
\bottomrule
|
| 224 |
+
\end{tabular}
|
| 225 |
+
\end{table}
|
| 226 |
+
|
| 227 |
+
The key insight: \textbf{proximity replaces foreign keys}. In a relational database, you declare relationships explicitly. In ARMS, relationships emerge from spatial position.
|
| 228 |
+
|
| 229 |
+
\subsection{Merge: Composition Without Loss}
|
| 230 |
+
|
| 231 |
+
Merge combines multiple points into a single representative:
|
| 232 |
+
|
| 233 |
+
\begin{itemize}
|
| 234 |
+
\item \textbf{Mean}: Arithmetic average (default)
|
| 235 |
+
\item \textbf{WeightedMean}: Importance-weighted average
|
| 236 |
+
\item \textbf{MaxPool}: Element-wise maximum
|
| 237 |
+
\end{itemize}
|
| 238 |
+
|
| 239 |
+
Merge enables hierarchical summarization. A conversation can be represented by the merge of its messages; a session by the merge of its conversations.
|
| 240 |
+
|
| 241 |
+
\subsection{Place and Near: The Memory Interface}
|
| 242 |
+
|
| 243 |
+
Place stores a point with associated data:
|
| 244 |
+
|
| 245 |
+
\begin{lstlisting}[language=Rust,caption={Place and Near operations.}]
|
| 246 |
+
// Store
|
| 247 |
+
let id = arms.place(embedding, blob)?;
|
| 248 |
+
|
| 249 |
+
// Retrieve
|
| 250 |
+
let neighbors = arms.near(&query, k)?;
|
| 251 |
+
\end{lstlisting}
|
| 252 |
+
|
| 253 |
+
This is the complete memory interface. Everything else---storage backends, index algorithms, APIs---is implementation detail.
|
| 254 |
+
|
| 255 |
+
% ----------------------------------------------------------------------------
|
| 256 |
+
\section{Hexagonal Architecture}
|
| 257 |
+
\label{sec:architecture}
|
| 258 |
+
|
| 259 |
+
ARMS follows hexagonal (ports-and-adapters) architecture. The core domain contains pure math with no I/O. Ports define trait contracts. Adapters provide swappable implementations.
|
| 260 |
+
|
| 261 |
+
\begin{figure}[H]
|
| 262 |
+
\centering
|
| 263 |
+
\includegraphics[width=0.9\textwidth]{fig02_hexagonal.jpg}
|
| 264 |
+
\caption{Hexagonal architecture of ARMS. The core domain contains pure math with no I/O. Ports define trait contracts. Adapters provide swappable implementations for storage, indexing, and APIs.}
|
| 265 |
+
\label{fig:hexagonal}
|
| 266 |
+
\end{figure}
|
| 267 |
+
|
| 268 |
+
\subsection{Core Domain}
|
| 269 |
+
|
| 270 |
+
The core contains:
|
| 271 |
+
|
| 272 |
+
\begin{itemize}
|
| 273 |
+
\item \textbf{Point}: Vector representation
|
| 274 |
+
\item \textbf{Id}: Unique identifiers
|
| 275 |
+
\item \textbf{Blob}: Associated data
|
| 276 |
+
\item \textbf{Proximity}: Relationship measurement
|
| 277 |
+
\item \textbf{Merge}: Point composition
|
| 278 |
+
\end{itemize}
|
| 279 |
+
|
| 280 |
+
No I/O, no side effects, pure functions. This enables testing without mocks and reasoning without context.
|
| 281 |
+
|
| 282 |
+
\subsection{Ports}
|
| 283 |
+
|
| 284 |
+
Ports define what the system needs without specifying how:
|
| 285 |
+
|
| 286 |
+
\begin{lstlisting}[language=Rust,caption={Port definitions.}]
|
| 287 |
+
pub trait Place {
|
| 288 |
+
fn place(&mut self, point: Point, blob: Blob) -> Result<Id>;
|
| 289 |
+
fn get(&self, id: Id) -> Option<&PlacedPoint>;
|
| 290 |
+
fn remove(&mut self, id: Id) -> Option<PlacedPoint>;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
pub trait Near {
|
| 294 |
+
fn near(&self, query: &Point, k: usize) -> Result<Vec<SearchResult>>;
|
| 295 |
+
fn add(&mut self, id: Id, point: &Point) -> Result<()>;
|
| 296 |
+
}
|
| 297 |
+
\end{lstlisting}
|
| 298 |
+
|
| 299 |
+
\subsection{Adapters}
|
| 300 |
+
|
| 301 |
+
Adapters implement ports for specific technologies:
|
| 302 |
+
|
| 303 |
+
\begin{table}[H]
|
| 304 |
+
\centering
|
| 305 |
+
\caption{Available adapters.}
|
| 306 |
+
\label{tab:adapters}
|
| 307 |
+
\begin{tabular}{lll}
|
| 308 |
+
\toprule
|
| 309 |
+
\textbf{Port} & \textbf{Adapter} & \textbf{Description} \\
|
| 310 |
+
\midrule
|
| 311 |
+
Place & MemoryStorage & In-memory hash map \\
|
| 312 |
+
Place & NVMeStorage & Memory-mapped files (planned) \\
|
| 313 |
+
Near & FlatIndex & Brute-force exact search \\
|
| 314 |
+
Near & HatIndex & Hierarchical Attention Tree \\
|
| 315 |
+
\bottomrule
|
| 316 |
+
\end{tabular}
|
| 317 |
+
\end{table}
|
| 318 |
+
|
| 319 |
+
The HAT index adapter (published separately as \texttt{arms-hat}) demonstrates how domain-specific knowledge can be exploited for superior performance on hierarchical data.
|
| 320 |
+
|
| 321 |
+
\begin{figure}[H]
|
| 322 |
+
\centering
|
| 323 |
+
\includegraphics[width=0.85\textwidth]{fig07_ecosystem.jpg}
|
| 324 |
+
\caption{The ARMS ecosystem: \texttt{arms-core} provides the foundational primitives, while specialized adapters like \texttt{arms-hat} exploit domain-specific structure. Future adapters will add persistence, distribution, and GPU acceleration.}
|
| 325 |
+
\label{fig:ecosystem}
|
| 326 |
+
\end{figure}
|
| 327 |
+
|
| 328 |
+
% ----------------------------------------------------------------------------
|
| 329 |
+
\section{Position IS Relationship}
|
| 330 |
+
\label{sec:philosophy}
|
| 331 |
+
|
| 332 |
+
The core philosophical innovation of ARMS is treating position as the fundamental relationship primitive.
|
| 333 |
+
|
| 334 |
+
\begin{figure}[H]
|
| 335 |
+
\centering
|
| 336 |
+
\includegraphics[width=0.9\textwidth]{fig04_position_relationship.jpg}
|
| 337 |
+
\caption{Position IS relationship: Comparison of relationship representation across database paradigms. ARMS uses spatial position as the fundamental relationship primitive, eliminating the need for explicit declarations.}
|
| 338 |
+
\label{fig:position}
|
| 339 |
+
\end{figure}
|
| 340 |
+
|
| 341 |
+
\subsection{Traditional Approaches}
|
| 342 |
+
|
| 343 |
+
\begin{table}[H]
|
| 344 |
+
\centering
|
| 345 |
+
\caption{Relationship representation in different paradigms.}
|
| 346 |
+
\label{tab:paradigms}
|
| 347 |
+
\begin{tabular}{lll}
|
| 348 |
+
\toprule
|
| 349 |
+
\textbf{Paradigm} & \textbf{Relationship} & \textbf{Limitation} \\
|
| 350 |
+
\midrule
|
| 351 |
+
Relational DB & Foreign keys & Must be declared explicitly \\
|
| 352 |
+
Document DB & Nesting & Limited to containment \\
|
| 353 |
+
Graph DB & Edges & Must be declared explicitly \\
|
| 354 |
+
Vector DB & Learned topology & Requires training/building \\
|
| 355 |
+
\textbf{ARMS} & \textbf{Spatial position} & \textbf{Inherent in representation} \\
|
| 356 |
+
\bottomrule
|
| 357 |
+
\end{tabular}
|
| 358 |
+
\end{table}
|
| 359 |
+
|
| 360 |
+
\subsection{Implications}
|
| 361 |
+
|
| 362 |
+
When position is relationship:
|
| 363 |
+
|
| 364 |
+
\begin{enumerate}
|
| 365 |
+
\item \textbf{Schema-free}: No need to declare relationship types
|
| 366 |
+
\item \textbf{Continuous}: Relationships have degrees, not just existence
|
| 367 |
+
\item \textbf{Emergent}: New relationships discovered through proximity
|
| 368 |
+
\item \textbf{Composable}: Merged points represent group relationships
|
| 369 |
+
\end{enumerate}
|
| 370 |
+
|
| 371 |
+
\subsection{The Hippocampus Analogy}
|
| 372 |
+
|
| 373 |
+
ARMS mirrors the function of the biological hippocampus:
|
| 374 |
+
|
| 375 |
+
\begin{figure}[H]
|
| 376 |
+
\centering
|
| 377 |
+
\includegraphics[width=0.85\textwidth]{fig05_hippocampus.jpg}
|
| 378 |
+
\caption{The hippocampus analogy: ARMS functions as an artificial hippocampus, enabling AI systems to form, consolidate, and retrieve episodic memories through spatial organization.}
|
| 379 |
+
\label{fig:hippocampus}
|
| 380 |
+
\end{figure}
|
| 381 |
+
|
| 382 |
+
\begin{table}[H]
|
| 383 |
+
\centering
|
| 384 |
+
\caption{Hippocampus vs ARMS.}
|
| 385 |
+
\label{tab:hippocampus}
|
| 386 |
+
\begin{tabular}{ll}
|
| 387 |
+
\toprule
|
| 388 |
+
\textbf{Hippocampus} & \textbf{ARMS} \\
|
| 389 |
+
\midrule
|
| 390 |
+
Encodes episodic memories & Stores attention states \\
|
| 391 |
+
Spatial navigation & High-dimensional proximity \\
|
| 392 |
+
Pattern completion & Near queries \\
|
| 393 |
+
Memory consolidation & Merge operations \\
|
| 394 |
+
Place cells & Points at coordinates \\
|
| 395 |
+
\bottomrule
|
| 396 |
+
\end{tabular}
|
| 397 |
+
\end{table}
|
| 398 |
+
|
| 399 |
+
% ----------------------------------------------------------------------------
|
| 400 |
+
\section{Implementation}
|
| 401 |
+
\label{sec:implementation}
|
| 402 |
+
|
| 403 |
+
ARMS is implemented in Rust for performance and safety, with Python bindings planned.
|
| 404 |
+
|
| 405 |
+
\subsection{Usage Example}
|
| 406 |
+
|
| 407 |
+
\begin{lstlisting}[language=Rust,caption={Complete ARMS usage example.}]
|
| 408 |
+
use arms_core::{Arms, ArmsConfig, Point, Blob};
|
| 409 |
+
|
| 410 |
+
// Create ARMS with 768 dimensions
|
| 411 |
+
let mut arms = Arms::new(ArmsConfig::new(768));
|
| 412 |
+
|
| 413 |
+
// Store embeddings
|
| 414 |
+
let embedding = Point::new(vec![0.1; 768]);
|
| 415 |
+
let id = arms.place(embedding, Blob::from_str("hello")).unwrap();
|
| 416 |
+
|
| 417 |
+
// Query by proximity
|
| 418 |
+
let query = Point::new(vec![0.1; 768]);
|
| 419 |
+
let neighbors = arms.near(&query, 10).unwrap();
|
| 420 |
+
|
| 421 |
+
// Get with data
|
| 422 |
+
let results = arms.near_with_data(&query, 5).unwrap();
|
| 423 |
+
for (point, score) in results {
|
| 424 |
+
println!("{}: {}", point.blob.as_str().unwrap(), score);
|
| 425 |
+
}
|
| 426 |
+
\end{lstlisting}
|
| 427 |
+
|
| 428 |
+
\subsection{Performance}
|
| 429 |
+
|
| 430 |
+
With the flat index (exact search):
|
| 431 |
+
|
| 432 |
+
\begin{table}[H]
|
| 433 |
+
\centering
|
| 434 |
+
\caption{Flat index performance.}
|
| 435 |
+
\label{tab:performance}
|
| 436 |
+
\begin{tabular}{rrr}
|
| 437 |
+
\toprule
|
| 438 |
+
\textbf{Points} & \textbf{Dimensions} & \textbf{Query Time} \\
|
| 439 |
+
\midrule
|
| 440 |
+
1,000 & 768 & 0.3ms \\
|
| 441 |
+
10,000 & 768 & 3ms \\
|
| 442 |
+
100,000 & 768 & 30ms \\
|
| 443 |
+
\bottomrule
|
| 444 |
+
\end{tabular}
|
| 445 |
+
\end{table}
|
| 446 |
+
|
| 447 |
+
For large-scale deployments, the HAT index adapter provides $O(\log n)$ queries with 100\% recall on hierarchical data.
|
| 448 |
+
|
| 449 |
+
% ----------------------------------------------------------------------------
|
| 450 |
+
\section{Related Work}
|
| 451 |
+
\label{sec:related}
|
| 452 |
+
|
| 453 |
+
\textbf{Vector Databases}: Pinecone, Weaviate, Milvus, and Qdrant provide vector storage and retrieval. ARMS differs by providing a minimal primitive set and hexagonal architecture rather than a monolithic solution.
|
| 454 |
+
|
| 455 |
+
\textbf{Memory-Augmented Networks}: Neural Turing Machines and Differentiable Neural Computers use learned memory access. ARMS provides explicit, interpretable memory operations.
|
| 456 |
+
|
| 457 |
+
\textbf{RAG Systems}: Retrieval-Augmented Generation retrieves text for reprocessing. ARMS can store pre-computed attention states, avoiding recomputation.
|
| 458 |
+
|
| 459 |
+
\textbf{Embedding Stores}: LangChain, LlamaIndex provide embedding storage. ARMS provides lower-level primitives for building such systems.
|
| 460 |
+
|
| 461 |
+
% ----------------------------------------------------------------------------
|
| 462 |
+
\section{Future Work}
|
| 463 |
+
\label{sec:future}
|
| 464 |
+
|
| 465 |
+
\subsection{Planned Adapters}
|
| 466 |
+
|
| 467 |
+
\begin{itemize}
|
| 468 |
+
\item \textbf{NVMe Storage}: Memory-mapped files for persistence
|
| 469 |
+
\item \textbf{Distributed Storage}: Sharded across machines
|
| 470 |
+
\item \textbf{GPU Index}: CUDA-accelerated similarity search
|
| 471 |
+
\end{itemize}
|
| 472 |
+
|
| 473 |
+
\subsection{Applications}
|
| 474 |
+
|
| 475 |
+
\begin{itemize}
|
| 476 |
+
\item \textbf{LLM Memory}: Long-term episodic memory for chatbots
|
| 477 |
+
\item \textbf{Agent State}: Persistent state for AI agents
|
| 478 |
+
\item \textbf{Attention Caching}: Store and retrieve KV cache states
|
| 479 |
+
\item \textbf{Multimodal Memory}: Unified space for text, image, audio embeddings
|
| 480 |
+
\end{itemize}
|
| 481 |
+
|
| 482 |
+
% ----------------------------------------------------------------------------
|
| 483 |
+
\section{Conclusion}
|
| 484 |
+
\label{sec:conclusion}
|
| 485 |
+
|
| 486 |
+
ARMS provides a minimal, principled foundation for AI memory systems. By reducing memory operations to five primitives and adopting a hexagonal architecture, ARMS enables:
|
| 487 |
+
|
| 488 |
+
\begin{enumerate}
|
| 489 |
+
\item \textbf{Simplicity}: Five operations cover all memory needs
|
| 490 |
+
\item \textbf{Flexibility}: Swap storage, index, and API independently
|
| 491 |
+
\item \textbf{Performance}: Domain-specific adapters like HAT
|
| 492 |
+
\item \textbf{Philosophy}: Position IS relationship
|
| 493 |
+
\end{enumerate}
|
| 494 |
+
|
| 495 |
+
ARMS functions as an artificial hippocampus for AI systems, enabling them to form, consolidate, and retrieve memories through spatial organization rather than explicit indexing.
|
| 496 |
+
|
| 497 |
+
% ----------------------------------------------------------------------------
|
| 498 |
+
\section*{Acknowledgments}
|
| 499 |
+
|
| 500 |
+
I thank the open-source Rust community for excellent tooling and the researchers whose work on memory-augmented networks inspired this architecture.
|
| 501 |
+
|
| 502 |
+
% ----------------------------------------------------------------------------
|
| 503 |
+
\bibliographystyle{plainnat}
|
| 504 |
+
\bibliography{refs}
|
| 505 |
+
|
| 506 |
+
% ----------------------------------------------------------------------------
|
| 507 |
+
\appendix
|
| 508 |
+
|
| 509 |
+
\section{Code Availability}
|
| 510 |
+
\label{app:code}
|
| 511 |
+
|
| 512 |
+
ARMS is available as open-source software:
|
| 513 |
+
|
| 514 |
+
\begin{itemize}
|
| 515 |
+
\item \textbf{Rust crate}: \texttt{arms-core} on crates.io
|
| 516 |
+
\item \textbf{HAT adapter}: \texttt{arms-hat} on crates.io
|
| 517 |
+
\item \textbf{Repository}: \url{https://github.com/automate-capture/arms}
|
| 518 |
+
\end{itemize}
|
| 519 |
+
|
| 520 |
+
\end{document}
|
paper/figures/fig01_architecture.jpg
ADDED
|
Git LFS Details
|
paper/figures/fig02_hexagonal.jpg
ADDED
|
Git LFS Details
|
paper/figures/fig03_primitives.jpg
ADDED
|
Git LFS Details
|
paper/figures/fig04_position_relationship.jpg
ADDED
|
Git LFS Details
|
paper/figures/fig05_hippocampus.jpg
ADDED
|
Git LFS Details
|
paper/figures/fig06_traditional_vs_arms.jpg
ADDED
|
Git LFS Details
|
paper/figures/fig07_ecosystem.jpg
ADDED
|
Git LFS Details
|
paper/refs.bib
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
% ARMS Paper Bibliography
|
| 2 |
+
|
| 3 |
+
@article{graves2014neural,
|
| 4 |
+
author = {Graves, Alex and Wayne, Greg and Danihelka, Ivo},
|
| 5 |
+
title = {Neural Turing Machines},
|
| 6 |
+
journal = {arXiv preprint arXiv:1410.5401},
|
| 7 |
+
year = {2014},
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
@article{graves2016dnc,
|
| 11 |
+
author = {Graves, Alex and Wayne, Greg and Reynolds, Malcolm and Harley, Tim and Danihelka, Ivo and others},
|
| 12 |
+
title = {Hybrid Computing Using a Neural Network with Dynamic External Memory},
|
| 13 |
+
journal = {Nature},
|
| 14 |
+
volume = {538},
|
| 15 |
+
number = {7626},
|
| 16 |
+
pages = {471--476},
|
| 17 |
+
year = {2016},
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
@inproceedings{weston2015memory,
|
| 21 |
+
author = {Weston, Jason and Chopra, Sumit and Bordes, Antoine},
|
| 22 |
+
title = {Memory Networks},
|
| 23 |
+
booktitle = {International Conference on Learning Representations},
|
| 24 |
+
year = {2015},
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
@inproceedings{lewis2020rag,
|
| 28 |
+
author = {Lewis, Patrick and others},
|
| 29 |
+
title = {Retrieval-Augmented Generation for Knowledge-Intensive {NLP} Tasks},
|
| 30 |
+
booktitle = {Advances in Neural Information Processing Systems},
|
| 31 |
+
volume = {33},
|
| 32 |
+
pages = {9459--9474},
|
| 33 |
+
year = {2020},
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
@article{malkov2018hnsw,
|
| 37 |
+
author = {Malkov, Yu A. and Yashunin, D. A.},
|
| 38 |
+
title = {Efficient and Robust Approximate Nearest Neighbor Search Using Hierarchical Navigable Small World Graphs},
|
| 39 |
+
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
| 40 |
+
volume = {42},
|
| 41 |
+
number = {4},
|
| 42 |
+
pages = {824--836},
|
| 43 |
+
year = {2018},
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
@article{johnson2019faiss,
|
| 47 |
+
author = {Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
|
| 48 |
+
title = {Billion-scale Similarity Search with {GPU}s},
|
| 49 |
+
journal = {IEEE Transactions on Big Data},
|
| 50 |
+
volume = {7},
|
| 51 |
+
number = {3},
|
| 52 |
+
pages = {535--547},
|
| 53 |
+
year = {2019},
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
@article{vaswani2017attention,
|
| 57 |
+
author = {Vaswani, Ashish and others},
|
| 58 |
+
title = {Attention is All You Need},
|
| 59 |
+
journal = {Advances in Neural Information Processing Systems},
|
| 60 |
+
volume = {30},
|
| 61 |
+
year = {2017},
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
@book{cockburn2005hexagonal,
|
| 65 |
+
author = {Cockburn, Alistair},
|
| 66 |
+
title = {Hexagonal Architecture},
|
| 67 |
+
year = {2005},
|
| 68 |
+
note = {Also known as Ports and Adapters pattern},
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
@article{moser2008place,
|
| 72 |
+
author = {Moser, Edvard I. and Kropff, Emilio and Moser, May-Britt},
|
| 73 |
+
title = {Place Cells, Grid Cells, and the Brain's Spatial Representation System},
|
| 74 |
+
journal = {Annual Review of Neuroscience},
|
| 75 |
+
volume = {31},
|
| 76 |
+
pages = {69--89},
|
| 77 |
+
year = {2008},
|
| 78 |
+
}
|
src/adapters/index/flat.rs
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Flat Index Adapter
|
| 2 |
+
//!
|
| 3 |
+
//! Brute force nearest neighbor search.
|
| 4 |
+
//! Compares query against ALL points - O(n) per query.
|
| 5 |
+
//!
|
| 6 |
+
//! Good for:
|
| 7 |
+
//! - Testing
|
| 8 |
+
//! - Small datasets (< 10,000 points)
|
| 9 |
+
//! - When exact results are required
|
| 10 |
+
//!
|
| 11 |
+
//! Not good for:
|
| 12 |
+
//! - Large datasets (use HNSW instead)
|
| 13 |
+
|
| 14 |
+
use std::collections::HashMap;
|
| 15 |
+
use std::sync::Arc;
|
| 16 |
+
|
| 17 |
+
use crate::core::{Id, Point};
|
| 18 |
+
use crate::core::proximity::Proximity;
|
| 19 |
+
use crate::ports::{Near, NearError, NearResult, SearchResult};
|
| 20 |
+
|
| 21 |
+
/// Brute force index - searches all points
|
| 22 |
+
pub struct FlatIndex {
|
| 23 |
+
/// Stored points (ID -> Point)
|
| 24 |
+
points: HashMap<Id, Point>,
|
| 25 |
+
|
| 26 |
+
/// Expected dimensionality
|
| 27 |
+
dimensionality: usize,
|
| 28 |
+
|
| 29 |
+
/// Proximity function to use
|
| 30 |
+
proximity: Arc<dyn Proximity>,
|
| 31 |
+
|
| 32 |
+
/// Whether higher proximity = more similar
|
| 33 |
+
/// true for cosine/dot product, false for euclidean
|
| 34 |
+
higher_is_better: bool,
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
impl FlatIndex {
|
| 38 |
+
/// Create a new flat index
|
| 39 |
+
///
|
| 40 |
+
/// `higher_is_better` indicates whether higher proximity scores mean more similar.
|
| 41 |
+
/// - `true` for Cosine, DotProduct
|
| 42 |
+
/// - `false` for Euclidean, Manhattan
|
| 43 |
+
pub fn new(
|
| 44 |
+
dimensionality: usize,
|
| 45 |
+
proximity: Arc<dyn Proximity>,
|
| 46 |
+
higher_is_better: bool,
|
| 47 |
+
) -> Self {
|
| 48 |
+
Self {
|
| 49 |
+
points: HashMap::new(),
|
| 50 |
+
dimensionality,
|
| 51 |
+
proximity,
|
| 52 |
+
higher_is_better,
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
/// Create with cosine similarity (higher = better)
|
| 57 |
+
pub fn cosine(dimensionality: usize) -> Self {
|
| 58 |
+
use crate::core::proximity::Cosine;
|
| 59 |
+
Self::new(dimensionality, Arc::new(Cosine), true)
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
/// Create with euclidean distance (lower = better)
|
| 63 |
+
pub fn euclidean(dimensionality: usize) -> Self {
|
| 64 |
+
use crate::core::proximity::Euclidean;
|
| 65 |
+
Self::new(dimensionality, Arc::new(Euclidean), false)
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
/// Sort results by relevance
|
| 69 |
+
fn sort_results(&self, results: &mut Vec<SearchResult>) {
|
| 70 |
+
if self.higher_is_better {
|
| 71 |
+
// Higher score = more relevant, sort descending
|
| 72 |
+
results.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap());
|
| 73 |
+
} else {
|
| 74 |
+
// Lower score = more relevant, sort ascending
|
| 75 |
+
results.sort_by(|a, b| a.score.partial_cmp(&b.score).unwrap());
|
| 76 |
+
}
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
impl Near for FlatIndex {
|
| 81 |
+
fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>> {
|
| 82 |
+
// Check dimensionality
|
| 83 |
+
if query.dimensionality() != self.dimensionality {
|
| 84 |
+
return Err(NearError::DimensionalityMismatch {
|
| 85 |
+
expected: self.dimensionality,
|
| 86 |
+
got: query.dimensionality(),
|
| 87 |
+
});
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
// Compute proximity to all points
|
| 91 |
+
let mut results: Vec<SearchResult> = self
|
| 92 |
+
.points
|
| 93 |
+
.iter()
|
| 94 |
+
.map(|(id, point)| {
|
| 95 |
+
let score = self.proximity.proximity(query, point);
|
| 96 |
+
SearchResult::new(*id, score)
|
| 97 |
+
})
|
| 98 |
+
.collect();
|
| 99 |
+
|
| 100 |
+
// Sort by relevance
|
| 101 |
+
self.sort_results(&mut results);
|
| 102 |
+
|
| 103 |
+
// Take top k
|
| 104 |
+
results.truncate(k);
|
| 105 |
+
|
| 106 |
+
Ok(results)
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>> {
|
| 110 |
+
// Check dimensionality
|
| 111 |
+
if query.dimensionality() != self.dimensionality {
|
| 112 |
+
return Err(NearError::DimensionalityMismatch {
|
| 113 |
+
expected: self.dimensionality,
|
| 114 |
+
got: query.dimensionality(),
|
| 115 |
+
});
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
// Find all points within threshold
|
| 119 |
+
let mut results: Vec<SearchResult> = self
|
| 120 |
+
.points
|
| 121 |
+
.iter()
|
| 122 |
+
.filter_map(|(id, point)| {
|
| 123 |
+
let score = self.proximity.proximity(query, point);
|
| 124 |
+
let within = if self.higher_is_better {
|
| 125 |
+
score >= threshold
|
| 126 |
+
} else {
|
| 127 |
+
score <= threshold
|
| 128 |
+
};
|
| 129 |
+
if within {
|
| 130 |
+
Some(SearchResult::new(*id, score))
|
| 131 |
+
} else {
|
| 132 |
+
None
|
| 133 |
+
}
|
| 134 |
+
})
|
| 135 |
+
.collect();
|
| 136 |
+
|
| 137 |
+
// Sort by relevance
|
| 138 |
+
self.sort_results(&mut results);
|
| 139 |
+
|
| 140 |
+
Ok(results)
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
fn add(&mut self, id: Id, point: &Point) -> NearResult<()> {
|
| 144 |
+
if point.dimensionality() != self.dimensionality {
|
| 145 |
+
return Err(NearError::DimensionalityMismatch {
|
| 146 |
+
expected: self.dimensionality,
|
| 147 |
+
got: point.dimensionality(),
|
| 148 |
+
});
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
self.points.insert(id, point.clone());
|
| 152 |
+
Ok(())
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
fn remove(&mut self, id: Id) -> NearResult<()> {
|
| 156 |
+
self.points.remove(&id);
|
| 157 |
+
Ok(())
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
fn rebuild(&mut self) -> NearResult<()> {
|
| 161 |
+
// Flat index doesn't need rebuilding
|
| 162 |
+
Ok(())
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
fn is_ready(&self) -> bool {
|
| 166 |
+
true // Always ready
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
fn len(&self) -> usize {
|
| 170 |
+
self.points.len()
|
| 171 |
+
}
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
#[cfg(test)]
|
| 175 |
+
mod tests {
|
| 176 |
+
use super::*;
|
| 177 |
+
|
| 178 |
+
fn setup_index() -> FlatIndex {
|
| 179 |
+
let mut index = FlatIndex::cosine(3);
|
| 180 |
+
|
| 181 |
+
// Add some test points
|
| 182 |
+
let points = vec![
|
| 183 |
+
(Id::from_bytes([1; 16]), Point::new(vec![1.0, 0.0, 0.0])),
|
| 184 |
+
(Id::from_bytes([2; 16]), Point::new(vec![0.0, 1.0, 0.0])),
|
| 185 |
+
(Id::from_bytes([3; 16]), Point::new(vec![0.0, 0.0, 1.0])),
|
| 186 |
+
(Id::from_bytes([4; 16]), Point::new(vec![0.7, 0.7, 0.0]).normalize()),
|
| 187 |
+
];
|
| 188 |
+
|
| 189 |
+
for (id, point) in points {
|
| 190 |
+
index.add(id, &point).unwrap();
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
index
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
#[test]
|
| 197 |
+
fn test_flat_index_near() {
|
| 198 |
+
let index = setup_index();
|
| 199 |
+
|
| 200 |
+
// Query for points near [1, 0, 0]
|
| 201 |
+
let query = Point::new(vec![1.0, 0.0, 0.0]);
|
| 202 |
+
let results = index.near(&query, 2).unwrap();
|
| 203 |
+
|
| 204 |
+
assert_eq!(results.len(), 2);
|
| 205 |
+
|
| 206 |
+
// First result should be [1, 0, 0] with cosine = 1.0
|
| 207 |
+
assert_eq!(results[0].id, Id::from_bytes([1; 16]));
|
| 208 |
+
assert!((results[0].score - 1.0).abs() < 0.0001);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
#[test]
|
| 212 |
+
fn test_flat_index_within_cosine() {
|
| 213 |
+
let index = setup_index();
|
| 214 |
+
|
| 215 |
+
// Find all points with cosine > 0.5 to [1, 0, 0]
|
| 216 |
+
let query = Point::new(vec![1.0, 0.0, 0.0]);
|
| 217 |
+
let results = index.within(&query, 0.5).unwrap();
|
| 218 |
+
|
| 219 |
+
// Should find [1,0,0] (cosine=1.0) and [0.7,0.7,0] (cosine≈0.707)
|
| 220 |
+
assert_eq!(results.len(), 2);
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
#[test]
|
| 224 |
+
fn test_flat_index_euclidean() {
|
| 225 |
+
let mut index = FlatIndex::euclidean(2);
|
| 226 |
+
|
| 227 |
+
index.add(Id::from_bytes([1; 16]), &Point::new(vec![0.0, 0.0])).unwrap();
|
| 228 |
+
index.add(Id::from_bytes([2; 16]), &Point::new(vec![1.0, 0.0])).unwrap();
|
| 229 |
+
index.add(Id::from_bytes([3; 16]), &Point::new(vec![5.0, 0.0])).unwrap();
|
| 230 |
+
|
| 231 |
+
let query = Point::new(vec![0.0, 0.0]);
|
| 232 |
+
let results = index.near(&query, 2).unwrap();
|
| 233 |
+
|
| 234 |
+
// Nearest should be [0,0] with distance 0
|
| 235 |
+
assert_eq!(results[0].id, Id::from_bytes([1; 16]));
|
| 236 |
+
assert!((results[0].score - 0.0).abs() < 0.0001);
|
| 237 |
+
|
| 238 |
+
// Second nearest should be [1,0] with distance 1
|
| 239 |
+
assert_eq!(results[1].id, Id::from_bytes([2; 16]));
|
| 240 |
+
assert!((results[1].score - 1.0).abs() < 0.0001);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
#[test]
|
| 244 |
+
fn test_flat_index_add_remove() {
|
| 245 |
+
let mut index = FlatIndex::cosine(3);
|
| 246 |
+
|
| 247 |
+
let id = Id::from_bytes([1; 16]);
|
| 248 |
+
let point = Point::new(vec![1.0, 0.0, 0.0]);
|
| 249 |
+
|
| 250 |
+
index.add(id, &point).unwrap();
|
| 251 |
+
assert_eq!(index.len(), 1);
|
| 252 |
+
|
| 253 |
+
index.remove(id).unwrap();
|
| 254 |
+
assert_eq!(index.len(), 0);
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
#[test]
|
| 258 |
+
fn test_flat_index_dimensionality_check() {
|
| 259 |
+
let mut index = FlatIndex::cosine(3);
|
| 260 |
+
|
| 261 |
+
let wrong_dims = Point::new(vec![1.0, 0.0]); // 2 dims
|
| 262 |
+
let result = index.add(Id::now(), &wrong_dims);
|
| 263 |
+
|
| 264 |
+
match result {
|
| 265 |
+
Err(NearError::DimensionalityMismatch { expected, got }) => {
|
| 266 |
+
assert_eq!(expected, 3);
|
| 267 |
+
assert_eq!(got, 2);
|
| 268 |
+
}
|
| 269 |
+
_ => panic!("Expected DimensionalityMismatch error"),
|
| 270 |
+
}
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
#[test]
|
| 274 |
+
fn test_flat_index_ready() {
|
| 275 |
+
let index = FlatIndex::cosine(3);
|
| 276 |
+
assert!(index.is_ready());
|
| 277 |
+
}
|
| 278 |
+
}
|
src/adapters/index/mod.rs
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Index Adapters
|
| 2 |
+
//!
|
| 3 |
+
//! Implementations of the Near port for different index backends.
|
| 4 |
+
//!
|
| 5 |
+
//! Available adapters:
|
| 6 |
+
//! - `FlatIndex` - Brute force search (exact, slow for large N)
|
| 7 |
+
//! - `HnswIndex` - Hierarchical Navigable Small World (approximate, fast) [TODO]
|
| 8 |
+
|
| 9 |
+
mod flat;
|
| 10 |
+
|
| 11 |
+
pub use flat::FlatIndex;
|
| 12 |
+
|
| 13 |
+
// TODO: Add HNSW adapter
|
| 14 |
+
// mod hnsw;
|
| 15 |
+
// pub use hnsw::HnswIndex;
|
src/adapters/mod.rs
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Adapters
|
| 2 |
+
//!
|
| 3 |
+
//! Swappable implementations of port traits.
|
| 4 |
+
//!
|
| 5 |
+
//! This is where the hexagonal architecture meets reality:
|
| 6 |
+
//! - Storage adapters: Memory, NVMe
|
| 7 |
+
//! - Index adapters: Flat (brute force)
|
| 8 |
+
//!
|
| 9 |
+
//! Each adapter implements one or more port traits.
|
| 10 |
+
//! Adapters can be swapped without changing core logic.
|
| 11 |
+
//!
|
| 12 |
+
//! For advanced index adapters like HAT (Hierarchical Attention Tree),
|
| 13 |
+
//! see the `arms-hat` crate.
|
| 14 |
+
|
| 15 |
+
pub mod storage;
|
| 16 |
+
pub mod index;
|
src/adapters/storage/memory.rs
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Memory Storage Adapter
|
| 2 |
+
//!
|
| 3 |
+
//! In-memory storage using HashMap.
|
| 4 |
+
//! Fast, but volatile (data lost on shutdown).
|
| 5 |
+
//!
|
| 6 |
+
//! Good for:
|
| 7 |
+
//! - Testing
|
| 8 |
+
//! - Hot tier storage
|
| 9 |
+
//! - Small datasets
|
| 10 |
+
|
| 11 |
+
use std::collections::HashMap;
|
| 12 |
+
|
| 13 |
+
use crate::core::{Blob, Id, PlacedPoint, Point};
|
| 14 |
+
use crate::ports::{Place, PlaceError, PlaceResult};
|
| 15 |
+
|
| 16 |
+
/// In-memory storage adapter
|
| 17 |
+
pub struct MemoryStorage {
|
| 18 |
+
/// The stored points
|
| 19 |
+
points: HashMap<Id, PlacedPoint>,
|
| 20 |
+
|
| 21 |
+
/// Expected dimensionality
|
| 22 |
+
dimensionality: usize,
|
| 23 |
+
|
| 24 |
+
/// Maximum capacity in bytes (0 = unlimited)
|
| 25 |
+
capacity: usize,
|
| 26 |
+
|
| 27 |
+
/// Current size in bytes
|
| 28 |
+
current_size: usize,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
impl MemoryStorage {
|
| 32 |
+
/// Create a new memory storage with specified dimensionality
|
| 33 |
+
pub fn new(dimensionality: usize) -> Self {
|
| 34 |
+
Self {
|
| 35 |
+
points: HashMap::new(),
|
| 36 |
+
dimensionality,
|
| 37 |
+
capacity: 0,
|
| 38 |
+
current_size: 0,
|
| 39 |
+
}
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
/// Create with a capacity limit
|
| 43 |
+
pub fn with_capacity(dimensionality: usize, capacity: usize) -> Self {
|
| 44 |
+
Self {
|
| 45 |
+
points: HashMap::new(),
|
| 46 |
+
dimensionality,
|
| 47 |
+
capacity,
|
| 48 |
+
current_size: 0,
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/// Calculate size of a placed point in bytes
|
| 53 |
+
fn point_size(point: &PlacedPoint) -> usize {
|
| 54 |
+
// Id: 16 bytes
|
| 55 |
+
// Point: dims.len() * 4 bytes (f32)
|
| 56 |
+
// Blob: data.len() bytes
|
| 57 |
+
// Overhead: ~48 bytes for struct padding and HashMap entry
|
| 58 |
+
16 + (point.point.dimensionality() * 4) + point.blob.size() + 48
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
impl Place for MemoryStorage {
|
| 63 |
+
fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id> {
|
| 64 |
+
// Check dimensionality
|
| 65 |
+
if point.dimensionality() != self.dimensionality {
|
| 66 |
+
return Err(PlaceError::DimensionalityMismatch {
|
| 67 |
+
expected: self.dimensionality,
|
| 68 |
+
got: point.dimensionality(),
|
| 69 |
+
});
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
let id = Id::now();
|
| 73 |
+
let placed = PlacedPoint::new(id, point, blob);
|
| 74 |
+
|
| 75 |
+
// Check capacity
|
| 76 |
+
let size = Self::point_size(&placed);
|
| 77 |
+
if self.capacity > 0 && self.current_size + size > self.capacity {
|
| 78 |
+
return Err(PlaceError::CapacityExceeded);
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
self.current_size += size;
|
| 82 |
+
self.points.insert(id, placed);
|
| 83 |
+
|
| 84 |
+
Ok(id)
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
fn place_with_id(&mut self, id: Id, point: Point, blob: Blob) -> PlaceResult<()> {
|
| 88 |
+
// Check dimensionality
|
| 89 |
+
if point.dimensionality() != self.dimensionality {
|
| 90 |
+
return Err(PlaceError::DimensionalityMismatch {
|
| 91 |
+
expected: self.dimensionality,
|
| 92 |
+
got: point.dimensionality(),
|
| 93 |
+
});
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
// Check for duplicates
|
| 97 |
+
if self.points.contains_key(&id) {
|
| 98 |
+
return Err(PlaceError::DuplicateId(id));
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
let placed = PlacedPoint::new(id, point, blob);
|
| 102 |
+
|
| 103 |
+
// Check capacity
|
| 104 |
+
let size = Self::point_size(&placed);
|
| 105 |
+
if self.capacity > 0 && self.current_size + size > self.capacity {
|
| 106 |
+
return Err(PlaceError::CapacityExceeded);
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
self.current_size += size;
|
| 110 |
+
self.points.insert(id, placed);
|
| 111 |
+
|
| 112 |
+
Ok(())
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
fn remove(&mut self, id: Id) -> Option<PlacedPoint> {
|
| 116 |
+
if let Some(placed) = self.points.remove(&id) {
|
| 117 |
+
self.current_size -= Self::point_size(&placed);
|
| 118 |
+
Some(placed)
|
| 119 |
+
} else {
|
| 120 |
+
None
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
fn get(&self, id: Id) -> Option<&PlacedPoint> {
|
| 125 |
+
self.points.get(&id)
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
fn len(&self) -> usize {
|
| 129 |
+
self.points.len()
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
fn iter(&self) -> Box<dyn Iterator<Item = &PlacedPoint> + '_> {
|
| 133 |
+
Box::new(self.points.values())
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
fn size_bytes(&self) -> usize {
|
| 137 |
+
self.current_size
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
fn clear(&mut self) {
|
| 141 |
+
self.points.clear();
|
| 142 |
+
self.current_size = 0;
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
#[cfg(test)]
|
| 147 |
+
mod tests {
|
| 148 |
+
use super::*;
|
| 149 |
+
|
| 150 |
+
#[test]
|
| 151 |
+
fn test_memory_storage_place() {
|
| 152 |
+
let mut storage = MemoryStorage::new(3);
|
| 153 |
+
|
| 154 |
+
let point = Point::new(vec![1.0, 2.0, 3.0]);
|
| 155 |
+
let blob = Blob::from_str("test");
|
| 156 |
+
|
| 157 |
+
let id = storage.place(point, blob).unwrap();
|
| 158 |
+
|
| 159 |
+
assert_eq!(storage.len(), 1);
|
| 160 |
+
assert!(storage.contains(id));
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
#[test]
|
| 164 |
+
fn test_memory_storage_get() {
|
| 165 |
+
let mut storage = MemoryStorage::new(3);
|
| 166 |
+
|
| 167 |
+
let point = Point::new(vec![1.0, 2.0, 3.0]);
|
| 168 |
+
let blob = Blob::from_str("hello");
|
| 169 |
+
|
| 170 |
+
let id = storage.place(point, blob).unwrap();
|
| 171 |
+
|
| 172 |
+
let retrieved = storage.get(id).unwrap();
|
| 173 |
+
assert_eq!(retrieved.blob.as_str(), Some("hello"));
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
#[test]
|
| 177 |
+
fn test_memory_storage_remove() {
|
| 178 |
+
let mut storage = MemoryStorage::new(3);
|
| 179 |
+
|
| 180 |
+
let point = Point::new(vec![1.0, 2.0, 3.0]);
|
| 181 |
+
let id = storage.place(point, Blob::empty()).unwrap();
|
| 182 |
+
|
| 183 |
+
assert_eq!(storage.len(), 1);
|
| 184 |
+
|
| 185 |
+
let removed = storage.remove(id);
|
| 186 |
+
assert!(removed.is_some());
|
| 187 |
+
assert_eq!(storage.len(), 0);
|
| 188 |
+
assert!(!storage.contains(id));
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
#[test]
|
| 192 |
+
fn test_memory_storage_dimensionality_check() {
|
| 193 |
+
let mut storage = MemoryStorage::new(3);
|
| 194 |
+
|
| 195 |
+
let wrong_dims = Point::new(vec![1.0, 2.0]); // 2 dims, expected 3
|
| 196 |
+
|
| 197 |
+
let result = storage.place(wrong_dims, Blob::empty());
|
| 198 |
+
|
| 199 |
+
match result {
|
| 200 |
+
Err(PlaceError::DimensionalityMismatch { expected, got }) => {
|
| 201 |
+
assert_eq!(expected, 3);
|
| 202 |
+
assert_eq!(got, 2);
|
| 203 |
+
}
|
| 204 |
+
_ => panic!("Expected DimensionalityMismatch error"),
|
| 205 |
+
}
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
#[test]
|
| 209 |
+
fn test_memory_storage_capacity() {
|
| 210 |
+
// Small capacity - enough for one point but not two
|
| 211 |
+
// Point size: 16 (id) + 12 (3 f32s) + 10 (blob) + 48 (overhead) = 86 bytes
|
| 212 |
+
let mut storage = MemoryStorage::with_capacity(3, 150);
|
| 213 |
+
|
| 214 |
+
let point = Point::new(vec![1.0, 2.0, 3.0]);
|
| 215 |
+
let blob = Blob::new(vec![0u8; 10]); // Small blob
|
| 216 |
+
|
| 217 |
+
// First one should succeed
|
| 218 |
+
storage.place(point.clone(), blob.clone()).unwrap();
|
| 219 |
+
|
| 220 |
+
// Second should fail due to capacity
|
| 221 |
+
let result = storage.place(point, blob);
|
| 222 |
+
assert!(matches!(result, Err(PlaceError::CapacityExceeded)));
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
#[test]
|
| 226 |
+
fn test_memory_storage_clear() {
|
| 227 |
+
let mut storage = MemoryStorage::new(3);
|
| 228 |
+
|
| 229 |
+
for i in 0..10 {
|
| 230 |
+
let point = Point::new(vec![i as f32, 0.0, 0.0]);
|
| 231 |
+
storage.place(point, Blob::empty()).unwrap();
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
assert_eq!(storage.len(), 10);
|
| 235 |
+
assert!(storage.size_bytes() > 0);
|
| 236 |
+
|
| 237 |
+
storage.clear();
|
| 238 |
+
|
| 239 |
+
assert_eq!(storage.len(), 0);
|
| 240 |
+
assert_eq!(storage.size_bytes(), 0);
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
#[test]
|
| 244 |
+
fn test_memory_storage_iter() {
|
| 245 |
+
let mut storage = MemoryStorage::new(2);
|
| 246 |
+
|
| 247 |
+
storage.place(Point::new(vec![1.0, 0.0]), Blob::empty()).unwrap();
|
| 248 |
+
storage.place(Point::new(vec![0.0, 1.0]), Blob::empty()).unwrap();
|
| 249 |
+
|
| 250 |
+
let points: Vec<_> = storage.iter().collect();
|
| 251 |
+
assert_eq!(points.len(), 2);
|
| 252 |
+
}
|
| 253 |
+
}
|
src/adapters/storage/mod.rs
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Storage Adapters
|
| 2 |
+
//!
|
| 3 |
+
//! Implementations of the Place port for different storage backends.
|
| 4 |
+
//!
|
| 5 |
+
//! Available adapters:
|
| 6 |
+
//! - `MemoryStorage` - In-memory HashMap (fast, volatile)
|
| 7 |
+
//! - `NvmeStorage` - Memory-mapped NVMe (persistent, large) [TODO]
|
| 8 |
+
|
| 9 |
+
mod memory;
|
| 10 |
+
|
| 11 |
+
pub use memory::MemoryStorage;
|
| 12 |
+
|
| 13 |
+
// TODO: Add NVMe adapter
|
| 14 |
+
// mod nvme;
|
| 15 |
+
// pub use nvme::NvmeStorage;
|
src/core/blob.rs
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Blob
|
| 2 |
+
//!
|
| 3 |
+
//! Raw payload data attached to a point.
|
| 4 |
+
//!
|
| 5 |
+
//! ARMS doesn't interpret this data - it's yours.
|
| 6 |
+
//! Could be: tensor bytes, text, compressed state, anything.
|
| 7 |
+
//!
|
| 8 |
+
//! Separation of concerns:
|
| 9 |
+
//! - Point = WHERE (position in space)
|
| 10 |
+
//! - Blob = WHAT (the actual data)
|
| 11 |
+
|
| 12 |
+
/// Raw data attached to a point
|
| 13 |
+
///
|
| 14 |
+
/// ARMS stores this opaquely. You define what it means.
|
| 15 |
+
#[derive(Clone, Debug, PartialEq)]
|
| 16 |
+
pub struct Blob {
|
| 17 |
+
data: Vec<u8>,
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
impl Blob {
|
| 21 |
+
/// Create a new blob from bytes
|
| 22 |
+
///
|
| 23 |
+
/// # Example
|
| 24 |
+
/// ```
|
| 25 |
+
/// use arms::Blob;
|
| 26 |
+
/// let blob = Blob::new(vec![1, 2, 3, 4]);
|
| 27 |
+
/// assert_eq!(blob.size(), 4);
|
| 28 |
+
/// ```
|
| 29 |
+
pub fn new(data: Vec<u8>) -> Self {
|
| 30 |
+
Self { data }
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
/// Create an empty blob
|
| 34 |
+
///
|
| 35 |
+
/// Useful when you only care about position, not payload.
|
| 36 |
+
pub fn empty() -> Self {
|
| 37 |
+
Self { data: vec![] }
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
/// Create a blob from a string (UTF-8 bytes)
|
| 41 |
+
///
|
| 42 |
+
/// # Example
|
| 43 |
+
/// ```
|
| 44 |
+
/// use arms::Blob;
|
| 45 |
+
/// let blob = Blob::from_str("hello");
|
| 46 |
+
/// assert_eq!(blob.as_str(), Some("hello"));
|
| 47 |
+
/// ```
|
| 48 |
+
pub fn from_str(s: &str) -> Self {
|
| 49 |
+
Self {
|
| 50 |
+
data: s.as_bytes().to_vec(),
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
/// Get the raw bytes
|
| 55 |
+
pub fn data(&self) -> &[u8] {
|
| 56 |
+
&self.data
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
/// Get the size in bytes
|
| 60 |
+
pub fn size(&self) -> usize {
|
| 61 |
+
self.data.len()
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
/// Check if the blob is empty
|
| 65 |
+
pub fn is_empty(&self) -> bool {
|
| 66 |
+
self.data.is_empty()
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/// Try to interpret as UTF-8 string
|
| 70 |
+
pub fn as_str(&self) -> Option<&str> {
|
| 71 |
+
std::str::from_utf8(&self.data).ok()
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/// Consume and return the inner data
|
| 75 |
+
pub fn into_inner(self) -> Vec<u8> {
|
| 76 |
+
self.data
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
impl From<Vec<u8>> for Blob {
|
| 81 |
+
fn from(data: Vec<u8>) -> Self {
|
| 82 |
+
Self::new(data)
|
| 83 |
+
}
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
impl From<&[u8]> for Blob {
|
| 87 |
+
fn from(data: &[u8]) -> Self {
|
| 88 |
+
Self::new(data.to_vec())
|
| 89 |
+
}
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
impl From<&str> for Blob {
|
| 93 |
+
fn from(s: &str) -> Self {
|
| 94 |
+
Self::from_str(s)
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
impl From<String> for Blob {
|
| 99 |
+
fn from(s: String) -> Self {
|
| 100 |
+
Self::new(s.into_bytes())
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
#[cfg(test)]
|
| 105 |
+
mod tests {
|
| 106 |
+
use super::*;
|
| 107 |
+
|
| 108 |
+
#[test]
|
| 109 |
+
fn test_blob_new() {
|
| 110 |
+
let blob = Blob::new(vec![1, 2, 3]);
|
| 111 |
+
assert_eq!(blob.data(), &[1, 2, 3]);
|
| 112 |
+
assert_eq!(blob.size(), 3);
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
#[test]
|
| 116 |
+
fn test_blob_empty() {
|
| 117 |
+
let blob = Blob::empty();
|
| 118 |
+
assert!(blob.is_empty());
|
| 119 |
+
assert_eq!(blob.size(), 0);
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
#[test]
|
| 123 |
+
fn test_blob_from_str() {
|
| 124 |
+
let blob = Blob::from_str("hello world");
|
| 125 |
+
assert_eq!(blob.as_str(), Some("hello world"));
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
#[test]
|
| 129 |
+
fn test_blob_as_str_invalid_utf8() {
|
| 130 |
+
let blob = Blob::new(vec![0xff, 0xfe]);
|
| 131 |
+
assert_eq!(blob.as_str(), None);
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
#[test]
|
| 135 |
+
fn test_blob_from_conversions() {
|
| 136 |
+
let blob1: Blob = vec![1, 2, 3].into();
|
| 137 |
+
assert_eq!(blob1.size(), 3);
|
| 138 |
+
|
| 139 |
+
let blob2: Blob = "test".into();
|
| 140 |
+
assert_eq!(blob2.as_str(), Some("test"));
|
| 141 |
+
|
| 142 |
+
let blob3: Blob = String::from("test").into();
|
| 143 |
+
assert_eq!(blob3.as_str(), Some("test"));
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
#[test]
|
| 147 |
+
fn test_blob_into_inner() {
|
| 148 |
+
let blob = Blob::new(vec![1, 2, 3]);
|
| 149 |
+
let data = blob.into_inner();
|
| 150 |
+
assert_eq!(data, vec![1, 2, 3]);
|
| 151 |
+
}
|
| 152 |
+
}
|
src/core/config.rs
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Configuration
|
| 2 |
+
//!
|
| 3 |
+
//! ARMS configuration - define your space.
|
| 4 |
+
//!
|
| 5 |
+
//! Everything is configurable, not hardcoded:
|
| 6 |
+
//! - Dimensionality
|
| 7 |
+
//! - Proximity function
|
| 8 |
+
//! - Merge function
|
| 9 |
+
//! - Tier settings
|
| 10 |
+
//!
|
| 11 |
+
//! "If we say it's a rock now, in 2 years it can never be carved into a wheel."
|
| 12 |
+
|
| 13 |
+
use super::proximity::{Cosine, Proximity};
|
| 14 |
+
use super::merge::{Mean, Merge};
|
| 15 |
+
use std::sync::Arc;
|
| 16 |
+
|
| 17 |
+
/// Main ARMS configuration
|
| 18 |
+
///
|
| 19 |
+
/// Defines the dimensional space and default operations.
|
| 20 |
+
#[derive(Clone)]
|
| 21 |
+
pub struct ArmsConfig {
|
| 22 |
+
/// Dimensionality of the space
|
| 23 |
+
///
|
| 24 |
+
/// Set this to match your model's hidden size.
|
| 25 |
+
/// Examples: 768 (BERT), 1024 (GPT-2 medium), 4096 (large models)
|
| 26 |
+
pub dimensionality: usize,
|
| 27 |
+
|
| 28 |
+
/// Proximity function for similarity calculations
|
| 29 |
+
pub proximity: Arc<dyn Proximity>,
|
| 30 |
+
|
| 31 |
+
/// Merge function for hierarchical composition
|
| 32 |
+
pub merge: Arc<dyn Merge>,
|
| 33 |
+
|
| 34 |
+
/// Whether to normalize points on insertion
|
| 35 |
+
pub normalize_on_insert: bool,
|
| 36 |
+
|
| 37 |
+
/// Tier configuration
|
| 38 |
+
pub tiers: TierConfig,
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
impl ArmsConfig {
|
| 42 |
+
/// Create a new configuration with specified dimensionality
|
| 43 |
+
///
|
| 44 |
+
/// Uses default proximity (Cosine) and merge (Mean) functions.
|
| 45 |
+
pub fn new(dimensionality: usize) -> Self {
|
| 46 |
+
Self {
|
| 47 |
+
dimensionality,
|
| 48 |
+
proximity: Arc::new(Cosine),
|
| 49 |
+
merge: Arc::new(Mean),
|
| 50 |
+
normalize_on_insert: true,
|
| 51 |
+
tiers: TierConfig::default(),
|
| 52 |
+
}
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
/// Set a custom proximity function
|
| 56 |
+
pub fn with_proximity<P: Proximity + 'static>(mut self, proximity: P) -> Self {
|
| 57 |
+
self.proximity = Arc::new(proximity);
|
| 58 |
+
self
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
/// Set a custom merge function
|
| 62 |
+
pub fn with_merge<M: Merge + 'static>(mut self, merge: M) -> Self {
|
| 63 |
+
self.merge = Arc::new(merge);
|
| 64 |
+
self
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
/// Set normalization behavior
|
| 68 |
+
pub fn with_normalize(mut self, normalize: bool) -> Self {
|
| 69 |
+
self.normalize_on_insert = normalize;
|
| 70 |
+
self
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
/// Set tier configuration
|
| 74 |
+
pub fn with_tiers(mut self, tiers: TierConfig) -> Self {
|
| 75 |
+
self.tiers = tiers;
|
| 76 |
+
self
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
impl Default for ArmsConfig {
|
| 81 |
+
/// Default configuration: 768 dimensions, cosine proximity, mean merge
|
| 82 |
+
fn default() -> Self {
|
| 83 |
+
Self::new(768)
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/// Tier configuration for storage management
|
| 88 |
+
#[derive(Clone, Debug)]
|
| 89 |
+
pub struct TierConfig {
|
| 90 |
+
/// Hot tier (RAM) capacity in bytes
|
| 91 |
+
pub hot_capacity: usize,
|
| 92 |
+
|
| 93 |
+
/// Warm tier (NVMe) capacity in bytes
|
| 94 |
+
pub warm_capacity: usize,
|
| 95 |
+
|
| 96 |
+
/// Number of accesses before promoting to hotter tier
|
| 97 |
+
pub promote_after_accesses: u32,
|
| 98 |
+
|
| 99 |
+
/// Milliseconds since last access before evicting to colder tier
|
| 100 |
+
pub evict_after_ms: u64,
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
impl TierConfig {
|
| 104 |
+
/// Create a new tier configuration
|
| 105 |
+
pub fn new(hot_capacity: usize, warm_capacity: usize) -> Self {
|
| 106 |
+
Self {
|
| 107 |
+
hot_capacity,
|
| 108 |
+
warm_capacity,
|
| 109 |
+
promote_after_accesses: 3,
|
| 110 |
+
evict_after_ms: 3600 * 1000, // 1 hour
|
| 111 |
+
}
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
/// Tiny config for testing
|
| 115 |
+
pub fn tiny() -> Self {
|
| 116 |
+
Self {
|
| 117 |
+
hot_capacity: 1024 * 1024, // 1 MB
|
| 118 |
+
warm_capacity: 10 * 1024 * 1024, // 10 MB
|
| 119 |
+
promote_after_accesses: 2,
|
| 120 |
+
evict_after_ms: 60 * 1000, // 1 minute
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
impl Default for TierConfig {
|
| 126 |
+
fn default() -> Self {
|
| 127 |
+
Self {
|
| 128 |
+
hot_capacity: 1024 * 1024 * 1024, // 1 GB
|
| 129 |
+
warm_capacity: 100 * 1024 * 1024 * 1024, // 100 GB
|
| 130 |
+
promote_after_accesses: 3,
|
| 131 |
+
evict_after_ms: 3600 * 1000, // 1 hour
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
#[cfg(test)]
|
| 137 |
+
mod tests {
|
| 138 |
+
use super::*;
|
| 139 |
+
use crate::core::proximity::Euclidean;
|
| 140 |
+
use crate::core::merge::MaxPool;
|
| 141 |
+
|
| 142 |
+
#[test]
|
| 143 |
+
fn test_default_config() {
|
| 144 |
+
let config = ArmsConfig::default();
|
| 145 |
+
assert_eq!(config.dimensionality, 768);
|
| 146 |
+
assert!(config.normalize_on_insert);
|
| 147 |
+
assert_eq!(config.proximity.name(), "cosine");
|
| 148 |
+
assert_eq!(config.merge.name(), "mean");
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
#[test]
|
| 152 |
+
fn test_custom_config() {
|
| 153 |
+
let config = ArmsConfig::new(4096)
|
| 154 |
+
.with_proximity(Euclidean)
|
| 155 |
+
.with_merge(MaxPool)
|
| 156 |
+
.with_normalize(false);
|
| 157 |
+
|
| 158 |
+
assert_eq!(config.dimensionality, 4096);
|
| 159 |
+
assert!(!config.normalize_on_insert);
|
| 160 |
+
assert_eq!(config.proximity.name(), "euclidean");
|
| 161 |
+
assert_eq!(config.merge.name(), "max_pool");
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
#[test]
|
| 165 |
+
fn test_tier_config() {
|
| 166 |
+
let tiers = TierConfig::new(1024, 2048);
|
| 167 |
+
assert_eq!(tiers.hot_capacity, 1024);
|
| 168 |
+
assert_eq!(tiers.warm_capacity, 2048);
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
#[test]
|
| 172 |
+
fn test_tier_tiny() {
|
| 173 |
+
let tiers = TierConfig::tiny();
|
| 174 |
+
assert_eq!(tiers.hot_capacity, 1024 * 1024);
|
| 175 |
+
assert_eq!(tiers.evict_after_ms, 60 * 1000);
|
| 176 |
+
}
|
| 177 |
+
}
|
src/core/id.rs
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Id
|
| 2 |
+
//!
|
| 3 |
+
//! Unique identifier for placed points.
|
| 4 |
+
//!
|
| 5 |
+
//! Format: 128 bits = [timestamp_ms:48][counter:16][random:64]
|
| 6 |
+
//! - Timestamp provides natural temporal ordering
|
| 7 |
+
//! - Counter prevents collisions within same millisecond
|
| 8 |
+
//! - Random portion adds uniqueness
|
| 9 |
+
//! - Sortable by time when compared
|
| 10 |
+
//! - No external dependencies (not UUID, just bytes)
|
| 11 |
+
|
| 12 |
+
use std::sync::atomic::{AtomicU64, Ordering};
|
| 13 |
+
use std::time::{SystemTime, UNIX_EPOCH};
|
| 14 |
+
|
| 15 |
+
/// Global counter for uniqueness within same millisecond
|
| 16 |
+
static COUNTER: AtomicU64 = AtomicU64::new(0);
|
| 17 |
+
|
| 18 |
+
/// Unique identifier for a placed point
|
| 19 |
+
///
|
| 20 |
+
/// 128 bits, timestamp-prefixed for natural time ordering.
|
| 21 |
+
#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
| 22 |
+
pub struct Id([u8; 16]);
|
| 23 |
+
|
| 24 |
+
impl Id {
|
| 25 |
+
/// Generate a new Id for the current moment
|
| 26 |
+
///
|
| 27 |
+
/// Uses current timestamp + counter + random bytes for uniqueness.
|
| 28 |
+
pub fn now() -> Self {
|
| 29 |
+
let timestamp = SystemTime::now()
|
| 30 |
+
.duration_since(UNIX_EPOCH)
|
| 31 |
+
.unwrap()
|
| 32 |
+
.as_millis() as u64;
|
| 33 |
+
|
| 34 |
+
// Atomically increment counter for uniqueness
|
| 35 |
+
let counter = COUNTER.fetch_add(1, Ordering::Relaxed);
|
| 36 |
+
|
| 37 |
+
let mut bytes = [0u8; 16];
|
| 38 |
+
|
| 39 |
+
// First 6 bytes: timestamp (48 bits)
|
| 40 |
+
bytes[0] = (timestamp >> 40) as u8;
|
| 41 |
+
bytes[1] = (timestamp >> 32) as u8;
|
| 42 |
+
bytes[2] = (timestamp >> 24) as u8;
|
| 43 |
+
bytes[3] = (timestamp >> 16) as u8;
|
| 44 |
+
bytes[4] = (timestamp >> 8) as u8;
|
| 45 |
+
bytes[5] = timestamp as u8;
|
| 46 |
+
|
| 47 |
+
// Next 2 bytes: counter (16 bits) - ensures uniqueness within millisecond
|
| 48 |
+
bytes[6] = (counter >> 8) as u8;
|
| 49 |
+
bytes[7] = counter as u8;
|
| 50 |
+
|
| 51 |
+
// Remaining 8 bytes: pseudo-random based on timestamp and counter
|
| 52 |
+
let random_seed = timestamp
|
| 53 |
+
.wrapping_mul(6364136223846793005)
|
| 54 |
+
.wrapping_add(counter);
|
| 55 |
+
bytes[8] = (random_seed >> 56) as u8;
|
| 56 |
+
bytes[9] = (random_seed >> 48) as u8;
|
| 57 |
+
bytes[10] = (random_seed >> 40) as u8;
|
| 58 |
+
bytes[11] = (random_seed >> 32) as u8;
|
| 59 |
+
bytes[12] = (random_seed >> 24) as u8;
|
| 60 |
+
bytes[13] = (random_seed >> 16) as u8;
|
| 61 |
+
bytes[14] = (random_seed >> 8) as u8;
|
| 62 |
+
bytes[15] = random_seed as u8;
|
| 63 |
+
|
| 64 |
+
Self(bytes)
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
/// Create an Id from raw bytes
|
| 68 |
+
pub fn from_bytes(bytes: [u8; 16]) -> Self {
|
| 69 |
+
Self(bytes)
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/// Get the raw bytes
|
| 73 |
+
pub fn as_bytes(&self) -> &[u8; 16] {
|
| 74 |
+
&self.0
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
/// Extract the timestamp component (milliseconds since epoch)
|
| 78 |
+
pub fn timestamp_ms(&self) -> u64 {
|
| 79 |
+
((self.0[0] as u64) << 40)
|
| 80 |
+
| ((self.0[1] as u64) << 32)
|
| 81 |
+
| ((self.0[2] as u64) << 24)
|
| 82 |
+
| ((self.0[3] as u64) << 16)
|
| 83 |
+
| ((self.0[4] as u64) << 8)
|
| 84 |
+
| (self.0[5] as u64)
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/// Create a nil/zero Id (useful for testing)
|
| 88 |
+
pub fn nil() -> Self {
|
| 89 |
+
Self([0u8; 16])
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
/// Check if this is a nil Id
|
| 93 |
+
pub fn is_nil(&self) -> bool {
|
| 94 |
+
self.0 == [0u8; 16]
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
impl std::fmt::Display for Id {
|
| 99 |
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
| 100 |
+
// Display as hex string
|
| 101 |
+
for byte in &self.0 {
|
| 102 |
+
write!(f, "{:02x}", byte)?;
|
| 103 |
+
}
|
| 104 |
+
Ok(())
|
| 105 |
+
}
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
#[cfg(test)]
|
| 109 |
+
mod tests {
|
| 110 |
+
use super::*;
|
| 111 |
+
use std::thread;
|
| 112 |
+
use std::time::Duration;
|
| 113 |
+
|
| 114 |
+
#[test]
|
| 115 |
+
fn test_id_creation() {
|
| 116 |
+
let id = Id::now();
|
| 117 |
+
assert!(!id.is_nil());
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
#[test]
|
| 121 |
+
fn test_id_timestamp() {
|
| 122 |
+
let before = SystemTime::now()
|
| 123 |
+
.duration_since(UNIX_EPOCH)
|
| 124 |
+
.unwrap()
|
| 125 |
+
.as_millis() as u64;
|
| 126 |
+
|
| 127 |
+
let id = Id::now();
|
| 128 |
+
|
| 129 |
+
let after = SystemTime::now()
|
| 130 |
+
.duration_since(UNIX_EPOCH)
|
| 131 |
+
.unwrap()
|
| 132 |
+
.as_millis() as u64;
|
| 133 |
+
|
| 134 |
+
let ts = id.timestamp_ms();
|
| 135 |
+
assert!(ts >= before);
|
| 136 |
+
assert!(ts <= after);
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
#[test]
|
| 140 |
+
fn test_id_ordering() {
|
| 141 |
+
let id1 = Id::now();
|
| 142 |
+
thread::sleep(Duration::from_millis(2));
|
| 143 |
+
let id2 = Id::now();
|
| 144 |
+
|
| 145 |
+
// id2 should be greater (later timestamp)
|
| 146 |
+
assert!(id2 > id1);
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
#[test]
|
| 150 |
+
fn test_id_from_bytes() {
|
| 151 |
+
let bytes = [1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
| 152 |
+
let id = Id::from_bytes(bytes);
|
| 153 |
+
assert_eq!(id.as_bytes(), &bytes);
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
#[test]
|
| 157 |
+
fn test_id_nil() {
|
| 158 |
+
let nil = Id::nil();
|
| 159 |
+
assert!(nil.is_nil());
|
| 160 |
+
assert_eq!(nil.timestamp_ms(), 0);
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
#[test]
|
| 164 |
+
fn test_id_display() {
|
| 165 |
+
let id = Id::from_bytes([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
|
| 166 |
+
let display = format!("{}", id);
|
| 167 |
+
assert_eq!(display, "000102030405060708090a0b0c0d0e0f");
|
| 168 |
+
}
|
| 169 |
+
}
|
src/core/merge.rs
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Merge
|
| 2 |
+
//!
|
| 3 |
+
//! Trait and implementations for composing multiple points into one.
|
| 4 |
+
//!
|
| 5 |
+
//! This is one of the five primitives of ARMS:
|
| 6 |
+
//! `Merge: fn(points) -> point` - Compose together
|
| 7 |
+
//!
|
| 8 |
+
//! Merge is used for hierarchical composition:
|
| 9 |
+
//! - Chunks → Document
|
| 10 |
+
//! - Documents → Session
|
| 11 |
+
//! - Sessions → Domain
|
| 12 |
+
//!
|
| 13 |
+
//! Merge functions are pluggable - use whichever fits your use case.
|
| 14 |
+
|
| 15 |
+
use super::Point;
|
| 16 |
+
|
| 17 |
+
/// Trait for merging multiple points into one
|
| 18 |
+
///
|
| 19 |
+
/// Used for hierarchical composition and aggregation.
|
| 20 |
+
pub trait Merge: Send + Sync {
|
| 21 |
+
/// Merge multiple points into a single point
|
| 22 |
+
///
|
| 23 |
+
/// All points must have the same dimensionality.
|
| 24 |
+
/// The slice must not be empty.
|
| 25 |
+
fn merge(&self, points: &[Point]) -> Point;
|
| 26 |
+
|
| 27 |
+
/// Name of this merge function (for debugging/config)
|
| 28 |
+
fn name(&self) -> &'static str;
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
// ============================================================================
|
| 32 |
+
// IMPLEMENTATIONS
|
| 33 |
+
// ============================================================================
|
| 34 |
+
|
| 35 |
+
/// Mean (average) of all points
|
| 36 |
+
///
|
| 37 |
+
/// The centroid of the input points.
|
| 38 |
+
/// Good default for most hierarchical composition.
|
| 39 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 40 |
+
pub struct Mean;
|
| 41 |
+
|
| 42 |
+
impl Merge for Mean {
|
| 43 |
+
fn merge(&self, points: &[Point]) -> Point {
|
| 44 |
+
assert!(!points.is_empty(), "Cannot merge empty slice");
|
| 45 |
+
|
| 46 |
+
let dims = points[0].dimensionality();
|
| 47 |
+
let n = points.len() as f32;
|
| 48 |
+
|
| 49 |
+
let mut result = vec![0.0; dims];
|
| 50 |
+
for p in points {
|
| 51 |
+
assert_eq!(
|
| 52 |
+
p.dimensionality(),
|
| 53 |
+
dims,
|
| 54 |
+
"All points must have same dimensionality"
|
| 55 |
+
);
|
| 56 |
+
for (r, d) in result.iter_mut().zip(p.dims()) {
|
| 57 |
+
*r += d / n;
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
Point::new(result)
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
fn name(&self) -> &'static str {
|
| 65 |
+
"mean"
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/// Weighted mean of points
|
| 70 |
+
///
|
| 71 |
+
/// Each point contributes proportionally to its weight.
|
| 72 |
+
/// Useful for recency weighting, importance weighting, etc.
|
| 73 |
+
#[derive(Clone, Debug)]
|
| 74 |
+
pub struct WeightedMean {
|
| 75 |
+
weights: Vec<f32>,
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
impl WeightedMean {
|
| 79 |
+
/// Create a new weighted mean with given weights
|
| 80 |
+
///
|
| 81 |
+
/// Weights will be normalized (divided by sum) during merge.
|
| 82 |
+
pub fn new(weights: Vec<f32>) -> Self {
|
| 83 |
+
Self { weights }
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
/// Create with uniform weights (equivalent to Mean)
|
| 87 |
+
pub fn uniform(n: usize) -> Self {
|
| 88 |
+
Self {
|
| 89 |
+
weights: vec![1.0; n],
|
| 90 |
+
}
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
/// Create with recency weighting (more recent = higher weight)
|
| 94 |
+
///
|
| 95 |
+
/// `decay` should be in (0, 1). Smaller = faster decay.
|
| 96 |
+
/// First point is oldest, last is most recent.
|
| 97 |
+
pub fn recency(n: usize, decay: f32) -> Self {
|
| 98 |
+
let weights: Vec<f32> = (0..n).map(|i| decay.powi((n - 1 - i) as i32)).collect();
|
| 99 |
+
Self { weights }
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
impl Merge for WeightedMean {
|
| 104 |
+
fn merge(&self, points: &[Point]) -> Point {
|
| 105 |
+
assert!(!points.is_empty(), "Cannot merge empty slice");
|
| 106 |
+
assert_eq!(
|
| 107 |
+
points.len(),
|
| 108 |
+
self.weights.len(),
|
| 109 |
+
"Number of points must match number of weights"
|
| 110 |
+
);
|
| 111 |
+
|
| 112 |
+
let dims = points[0].dimensionality();
|
| 113 |
+
let total_weight: f32 = self.weights.iter().sum();
|
| 114 |
+
|
| 115 |
+
let mut result = vec![0.0; dims];
|
| 116 |
+
for (p, &w) in points.iter().zip(&self.weights) {
|
| 117 |
+
assert_eq!(
|
| 118 |
+
p.dimensionality(),
|
| 119 |
+
dims,
|
| 120 |
+
"All points must have same dimensionality"
|
| 121 |
+
);
|
| 122 |
+
let normalized_w = w / total_weight;
|
| 123 |
+
for (r, d) in result.iter_mut().zip(p.dims()) {
|
| 124 |
+
*r += d * normalized_w;
|
| 125 |
+
}
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
Point::new(result)
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
fn name(&self) -> &'static str {
|
| 132 |
+
"weighted_mean"
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
/// Max pooling across points
|
| 137 |
+
///
|
| 138 |
+
/// Takes the maximum value of each dimension across all points.
|
| 139 |
+
/// Preserves the strongest activations.
|
| 140 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 141 |
+
pub struct MaxPool;
|
| 142 |
+
|
| 143 |
+
impl Merge for MaxPool {
|
| 144 |
+
fn merge(&self, points: &[Point]) -> Point {
|
| 145 |
+
assert!(!points.is_empty(), "Cannot merge empty slice");
|
| 146 |
+
|
| 147 |
+
let dims = points[0].dimensionality();
|
| 148 |
+
let mut result = points[0].dims().to_vec();
|
| 149 |
+
|
| 150 |
+
for p in &points[1..] {
|
| 151 |
+
assert_eq!(
|
| 152 |
+
p.dimensionality(),
|
| 153 |
+
dims,
|
| 154 |
+
"All points must have same dimensionality"
|
| 155 |
+
);
|
| 156 |
+
for (r, d) in result.iter_mut().zip(p.dims()) {
|
| 157 |
+
*r = r.max(*d);
|
| 158 |
+
}
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
Point::new(result)
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
fn name(&self) -> &'static str {
|
| 165 |
+
"max_pool"
|
| 166 |
+
}
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
/// Min pooling across points
|
| 170 |
+
///
|
| 171 |
+
/// Takes the minimum value of each dimension across all points.
|
| 172 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 173 |
+
pub struct MinPool;
|
| 174 |
+
|
| 175 |
+
impl Merge for MinPool {
|
| 176 |
+
fn merge(&self, points: &[Point]) -> Point {
|
| 177 |
+
assert!(!points.is_empty(), "Cannot merge empty slice");
|
| 178 |
+
|
| 179 |
+
let dims = points[0].dimensionality();
|
| 180 |
+
let mut result = points[0].dims().to_vec();
|
| 181 |
+
|
| 182 |
+
for p in &points[1..] {
|
| 183 |
+
assert_eq!(
|
| 184 |
+
p.dimensionality(),
|
| 185 |
+
dims,
|
| 186 |
+
"All points must have same dimensionality"
|
| 187 |
+
);
|
| 188 |
+
for (r, d) in result.iter_mut().zip(p.dims()) {
|
| 189 |
+
*r = r.min(*d);
|
| 190 |
+
}
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
Point::new(result)
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
fn name(&self) -> &'static str {
|
| 197 |
+
"min_pool"
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
/// Sum of all points (no averaging)
|
| 202 |
+
///
|
| 203 |
+
/// Simple additive composition.
|
| 204 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 205 |
+
pub struct Sum;
|
| 206 |
+
|
| 207 |
+
impl Merge for Sum {
|
| 208 |
+
fn merge(&self, points: &[Point]) -> Point {
|
| 209 |
+
assert!(!points.is_empty(), "Cannot merge empty slice");
|
| 210 |
+
|
| 211 |
+
let dims = points[0].dimensionality();
|
| 212 |
+
let mut result = vec![0.0; dims];
|
| 213 |
+
|
| 214 |
+
for p in points {
|
| 215 |
+
assert_eq!(
|
| 216 |
+
p.dimensionality(),
|
| 217 |
+
dims,
|
| 218 |
+
"All points must have same dimensionality"
|
| 219 |
+
);
|
| 220 |
+
for (r, d) in result.iter_mut().zip(p.dims()) {
|
| 221 |
+
*r += d;
|
| 222 |
+
}
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
Point::new(result)
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
fn name(&self) -> &'static str {
|
| 229 |
+
"sum"
|
| 230 |
+
}
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
#[cfg(test)]
|
| 234 |
+
mod tests {
|
| 235 |
+
use super::*;
|
| 236 |
+
|
| 237 |
+
#[test]
|
| 238 |
+
fn test_mean_single() {
|
| 239 |
+
let points = vec![Point::new(vec![1.0, 2.0, 3.0])];
|
| 240 |
+
let merged = Mean.merge(&points);
|
| 241 |
+
assert_eq!(merged.dims(), &[1.0, 2.0, 3.0]);
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
#[test]
|
| 245 |
+
fn test_mean_multiple() {
|
| 246 |
+
let points = vec![
|
| 247 |
+
Point::new(vec![1.0, 2.0]),
|
| 248 |
+
Point::new(vec![3.0, 4.0]),
|
| 249 |
+
];
|
| 250 |
+
let merged = Mean.merge(&points);
|
| 251 |
+
assert_eq!(merged.dims(), &[2.0, 3.0]);
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
#[test]
|
| 255 |
+
fn test_weighted_mean() {
|
| 256 |
+
let points = vec![
|
| 257 |
+
Point::new(vec![0.0, 0.0]),
|
| 258 |
+
Point::new(vec![10.0, 10.0]),
|
| 259 |
+
];
|
| 260 |
+
// Weight second point 3x more than first
|
| 261 |
+
let merger = WeightedMean::new(vec![1.0, 3.0]);
|
| 262 |
+
let merged = merger.merge(&points);
|
| 263 |
+
// (0*0.25 + 10*0.75, 0*0.25 + 10*0.75) = (7.5, 7.5)
|
| 264 |
+
assert!((merged.dims()[0] - 7.5).abs() < 0.0001);
|
| 265 |
+
assert!((merged.dims()[1] - 7.5).abs() < 0.0001);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
#[test]
|
| 269 |
+
fn test_weighted_mean_recency() {
|
| 270 |
+
let merger = WeightedMean::recency(3, 0.5);
|
| 271 |
+
// decay = 0.5, n = 3
|
| 272 |
+
// weights: [0.5^2, 0.5^1, 0.5^0] = [0.25, 0.5, 1.0]
|
| 273 |
+
assert_eq!(merger.weights.len(), 3);
|
| 274 |
+
assert!((merger.weights[0] - 0.25).abs() < 0.0001);
|
| 275 |
+
assert!((merger.weights[1] - 0.5).abs() < 0.0001);
|
| 276 |
+
assert!((merger.weights[2] - 1.0).abs() < 0.0001);
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
#[test]
|
| 280 |
+
fn test_max_pool() {
|
| 281 |
+
let points = vec![
|
| 282 |
+
Point::new(vec![1.0, 5.0, 2.0]),
|
| 283 |
+
Point::new(vec![3.0, 2.0, 4.0]),
|
| 284 |
+
Point::new(vec![2.0, 3.0, 1.0]),
|
| 285 |
+
];
|
| 286 |
+
let merged = MaxPool.merge(&points);
|
| 287 |
+
assert_eq!(merged.dims(), &[3.0, 5.0, 4.0]);
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
#[test]
|
| 291 |
+
fn test_min_pool() {
|
| 292 |
+
let points = vec![
|
| 293 |
+
Point::new(vec![1.0, 5.0, 2.0]),
|
| 294 |
+
Point::new(vec![3.0, 2.0, 4.0]),
|
| 295 |
+
Point::new(vec![2.0, 3.0, 1.0]),
|
| 296 |
+
];
|
| 297 |
+
let merged = MinPool.merge(&points);
|
| 298 |
+
assert_eq!(merged.dims(), &[1.0, 2.0, 1.0]);
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
#[test]
|
| 302 |
+
fn test_sum() {
|
| 303 |
+
let points = vec![
|
| 304 |
+
Point::new(vec![1.0, 2.0]),
|
| 305 |
+
Point::new(vec![3.0, 4.0]),
|
| 306 |
+
];
|
| 307 |
+
let merged = Sum.merge(&points);
|
| 308 |
+
assert_eq!(merged.dims(), &[4.0, 6.0]);
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
#[test]
|
| 312 |
+
fn test_merge_names() {
|
| 313 |
+
assert_eq!(Mean.name(), "mean");
|
| 314 |
+
assert_eq!(MaxPool.name(), "max_pool");
|
| 315 |
+
assert_eq!(MinPool.name(), "min_pool");
|
| 316 |
+
assert_eq!(Sum.name(), "sum");
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
#[test]
|
| 320 |
+
#[should_panic(expected = "Cannot merge empty")]
|
| 321 |
+
fn test_merge_empty_panics() {
|
| 322 |
+
let points: Vec<Point> = vec![];
|
| 323 |
+
Mean.merge(&points);
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
#[test]
|
| 327 |
+
#[should_panic(expected = "same dimensionality")]
|
| 328 |
+
fn test_merge_dimension_mismatch_panics() {
|
| 329 |
+
let points = vec![
|
| 330 |
+
Point::new(vec![1.0, 2.0]),
|
| 331 |
+
Point::new(vec![1.0, 2.0, 3.0]),
|
| 332 |
+
];
|
| 333 |
+
Mean.merge(&points);
|
| 334 |
+
}
|
| 335 |
+
}
|
src/core/mod.rs
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Core Domain
|
| 2 |
+
//!
|
| 3 |
+
//! Pure math, no I/O. The foundation of ARMS.
|
| 4 |
+
//!
|
| 5 |
+
//! This module contains the fundamental types and operations:
|
| 6 |
+
//! - `Point` - A position in dimensional space
|
| 7 |
+
//! - `Id` - Unique identifier for placed points
|
| 8 |
+
//! - `Blob` - Raw payload data
|
| 9 |
+
//! - `Proximity` - Trait for measuring relatedness
|
| 10 |
+
//! - `Merge` - Trait for composing points
|
| 11 |
+
//!
|
| 12 |
+
//! ## Design Principles
|
| 13 |
+
//!
|
| 14 |
+
//! - All functions are pure (deterministic, no side effects)
|
| 15 |
+
//! - No I/O operations
|
| 16 |
+
//! - No external dependencies beyond std
|
| 17 |
+
//! - Fully testable in isolation
|
| 18 |
+
|
| 19 |
+
mod point;
|
| 20 |
+
mod id;
|
| 21 |
+
mod blob;
|
| 22 |
+
pub mod proximity;
|
| 23 |
+
pub mod merge;
|
| 24 |
+
pub mod config;
|
| 25 |
+
|
| 26 |
+
// Re-exports
|
| 27 |
+
pub use point::Point;
|
| 28 |
+
pub use id::Id;
|
| 29 |
+
pub use blob::Blob;
|
| 30 |
+
|
| 31 |
+
/// A point that has been placed in the space
|
| 32 |
+
#[derive(Clone)]
|
| 33 |
+
pub struct PlacedPoint {
|
| 34 |
+
/// Unique identifier
|
| 35 |
+
pub id: Id,
|
| 36 |
+
/// Position in dimensional space
|
| 37 |
+
pub point: Point,
|
| 38 |
+
/// Attached payload
|
| 39 |
+
pub blob: Blob,
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
impl PlacedPoint {
|
| 43 |
+
/// Create a new placed point
|
| 44 |
+
pub fn new(id: Id, point: Point, blob: Blob) -> Self {
|
| 45 |
+
Self { id, point, blob }
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
#[cfg(test)]
|
| 50 |
+
mod tests {
|
| 51 |
+
use super::*;
|
| 52 |
+
|
| 53 |
+
#[test]
|
| 54 |
+
fn test_placed_point_creation() {
|
| 55 |
+
let id = Id::now();
|
| 56 |
+
let point = Point::new(vec![1.0, 2.0, 3.0]);
|
| 57 |
+
let blob = Blob::new(vec![1, 2, 3]);
|
| 58 |
+
|
| 59 |
+
let placed = PlacedPoint::new(id, point.clone(), blob);
|
| 60 |
+
|
| 61 |
+
assert_eq!(placed.point.dimensionality(), 3);
|
| 62 |
+
assert_eq!(placed.blob.size(), 3);
|
| 63 |
+
}
|
| 64 |
+
}
|
src/core/point.rs
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Point
|
| 2 |
+
//!
|
| 3 |
+
//! A position in dimensional space. The fundamental primitive.
|
| 4 |
+
//!
|
| 5 |
+
//! Dimensionality is NOT fixed - configure it for your model.
|
| 6 |
+
//! 768-dim, 1024-dim, 4096-dim, or any size you need.
|
| 7 |
+
//!
|
| 8 |
+
//! The point IS the thought's position.
|
| 9 |
+
//! The position IS its relationship to all other thoughts.
|
| 10 |
+
|
| 11 |
+
/// A point in dimensional space
|
| 12 |
+
#[derive(Clone, Debug, PartialEq)]
|
| 13 |
+
pub struct Point {
|
| 14 |
+
dims: Vec<f32>,
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
impl Point {
|
| 18 |
+
/// Create a new point from a vector of dimensions
|
| 19 |
+
///
|
| 20 |
+
/// # Example
|
| 21 |
+
/// ```
|
| 22 |
+
/// use arms::Point;
|
| 23 |
+
/// let p = Point::new(vec![1.0, 2.0, 3.0]);
|
| 24 |
+
/// assert_eq!(p.dimensionality(), 3);
|
| 25 |
+
/// ```
|
| 26 |
+
pub fn new(dims: Vec<f32>) -> Self {
|
| 27 |
+
Self { dims }
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
/// Create an origin point (all zeros) of given dimensionality
|
| 31 |
+
///
|
| 32 |
+
/// # Example
|
| 33 |
+
/// ```
|
| 34 |
+
/// use arms::Point;
|
| 35 |
+
/// let origin = Point::origin(768);
|
| 36 |
+
/// assert_eq!(origin.dimensionality(), 768);
|
| 37 |
+
/// assert!(origin.dims().iter().all(|&x| x == 0.0));
|
| 38 |
+
/// ```
|
| 39 |
+
pub fn origin(dims: usize) -> Self {
|
| 40 |
+
Self {
|
| 41 |
+
dims: vec![0.0; dims],
|
| 42 |
+
}
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
/// Get the dimensionality of this point
|
| 46 |
+
pub fn dimensionality(&self) -> usize {
|
| 47 |
+
self.dims.len()
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
/// Access the dimensions as a slice
|
| 51 |
+
pub fn dims(&self) -> &[f32] {
|
| 52 |
+
&self.dims
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
/// Mutable access to dimensions
|
| 56 |
+
pub fn dims_mut(&mut self) -> &mut [f32] {
|
| 57 |
+
&mut self.dims
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
/// Calculate the magnitude (L2 norm) of this point
|
| 61 |
+
///
|
| 62 |
+
/// # Example
|
| 63 |
+
/// ```
|
| 64 |
+
/// use arms::Point;
|
| 65 |
+
/// let p = Point::new(vec![3.0, 4.0]);
|
| 66 |
+
/// assert!((p.magnitude() - 5.0).abs() < 0.0001);
|
| 67 |
+
/// ```
|
| 68 |
+
pub fn magnitude(&self) -> f32 {
|
| 69 |
+
self.dims.iter().map(|x| x * x).sum::<f32>().sqrt()
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/// Check if this point is normalized (magnitude ≈ 1.0)
|
| 73 |
+
pub fn is_normalized(&self) -> bool {
|
| 74 |
+
let mag = self.magnitude();
|
| 75 |
+
(mag - 1.0).abs() < 0.001
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
/// Return a normalized copy of this point
|
| 79 |
+
///
|
| 80 |
+
/// If magnitude is zero, returns a clone of self.
|
| 81 |
+
///
|
| 82 |
+
/// # Example
|
| 83 |
+
/// ```
|
| 84 |
+
/// use arms::Point;
|
| 85 |
+
/// let p = Point::new(vec![3.0, 4.0]);
|
| 86 |
+
/// let normalized = p.normalize();
|
| 87 |
+
/// assert!(normalized.is_normalized());
|
| 88 |
+
/// ```
|
| 89 |
+
pub fn normalize(&self) -> Self {
|
| 90 |
+
let mag = self.magnitude();
|
| 91 |
+
if mag == 0.0 {
|
| 92 |
+
return self.clone();
|
| 93 |
+
}
|
| 94 |
+
Self {
|
| 95 |
+
dims: self.dims.iter().map(|x| x / mag).collect(),
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
/// Add another point to this one (element-wise)
|
| 100 |
+
pub fn add(&self, other: &Point) -> Self {
|
| 101 |
+
assert_eq!(
|
| 102 |
+
self.dimensionality(),
|
| 103 |
+
other.dimensionality(),
|
| 104 |
+
"Points must have same dimensionality"
|
| 105 |
+
);
|
| 106 |
+
Self {
|
| 107 |
+
dims: self
|
| 108 |
+
.dims
|
| 109 |
+
.iter()
|
| 110 |
+
.zip(other.dims.iter())
|
| 111 |
+
.map(|(a, b)| a + b)
|
| 112 |
+
.collect(),
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
/// Scale this point by a scalar
|
| 117 |
+
pub fn scale(&self, scalar: f32) -> Self {
|
| 118 |
+
Self {
|
| 119 |
+
dims: self.dims.iter().map(|x| x * scalar).collect(),
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
#[cfg(test)]
|
| 125 |
+
mod tests {
|
| 126 |
+
use super::*;
|
| 127 |
+
|
| 128 |
+
#[test]
|
| 129 |
+
fn test_new_point() {
|
| 130 |
+
let p = Point::new(vec![1.0, 2.0, 3.0]);
|
| 131 |
+
assert_eq!(p.dimensionality(), 3);
|
| 132 |
+
assert_eq!(p.dims(), &[1.0, 2.0, 3.0]);
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
#[test]
|
| 136 |
+
fn test_origin() {
|
| 137 |
+
let origin = Point::origin(768);
|
| 138 |
+
assert_eq!(origin.dimensionality(), 768);
|
| 139 |
+
assert!(origin.dims().iter().all(|&x| x == 0.0));
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
#[test]
|
| 143 |
+
fn test_magnitude() {
|
| 144 |
+
let p = Point::new(vec![3.0, 4.0]);
|
| 145 |
+
assert!((p.magnitude() - 5.0).abs() < 0.0001);
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
#[test]
|
| 149 |
+
fn test_normalize() {
|
| 150 |
+
let p = Point::new(vec![3.0, 4.0]);
|
| 151 |
+
let normalized = p.normalize();
|
| 152 |
+
assert!(normalized.is_normalized());
|
| 153 |
+
assert!((normalized.dims()[0] - 0.6).abs() < 0.0001);
|
| 154 |
+
assert!((normalized.dims()[1] - 0.8).abs() < 0.0001);
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
#[test]
|
| 158 |
+
fn test_normalize_zero() {
|
| 159 |
+
let p = Point::origin(3);
|
| 160 |
+
let normalized = p.normalize();
|
| 161 |
+
assert_eq!(normalized.dims(), &[0.0, 0.0, 0.0]);
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
#[test]
|
| 165 |
+
fn test_add() {
|
| 166 |
+
let a = Point::new(vec![1.0, 2.0]);
|
| 167 |
+
let b = Point::new(vec![3.0, 4.0]);
|
| 168 |
+
let c = a.add(&b);
|
| 169 |
+
assert_eq!(c.dims(), &[4.0, 6.0]);
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
#[test]
|
| 173 |
+
fn test_scale() {
|
| 174 |
+
let p = Point::new(vec![1.0, 2.0]);
|
| 175 |
+
let scaled = p.scale(2.0);
|
| 176 |
+
assert_eq!(scaled.dims(), &[2.0, 4.0]);
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
#[test]
|
| 180 |
+
#[should_panic(expected = "same dimensionality")]
|
| 181 |
+
fn test_add_different_dims_panics() {
|
| 182 |
+
let a = Point::new(vec![1.0, 2.0]);
|
| 183 |
+
let b = Point::new(vec![1.0, 2.0, 3.0]);
|
| 184 |
+
let _ = a.add(&b);
|
| 185 |
+
}
|
| 186 |
+
}
|
src/core/proximity.rs
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Proximity
|
| 2 |
+
//!
|
| 3 |
+
//! Trait and implementations for measuring how related two points are.
|
| 4 |
+
//!
|
| 5 |
+
//! This is one of the five primitives of ARMS:
|
| 6 |
+
//! `Proximity: fn(a, b) -> f32` - How related?
|
| 7 |
+
//!
|
| 8 |
+
//! Proximity functions are pluggable - use whichever fits your use case.
|
| 9 |
+
|
| 10 |
+
use super::Point;
|
| 11 |
+
|
| 12 |
+
/// Trait for measuring proximity between points
|
| 13 |
+
///
|
| 14 |
+
/// Higher values typically mean more similar/related.
|
| 15 |
+
/// The exact semantics depend on the implementation.
|
| 16 |
+
pub trait Proximity: Send + Sync {
|
| 17 |
+
/// Compute proximity between two points
|
| 18 |
+
///
|
| 19 |
+
/// Both points must have the same dimensionality.
|
| 20 |
+
fn proximity(&self, a: &Point, b: &Point) -> f32;
|
| 21 |
+
|
| 22 |
+
/// Name of this proximity function (for debugging/config)
|
| 23 |
+
fn name(&self) -> &'static str;
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
// ============================================================================
|
| 27 |
+
// IMPLEMENTATIONS
|
| 28 |
+
// ============================================================================
|
| 29 |
+
|
| 30 |
+
/// Cosine similarity
|
| 31 |
+
///
|
| 32 |
+
/// Measures the cosine of the angle between two vectors.
|
| 33 |
+
/// Returns a value in [-1, 1] where 1 means identical direction.
|
| 34 |
+
///
|
| 35 |
+
/// Best for: Normalized vectors, semantic similarity.
|
| 36 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 37 |
+
pub struct Cosine;
|
| 38 |
+
|
| 39 |
+
impl Proximity for Cosine {
|
| 40 |
+
fn proximity(&self, a: &Point, b: &Point) -> f32 {
|
| 41 |
+
assert_eq!(
|
| 42 |
+
a.dimensionality(),
|
| 43 |
+
b.dimensionality(),
|
| 44 |
+
"Points must have same dimensionality"
|
| 45 |
+
);
|
| 46 |
+
|
| 47 |
+
let dot: f32 = a
|
| 48 |
+
.dims()
|
| 49 |
+
.iter()
|
| 50 |
+
.zip(b.dims().iter())
|
| 51 |
+
.map(|(x, y)| x * y)
|
| 52 |
+
.sum();
|
| 53 |
+
|
| 54 |
+
let mag_a = a.magnitude();
|
| 55 |
+
let mag_b = b.magnitude();
|
| 56 |
+
|
| 57 |
+
if mag_a == 0.0 || mag_b == 0.0 {
|
| 58 |
+
return 0.0;
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
dot / (mag_a * mag_b)
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
fn name(&self) -> &'static str {
|
| 65 |
+
"cosine"
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
/// Euclidean distance
|
| 70 |
+
///
|
| 71 |
+
/// The straight-line distance between two points.
|
| 72 |
+
/// Returns a value in [0, ∞) where 0 means identical.
|
| 73 |
+
///
|
| 74 |
+
/// Note: This returns DISTANCE, not similarity.
|
| 75 |
+
/// Lower values = more similar.
|
| 76 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 77 |
+
pub struct Euclidean;
|
| 78 |
+
|
| 79 |
+
impl Proximity for Euclidean {
|
| 80 |
+
fn proximity(&self, a: &Point, b: &Point) -> f32 {
|
| 81 |
+
assert_eq!(
|
| 82 |
+
a.dimensionality(),
|
| 83 |
+
b.dimensionality(),
|
| 84 |
+
"Points must have same dimensionality"
|
| 85 |
+
);
|
| 86 |
+
|
| 87 |
+
let dist_sq: f32 = a
|
| 88 |
+
.dims()
|
| 89 |
+
.iter()
|
| 90 |
+
.zip(b.dims().iter())
|
| 91 |
+
.map(|(x, y)| (x - y).powi(2))
|
| 92 |
+
.sum();
|
| 93 |
+
|
| 94 |
+
dist_sq.sqrt()
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
fn name(&self) -> &'static str {
|
| 98 |
+
"euclidean"
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
/// Squared Euclidean distance
|
| 103 |
+
///
|
| 104 |
+
/// Same ordering as Euclidean but faster (no sqrt).
|
| 105 |
+
/// Use when you only need to compare distances, not absolute values.
|
| 106 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 107 |
+
pub struct EuclideanSquared;
|
| 108 |
+
|
| 109 |
+
impl Proximity for EuclideanSquared {
|
| 110 |
+
fn proximity(&self, a: &Point, b: &Point) -> f32 {
|
| 111 |
+
assert_eq!(
|
| 112 |
+
a.dimensionality(),
|
| 113 |
+
b.dimensionality(),
|
| 114 |
+
"Points must have same dimensionality"
|
| 115 |
+
);
|
| 116 |
+
|
| 117 |
+
a.dims()
|
| 118 |
+
.iter()
|
| 119 |
+
.zip(b.dims().iter())
|
| 120 |
+
.map(|(x, y)| (x - y).powi(2))
|
| 121 |
+
.sum()
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
fn name(&self) -> &'static str {
|
| 125 |
+
"euclidean_squared"
|
| 126 |
+
}
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
/// Dot product
|
| 130 |
+
///
|
| 131 |
+
/// The raw dot product without normalization.
|
| 132 |
+
/// Returns a value that depends on magnitudes.
|
| 133 |
+
///
|
| 134 |
+
/// Best for: When magnitude matters, not just direction.
|
| 135 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 136 |
+
pub struct DotProduct;
|
| 137 |
+
|
| 138 |
+
impl Proximity for DotProduct {
|
| 139 |
+
fn proximity(&self, a: &Point, b: &Point) -> f32 {
|
| 140 |
+
assert_eq!(
|
| 141 |
+
a.dimensionality(),
|
| 142 |
+
b.dimensionality(),
|
| 143 |
+
"Points must have same dimensionality"
|
| 144 |
+
);
|
| 145 |
+
|
| 146 |
+
a.dims()
|
| 147 |
+
.iter()
|
| 148 |
+
.zip(b.dims().iter())
|
| 149 |
+
.map(|(x, y)| x * y)
|
| 150 |
+
.sum()
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
fn name(&self) -> &'static str {
|
| 154 |
+
"dot_product"
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
/// Manhattan (L1) distance
|
| 159 |
+
///
|
| 160 |
+
/// Sum of absolute differences along each dimension.
|
| 161 |
+
/// Returns a value in [0, ∞) where 0 means identical.
|
| 162 |
+
#[derive(Clone, Copy, Debug, Default)]
|
| 163 |
+
pub struct Manhattan;
|
| 164 |
+
|
| 165 |
+
impl Proximity for Manhattan {
|
| 166 |
+
fn proximity(&self, a: &Point, b: &Point) -> f32 {
|
| 167 |
+
assert_eq!(
|
| 168 |
+
a.dimensionality(),
|
| 169 |
+
b.dimensionality(),
|
| 170 |
+
"Points must have same dimensionality"
|
| 171 |
+
);
|
| 172 |
+
|
| 173 |
+
a.dims()
|
| 174 |
+
.iter()
|
| 175 |
+
.zip(b.dims().iter())
|
| 176 |
+
.map(|(x, y)| (x - y).abs())
|
| 177 |
+
.sum()
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
fn name(&self) -> &'static str {
|
| 181 |
+
"manhattan"
|
| 182 |
+
}
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
#[cfg(test)]
|
| 186 |
+
mod tests {
|
| 187 |
+
use super::*;
|
| 188 |
+
|
| 189 |
+
#[test]
|
| 190 |
+
fn test_cosine_identical() {
|
| 191 |
+
let a = Point::new(vec![1.0, 0.0, 0.0]);
|
| 192 |
+
let b = Point::new(vec![1.0, 0.0, 0.0]);
|
| 193 |
+
let cos = Cosine.proximity(&a, &b);
|
| 194 |
+
assert!((cos - 1.0).abs() < 0.0001);
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
#[test]
|
| 198 |
+
fn test_cosine_opposite() {
|
| 199 |
+
let a = Point::new(vec![1.0, 0.0, 0.0]);
|
| 200 |
+
let b = Point::new(vec![-1.0, 0.0, 0.0]);
|
| 201 |
+
let cos = Cosine.proximity(&a, &b);
|
| 202 |
+
assert!((cos - (-1.0)).abs() < 0.0001);
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
#[test]
|
| 206 |
+
fn test_cosine_orthogonal() {
|
| 207 |
+
let a = Point::new(vec![1.0, 0.0, 0.0]);
|
| 208 |
+
let b = Point::new(vec![0.0, 1.0, 0.0]);
|
| 209 |
+
let cos = Cosine.proximity(&a, &b);
|
| 210 |
+
assert!(cos.abs() < 0.0001);
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
#[test]
|
| 214 |
+
fn test_euclidean() {
|
| 215 |
+
let a = Point::new(vec![0.0, 0.0]);
|
| 216 |
+
let b = Point::new(vec![3.0, 4.0]);
|
| 217 |
+
let dist = Euclidean.proximity(&a, &b);
|
| 218 |
+
assert!((dist - 5.0).abs() < 0.0001);
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
#[test]
|
| 222 |
+
fn test_euclidean_squared() {
|
| 223 |
+
let a = Point::new(vec![0.0, 0.0]);
|
| 224 |
+
let b = Point::new(vec![3.0, 4.0]);
|
| 225 |
+
let dist_sq = EuclideanSquared.proximity(&a, &b);
|
| 226 |
+
assert!((dist_sq - 25.0).abs() < 0.0001);
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
#[test]
|
| 230 |
+
fn test_dot_product() {
|
| 231 |
+
let a = Point::new(vec![1.0, 2.0, 3.0]);
|
| 232 |
+
let b = Point::new(vec![4.0, 5.0, 6.0]);
|
| 233 |
+
let dot = DotProduct.proximity(&a, &b);
|
| 234 |
+
// 1*4 + 2*5 + 3*6 = 4 + 10 + 18 = 32
|
| 235 |
+
assert!((dot - 32.0).abs() < 0.0001);
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
#[test]
|
| 239 |
+
fn test_manhattan() {
|
| 240 |
+
let a = Point::new(vec![0.0, 0.0]);
|
| 241 |
+
let b = Point::new(vec![3.0, 4.0]);
|
| 242 |
+
let dist = Manhattan.proximity(&a, &b);
|
| 243 |
+
assert!((dist - 7.0).abs() < 0.0001);
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
#[test]
|
| 247 |
+
fn test_proximity_names() {
|
| 248 |
+
assert_eq!(Cosine.name(), "cosine");
|
| 249 |
+
assert_eq!(Euclidean.name(), "euclidean");
|
| 250 |
+
assert_eq!(DotProduct.name(), "dot_product");
|
| 251 |
+
assert_eq!(Manhattan.name(), "manhattan");
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
#[test]
|
| 255 |
+
#[should_panic(expected = "same dimensionality")]
|
| 256 |
+
fn test_dimension_mismatch_panics() {
|
| 257 |
+
let a = Point::new(vec![1.0, 2.0]);
|
| 258 |
+
let b = Point::new(vec![1.0, 2.0, 3.0]);
|
| 259 |
+
Cosine.proximity(&a, &b);
|
| 260 |
+
}
|
| 261 |
+
}
|
src/engine/arms.rs
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Arms Engine
|
| 2 |
+
//!
|
| 3 |
+
//! The main ARMS orchestrator.
|
| 4 |
+
//!
|
| 5 |
+
//! This struct wires together:
|
| 6 |
+
//! - Storage (Place port)
|
| 7 |
+
//! - Index (Near port)
|
| 8 |
+
//! - Configuration
|
| 9 |
+
//!
|
| 10 |
+
//! And exposes a unified API for storing and retrieving points.
|
| 11 |
+
|
| 12 |
+
use crate::core::{Blob, Id, PlacedPoint, Point};
|
| 13 |
+
use crate::core::config::ArmsConfig;
|
| 14 |
+
use crate::ports::{Near, NearResult, Place, PlaceResult, SearchResult};
|
| 15 |
+
use crate::adapters::storage::MemoryStorage;
|
| 16 |
+
use crate::adapters::index::FlatIndex;
|
| 17 |
+
|
| 18 |
+
/// The main ARMS engine
|
| 19 |
+
///
|
| 20 |
+
/// Orchestrates storage and indexing with a unified API.
|
| 21 |
+
pub struct Arms {
|
| 22 |
+
/// Configuration
|
| 23 |
+
config: ArmsConfig,
|
| 24 |
+
|
| 25 |
+
/// Storage backend (Place port)
|
| 26 |
+
storage: Box<dyn Place>,
|
| 27 |
+
|
| 28 |
+
/// Index backend (Near port)
|
| 29 |
+
index: Box<dyn Near>,
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
impl Arms {
|
| 33 |
+
/// Create a new ARMS instance with default adapters
|
| 34 |
+
///
|
| 35 |
+
/// Uses MemoryStorage and FlatIndex.
|
| 36 |
+
/// For production, use `Arms::with_adapters` with appropriate backends.
|
| 37 |
+
pub fn new(config: ArmsConfig) -> Self {
|
| 38 |
+
let storage = Box::new(MemoryStorage::new(config.dimensionality));
|
| 39 |
+
let index = Box::new(FlatIndex::new(
|
| 40 |
+
config.dimensionality,
|
| 41 |
+
config.proximity.clone(),
|
| 42 |
+
true, // Assuming cosine-like similarity by default
|
| 43 |
+
));
|
| 44 |
+
|
| 45 |
+
Self {
|
| 46 |
+
config,
|
| 47 |
+
storage,
|
| 48 |
+
index,
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
/// Create with custom adapters
|
| 53 |
+
pub fn with_adapters(
|
| 54 |
+
config: ArmsConfig,
|
| 55 |
+
storage: Box<dyn Place>,
|
| 56 |
+
index: Box<dyn Near>,
|
| 57 |
+
) -> Self {
|
| 58 |
+
Self {
|
| 59 |
+
config,
|
| 60 |
+
storage,
|
| 61 |
+
index,
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
/// Get the configuration
|
| 66 |
+
pub fn config(&self) -> &ArmsConfig {
|
| 67 |
+
&self.config
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
/// Get the dimensionality of this space
|
| 71 |
+
pub fn dimensionality(&self) -> usize {
|
| 72 |
+
self.config.dimensionality
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
// ========================================================================
|
| 76 |
+
// PLACE OPERATIONS
|
| 77 |
+
// ========================================================================
|
| 78 |
+
|
| 79 |
+
/// Place a point in the space
|
| 80 |
+
///
|
| 81 |
+
/// The point will be normalized if configured to do so.
|
| 82 |
+
/// Returns the assigned ID.
|
| 83 |
+
pub fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id> {
|
| 84 |
+
// Normalize if configured
|
| 85 |
+
let point = if self.config.normalize_on_insert {
|
| 86 |
+
point.normalize()
|
| 87 |
+
} else {
|
| 88 |
+
point
|
| 89 |
+
};
|
| 90 |
+
|
| 91 |
+
// Store in storage
|
| 92 |
+
let id = self.storage.place(point.clone(), blob)?;
|
| 93 |
+
|
| 94 |
+
// Add to index
|
| 95 |
+
if let Err(e) = self.index.add(id, &point) {
|
| 96 |
+
// Rollback storage if index fails
|
| 97 |
+
self.storage.remove(id);
|
| 98 |
+
return Err(crate::ports::PlaceError::StorageError(format!(
|
| 99 |
+
"Index error: {:?}",
|
| 100 |
+
e
|
| 101 |
+
)));
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
Ok(id)
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/// Place multiple points at once
|
| 108 |
+
pub fn place_batch(&mut self, items: Vec<(Point, Blob)>) -> Vec<PlaceResult<Id>> {
|
| 109 |
+
items
|
| 110 |
+
.into_iter()
|
| 111 |
+
.map(|(point, blob)| self.place(point, blob))
|
| 112 |
+
.collect()
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
/// Remove a point from the space
|
| 116 |
+
pub fn remove(&mut self, id: Id) -> Option<PlacedPoint> {
|
| 117 |
+
// Remove from index first
|
| 118 |
+
let _ = self.index.remove(id);
|
| 119 |
+
|
| 120 |
+
// Then from storage
|
| 121 |
+
self.storage.remove(id)
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
/// Get a point by ID
|
| 125 |
+
pub fn get(&self, id: Id) -> Option<&PlacedPoint> {
|
| 126 |
+
self.storage.get(id)
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
/// Check if a point exists
|
| 130 |
+
pub fn contains(&self, id: Id) -> bool {
|
| 131 |
+
self.storage.contains(id)
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
/// Get the number of stored points
|
| 135 |
+
pub fn len(&self) -> usize {
|
| 136 |
+
self.storage.len()
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
/// Check if the space is empty
|
| 140 |
+
pub fn is_empty(&self) -> bool {
|
| 141 |
+
self.storage.is_empty()
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
/// Clear all points
|
| 145 |
+
pub fn clear(&mut self) {
|
| 146 |
+
self.storage.clear();
|
| 147 |
+
let _ = self.index.rebuild(); // Reset index
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
// ========================================================================
|
| 151 |
+
// NEAR OPERATIONS
|
| 152 |
+
// ========================================================================
|
| 153 |
+
|
| 154 |
+
/// Find k nearest points to query
|
| 155 |
+
pub fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>> {
|
| 156 |
+
// Normalize query if configured
|
| 157 |
+
let query = if self.config.normalize_on_insert {
|
| 158 |
+
query.normalize()
|
| 159 |
+
} else {
|
| 160 |
+
query.clone()
|
| 161 |
+
};
|
| 162 |
+
|
| 163 |
+
self.index.near(&query, k)
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
/// Find all points within threshold
|
| 167 |
+
pub fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>> {
|
| 168 |
+
let query = if self.config.normalize_on_insert {
|
| 169 |
+
query.normalize()
|
| 170 |
+
} else {
|
| 171 |
+
query.clone()
|
| 172 |
+
};
|
| 173 |
+
|
| 174 |
+
self.index.within(&query, threshold)
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
/// Find and retrieve k nearest points (with full data)
|
| 178 |
+
pub fn near_with_data(&self, query: &Point, k: usize) -> NearResult<Vec<(&PlacedPoint, f32)>> {
|
| 179 |
+
let results = self.near(query, k)?;
|
| 180 |
+
|
| 181 |
+
Ok(results
|
| 182 |
+
.into_iter()
|
| 183 |
+
.filter_map(|r| self.storage.get(r.id).map(|p| (p, r.score)))
|
| 184 |
+
.collect())
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
// ========================================================================
|
| 188 |
+
// MERGE OPERATIONS
|
| 189 |
+
// ========================================================================
|
| 190 |
+
|
| 191 |
+
/// Merge multiple points into one using the configured merge function
|
| 192 |
+
pub fn merge(&self, points: &[Point]) -> Point {
|
| 193 |
+
self.config.merge.merge(points)
|
| 194 |
+
}
|
| 195 |
+
|
| 196 |
+
/// Compute proximity between two points
|
| 197 |
+
pub fn proximity(&self, a: &Point, b: &Point) -> f32 {
|
| 198 |
+
self.config.proximity.proximity(a, b)
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
// ========================================================================
|
| 202 |
+
// STATS
|
| 203 |
+
// ========================================================================
|
| 204 |
+
|
| 205 |
+
/// Get storage size in bytes
|
| 206 |
+
pub fn size_bytes(&self) -> usize {
|
| 207 |
+
self.storage.size_bytes()
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
/// Get index stats
|
| 211 |
+
pub fn index_len(&self) -> usize {
|
| 212 |
+
self.index.len()
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
/// Check if index is ready
|
| 216 |
+
pub fn is_ready(&self) -> bool {
|
| 217 |
+
self.index.is_ready()
|
| 218 |
+
}
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
#[cfg(test)]
|
| 222 |
+
mod tests {
|
| 223 |
+
use super::*;
|
| 224 |
+
|
| 225 |
+
fn create_test_arms() -> Arms {
|
| 226 |
+
Arms::new(ArmsConfig::new(3))
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
#[test]
|
| 230 |
+
fn test_arms_place_and_get() {
|
| 231 |
+
let mut arms = create_test_arms();
|
| 232 |
+
|
| 233 |
+
let point = Point::new(vec![1.0, 0.0, 0.0]);
|
| 234 |
+
let blob = Blob::from_str("test data");
|
| 235 |
+
|
| 236 |
+
let id = arms.place(point, blob).unwrap();
|
| 237 |
+
|
| 238 |
+
let retrieved = arms.get(id).unwrap();
|
| 239 |
+
assert_eq!(retrieved.blob.as_str(), Some("test data"));
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
#[test]
|
| 243 |
+
fn test_arms_near() {
|
| 244 |
+
let mut arms = create_test_arms();
|
| 245 |
+
|
| 246 |
+
// Add some points
|
| 247 |
+
arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::from_str("x")).unwrap();
|
| 248 |
+
arms.place(Point::new(vec![0.0, 1.0, 0.0]), Blob::from_str("y")).unwrap();
|
| 249 |
+
arms.place(Point::new(vec![0.0, 0.0, 1.0]), Blob::from_str("z")).unwrap();
|
| 250 |
+
|
| 251 |
+
// Query
|
| 252 |
+
let query = Point::new(vec![1.0, 0.0, 0.0]);
|
| 253 |
+
let results = arms.near(&query, 2).unwrap();
|
| 254 |
+
|
| 255 |
+
assert_eq!(results.len(), 2);
|
| 256 |
+
// First result should have highest similarity
|
| 257 |
+
assert!(results[0].score > results[1].score);
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
#[test]
|
| 261 |
+
fn test_arms_near_with_data() {
|
| 262 |
+
let mut arms = create_test_arms();
|
| 263 |
+
|
| 264 |
+
arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::from_str("x")).unwrap();
|
| 265 |
+
arms.place(Point::new(vec![0.0, 1.0, 0.0]), Blob::from_str("y")).unwrap();
|
| 266 |
+
|
| 267 |
+
let query = Point::new(vec![1.0, 0.0, 0.0]);
|
| 268 |
+
let results = arms.near_with_data(&query, 1).unwrap();
|
| 269 |
+
|
| 270 |
+
assert_eq!(results.len(), 1);
|
| 271 |
+
assert_eq!(results[0].0.blob.as_str(), Some("x"));
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
#[test]
|
| 275 |
+
fn test_arms_remove() {
|
| 276 |
+
let mut arms = create_test_arms();
|
| 277 |
+
|
| 278 |
+
let id = arms.place(Point::new(vec![1.0, 0.0, 0.0]), Blob::empty()).unwrap();
|
| 279 |
+
|
| 280 |
+
assert!(arms.contains(id));
|
| 281 |
+
assert_eq!(arms.len(), 1);
|
| 282 |
+
|
| 283 |
+
arms.remove(id);
|
| 284 |
+
|
| 285 |
+
assert!(!arms.contains(id));
|
| 286 |
+
assert_eq!(arms.len(), 0);
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
#[test]
|
| 290 |
+
fn test_arms_merge() {
|
| 291 |
+
let arms = create_test_arms();
|
| 292 |
+
|
| 293 |
+
let points = vec![
|
| 294 |
+
Point::new(vec![1.0, 0.0, 0.0]),
|
| 295 |
+
Point::new(vec![0.0, 1.0, 0.0]),
|
| 296 |
+
];
|
| 297 |
+
|
| 298 |
+
let merged = arms.merge(&points);
|
| 299 |
+
|
| 300 |
+
// Mean of [1,0,0] and [0,1,0] = [0.5, 0.5, 0]
|
| 301 |
+
assert!((merged.dims()[0] - 0.5).abs() < 0.0001);
|
| 302 |
+
assert!((merged.dims()[1] - 0.5).abs() < 0.0001);
|
| 303 |
+
assert!((merged.dims()[2] - 0.0).abs() < 0.0001);
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
#[test]
|
| 307 |
+
fn test_arms_clear() {
|
| 308 |
+
let mut arms = create_test_arms();
|
| 309 |
+
|
| 310 |
+
for i in 0..10 {
|
| 311 |
+
arms.place(Point::new(vec![i as f32, 0.0, 0.0]), Blob::empty()).unwrap();
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
assert_eq!(arms.len(), 10);
|
| 315 |
+
|
| 316 |
+
arms.clear();
|
| 317 |
+
|
| 318 |
+
assert_eq!(arms.len(), 0);
|
| 319 |
+
assert!(arms.is_empty());
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
#[test]
|
| 323 |
+
fn test_arms_normalizes_on_insert() {
|
| 324 |
+
let mut arms = create_test_arms();
|
| 325 |
+
|
| 326 |
+
// Insert a non-normalized point
|
| 327 |
+
let point = Point::new(vec![3.0, 4.0, 0.0]); // magnitude = 5
|
| 328 |
+
let id = arms.place(point, Blob::empty()).unwrap();
|
| 329 |
+
|
| 330 |
+
let retrieved = arms.get(id).unwrap();
|
| 331 |
+
|
| 332 |
+
// Should be normalized
|
| 333 |
+
assert!(retrieved.point.is_normalized());
|
| 334 |
+
}
|
| 335 |
+
}
|
src/engine/mod.rs
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Engine
|
| 2 |
+
//!
|
| 3 |
+
//! The orchestration layer that wires everything together.
|
| 4 |
+
//!
|
| 5 |
+
//! This is where:
|
| 6 |
+
//! - Configuration is applied
|
| 7 |
+
//! - Adapters are connected to ports
|
| 8 |
+
//! - The unified ARMS interface is exposed
|
| 9 |
+
|
| 10 |
+
mod arms;
|
| 11 |
+
|
| 12 |
+
pub use arms::Arms;
|
src/lib.rs
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # ARMS - Attention Reasoning Memory Store
|
| 2 |
+
//!
|
| 3 |
+
//! > "The hippocampus of artificial minds"
|
| 4 |
+
//!
|
| 5 |
+
//! ARMS is a spatial memory fabric for AI models. It stores computed attention
|
| 6 |
+
//! states at their native dimensional coordinates, enabling instant retrieval
|
| 7 |
+
//! by proximity rather than traditional indexing.
|
| 8 |
+
//!
|
| 9 |
+
//! ## Philosophy
|
| 10 |
+
//!
|
| 11 |
+
//! - **Position IS relationship** - No foreign keys, proximity defines connection
|
| 12 |
+
//! - **Configurable, not hardcoded** - Dimensionality, proximity functions, all flexible
|
| 13 |
+
//! - **Generators over assets** - Algorithms, not rigid structures
|
| 14 |
+
//! - **Pure core, swappable adapters** - Hexagonal architecture
|
| 15 |
+
//!
|
| 16 |
+
//! ## Architecture
|
| 17 |
+
//!
|
| 18 |
+
//! ```text
|
| 19 |
+
//! ┌─────────────────────────────────────────────────────────────┐
|
| 20 |
+
//! │ ARMS │
|
| 21 |
+
//! ├─────────────────────────────────────────────────────────────┤
|
| 22 |
+
//! │ │
|
| 23 |
+
//! │ CORE (pure math, no I/O) │
|
| 24 |
+
//! │ Point, Id, Blob, Proximity, Merge │
|
| 25 |
+
//! │ │
|
| 26 |
+
//! │ PORTS (trait contracts) │
|
| 27 |
+
//! │ Place, Near, Latency │
|
| 28 |
+
//! │ │
|
| 29 |
+
//! │ ADAPTERS (swappable implementations) │
|
| 30 |
+
//! │ Storage: Memory, NVMe │
|
| 31 |
+
//! │ Index: Flat, HNSW │
|
| 32 |
+
//! │ API: Python bindings │
|
| 33 |
+
//! │ │
|
| 34 |
+
//! │ ENGINE (orchestration) │
|
| 35 |
+
//! │ Arms - the main entry point │
|
| 36 |
+
//! │ │
|
| 37 |
+
//! └─────────────────────────────────────────────────────────────┘
|
| 38 |
+
//! ```
|
| 39 |
+
//!
|
| 40 |
+
//! ## Quick Start
|
| 41 |
+
//!
|
| 42 |
+
//! ```rust,ignore
|
| 43 |
+
//! use arms::{Arms, ArmsConfig, Point};
|
| 44 |
+
//!
|
| 45 |
+
//! // Create ARMS with default config (768 dimensions)
|
| 46 |
+
//! let mut arms = Arms::new(ArmsConfig::default());
|
| 47 |
+
//!
|
| 48 |
+
//! // Place a point in the space
|
| 49 |
+
//! let point = Point::new(vec![0.1; 768]);
|
| 50 |
+
//! let id = arms.place(point, b"my data".to_vec());
|
| 51 |
+
//!
|
| 52 |
+
//! // Find nearby points
|
| 53 |
+
//! let query = Point::new(vec![0.1; 768]);
|
| 54 |
+
//! let neighbors = arms.near(&query, 5);
|
| 55 |
+
//! ```
|
| 56 |
+
|
| 57 |
+
// ============================================================================
|
| 58 |
+
// MODULES
|
| 59 |
+
// ============================================================================
|
| 60 |
+
|
| 61 |
+
/// Core domain - pure math, no I/O
|
| 62 |
+
/// Contains: Point, Id, Blob, Proximity trait, Merge trait
|
| 63 |
+
pub mod core;
|
| 64 |
+
|
| 65 |
+
/// Port definitions - trait contracts for adapters
|
| 66 |
+
/// Contains: Place trait, Near trait, Latency trait
|
| 67 |
+
pub mod ports;
|
| 68 |
+
|
| 69 |
+
/// Adapter implementations - swappable components
|
| 70 |
+
/// Contains: storage, index, python submodules
|
| 71 |
+
pub mod adapters;
|
| 72 |
+
|
| 73 |
+
/// Engine - orchestration layer
|
| 74 |
+
/// Contains: Arms main struct
|
| 75 |
+
pub mod engine;
|
| 76 |
+
|
| 77 |
+
// ============================================================================
|
| 78 |
+
// RE-EXPORTS (public API)
|
| 79 |
+
// ============================================================================
|
| 80 |
+
|
| 81 |
+
// Core types
|
| 82 |
+
pub use crate::core::{Point, Id, Blob, PlacedPoint};
|
| 83 |
+
pub use crate::core::proximity::{Proximity, Cosine, Euclidean, DotProduct};
|
| 84 |
+
pub use crate::core::merge::{Merge, Mean, WeightedMean, MaxPool};
|
| 85 |
+
pub use crate::core::config::ArmsConfig;
|
| 86 |
+
|
| 87 |
+
// Port traits
|
| 88 |
+
pub use crate::ports::{Place, Near, Latency};
|
| 89 |
+
|
| 90 |
+
// Engine
|
| 91 |
+
pub use crate::engine::Arms;
|
| 92 |
+
|
| 93 |
+
// ============================================================================
|
| 94 |
+
// CRATE-LEVEL DOCUMENTATION
|
| 95 |
+
// ============================================================================
|
| 96 |
+
|
| 97 |
+
/// The five primitives of ARMS:
|
| 98 |
+
///
|
| 99 |
+
/// 1. **Point**: `Vec<f32>` - Any dimensionality
|
| 100 |
+
/// 2. **Proximity**: `fn(a, b) -> f32` - How related?
|
| 101 |
+
/// 3. **Merge**: `fn(points) -> point` - Compose together
|
| 102 |
+
/// 4. **Place**: `fn(point, data) -> id` - Exist in space
|
| 103 |
+
/// 5. **Near**: `fn(point, k) -> ids` - What's related?
|
| 104 |
+
///
|
| 105 |
+
/// Everything else is configuration or adapters.
|
| 106 |
+
#[doc(hidden)]
|
| 107 |
+
pub const _PRIMITIVES: () = ();
|
src/ports/latency.rs
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Latency Port
|
| 2 |
+
//!
|
| 3 |
+
//! Trait for runtime latency measurement and adaptation.
|
| 4 |
+
//!
|
| 5 |
+
//! This enables the model to know its actual retrieval constraints:
|
| 6 |
+
//! - How fast is the hot tier right now?
|
| 7 |
+
//! - How much budget do I have for retrieval?
|
| 8 |
+
//! - Should I use fewer, faster retrievals or more, slower ones?
|
| 9 |
+
|
| 10 |
+
use std::time::Duration;
|
| 11 |
+
|
| 12 |
+
/// Storage tier levels
|
| 13 |
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
| 14 |
+
pub enum Tier {
|
| 15 |
+
/// RAM storage - fastest
|
| 16 |
+
Hot,
|
| 17 |
+
/// NVMe storage - fast
|
| 18 |
+
Warm,
|
| 19 |
+
/// Archive storage - slow
|
| 20 |
+
Cold,
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
impl Tier {
|
| 24 |
+
/// Get expected latency range for this tier
|
| 25 |
+
pub fn expected_latency(&self) -> (Duration, Duration) {
|
| 26 |
+
match self {
|
| 27 |
+
Tier::Hot => (Duration::from_micros(1), Duration::from_millis(1)),
|
| 28 |
+
Tier::Warm => (Duration::from_millis(1), Duration::from_millis(10)),
|
| 29 |
+
Tier::Cold => (Duration::from_millis(10), Duration::from_millis(100)),
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
/// Latency measurement result
|
| 35 |
+
#[derive(Debug, Clone)]
|
| 36 |
+
pub struct LatencyMeasurement {
|
| 37 |
+
/// The tier that was measured
|
| 38 |
+
pub tier: Tier,
|
| 39 |
+
|
| 40 |
+
/// Measured latency for a single operation
|
| 41 |
+
pub latency: Duration,
|
| 42 |
+
|
| 43 |
+
/// Throughput (operations per second) if measured
|
| 44 |
+
pub throughput_ops: Option<f64>,
|
| 45 |
+
|
| 46 |
+
/// Timestamp of measurement
|
| 47 |
+
pub measured_at: std::time::Instant,
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
/// Budget allocation for retrieval operations
|
| 51 |
+
#[derive(Debug, Clone)]
|
| 52 |
+
pub struct LatencyBudget {
|
| 53 |
+
/// Total time budget for this retrieval batch
|
| 54 |
+
pub total: Duration,
|
| 55 |
+
|
| 56 |
+
/// Maximum time per individual retrieval
|
| 57 |
+
pub per_operation: Duration,
|
| 58 |
+
|
| 59 |
+
/// Maximum number of operations in this budget
|
| 60 |
+
pub max_operations: usize,
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
impl Default for LatencyBudget {
|
| 64 |
+
fn default() -> Self {
|
| 65 |
+
Self {
|
| 66 |
+
total: Duration::from_millis(50),
|
| 67 |
+
per_operation: Duration::from_millis(5),
|
| 68 |
+
max_operations: 10,
|
| 69 |
+
}
|
| 70 |
+
}
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
/// Tier statistics
|
| 74 |
+
#[derive(Debug, Clone)]
|
| 75 |
+
pub struct TierStats {
|
| 76 |
+
/// The tier
|
| 77 |
+
pub tier: Tier,
|
| 78 |
+
|
| 79 |
+
/// Number of points in this tier
|
| 80 |
+
pub count: usize,
|
| 81 |
+
|
| 82 |
+
/// Total size in bytes
|
| 83 |
+
pub size_bytes: usize,
|
| 84 |
+
|
| 85 |
+
/// Capacity in bytes
|
| 86 |
+
pub capacity_bytes: usize,
|
| 87 |
+
|
| 88 |
+
/// Usage ratio (0.0 to 1.0)
|
| 89 |
+
pub usage_ratio: f32,
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
/// Trait for latency measurement and adaptation
|
| 93 |
+
///
|
| 94 |
+
/// System adapters implement this trait.
|
| 95 |
+
pub trait Latency: Send + Sync {
|
| 96 |
+
/// Probe a tier to measure current latency
|
| 97 |
+
///
|
| 98 |
+
/// Performs a small test operation to measure actual latency.
|
| 99 |
+
fn probe(&mut self, tier: Tier) -> LatencyMeasurement;
|
| 100 |
+
|
| 101 |
+
/// Get the current latency budget
|
| 102 |
+
fn budget(&self) -> LatencyBudget;
|
| 103 |
+
|
| 104 |
+
/// Set a new latency budget
|
| 105 |
+
fn set_budget(&mut self, budget: LatencyBudget);
|
| 106 |
+
|
| 107 |
+
/// Get available capacity in a tier
|
| 108 |
+
fn available_capacity(&self, tier: Tier) -> usize;
|
| 109 |
+
|
| 110 |
+
/// Recommend which tier to use for an access pattern
|
| 111 |
+
///
|
| 112 |
+
/// `expected_accesses` is the expected number of accesses for this data.
|
| 113 |
+
fn recommend_tier(&self, expected_accesses: u32) -> Tier;
|
| 114 |
+
|
| 115 |
+
/// Get statistics for a tier
|
| 116 |
+
fn tier_stats(&self, tier: Tier) -> TierStats;
|
| 117 |
+
|
| 118 |
+
/// Get statistics for all tiers
|
| 119 |
+
fn all_stats(&self) -> Vec<TierStats> {
|
| 120 |
+
vec![
|
| 121 |
+
self.tier_stats(Tier::Hot),
|
| 122 |
+
self.tier_stats(Tier::Warm),
|
| 123 |
+
self.tier_stats(Tier::Cold),
|
| 124 |
+
]
|
| 125 |
+
}
|
| 126 |
+
}
|
src/ports/mod.rs
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Ports
|
| 2 |
+
//!
|
| 3 |
+
//! Trait definitions for adapters. Contracts only, no implementations.
|
| 4 |
+
//!
|
| 5 |
+
//! This is the hexagonal architecture boundary:
|
| 6 |
+
//! - Ports define WHAT operations are needed
|
| 7 |
+
//! - Adapters define HOW they're implemented
|
| 8 |
+
//!
|
| 9 |
+
//! The CORE doesn't know about adapters.
|
| 10 |
+
//! Adapters implement these port traits.
|
| 11 |
+
|
| 12 |
+
mod place;
|
| 13 |
+
mod near;
|
| 14 |
+
mod latency;
|
| 15 |
+
|
| 16 |
+
// Re-export traits
|
| 17 |
+
pub use place::Place;
|
| 18 |
+
pub use near::Near;
|
| 19 |
+
pub use latency::Latency;
|
| 20 |
+
|
| 21 |
+
// Re-export types from place
|
| 22 |
+
pub use place::{PlaceError, PlaceResult};
|
| 23 |
+
|
| 24 |
+
// Re-export types from near
|
| 25 |
+
pub use near::{NearError, NearResult, SearchResult};
|
| 26 |
+
|
| 27 |
+
// Re-export types from latency
|
| 28 |
+
pub use latency::{Tier, LatencyBudget, LatencyMeasurement, TierStats};
|
src/ports/near.rs
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Near Port
|
| 2 |
+
//!
|
| 3 |
+
//! Trait for finding related points.
|
| 4 |
+
//!
|
| 5 |
+
//! This is one of the five primitives of ARMS:
|
| 6 |
+
//! `Near: fn(point, k) -> ids` - What's related?
|
| 7 |
+
//!
|
| 8 |
+
//! Implemented by index adapters (Flat, HNSW, etc.)
|
| 9 |
+
|
| 10 |
+
use crate::core::{Id, Point};
|
| 11 |
+
|
| 12 |
+
/// Result type for near operations
|
| 13 |
+
pub type NearResult<T> = Result<T, NearError>;
|
| 14 |
+
|
| 15 |
+
/// A search result with ID and distance/similarity score
|
| 16 |
+
#[derive(Debug, Clone, PartialEq)]
|
| 17 |
+
pub struct SearchResult {
|
| 18 |
+
/// The ID of the found point
|
| 19 |
+
pub id: Id,
|
| 20 |
+
|
| 21 |
+
/// Distance or similarity score
|
| 22 |
+
/// Interpretation depends on the proximity function used.
|
| 23 |
+
pub score: f32,
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
impl SearchResult {
|
| 27 |
+
pub fn new(id: Id, score: f32) -> Self {
|
| 28 |
+
Self { id, score }
|
| 29 |
+
}
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
/// Errors that can occur during near operations
|
| 33 |
+
#[derive(Debug, Clone, PartialEq)]
|
| 34 |
+
pub enum NearError {
|
| 35 |
+
/// The query point has wrong dimensionality
|
| 36 |
+
DimensionalityMismatch { expected: usize, got: usize },
|
| 37 |
+
|
| 38 |
+
/// Index is not built/ready
|
| 39 |
+
IndexNotReady,
|
| 40 |
+
|
| 41 |
+
/// Index backend error
|
| 42 |
+
IndexError(String),
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
impl std::fmt::Display for NearError {
|
| 46 |
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
| 47 |
+
match self {
|
| 48 |
+
NearError::DimensionalityMismatch { expected, got } => {
|
| 49 |
+
write!(f, "Dimensionality mismatch: expected {}, got {}", expected, got)
|
| 50 |
+
}
|
| 51 |
+
NearError::IndexNotReady => write!(f, "Index not ready"),
|
| 52 |
+
NearError::IndexError(msg) => write!(f, "Index error: {}", msg),
|
| 53 |
+
}
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
impl std::error::Error for NearError {}
|
| 58 |
+
|
| 59 |
+
/// Trait for finding related points
|
| 60 |
+
///
|
| 61 |
+
/// Index adapters implement this trait.
|
| 62 |
+
pub trait Near: Send + Sync {
|
| 63 |
+
/// Find k nearest points to query
|
| 64 |
+
///
|
| 65 |
+
/// Returns results sorted by relevance (most relevant first).
|
| 66 |
+
fn near(&self, query: &Point, k: usize) -> NearResult<Vec<SearchResult>>;
|
| 67 |
+
|
| 68 |
+
/// Find all points within a distance/similarity threshold
|
| 69 |
+
///
|
| 70 |
+
/// For distance metrics (Euclidean), finds points with distance < threshold.
|
| 71 |
+
/// For similarity metrics (Cosine), finds points with similarity > threshold.
|
| 72 |
+
fn within(&self, query: &Point, threshold: f32) -> NearResult<Vec<SearchResult>>;
|
| 73 |
+
|
| 74 |
+
/// Add a point to the index
|
| 75 |
+
///
|
| 76 |
+
/// Call this after placing a point in storage.
|
| 77 |
+
fn add(&mut self, id: Id, point: &Point) -> NearResult<()>;
|
| 78 |
+
|
| 79 |
+
/// Remove a point from the index
|
| 80 |
+
fn remove(&mut self, id: Id) -> NearResult<()>;
|
| 81 |
+
|
| 82 |
+
/// Rebuild the index (if needed for performance)
|
| 83 |
+
fn rebuild(&mut self) -> NearResult<()>;
|
| 84 |
+
|
| 85 |
+
/// Check if the index is ready for queries
|
| 86 |
+
fn is_ready(&self) -> bool;
|
| 87 |
+
|
| 88 |
+
/// Get the number of indexed points
|
| 89 |
+
fn len(&self) -> usize;
|
| 90 |
+
|
| 91 |
+
/// Check if the index is empty
|
| 92 |
+
fn is_empty(&self) -> bool {
|
| 93 |
+
self.len() == 0
|
| 94 |
+
}
|
| 95 |
+
}
|
src/ports/place.rs
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
//! # Place Port
|
| 2 |
+
//!
|
| 3 |
+
//! Trait for placing points in the space.
|
| 4 |
+
//!
|
| 5 |
+
//! This is one of the five primitives of ARMS:
|
| 6 |
+
//! `Place: fn(point, data) -> id` - Exist in space
|
| 7 |
+
//!
|
| 8 |
+
//! Implemented by storage adapters (Memory, NVMe, etc.)
|
| 9 |
+
|
| 10 |
+
use crate::core::{Blob, Id, PlacedPoint, Point};
|
| 11 |
+
|
| 12 |
+
/// Result type for place operations
|
| 13 |
+
pub type PlaceResult<T> = Result<T, PlaceError>;
|
| 14 |
+
|
| 15 |
+
/// Errors that can occur during place operations
|
| 16 |
+
#[derive(Debug, Clone, PartialEq)]
|
| 17 |
+
pub enum PlaceError {
|
| 18 |
+
/// The point has wrong dimensionality for this space
|
| 19 |
+
DimensionalityMismatch { expected: usize, got: usize },
|
| 20 |
+
|
| 21 |
+
/// Storage capacity exceeded
|
| 22 |
+
CapacityExceeded,
|
| 23 |
+
|
| 24 |
+
/// Point with this ID already exists
|
| 25 |
+
DuplicateId(Id),
|
| 26 |
+
|
| 27 |
+
/// Storage backend error
|
| 28 |
+
StorageError(String),
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
impl std::fmt::Display for PlaceError {
|
| 32 |
+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
| 33 |
+
match self {
|
| 34 |
+
PlaceError::DimensionalityMismatch { expected, got } => {
|
| 35 |
+
write!(f, "Dimensionality mismatch: expected {}, got {}", expected, got)
|
| 36 |
+
}
|
| 37 |
+
PlaceError::CapacityExceeded => write!(f, "Storage capacity exceeded"),
|
| 38 |
+
PlaceError::DuplicateId(id) => write!(f, "Duplicate ID: {}", id),
|
| 39 |
+
PlaceError::StorageError(msg) => write!(f, "Storage error: {}", msg),
|
| 40 |
+
}
|
| 41 |
+
}
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
impl std::error::Error for PlaceError {}
|
| 45 |
+
|
| 46 |
+
/// Trait for placing points in the space
|
| 47 |
+
///
|
| 48 |
+
/// Storage adapters implement this trait.
|
| 49 |
+
pub trait Place: Send + Sync {
|
| 50 |
+
/// Place a point with its payload in the space
|
| 51 |
+
///
|
| 52 |
+
/// Returns the ID assigned to the placed point.
|
| 53 |
+
fn place(&mut self, point: Point, blob: Blob) -> PlaceResult<Id>;
|
| 54 |
+
|
| 55 |
+
/// Place a point with a specific ID
|
| 56 |
+
///
|
| 57 |
+
/// Use when you need deterministic IDs (e.g., replication, testing).
|
| 58 |
+
fn place_with_id(&mut self, id: Id, point: Point, blob: Blob) -> PlaceResult<()>;
|
| 59 |
+
|
| 60 |
+
/// Remove a point from the space
|
| 61 |
+
///
|
| 62 |
+
/// Returns the removed point if it existed.
|
| 63 |
+
fn remove(&mut self, id: Id) -> Option<PlacedPoint>;
|
| 64 |
+
|
| 65 |
+
/// Get a placed point by ID
|
| 66 |
+
///
|
| 67 |
+
/// Returns None if not found.
|
| 68 |
+
fn get(&self, id: Id) -> Option<&PlacedPoint>;
|
| 69 |
+
|
| 70 |
+
/// Check if a point exists
|
| 71 |
+
fn contains(&self, id: Id) -> bool {
|
| 72 |
+
self.get(id).is_some()
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
/// Get the number of placed points
|
| 76 |
+
fn len(&self) -> usize;
|
| 77 |
+
|
| 78 |
+
/// Check if the space is empty
|
| 79 |
+
fn is_empty(&self) -> bool {
|
| 80 |
+
self.len() == 0
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
/// Iterate over all placed points
|
| 84 |
+
fn iter(&self) -> Box<dyn Iterator<Item = &PlacedPoint> + '_>;
|
| 85 |
+
|
| 86 |
+
/// Get current storage size in bytes
|
| 87 |
+
fn size_bytes(&self) -> usize;
|
| 88 |
+
|
| 89 |
+
/// Clear all points
|
| 90 |
+
fn clear(&mut self);
|
| 91 |
+
}
|