| """Tests for kvcos.engram.embedder — unified fingerprint embedding.""" |
|
|
| import pytest |
| import torch |
| import torch.nn.functional as F |
|
|
| from kvcos.engram.embedder import ( |
| HashEmbedder, |
| get_embedder, |
| get_fingerprint, |
| reset_embedder, |
| ) |
|
|
|
|
| class TestHashEmbedder: |
| def test_deterministic(self): |
| emb = HashEmbedder(dim=128) |
| fp1 = emb.embed("hello") |
| fp2 = emb.embed("hello") |
| assert torch.allclose(fp1, fp2) |
|
|
| def test_different_text(self): |
| emb = HashEmbedder(dim=128) |
| fp1 = emb.embed("hello") |
| fp2 = emb.embed("world") |
| assert not torch.allclose(fp1, fp2) |
|
|
| def test_normalized(self): |
| emb = HashEmbedder(dim=128) |
| fp = emb.embed("test") |
| norm = torch.norm(fp).item() |
| assert abs(norm - 1.0) < 0.01 |
|
|
| def test_dimension(self): |
| emb = HashEmbedder(dim=256) |
| fp = emb.embed("test") |
| assert fp.shape == (256,) |
| assert emb.dim == 256 |
|
|
| def test_source_tag(self): |
| emb = HashEmbedder() |
| assert emb.source == "hash-fallback" |
|
|
|
|
| class TestGetFingerprint: |
| def test_returns_tensor_and_source(self): |
| fp, source = get_fingerprint("test text") |
| assert isinstance(fp, torch.Tensor) |
| assert isinstance(source, str) |
| assert source in ("llama_cpp", "sbert", "hash-fallback") |
|
|
| def test_deterministic(self): |
| fp1, _ = get_fingerprint("same text") |
| fp2, _ = get_fingerprint("same text") |
| assert torch.allclose(fp1, fp2) |
|
|
|
|
| class TestSBertEmbedder: |
| """Test sbert if available (installed in this venv).""" |
|
|
| def test_sbert_available(self): |
| """Verify sentence-transformers is usable.""" |
| try: |
| from kvcos.engram.embedder import SBertEmbedder |
| emb = SBertEmbedder() |
| assert emb.source == "sbert" |
| assert emb.dim == 384 |
| except ImportError: |
| pytest.skip("sentence-transformers not installed") |
|
|
| def test_semantic_discrimination(self): |
| """Related texts should be more similar than unrelated.""" |
| try: |
| from kvcos.engram.embedder import SBertEmbedder |
| emb = SBertEmbedder() |
| except ImportError: |
| pytest.skip("sentence-transformers not installed") |
|
|
| fp_a = emb.embed("machine learning neural network training") |
| fp_b = emb.embed("deep learning model optimization") |
| fp_c = emb.embed("chocolate cake baking recipe") |
|
|
| sim_ab = F.cosine_similarity(fp_a.unsqueeze(0), fp_b.unsqueeze(0)).item() |
| sim_ac = F.cosine_similarity(fp_a.unsqueeze(0), fp_c.unsqueeze(0)).item() |
|
|
| assert sim_ab > sim_ac, ( |
| f"Related topics ({sim_ab:.4f}) should be more similar " |
| f"than unrelated ({sim_ac:.4f})" |
| ) |
|
|
|
|
| class TestGetEmbedder: |
| def test_singleton(self): |
| reset_embedder() |
| e1 = get_embedder() |
| e2 = get_embedder() |
| assert e1 is e2 |
|
|
| def test_reset(self): |
| reset_embedder() |
| e1 = get_embedder() |
| reset_embedder() |
| e2 = get_embedder() |
| |
| |
| assert e2 is not None |
|
|