engram / tests /test_embedder.py
eigengram's picture
test: upload 220 tests
2ece486 verified
"""Tests for kvcos.engram.embedder — unified fingerprint embedding."""
import pytest
import torch
import torch.nn.functional as F
from kvcos.engram.embedder import (
HashEmbedder,
get_embedder,
get_fingerprint,
reset_embedder,
)
class TestHashEmbedder:
def test_deterministic(self):
emb = HashEmbedder(dim=128)
fp1 = emb.embed("hello")
fp2 = emb.embed("hello")
assert torch.allclose(fp1, fp2)
def test_different_text(self):
emb = HashEmbedder(dim=128)
fp1 = emb.embed("hello")
fp2 = emb.embed("world")
assert not torch.allclose(fp1, fp2)
def test_normalized(self):
emb = HashEmbedder(dim=128)
fp = emb.embed("test")
norm = torch.norm(fp).item()
assert abs(norm - 1.0) < 0.01
def test_dimension(self):
emb = HashEmbedder(dim=256)
fp = emb.embed("test")
assert fp.shape == (256,)
assert emb.dim == 256
def test_source_tag(self):
emb = HashEmbedder()
assert emb.source == "hash-fallback"
class TestGetFingerprint:
def test_returns_tensor_and_source(self):
fp, source = get_fingerprint("test text")
assert isinstance(fp, torch.Tensor)
assert isinstance(source, str)
assert source in ("llama_cpp", "sbert", "hash-fallback")
def test_deterministic(self):
fp1, _ = get_fingerprint("same text")
fp2, _ = get_fingerprint("same text")
assert torch.allclose(fp1, fp2)
class TestSBertEmbedder:
"""Test sbert if available (installed in this venv)."""
def test_sbert_available(self):
"""Verify sentence-transformers is usable."""
try:
from kvcos.engram.embedder import SBertEmbedder
emb = SBertEmbedder()
assert emb.source == "sbert"
assert emb.dim == 384
except ImportError:
pytest.skip("sentence-transformers not installed")
def test_semantic_discrimination(self):
"""Related texts should be more similar than unrelated."""
try:
from kvcos.engram.embedder import SBertEmbedder
emb = SBertEmbedder()
except ImportError:
pytest.skip("sentence-transformers not installed")
fp_a = emb.embed("machine learning neural network training")
fp_b = emb.embed("deep learning model optimization")
fp_c = emb.embed("chocolate cake baking recipe")
sim_ab = F.cosine_similarity(fp_a.unsqueeze(0), fp_b.unsqueeze(0)).item()
sim_ac = F.cosine_similarity(fp_a.unsqueeze(0), fp_c.unsqueeze(0)).item()
assert sim_ab > sim_ac, (
f"Related topics ({sim_ab:.4f}) should be more similar "
f"than unrelated ({sim_ac:.4f})"
)
class TestGetEmbedder:
def test_singleton(self):
reset_embedder()
e1 = get_embedder()
e2 = get_embedder()
assert e1 is e2
def test_reset(self):
reset_embedder()
e1 = get_embedder()
reset_embedder()
e2 = get_embedder()
# After reset, a new instance is created
# (may or may not be same object depending on strategy)
assert e2 is not None