Spaces:

barunsaha
/

slide-deck-ai

Running

App Files Files Community

barunsaha commited on 18 days ago

Commit

8f5ac4a

1 Parent(s): 622c44e

Add tests for icon embeddings

Browse files

Files changed (1) hide show

tests/unit/test_icons_embeddings.py +219 -0

tests/unit/test_icons_embeddings.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Unit tests for the icons embeddings module.
+"""
+import importlib
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any, List
+import numpy as np
+def _reload_module_with_dummies(monkeypatch: Any, emb_dim: int = 4):
+    """
+    Reload the icons_embeddings module after monkeypatching the
+    Transformers constructors to return lightweight dummy objects.
+    This prevents network/download or heavy model initialization during
+    tests and allows deterministic embeddings.
+    Args:
+        monkeypatch: The pytest monkeypatch fixture.
+        emb_dim: The embedding dimensionality that the dummy model
+            should produce.
+    Returns:
+        The reloaded module object.
+    """
+    class DummyTokenizer:
+        def __call__(self, texts, return_tensors=None, padding=None,
+                     max_length=None, truncation=None):
+            if isinstance(texts, str):
+                texts_list = [texts]
+            else:
+                texts_list = list(texts)
+            return {'texts': texts_list}
+    class DummyTensor:
+        def __init__(self, arr: np.ndarray) -> None:
+            self.arr = arr
+        def mean(self, dim: int) -> 'DummyTensor':
+            # Take numpy mean along the requested axis to emulate PyTorch.
+            return DummyTensor(self.arr.mean(axis=dim))
+        def detach(self) -> 'DummyTensor':
+            return self
+        def numpy(self) -> np.ndarray:
+            return self.arr
+    class DummyModel:
+        def __call__(self, **inputs: Any) -> SimpleNamespace:
+            texts = inputs.get('texts', [])
+            n = len(texts)
+            seq_len = 3
+            arr = np.arange(n * seq_len * emb_dim, dtype=float)
+            arr = arr.reshape((n, seq_len, emb_dim))
+            return SimpleNamespace(last_hidden_state=DummyTensor(arr))
+    monkeypatch.setattr(
+        'transformers.BertTokenizer.from_pretrained',
+        lambda name: DummyTokenizer(),
+    )
+    monkeypatch.setattr(
+        'transformers.BertModel.from_pretrained',
+        lambda name: DummyModel(),
+    )
+    if 'slidedeckai.helpers.icons_embeddings' in sys.modules:
+        mod = importlib.reload(sys.modules['slidedeckai.helpers.icons_embeddings'])
+    else:
+        mod = importlib.import_module('slidedeckai.helpers.icons_embeddings')
+    return mod
+def test_get_icons_list(tmp_path: Path, monkeypatch: Any) -> None:
+    """
+    get_icons_list should return the stems of PNG files in the
+    configured icons directory.
+    """
+    mod = _reload_module_with_dummies(monkeypatch)
+    # Prepare a temporary icons directory with some files.
+    icons_dir = tmp_path / 'icons'
+    icons_dir.mkdir()
+    (icons_dir / 'apple.png').write_text('x')
+    (icons_dir / 'banana.png').write_text('y')
+    (icons_dir / 'not_an_icon.txt').write_text('z')
+    monkeypatch.setattr(mod.GlobalConfig, 'ICONS_DIR', icons_dir)
+    icons = mod.get_icons_list()
+    assert set(icons) == {'apple', 'banana'}
+def test_get_embeddings_single_and_list(monkeypatch: Any) -> None:
+    """
+    get_embeddings must return numpy arrays with the expected shapes for
+    single string and list inputs.
+    """
+    emb_dim = 5
+    mod = _reload_module_with_dummies(monkeypatch, emb_dim=emb_dim)
+    # Single string -> shape (1, emb_dim)
+    arr1 = mod.get_embeddings('hello')
+    assert isinstance(arr1, np.ndarray)
+    assert arr1.shape == (1, emb_dim)
+    # List of strings -> shape (3, emb_dim)
+    arr2 = mod.get_embeddings(['a', 'b', 'c'])
+    assert arr2.shape == (3, emb_dim)
+    # Verify determinism from our dummy model for the first row.
+    # The dummy model fills values with a range; mean over axis=1 reduces
+    # the seq_len dimension.
+    expected_first_row = np.arange(3 * emb_dim).reshape((3, emb_dim)).mean(axis=0)
+    assert np.allclose(arr2[0], expected_first_row)
+def test_save_and_load_embeddings(tmp_path: Path, monkeypatch: Any) -> None:
+    """
+    save_icons_embeddings should write embeddings and file names to the
+    configured paths and load_saved_embeddings should read them back.
+    """
+    emb_dim = 6
+    mod = _reload_module_with_dummies(monkeypatch, emb_dim=emb_dim)
+    # Create icons dir with files.
+    icons_dir = tmp_path / 'icons2'
+    icons_dir.mkdir()
+    (icons_dir / 'one.png').write_text('1')
+    (icons_dir / 'two.png').write_text('2')
+    monkeypatch.setattr(mod.GlobalConfig, 'ICONS_DIR', icons_dir)
+    emb_file = tmp_path / 'emb.npy'
+    names_file = tmp_path / 'names.npy'
+    monkeypatch.setattr(mod.GlobalConfig, 'EMBEDDINGS_FILE_NAME', str(emb_file))
+    monkeypatch.setattr(mod.GlobalConfig, 'ICONS_FILE_NAME', str(names_file))
+    # Run save which uses the dummy tokenizer/model to create embeddings.
+    mod.save_icons_embeddings()
+    assert emb_file.exists()
+    assert names_file.exists()
+    loaded_emb, loaded_names = mod.load_saved_embeddings()
+    assert isinstance(loaded_emb, np.ndarray)
+    assert isinstance(loaded_names, np.ndarray)
+    assert loaded_emb.shape[0] == len(loaded_names)
+def test_find_icons(monkeypatch: Any, tmp_path: Path) -> None:
+    """
+    find_icons should map keywords to the most similar icon filenames
+    based on cosine similarity against pre-saved embeddings.
+    """
+    # Reload module with dummy model but we will monkeypatch get_embeddings
+    # to control keyword embeddings precisely.
+    mod = _reload_module_with_dummies(monkeypatch, emb_dim=3)
+    # Prepare saved embeddings with two icons.
+    emb = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]])
+    names = np.array(['a_icon', 'b_icon'])
+    emb_file = tmp_path / 'emb_s.npy'
+    names_file = tmp_path / 'names_s.npy'
+    np.save(str(emb_file), emb)
+    np.save(str(names_file), names)
+    monkeypatch.setattr(mod.GlobalConfig, 'EMBEDDINGS_FILE_NAME', str(emb_file))
+    monkeypatch.setattr(mod.GlobalConfig, 'ICONS_FILE_NAME', str(names_file))
+    # Make keyword embeddings match each saved one.
+    def fake_get_embeddings(keywords: List[str]) -> np.ndarray:
+        out = []
+        for kw in keywords:
+            if kw == 'match_a':
+                out.append([1.0, 0.0, 0.0])
+            else:
+                out.append([0.0, 1.0, 0.0])
+        return np.array(out)
+    monkeypatch.setattr(mod, 'get_embeddings', fake_get_embeddings)
+    res = mod.find_icons(['match_a', 'other'])
+    assert list(res) == ['a_icon', 'b_icon']
+def test_main_calls_and_prints(monkeypatch: Any, capsys: Any) -> None:
+    """
+    main should call save_icons_embeddings and find_icons and print the
+    zipped results. We monkeypatch the heavy functions to keep it fast.
+    """
+    mod = _reload_module_with_dummies(monkeypatch)
+    called = {}
+    def fake_save():
+        called['saved'] = True
+    def fake_find(keywords: List[str]) -> List[str]:
+        called['found'] = True
+        return ['x' for _ in keywords]
+    monkeypatch.setattr(mod, 'save_icons_embeddings', fake_save)
+    monkeypatch.setattr(mod, 'find_icons', fake_find)
+    mod.main()
+    captured = capsys.readouterr()
+    assert 'The relevant icon files are' in captured.out
+    assert called.get('saved') is True
+    assert called.get('found') is True