final_test / tests /test_data_preparation.py
Abdelrahman Almatrooshi
Deploy snapshot from main b7a59b11809483dfc959f196f1930240f2662c49
22a6915
import glob
import os
import sys
import numpy as np
import pytest
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
if PROJECT_ROOT not in sys.path:
sys.path.insert(0, PROJECT_ROOT)
from data_preparation.prepare_dataset import (
SELECTED_FEATURES,
_generate_synthetic_data,
get_default_split_config,
get_numpy_splits,
)
def test_get_default_split_config():
ratios, seed = get_default_split_config()
assert len(ratios) == 3
assert abs(sum(ratios) - 1.0) < 1e-6
assert seed >= 0
def test_generate_synthetic_data_shape():
X, y = _generate_synthetic_data("face_orientation")
assert X.shape[0] == 500
assert y.shape[0] == 500
assert X.shape[1] == len(SELECTED_FEATURES["face_orientation"])
def test_get_numpy_splits_consistency():
pattern = os.path.join(PROJECT_ROOT, "data", "collected_*", "*.npz")
if not glob.glob(pattern):
pytest.skip("No data/collected_*/*.npz — run collect_features or add dataset files.")
split_ratios, seed = get_default_split_config()
splits, num_features, num_classes, scaler = get_numpy_splits(
"face_orientation", split_ratios=split_ratios, seed=seed
)
n_train = len(splits["y_train"])
n_val = len(splits["y_val"])
n_test = len(splits["y_test"])
assert n_train > 0
assert n_val > 0
assert n_test > 0
assert splits["X_train"].shape[1] == num_features
assert num_classes >= 2
# Same seed and ratios produce same split (deterministic)
splits2, _, _, _ = get_numpy_splits(
"face_orientation", split_ratios=split_ratios, seed=seed
)
np.testing.assert_array_equal(splits["y_test"], splits2["y_test"])