Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from api.service import InferenceService, load_inference_service, resolve_threshold | |
| class DummyPreprocessor: | |
| feature_names_in_ = np.array(["Time", *[f"V{i}" for i in range(1, 29)], "Amount"]) | |
| def transform(self, frame: pd.DataFrame) -> pd.DataFrame: | |
| return frame | |
| class DummyModel: | |
| def predict_proba(self, frame: pd.DataFrame) -> np.ndarray: | |
| probs = [] | |
| for amount in frame["Amount"].tolist(): | |
| if amount >= 300: | |
| probs.append([0.1, 0.9]) | |
| elif amount >= 100: | |
| probs.append([0.55, 0.45]) | |
| else: | |
| probs.append([0.95, 0.05]) | |
| return np.array(probs) | |
| def _record(amount: float) -> dict[str, float]: | |
| payload = {"Time": 0.0, "Amount": amount} | |
| for i in range(1, 29): | |
| payload[f"V{i}"] = 0.0 | |
| return payload | |
| def test_inference_service_predict_records_risk_levels() -> None: | |
| service = InferenceService( | |
| model=DummyModel(), | |
| preprocessor=DummyPreprocessor(), | |
| threshold=0.5, | |
| model_path=Path("models/model.pkl"), | |
| preprocessor_path=Path("models/preprocessor.pkl"), | |
| feature_columns=["Time", *[f"V{i}" for i in range(1, 29)], "Amount"], | |
| ) | |
| outputs = service.predict_records([_record(20), _record(120), _record(320)]) | |
| assert outputs[0]["risk_level"] == "low" | |
| assert outputs[1]["risk_level"] == "medium" | |
| assert outputs[2]["risk_level"] == "high" | |
| assert outputs[2]["is_fraud"] is True | |
| def test_resolve_threshold_precedence(tmp_path) -> None: | |
| training_report = tmp_path / "model_training_report.json" | |
| model_report = tmp_path / "model_report.json" | |
| config_path = tmp_path / "train.yaml" | |
| config_path.write_text("threshold:\n decision_threshold: 0.51\n", encoding="utf-8") | |
| model_report.write_text( | |
| json.dumps({"threshold_selection": {"selected_threshold": 0.63}}), encoding="utf-8" | |
| ) | |
| training_report.write_text( | |
| json.dumps({"best_model": {"selected_threshold": 0.74}}), encoding="utf-8" | |
| ) | |
| threshold = resolve_threshold( | |
| training_report_path=training_report, | |
| model_report_path=model_report, | |
| config_path=config_path, | |
| ) | |
| assert threshold == 0.74 | |
| def test_load_inference_service_reads_artifacts_and_threshold(tmp_path) -> None: | |
| load_inference_service.cache_clear() | |
| model_path = tmp_path / "model.pkl" | |
| preprocessor_path = tmp_path / "preprocessor.pkl" | |
| training_report = tmp_path / "model_training_report.json" | |
| joblib.dump(DummyModel(), model_path) | |
| joblib.dump(DummyPreprocessor(), preprocessor_path) | |
| training_report.write_text( | |
| json.dumps({"best_model": {"selected_threshold": 0.66}}), encoding="utf-8" | |
| ) | |
| service = load_inference_service( | |
| model_path=str(model_path), | |
| preprocessor_path=str(preprocessor_path), | |
| training_report_path=str(training_report), | |
| model_report_path=str(tmp_path / "missing_model_report.json"), | |
| config_path=str(tmp_path / "missing_config.yaml"), | |
| ) | |
| assert service.threshold == 0.66 | |
| outputs = service.predict_records([_record(300.0)]) | |
| assert outputs[0]["is_fraud"] is True | |