Spaces:

thasvithu
/

fraud-detection-mlops-api

Sleeping

fraud-detection-mlops-api / tests /test_service.py

github-actions[bot]

deploy: sync snapshot from github

4937cba 2 months ago

3.31 kB

	from __future__ import annotations

	import json
	from pathlib import Path

	import joblib
	import numpy as np
	import pandas as pd

	from api.service import InferenceService, load_inference_service, resolve_threshold


	class DummyPreprocessor:
	feature_names_in_ = np.array(["Time", *[f"V{i}" for i in range(1, 29)], "Amount"])

	def transform(self, frame: pd.DataFrame) -> pd.DataFrame:
	return frame


	class DummyModel:
	def predict_proba(self, frame: pd.DataFrame) -> np.ndarray:
	probs = []
	for amount in frame["Amount"].tolist():
	if amount >= 300:
	probs.append([0.1, 0.9])
	elif amount >= 100:
	probs.append([0.55, 0.45])
	else:
	probs.append([0.95, 0.05])
	return np.array(probs)


	def _record(amount: float) -> dict[str, float]:
	payload = {"Time": 0.0, "Amount": amount}
	for i in range(1, 29):
	payload[f"V{i}"] = 0.0
	return payload


	def test_inference_service_predict_records_risk_levels() -> None:
	service = InferenceService(
	model=DummyModel(),
	preprocessor=DummyPreprocessor(),
	threshold=0.5,
	model_path=Path("models/model.pkl"),
	preprocessor_path=Path("models/preprocessor.pkl"),
	feature_columns=["Time", *[f"V{i}" for i in range(1, 29)], "Amount"],
	)

	outputs = service.predict_records([_record(20), _record(120), _record(320)])

	assert outputs[0]["risk_level"] == "low"
	assert outputs[1]["risk_level"] == "medium"
	assert outputs[2]["risk_level"] == "high"
	assert outputs[2]["is_fraud"] is True


	def test_resolve_threshold_precedence(tmp_path) -> None:
	training_report = tmp_path / "model_training_report.json"
	model_report = tmp_path / "model_report.json"
	config_path = tmp_path / "train.yaml"

	config_path.write_text("threshold:\n decision_threshold: 0.51\n", encoding="utf-8")
	model_report.write_text(
	json.dumps({"threshold_selection": {"selected_threshold": 0.63}}), encoding="utf-8"
	)
	training_report.write_text(
	json.dumps({"best_model": {"selected_threshold": 0.74}}), encoding="utf-8"
	)

	threshold = resolve_threshold(
	training_report_path=training_report,
	model_report_path=model_report,
	config_path=config_path,
	)

	assert threshold == 0.74


	def test_load_inference_service_reads_artifacts_and_threshold(tmp_path) -> None:
	load_inference_service.cache_clear()

	model_path = tmp_path / "model.pkl"
	preprocessor_path = tmp_path / "preprocessor.pkl"
	training_report = tmp_path / "model_training_report.json"

	joblib.dump(DummyModel(), model_path)
	joblib.dump(DummyPreprocessor(), preprocessor_path)
	training_report.write_text(
	json.dumps({"best_model": {"selected_threshold": 0.66}}), encoding="utf-8"
	)

	service = load_inference_service(
	model_path=str(model_path),
	preprocessor_path=str(preprocessor_path),
	training_report_path=str(training_report),
	model_report_path=str(tmp_path / "missing_model_report.json"),
	config_path=str(tmp_path / "missing_config.yaml"),
	)

	assert service.threshold == 0.66
	outputs = service.predict_records([_record(300.0)])
	assert outputs[0]["is_fraud"] is True