Spaces:

NeerajCodz
/

aiBatteryLifeCycle

Building

App Files Files Community

aiBatteryLifeCycle / api /routers /simulate.py

NeerajCodz

fix:v3

f6712ff 8 minutes ago

raw

history blame contribute delete

17.9 kB

	"""
	api.routers.simulate
	====================
	Bulk battery lifecycle simulation endpoint - vectorized ML-driven.

	Performance design (O(1) Python overhead per battery regardless of step count):
	1. SEI impedance growth - numpy cumsum (no Python loop)
	2. Feature matrix build - numpy column_stack -> (N_steps, n_features)
	3. ML prediction - single model.predict() call via predict_array()
	4. RUL / EOL - numpy diff / cumsum / searchsorted
	5. Classify / colorize - numpy searchsorted on pre-built label arrays

	Scaler dispatch mirrors training exactly:
	Tree models (RF / ET / XGB / LGB / GB) -> raw numpy (no scaler)
	Linear / SVR / KNN -> standard_scaler.joblib.transform(X)
	best_ensemble -> per-component dispatch (same rules)
	Deep sequence models (PyTorch / Keras) -> not batchable, falls back to physics
	"""

	from __future__ import annotations

	import logging
	from pathlib import Path
	from typing import List, Optional

	import joblib
	import numpy as np
	from fastapi import APIRouter
	from pydantic import BaseModel, Field

	from api.model_registry import (
	FEATURE_COLS_SCALAR, V3_FEATURE_COLS, classify_degradation, soh_to_color,
	registry_v3 as registry_v2,
	)

	log = logging.getLogger(__name__)

	router = APIRouter(prefix="/api/v3", tags=["simulation"])

	_Q_NOM = 2.0 # NASA PCoE nominal capacity (Ah)

	_TIME_UNIT_SECONDS: dict[str, float \| None] = {
	"cycle": None, "second": 1.0, "minute": 60.0,
	"hour": 3_600.0, "day": 86_400.0, "week": 604_800.0,
	"month": 2_592_000.0, "year": 31_536_000.0,
	}
	_TIME_UNIT_LABELS: dict[str, str] = {
	"cycle": "Cycles", "second": "Seconds", "minute": "Minutes",
	"hour": "Hours", "day": "Days", "week": "Weeks",
	"month": "Months", "year": "Years",
	}

	# Column index map - must stay in sync with FEATURE_COLS_SCALAR (12 features)
	_F = {col: idx for idx, col in enumerate(FEATURE_COLS_SCALAR)}
	# Column index map for V3_FEATURE_COLS (18 features)
	_F3 = {col: idx for idx, col in enumerate(V3_FEATURE_COLS)}

	# Pre-built label/color arrays for O(1) numpy-vectorized classification
	_SOH_BINS = np.array([70.0, 80.0, 90.0]) # searchsorted thresholds
	_DEG_LABELS = np.array(["End-of-Life", "Degraded", "Moderate", "Healthy"], dtype=object)
	_COLOR_HEX = np.array(["#ef4444", "#f97316", "#eab308", "#22c55e"], dtype=object)

	# Optional learned Re/Rct progression model (trained in NB03).
	_RE_RCT_MODEL_PATH = Path("artifacts") / "v3" / "models" / "classical" / "re_rct_progression.joblib"
	try:
	_re_rct_model = joblib.load(_RE_RCT_MODEL_PATH)
	except Exception:
	_re_rct_model = None


	def _vec_classify(soh: np.ndarray) -> list[str]:
	"""Vectorized classify_degradation - single numpy call, no Python for-loop."""
	return _DEG_LABELS[np.searchsorted(_SOH_BINS, soh, side="left")].tolist()


	def _vec_color(soh: np.ndarray) -> list[str]:
	"""Vectorized soh_to_color - single numpy call, no Python for-loop."""
	return _COLOR_HEX[np.searchsorted(_SOH_BINS, soh, side="left")].tolist()


	# -- Schemas ------------------------------------------------------------------
	class BatterySimConfig(BaseModel):
	battery_id: str
	label: Optional[str] = None
	initial_soh: float = Field(default=100.0, ge=0.0, le=100.0)
	start_cycle: int = Field(default=1, ge=1)
	ambient_temperature: float = Field(default=24.0)
	peak_voltage: float = Field(default=4.19)
	min_voltage: float = Field(default=2.61)
	avg_current: float = Field(default=1.82)
	avg_temp: float = Field(default=32.6)
	temp_rise: float = Field(default=14.7)
	cycle_duration: float = Field(default=3690.0)
	Re: float = Field(default=0.045)
	Rct: float = Field(default=0.069)
	delta_capacity: float = Field(default=-0.005)


	class SimulateRequest(BaseModel):
	batteries: List[BatterySimConfig]
	steps: int = Field(default=200, ge=1, le=10_000)
	time_unit: str = Field(default="day")
	eol_threshold: float = Field(default=70.0, ge=0.0, le=100.0)
	model_name: Optional[str] = Field(default=None)
	use_ml: bool = Field(default=True)


	class BatterySimResult(BaseModel):
	battery_id: str
	label: Optional[str]
	soh_history: List[float]
	rul_history: List[float]
	rul_time_history: List[float]
	re_history: List[float]
	rct_history: List[float]
	cycle_history: List[int]
	time_history: List[float]
	degradation_history: List[str]
	color_history: List[str]
	eol_cycle: Optional[int]
	eol_time: Optional[float]
	final_soh: float
	final_rul: float
	deg_rate_avg: float
	model_used: str = "physics"


	class SimulateResponse(BaseModel):
	results: List[BatterySimResult]
	time_unit: str
	time_unit_label: str
	steps: int
	model_used: str = "physics"


	# -- Helpers ------------------------------------------------------------------
	def _build_feature_matrix(
	b: BatterySimConfig,
	cycle_arr: np.ndarray,
	re_arr: np.ndarray,
	rct_arr: np.ndarray,
	soh_rolling_override: np.ndarray \| None = None,
	) -> np.ndarray:
	"""Build (steps, n_features) feature matrix in registry.feature_cols order.

	For v3 (18 features) the extra 6 engineered columns are estimated from physics:
	- capacity_retention: current capacity / initial capacity
	- cumulative_energy: cumsumed capacity per cycle (Ah)
	- dRe_dn / dRct_dn: per-cycle derivative of SEI impedances
	- soh_rolling_mean: rolling mean of physics-estimated SOH trajectory
	- voltage_slope: assumed constant (0) in simulation

	Column ordering uses registry_v2.feature_cols so predictions are correct
	regardless of whether the registry is v1/v2 (12 cols) or v3 (18 cols).
	"""
	cycles = np.asarray(cycle_arr, dtype=np.float64)
	N = len(cycles)
	steps_elapsed = np.maximum(cycles - cycles[0], 0.0)

	# ---- 12 base features (always present) ---------------------------------
	feat_dict: dict[str, np.ndarray] = {
	"cycle_number": cycles,
	"ambient_temperature": np.full(N, b.ambient_temperature),
	"peak_voltage": np.full(N, b.peak_voltage),
	"min_voltage": np.full(N, b.min_voltage),
	"voltage_range": np.full(N, b.peak_voltage - b.min_voltage),
	"avg_current": np.full(N, b.avg_current),
	"avg_temp": np.full(N, b.avg_temp),
	"temp_rise": np.full(N, b.temp_rise),
	"cycle_duration": np.full(N, b.cycle_duration),
	"Re": re_arr,
	"Rct": rct_arr,
	"delta_capacity": np.full(N, b.delta_capacity),
	}

	# ---- 6 extra v3 features (estimated from physics) ----------------------
	initial_cap = max(b.initial_soh / 100.0 * _Q_NOM, 1e-6) # Ah
	cap_per_step = np.maximum(initial_cap + b.delta_capacity * steps_elapsed, 0.0)

	# capacity_retention = current_capacity / initial_capacity (ratio ~0-1)
	cap_retention = np.clip(cap_per_step / initial_cap, 0.0, None)

	# cumulative energy delivered (Ah)
	cum_energy = np.cumsum(cap_per_step)

	# per-cycle SEI impedance derivatives
	dRe_dn = np.diff(re_arr, prepend=b.Re)
	dRct_dn = np.diff(rct_arr, prepend=b.Rct)

	# physics-estimated SOH rolling mean (window=10, min_periods=1) ---
	# used as a proxy since soh_rolling_mean is a v3 training feature
	deg_pct_per_cycle = abs(b.delta_capacity) / _Q_NOM * 100.0
	soh_est = np.maximum(b.initial_soh - deg_pct_per_cycle * steps_elapsed, 0.0)
	# rolling mean via cumsum (O(N), no Python loop)
	window = 10
	csoh = np.cumsum(np.concatenate([[0.0], soh_est]))
	cnt = np.minimum(np.arange(1, N + 1), window)
	start = np.maximum(np.arange(N + 1)[1:] - window, 0)
	soh_rolling = (csoh[np.arange(1, N + 1)] - csoh[start]) / cnt

	if soh_rolling_override is not None:
	soh_rolling = np.asarray(soh_rolling_override, dtype=np.float64)

	feat_dict.update({
	"capacity_retention": cap_retention,
	"cumulative_energy": cum_energy,
	"dRe_dn": dRe_dn,
	"dRct_dn": dRct_dn,
	"soh_rolling_mean": soh_rolling,
	"voltage_slope": np.zeros(N),
	"coulombic_efficiency": np.zeros(N), # always 0 in training data
	})

	# Build matrix in registry's feature_cols order; unknown cols default to 0
	feat_cols = registry_v2.feature_cols # 12 for v1/v2, 18 for v3
	return np.column_stack([feat_dict.get(col, np.zeros(N)) for col in feat_cols])


	def _rolling_mean(arr: np.ndarray, window: int = 5) -> np.ndarray:
	"""Fast rolling mean with min_periods=1 semantics."""
	x = np.asarray(arr, dtype=np.float64)
	n = len(x)
	csum = np.cumsum(np.concatenate([[0.0], x]))
	idx = np.arange(1, n + 1)
	start = np.maximum(idx - window, 0)
	count = np.minimum(idx, window)
	return (csum[idx] - csum[start]) / count


	def _ml_re_rct(
	re0: float,
	rct0: float,
	cycle_arr: np.ndarray,
	elapsed_cycles: np.ndarray,
	) -> tuple[np.ndarray, np.ndarray]:
	"""Predict Re/Rct from learned regressors and anchor to user initial state."""
	if _re_rct_model is None:
	# Minimal linear fallback if helper model is unavailable.
	n = np.asarray(elapsed_cycles, dtype=np.float64)
	re_arr = np.minimum(re0 + 0.00012 * n, 2.0)
	rct_arr = np.minimum(rct0 + 0.00018 * n, 3.0)
	return re_arr, rct_arr

	x = np.asarray(cycle_arr, dtype=np.float64).reshape(-1, 1)
	re_pred = _re_rct_model["re_model"].predict(x)
	rct_pred = _re_rct_model["rct_model"].predict(x)
	re_arr = np.clip(re_pred + (re0 - float(re_pred[0])), 0.0, 2.0)
	rct_arr = np.clip(rct_pred + (rct0 - float(rct_pred[0])), 0.0, 3.0)
	return re_arr, rct_arr


	def _compute_rul_and_eol(
	soh_arr: np.ndarray,
	initial_soh: float,
	eol_thr: float,
	cycle_start: int,
	cycle_arr: np.ndarray,
	elapsed_cycles: np.ndarray,
	cycle_dur: float,
	tu_sec: float \| None,
	) -> tuple[np.ndarray, np.ndarray, Optional[int], Optional[float]]:
	"""Vectorized RUL and EOL from SOH trajectory.

	Returns (rul_cycles, rul_time, eol_cycle, eol_time).
	Uses rolling-average degradation rate for smooth RUL estimate.
	"""
	cycles = np.asarray(cycle_arr, dtype=np.int64)
	elapsed = np.asarray(elapsed_cycles, dtype=np.float64)

	# Rolling average degradation rate (smoothed, avoids division-by-zero)
	soh_prev = np.concatenate([[initial_soh], soh_arr[:-1]])
	step_deg = np.maximum(0.0, soh_prev - soh_arr)
	cum_deg = np.cumsum(step_deg)
	avg_rate = np.maximum(cum_deg / np.maximum(elapsed, 1.0), 1e-6)

	rul_cycles = np.where(soh_arr > eol_thr, (soh_arr - eol_thr) / avg_rate, 0.0)
	rul_time = (rul_cycles * cycle_dur / tu_sec) if tu_sec is not None else rul_cycles.copy()

	# EOL: first step where SOH <= threshold
	below = soh_arr <= eol_thr
	eol_cycle: Optional[int] = None
	eol_time: Optional[float] = None
	if below.any():
	idx = int(np.argmax(below))
	eol_cycle = int(cycles[idx])
	elapsed_s = max(0.0, float(eol_cycle - cycle_start) * cycle_dur)
	eol_time = round((elapsed_s / tu_sec) if tu_sec else float(eol_cycle), 3)

	return rul_cycles, rul_time, eol_cycle, eol_time


	# -- Endpoint -----------------------------------------------------------------
	@router.post(
	"/simulate",
	response_model=SimulateResponse,
	summary="Bulk battery lifecycle simulation (vectorized, ML-driven)",
	)
	async def simulate_batteries(req: SimulateRequest):
	"""
	Vectorized simulation: builds all N feature rows at once per battery,
	dispatches to the ML model as a single batch predict() call, then
	post-processes entirely with numpy (no Python for-loops).

	Scaler usage mirrors NB03 training exactly:
	- Tree models (RF/ET/XGB/LGB/GB): raw numpy X, no scaler
	- Linear/SVR/KNN: standard_scaler.joblib.transform(X)
	- best_ensemble: per-component family dispatch
	"""
	time_unit = req.time_unit.lower()
	if time_unit not in _TIME_UNIT_SECONDS:
	time_unit = "day"

	tu_sec = _TIME_UNIT_SECONDS[time_unit]
	tu_label = _TIME_UNIT_LABELS[time_unit]
	eol_thr = req.eol_threshold
	N = req.steps

	model_name = req.model_name or registry_v2.default_model or "best_ensemble"

	# Deep sequence models need per-sample tensors and are not used in this endpoint.
	# Classical + ensemble models use batch predict_array().
	family = registry_v2.model_meta.get(model_name, {}).get("family", "classical")
	is_deep = family in ("deep_pytorch", "deep_keras")
	ml_batchable = (
	req.use_ml
	and not is_deep
	and (model_name == "best_ensemble" or model_name in registry_v2.models)
	)

	# Determine scaler note for logging (mirrors training decision exactly)
	if registry_v2.model_meta.get(model_name, {}).get("requires_scaling", False):
	scaler_note = "standard_scaler"
	elif model_name == "best_ensemble":
	scaler_note = "per-component (tree=none / linear=standard_scaler)"
	else:
	scaler_note = "none (tree)"

	effective_model = "linear_fallback"
	log.info(
	"simulate: %d batteries x %d steps \| model=%s \| batchable=%s \| scaler=%s \| unit=%s",
	len(req.batteries), N, model_name, ml_batchable, scaler_note, time_unit,
	)

	results: list[BatterySimResult] = []

	for b in req.batteries:
	cycles_per_step = 1.0 if tu_sec is None else (tu_sec / max(b.cycle_duration, 1e-6))
	elapsed_cycles = np.arange(N, dtype=np.float64) * cycles_per_step
	cycle_arr_float = b.start_cycle + elapsed_cycles
	cycle_arr = np.maximum(b.start_cycle, np.floor(cycle_arr_float).astype(np.int64))

	# 1. Re/Rct progression - prefer learned model, fallback to simple linear slopes.
	re_arr, rct_arr = _ml_re_rct(b.Re, b.Rct, cycle_arr, elapsed_cycles)

	# 2. SOH prediction with two-pass soh_rolling_mean correction.
	if ml_batchable:
	X_pass1 = _build_feature_matrix(b, cycle_arr, re_arr, rct_arr)
	try:
	soh_pass1, effective_model = registry_v2.predict_array(X_pass1, model_name)
	soh_roll = _rolling_mean(soh_pass1, window=5)
	X_pass2 = _build_feature_matrix(
	b,
	cycle_arr,
	re_arr,
	rct_arr,
	soh_rolling_override=soh_roll,
	)
	soh_arr, effective_model = registry_v2.predict_array(X_pass2, model_name)
	except Exception as exc:
	log.warning(
	"predict_array failed for %s (%s) - falling back to linear",
	b.battery_id, exc,
	)
	deg_pct_per_cycle = abs(b.delta_capacity) / _Q_NOM * 100.0
	soh_arr = np.clip(b.initial_soh - deg_pct_per_cycle * (elapsed_cycles + 1.0), 0.0, 100.0)
	effective_model = "linear_fallback"
	else:
	deg_pct_per_cycle = abs(b.delta_capacity) / _Q_NOM * 100.0
	soh_arr = np.clip(b.initial_soh - deg_pct_per_cycle * (elapsed_cycles + 1.0), 0.0, 100.0)
	effective_model = "linear_fallback"

	soh_arr = np.clip(soh_arr, 0.0, 100.0)

	# 3. RUL + EOL - vectorized
	rul_cycles, rul_time, eol_cycle, eol_time = _compute_rul_and_eol(
	soh_arr,
	b.initial_soh,
	eol_thr,
	b.start_cycle,
	cycle_arr,
	elapsed_cycles,
	b.cycle_duration,
	tu_sec,
	)

	# 4. Time axis - vectorized
	time_arr = np.arange(N, dtype=np.float64) if tu_sec is not None else cycle_arr.astype(np.float64)

	# 5. Labels + colors - fully vectorized via numpy searchsorted
	# Replaces O(N) Python for-loop with a single C-level call
	deg_h = _vec_classify(soh_arr)
	color_h = _vec_color(soh_arr)

	avg_dr = float(np.mean(np.maximum(0.0, -np.diff(soh_arr, prepend=b.initial_soh))))

	# 6. Build result - numpy round + .tolist() (no per-element Python conversion)
	results.append(BatterySimResult(
	battery_id = b.battery_id,
	label = b.label or b.battery_id,
	soh_history = np.round(soh_arr, 3).tolist(),
	rul_history = np.round(rul_cycles, 1).tolist(),
	rul_time_history = np.round(rul_time, 2).tolist(),
	re_history = np.round(re_arr, 6).tolist(),
	rct_history = np.round(rct_arr, 6).tolist(),
	cycle_history = cycle_arr.tolist(),
	time_history = np.round(time_arr, 3).tolist(),
	degradation_history = deg_h,
	color_history = color_h,
	eol_cycle = eol_cycle,
	eol_time = eol_time,
	final_soh = round(float(soh_arr[-1]), 3),
	final_rul = round(float(rul_cycles[-1]), 1),
	deg_rate_avg = round(avg_dr, 6),
	model_used = effective_model,
	))

	return SimulateResponse(
	results = results,
	time_unit = time_unit,
	time_unit_label = tu_label,
	steps = N,
	model_used = effective_model,
	)