| """ |
| Feature engineering v3.0 — Leak-free extraction from DailySnapshotDTO. |
| |
| Predicts day T headache using: |
| - Day T weather forecast (WeatherKit) |
| - Day T-1 HealthKit + diary (lag) |
| - Day T-2 headache history |
| - Temporal + user context + interactions |
| |
| Total: 38 features. |
| """ |
|
|
| from __future__ import annotations |
| import math |
| import numpy as np |
| from datetime import datetime |
| from typing import List, Optional |
|
|
| from models import ( |
| DailySnapshotDTO, UserContextDTO, |
| HeadacheLogSnapshotDTO, HealthKitMetricsDTO, WeatherDataDTO, |
| SleepAnalysisDTO, HRVSummaryDTO, |
| ) |
|
|
| MOOD_MAP = {"great": 5, "good": 4, "okay": 3, "bad": 2, "terrible": 1} |
|
|
| FEATURE_NAMES = [ |
| "pressure_mb", "pressure_change_24h", "pressure_volatility", |
| "humidity_pct", "temperature_c", "is_pressure_drop", |
| "sleep_total_hours", "deep_sleep_min", "rem_sleep_min", |
| "resting_hr", "hrv_avg_ms", "workout_min", "menstrual_flow_flag", |
| "had_headache_1d", "severity_1d", "duration_1d", |
| "mood_1d", "symptom_count_1d", "trigger_count_1d", |
| "had_headache_2d", "severity_2d", "duration_2d", |
| "dow_sin", "dow_cos", "month_sin", "month_cos", |
| "doy_sin", "doy_cos", "is_weekend", |
| "age_midpoint", "is_europe", "is_tropical", |
| "sleep_x_pressure", "low_hrv_flag", "sleep_deficit", |
| "high_humidity_flag", "headache_streak_2d", "consecutive_headache_days", |
| ] |
| NUM_FEATURES = len(FEATURE_NAMES) |
|
|
| |
| RISK_LABELS = { |
| "had_headache_1d": "recent_headache", |
| "pressure_change_24h": "barometric_pressure_drop", |
| "consecutive_headache_days": "headache_streak", |
| "hrv_avg_ms": "low_hrv_stress", |
| "headache_streak_2d": "multi_day_pattern", |
| "humidity_pct": "high_humidity", |
| "menstrual_flow_flag": "menstrual_phase", |
| "temperature_c": "temperature_extreme", |
| "sleep_total_hours": "poor_sleep", |
| "is_weekend": "weekend_pattern", |
| "sleep_deficit": "sleep_deficit", |
| "low_hrv_flag": "stress_indicator", |
| "is_pressure_drop": "pressure_front", |
| } |
|
|
|
|
| def _safe(val, default=0.0) -> float: |
| return float(val) if val is not None else default |
|
|
| def _cyclic(value: float, period: float): |
| a = 2 * math.pi * value / period |
| return math.sin(a), math.cos(a) |
|
|
| def _parse_age_range(age_range: Optional[str]) -> float: |
| if not age_range: |
| return 35.0 |
| try: |
| parts = age_range.replace(" ", "").split("-") |
| return (float(parts[0]) + float(parts[1])) / 2.0 |
| except Exception: |
| return 35.0 |
|
|
|
|
| def extract_features_for_day( |
| target_weather: WeatherDataDTO, |
| target_date: str, |
| yesterday_snapshot: Optional[DailySnapshotDTO], |
| two_days_ago_snapshot: Optional[DailySnapshotDTO], |
| user_ctx: Optional[UserContextDTO] = None, |
| consecutive_headache_days: int = 0, |
| ) -> np.ndarray: |
| """Build 38-feature vector for predicting headache on target_date.""" |
| f: List[float] = [] |
|
|
| w = target_weather or WeatherDataDTO() |
| yest = yesterday_snapshot or DailySnapshotDTO() |
| twod = two_days_ago_snapshot or DailySnapshotDTO() |
| ctx = user_ctx or UserContextDTO() |
|
|
| yest_hk = yest.health_kit_metrics or HealthKitMetricsDTO() |
| yest_sl = yest_hk.sleep_analysis or SleepAnalysisDTO() |
| yest_hrv = yest_hk.hrv_summary or HRVSummaryDTO() |
| yest_log = yest.headache_log or HeadacheLogSnapshotDTO() |
| twod_log = twod.headache_log or HeadacheLogSnapshotDTO() |
|
|
| |
| pc = _safe(w.pressure_change_24h_mb, 0.0) |
| hum = _safe(w.humidity_percent, 50.0) |
| f.append(_safe(w.barometric_pressure_mb, 1013.25)) |
| f.append(pc) |
| f.append(abs(pc)) |
| f.append(hum) |
| f.append(_safe(w.temperature_celsius, 15.0)) |
| f.append(1.0 if pc < -5 else 0.0) |
|
|
| |
| slp = _safe(yest_sl.total_duration_hours, 7.0) |
| hrv = _safe(yest_hrv.average_ms, 40.0) |
| f.append(slp) |
| f.append(_safe(yest_sl.deep_sleep_minutes, 80.0)) |
| f.append(_safe(yest_sl.rem_sleep_minutes, 90.0)) |
| f.append(_safe(yest_hk.resting_heart_rate, 65.0)) |
| f.append(hrv) |
| f.append(_safe(yest_hk.workout_minutes, 0)) |
| f.append(1.0 if yest_hk.had_menstrual_flow else 0.0) |
|
|
| |
| yh = 1.0 if yest_log.severity > 0 else 0.0 |
| f.append(yh) |
| f.append(float(yest_log.severity)) |
| f.append(float(yest_log.duration_hours)) |
| f.append(float(MOOD_MAP.get(str(yest_log.mood).lower(), 3))) |
| f.append(float(len(yest_log.symptoms.symptoms))) |
| f.append(float(len(yest_log.triggers.triggers))) |
|
|
| |
| th = 1.0 if twod_log.severity > 0 else 0.0 |
| f.append(th) |
| f.append(float(twod_log.severity)) |
| f.append(float(twod_log.duration_hours)) |
|
|
| |
| try: |
| dt = datetime.strptime(target_date, "%Y-%m-%d") |
| except (ValueError, TypeError): |
| dt = datetime.now() |
| dw_s, dw_c = _cyclic(dt.weekday(), 7) |
| mn_s, mn_c = _cyclic(dt.month - 1, 12) |
| dy_s, dy_c = _cyclic(dt.timetuple().tm_yday, 365) |
| f.extend([dw_s, dw_c, mn_s, mn_c, dy_s, dy_c]) |
| f.append(1.0 if dt.weekday() >= 5 else 0.0) |
|
|
| |
| f.append(_parse_age_range(ctx.age_range)) |
| reg = str(ctx.location_region or "").lower() |
| f.append(1.0 if "europe" in reg else 0.0) |
| f.append(1.0 if "tropic" in reg else 0.0) |
|
|
| |
| f.append(slp * abs(pc)) |
| f.append(1.0 if hrv < 25 else 0.0) |
| f.append(max(0.0, 6.0 - slp)) |
| f.append(1.0 if hum > 80 else 0.0) |
| f.append(yh + th) |
| f.append(float(min(consecutive_headache_days, 7))) |
|
|
| return np.array(f, dtype=np.float32) |
|
|
|
|
| def extract_forecast_features( |
| snapshots: List[DailySnapshotDTO], |
| user_ctx: Optional[UserContextDTO] = None, |
| ) -> np.ndarray: |
| """ |
| Build feature matrix for 7-day forecast. |
| snapshots[0] = today (full data), [1..6] = future (weather only). |
| """ |
| rows = [] |
| for i in range(len(snapshots)): |
| snap = snapshots[i] |
| tw = snap.weather_data or WeatherDataDTO() |
| td = "" |
| if snap.headache_log and snap.headache_log.input_date: |
| td = snap.headache_log.input_date |
|
|
| yest = snapshots[i - 1] if i > 0 else None |
| twod = snapshots[i - 2] if i > 1 else None |
|
|
| consec = 0 |
| for j in range(i - 1, -1, -1): |
| lj = snapshots[j].headache_log |
| if lj and lj.severity > 0: |
| consec += 1 |
| else: |
| break |
|
|
| rows.append(extract_features_for_day(tw, td, yest, twod, user_ctx, consec)) |
| return np.vstack(rows) |
|
|
|
|
| def get_risk_factors( |
| features: np.ndarray, |
| feature_importances: dict, |
| top_k: int = 3, |
| ) -> List[str]: |
| """Identify top risk factors from feature values + learned importances.""" |
| risks = [] |
|
|
| |
| checks = [ |
| ("had_headache_1d", lambda v: v > 0), |
| ("pressure_change_24h", lambda v: v < -3), |
| ("consecutive_headache_days", lambda v: v >= 2), |
| ("hrv_avg_ms", lambda v: v < 30), |
| ("headache_streak_2d", lambda v: v >= 1), |
| ("humidity_pct", lambda v: v > 75), |
| ("menstrual_flow_flag", lambda v: v > 0), |
| ("temperature_c", lambda v: v > 30 or v < -5), |
| ("sleep_total_hours", lambda v: v < 6), |
| ("sleep_deficit", lambda v: v > 0), |
| ("low_hrv_flag", lambda v: v > 0), |
| ("is_pressure_drop", lambda v: v > 0), |
| ("is_weekend", lambda v: v > 0), |
| ] |
|
|
| |
| sorted_checks = sorted( |
| checks, |
| key=lambda x: feature_importances.get(x[0], 0), |
| reverse=True, |
| ) |
|
|
| for fname, condition in sorted_checks: |
| if fname in FEATURE_NAMES: |
| idx = FEATURE_NAMES.index(fname) |
| if condition(features[idx]): |
| label = RISK_LABELS.get(fname, fname) |
| if label not in risks: |
| risks.append(label) |
| if len(risks) >= top_k: |
| break |
|
|
| return risks |
|
|