copper-mind / deep_learning /data /regime_features.py
ifieryarrows's picture
Sync from GitHub (tests passed)
d317049 verified
"""Regime and event-conditioning features for weekly TFT forecasts."""
from __future__ import annotations
import numpy as np
import pandas as pd
REGIME_FEATURES = [
"regime_risk_on_demand",
"regime_risk_off_macro",
"regime_usd_pressure",
"regime_supply_shock",
"regime_inventory_tightness",
"regime_high_vol_chop",
"event_shock_score",
"sentiment_x_supply_shock",
"sentiment_x_usd_pressure",
"sentiment_x_risk_on",
"event_shock_x_high_vol",
]
FORCED_TFT_UNKNOWN_FEATURES = [
"sentiment_index",
"news_count",
"material_news_count",
"after_close_news_count",
"days_since_last_material_news",
"stale_sentiment_flag",
"regime_risk_on_demand",
"regime_risk_off_macro",
"regime_usd_pressure",
"regime_supply_shock",
"regime_inventory_tightness",
"regime_high_vol_chop",
"event_shock_score",
]
def _zero(index: pd.Index) -> pd.Series:
return pd.Series(0.0, index=index)
def _zscore(s: pd.Series, window: int = 60, min_periods: int = 20) -> pd.Series:
mean = s.rolling(window, min_periods=min_periods).mean()
std = s.rolling(window, min_periods=min_periods).std().replace(0, np.nan)
return ((s - mean) / std).replace([np.inf, -np.inf], np.nan).fillna(0.0)
def build_regime_event_features(master_like: pd.DataFrame) -> pd.DataFrame:
"""
Build market regime and event conditioning features.
Missing optional source columns are treated as neutral zero so the function
remains stable across training, local tests, and production inference.
"""
idx = master_like.index
out = pd.DataFrame(index=idx)
sentiment = master_like.get("sentiment_index", _zero(idx)).astype(float)
news_count = master_like.get("news_count", _zero(idx)).astype(float)
dxy_ret = (
master_like.get("DX-Y_NYB_ret1")
if "DX-Y_NYB_ret1" in master_like.columns
else master_like.get("DX_Y_NYB_ret1", _zero(idx))
)
dxy_ret = pd.Series(dxy_ret, index=idx).fillna(0.0).astype(float)
fxi_ret = master_like.get("FXI_ret1", _zero(idx)).fillna(0.0).astype(float)
crude_ret = master_like.get(
"CL=F_ret1",
master_like.get("CL_F_ret1", _zero(idx)),
).fillna(0.0).astype(float)
_ = crude_ret
lme_draw = master_like.get("lme_stock_change_5d", _zero(idx)).fillna(0.0).astype(float)
cancelled_ratio = master_like.get("lme_cancelled_ratio", _zero(idx)).fillna(0.0).astype(float)
supply_count = master_like.get("evt_supply_disruption_count", _zero(idx)).fillna(0.0).astype(float)
inventory_draw_count = master_like.get("evt_inventory_draw_count", _zero(idx)).fillna(0.0).astype(float)
if "target" in master_like.columns:
realized_vol = master_like["target"].rolling(20, min_periods=10).std().fillna(0.0)
else:
realized_vol = _zero(idx)
vol_z = _zscore(realized_vol, 60, 20)
sent_z = _zscore(sentiment, 60, 20)
lme_draw_z = _zscore(-lme_draw, 60, 20)
dxy_5d = dxy_ret.rolling(5, min_periods=1).sum()
fxi_5d = fxi_ret.rolling(5, min_periods=1).sum()
out["regime_usd_pressure"] = ((dxy_5d > 0.01) & (sentiment < 0)).astype(float)
out["regime_risk_on_demand"] = ((fxi_5d > 0.01) & (dxy_5d < 0)).astype(float)
out["regime_risk_off_macro"] = ((fxi_5d < -0.01) & (dxy_5d > 0)).astype(float)
out["regime_supply_shock"] = ((supply_count > 0) | (inventory_draw_count > 0)).astype(float)
out["regime_inventory_tightness"] = (
(lme_draw_z > 1.0)
| (cancelled_ratio > cancelled_ratio.rolling(60, min_periods=20).mean())
).astype(float)
out["regime_high_vol_chop"] = (vol_z > 1.0).astype(float)
event_importance = (
1.50 * supply_count
+ 1.35 * inventory_draw_count
+ 1.00 * news_count.clip(upper=5)
)
out["event_shock_score"] = (
sent_z.abs()
* np.log1p(news_count.clip(lower=0))
* (1.0 + event_importance)
).replace([np.inf, -np.inf], np.nan).fillna(0.0)
out["sentiment_x_supply_shock"] = sentiment * out["regime_supply_shock"]
out["sentiment_x_usd_pressure"] = sentiment * out["regime_usd_pressure"]
out["sentiment_x_risk_on"] = sentiment * out["regime_risk_on_demand"]
out["event_shock_x_high_vol"] = out["event_shock_score"] * out["regime_high_vol_chop"]
return out.astype("float32")