Added Arima ect
Browse files- models/timeseries_forecasting.py +626 -0
models/timeseries_forecasting.py
ADDED
|
@@ -0,0 +1,626 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
import sys, subprocess
|
| 5 |
+
def _ensure(pkg):
|
| 6 |
+
try:
|
| 7 |
+
__import__(pkg.split("==")[0].split(">=")[0])
|
| 8 |
+
except Exception:
|
| 9 |
+
subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
|
| 10 |
+
for _pkg in [
|
| 11 |
+
"pandas", "numpy", "matplotlib", "scikit-learn",
|
| 12 |
+
"statsmodels", "pmdarima", "prophet"
|
| 13 |
+
]:
|
| 14 |
+
_ensure(_pkg)
|
| 15 |
+
|
| 16 |
+
import warnings
|
| 17 |
+
warnings.filterwarnings("ignore")
|
| 18 |
+
|
| 19 |
+
from typing import List, Optional, Tuple
|
| 20 |
+
import numpy as np
|
| 21 |
+
import pandas as pd
|
| 22 |
+
import matplotlib.pyplot as plt
|
| 23 |
+
|
| 24 |
+
# Soft deps
|
| 25 |
+
try:
|
| 26 |
+
import pmdarima as pm
|
| 27 |
+
_PMDARIMA_OK = True
|
| 28 |
+
except Exception:
|
| 29 |
+
_PMDARIMA_OK = False
|
| 30 |
+
|
| 31 |
+
try:
|
| 32 |
+
from prophet import Prophet
|
| 33 |
+
_PROPHET_OK = True
|
| 34 |
+
except Exception:
|
| 35 |
+
_PROPHET_OK = False
|
| 36 |
+
|
| 37 |
+
from statsmodels.tsa.exponential_smoothing.ets import ETSModel
|
| 38 |
+
from statsmodels.tsa.statespace.sarimax import SARIMAX
|
| 39 |
+
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
|
| 40 |
+
|
| 41 |
+
# ----------------------------
|
| 42 |
+
# Helpers
|
| 43 |
+
# ----------------------------
|
| 44 |
+
def _parse_date(val: Optional[str]) -> Optional[pd.Timestamp]:
|
| 45 |
+
if val is None:
|
| 46 |
+
return None
|
| 47 |
+
if isinstance(val, str) and val.strip() == "":
|
| 48 |
+
return None
|
| 49 |
+
try:
|
| 50 |
+
return pd.to_datetime(val)
|
| 51 |
+
except Exception:
|
| 52 |
+
return None
|
| 53 |
+
|
| 54 |
+
def _rmse(y_true: np.ndarray, y_pred: np.ndarray) -> float:
|
| 55 |
+
return float(np.sqrt(mean_squared_error(y_true, y_pred)))
|
| 56 |
+
|
| 57 |
+
def _metrics_text(y_true: Optional[pd.Series], y_pred: Optional[pd.Series]) -> str:
|
| 58 |
+
if y_true is None or y_pred is None:
|
| 59 |
+
return ""
|
| 60 |
+
yt = pd.Series(y_true).dropna()
|
| 61 |
+
yp = pd.Series(y_pred).reindex(yt.index).dropna()
|
| 62 |
+
idx = yt.index.intersection(yp.index)
|
| 63 |
+
if len(idx) == 0:
|
| 64 |
+
return ""
|
| 65 |
+
mae = mean_absolute_error(yt.loc[idx], yp.loc[idx])
|
| 66 |
+
rmse_v = _rmse(yt.loc[idx], yp.loc[idx])
|
| 67 |
+
try:
|
| 68 |
+
mape_v = mean_absolute_percentage_error(yt.loc[idx], yp.loc[idx])
|
| 69 |
+
except Exception:
|
| 70 |
+
mape_v = np.nan
|
| 71 |
+
return f"MAE: {mae:.4f}\nRMSE: {rmse_v:.4f}\nMAPE: {mape_v:.4f}"
|
| 72 |
+
|
| 73 |
+
def _render_diagnostics(series: pd.Series, resid: pd.Series) -> plt.Figure:
|
| 74 |
+
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
|
| 75 |
+
from statsmodels.stats.diagnostic import acorr_ljungbox
|
| 76 |
+
|
| 77 |
+
fig = plt.figure(figsize=(10, 8))
|
| 78 |
+
ax1 = fig.add_subplot(2, 2, 1)
|
| 79 |
+
ax1.plot(resid)
|
| 80 |
+
ax1.set_title("Residuals")
|
| 81 |
+
# MODIFIED: tilt x-axis labels on residuals plot
|
| 82 |
+
for lbl in ax1.get_xticklabels():
|
| 83 |
+
lbl.set_rotation(45)
|
| 84 |
+
lbl.set_ha("right")
|
| 85 |
+
|
| 86 |
+
ax2 = fig.add_subplot(2, 2, 2)
|
| 87 |
+
plot_acf(resid.dropna(), ax=ax2)
|
| 88 |
+
|
| 89 |
+
ax3 = fig.add_subplot(2, 2, 3)
|
| 90 |
+
plot_pacf(resid.dropna(), ax=ax3)
|
| 91 |
+
|
| 92 |
+
ax4 = fig.add_subplot(2, 2, 4)
|
| 93 |
+
lags = min(10, max(1, len(resid) // 10))
|
| 94 |
+
try:
|
| 95 |
+
lb = acorr_ljungbox(resid.dropna(), lags=[lags], return_df=True)
|
| 96 |
+
pval = float(lb["lb_pvalue"].iloc[-1])
|
| 97 |
+
except Exception:
|
| 98 |
+
pval = np.nan
|
| 99 |
+
ax4.axis("off")
|
| 100 |
+
ax4.text(0, 0.8, f"Ljung-Box p-value (~lag {lags}): {pval:.4f}")
|
| 101 |
+
fig.tight_layout()
|
| 102 |
+
return fig
|
| 103 |
+
|
| 104 |
+
def perform_stationarity_tests(df_indexed: pd.DataFrame, target_col: str) -> str:
|
| 105 |
+
from statsmodels.tsa.stattools import adfuller, kpss # local import
|
| 106 |
+
y = df_indexed[target_col].astype(float).dropna()
|
| 107 |
+
lines = [f"Observations: {len(y)}"]
|
| 108 |
+
try:
|
| 109 |
+
adf_stat, adf_p, _, _, crit, _ = adfuller(y, autolag="AIC")
|
| 110 |
+
lines.append("ADF Test")
|
| 111 |
+
lines.append(f" Statistic: {adf_stat:.4f}, p-value: {adf_p:.4f}")
|
| 112 |
+
for k, v in crit.items():
|
| 113 |
+
lines.append(f" Critical {k}: {v:.4f}")
|
| 114 |
+
except Exception as e:
|
| 115 |
+
lines.append(f"ADF error: {e}")
|
| 116 |
+
try:
|
| 117 |
+
kpss_stat, kpss_p, _, crit = kpss(y, regression="c", nlags="auto")
|
| 118 |
+
lines.append("KPSS Test")
|
| 119 |
+
lines.append(f" Statistic: {kpss_stat:.4f}, p-value: {kpss_p:.4f}")
|
| 120 |
+
for k, v in crit.items():
|
| 121 |
+
lines.append(f" Critical {k}: {v}")
|
| 122 |
+
except Exception as e:
|
| 123 |
+
lines.append(f"KPSS error: {e}")
|
| 124 |
+
return "\n".join(lines)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def detect_outliers(df_indexed: pd.DataFrame, target_col: str, z_thresh: float = 3.0) -> str:
|
| 128 |
+
y = df_indexed[target_col].astype(float)
|
| 129 |
+
std = y.std(ddof=1)
|
| 130 |
+
std = float(std) if pd.notna(std) and std != 0 else 1.0
|
| 131 |
+
z = (y - y.mean()) / std
|
| 132 |
+
idx = y.index[(np.abs(z) > z_thresh)]
|
| 133 |
+
return f"Potential outliers beyond |z|>{z_thresh}: {len(idx)}"
|
| 134 |
+
|
| 135 |
+
# ----------------------------
|
| 136 |
+
# WINDOWING
|
| 137 |
+
# ----------------------------
|
| 138 |
+
def _build_train_and_forecast_index(
|
| 139 |
+
df_indexed: pd.DataFrame,
|
| 140 |
+
target_col: str,
|
| 141 |
+
train_start: Optional[str],
|
| 142 |
+
train_end: Optional[str],
|
| 143 |
+
horizon: int,
|
| 144 |
+
freq: str,
|
| 145 |
+
) -> Tuple[pd.DataFrame, pd.DatetimeIndex, str, str, Optional[pd.Series]]:
|
| 146 |
+
hist = df_indexed[df_indexed[target_col].notna()].copy()
|
| 147 |
+
if hist.empty:
|
| 148 |
+
raise ValueError("Training window has 0 rows; check train_start/train_end.")
|
| 149 |
+
|
| 150 |
+
t_start = _parse_date(train_start) or hist.index.min()
|
| 151 |
+
t_end = _parse_date(train_end) or hist.index.max()
|
| 152 |
+
|
| 153 |
+
train_df = hist.loc[(hist.index >= t_start) & (hist.index <= t_end)].copy()
|
| 154 |
+
if train_df.empty:
|
| 155 |
+
raise ValueError("Training window has 0 rows; check train_start/train_end.")
|
| 156 |
+
|
| 157 |
+
if freq == "infer":
|
| 158 |
+
inferred = pd.infer_freq(train_df.index)
|
| 159 |
+
if inferred is None:
|
| 160 |
+
raise ValueError("Could not infer frequency from training index; please select a frequency in the UI.")
|
| 161 |
+
use_freq = inferred
|
| 162 |
+
else:
|
| 163 |
+
use_freq = freq
|
| 164 |
+
|
| 165 |
+
last_train = train_df.index.max()
|
| 166 |
+
fc_index = pd.date_range(start=last_train, periods=horizon + 1, freq=use_freq)[1:]
|
| 167 |
+
|
| 168 |
+
y_true = None
|
| 169 |
+
if set(fc_index).issubset(set(df_indexed.index)):
|
| 170 |
+
yt = df_indexed.loc[fc_index, target_col]
|
| 171 |
+
if yt.notna().any():
|
| 172 |
+
y_true = yt
|
| 173 |
+
|
| 174 |
+
train_range = f"Train range: {train_df.index.min()} → {train_df.index.max()}"
|
| 175 |
+
forecast_range = f"Forecast range: {fc_index.min()} → {fc_index.max()}"
|
| 176 |
+
|
| 177 |
+
assert train_df.index.max() < fc_index.min()
|
| 178 |
+
assert (train_df.index <= train_df.index.max()).all()
|
| 179 |
+
|
| 180 |
+
return train_df, fc_index, train_range, forecast_range, y_true
|
| 181 |
+
|
| 182 |
+
# ----------------------------
|
| 183 |
+
# Exogenous handling
|
| 184 |
+
# ----------------------------
|
| 185 |
+
def _forecast_exog(
|
| 186 |
+
train_df: pd.DataFrame,
|
| 187 |
+
forecast_index: pd.DatetimeIndex,
|
| 188 |
+
exog_cols: List[str],
|
| 189 |
+
method: str = "naive",
|
| 190 |
+
exog_m: int = 0,
|
| 191 |
+
) -> pd.DataFrame:
|
| 192 |
+
if not exog_cols:
|
| 193 |
+
return pd.DataFrame(index=forecast_index)
|
| 194 |
+
fc_dict = {}
|
| 195 |
+
H = len(forecast_index)
|
| 196 |
+
for c in exog_cols:
|
| 197 |
+
y = train_df[c].astype(float)
|
| 198 |
+
if y.isna().any():
|
| 199 |
+
raise ValueError(f"Exogenous column '{c}' has NaNs in the training window.")
|
| 200 |
+
if method == "naive":
|
| 201 |
+
fc = np.repeat(y.iloc[-1], H)
|
| 202 |
+
elif method == "seasonal_naive":
|
| 203 |
+
if exog_m is None or int(exog_m) < 1:
|
| 204 |
+
raise ValueError("seasonal_naive requires exog_m >= 1.")
|
| 205 |
+
if len(y) < int(exog_m):
|
| 206 |
+
raise ValueError(f"Training window too short for exog_m={exog_m} on exogenous '{c}'.")
|
| 207 |
+
last_block = y.iloc[-int(exog_m):].values
|
| 208 |
+
reps = int(np.ceil(H / int(exog_m)))
|
| 209 |
+
fc = np.tile(last_block, reps)[:H]
|
| 210 |
+
elif method == "auto_arima":
|
| 211 |
+
if not _PMDARIMA_OK:
|
| 212 |
+
raise ValueError("pmdarima not installed; cannot auto_arima exogenous.")
|
| 213 |
+
seasonal_flag = int(exog_m) > 1
|
| 214 |
+
m_used = int(exog_m) if seasonal_flag else 1
|
| 215 |
+
model = pm.auto_arima(
|
| 216 |
+
y,
|
| 217 |
+
seasonal=seasonal_flag,
|
| 218 |
+
m=m_used,
|
| 219 |
+
error_action="ignore",
|
| 220 |
+
suppress_warnings=True,
|
| 221 |
+
stepwise=True,
|
| 222 |
+
)
|
| 223 |
+
fc = model.predict(n_periods=H)
|
| 224 |
+
else:
|
| 225 |
+
raise ValueError(f"Unknown exogenous forecast method: {method}")
|
| 226 |
+
fc_dict[c] = fc
|
| 227 |
+
return pd.DataFrame(fc_dict, index=forecast_index)
|
| 228 |
+
|
| 229 |
+
def _prepare_exog_with_policy(
|
| 230 |
+
train_df: pd.DataFrame,
|
| 231 |
+
forecast_index: pd.DatetimeIndex,
|
| 232 |
+
exog_cols: Optional[List[str]],
|
| 233 |
+
df_indexed: pd.DataFrame,
|
| 234 |
+
future_exog_df: Optional[pd.DataFrame],
|
| 235 |
+
exog_policy: str = "auto_forecast",
|
| 236 |
+
exog_method: str = "naive",
|
| 237 |
+
exog_m: int = 0,
|
| 238 |
+
) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], str]:
|
| 239 |
+
if not exog_cols:
|
| 240 |
+
return None, None, ""
|
| 241 |
+
|
| 242 |
+
X_train = train_df[exog_cols].copy()
|
| 243 |
+
for c in exog_cols:
|
| 244 |
+
if not pd.api.types.is_numeric_dtype(X_train[c]):
|
| 245 |
+
raise ValueError(f"Exogenous column '{c}' must be numeric.")
|
| 246 |
+
|
| 247 |
+
if set(forecast_index).issubset(set(df_indexed.index)):
|
| 248 |
+
cand = df_indexed.loc[forecast_index, exog_cols]
|
| 249 |
+
if cand.notna().all().all():
|
| 250 |
+
return X_train, cand.copy(), "Exogenous: using future values from main file."
|
| 251 |
+
|
| 252 |
+
if future_exog_df is not None:
|
| 253 |
+
try:
|
| 254 |
+
cand = future_exog_df.reindex(forecast_index)[exog_cols]
|
| 255 |
+
if cand.notna().all().all():
|
| 256 |
+
return X_train, cand.copy(), "Exogenous: using provided future exog file."
|
| 257 |
+
except Exception:
|
| 258 |
+
pass
|
| 259 |
+
|
| 260 |
+
if exog_policy == "require_future":
|
| 261 |
+
need = ", ".join(exog_cols)
|
| 262 |
+
missing = ", ".join(str(ts) for ts in forecast_index[:6])
|
| 263 |
+
raise ValueError(f"Future exogenous values required for: {need}; missing at (first few): {missing}")
|
| 264 |
+
elif exog_policy == "drop_if_missing":
|
| 265 |
+
return None, None, "Exogenous: dropped (future values missing)."
|
| 266 |
+
elif exog_policy == "auto_forecast":
|
| 267 |
+
X_future = _forecast_exog(
|
| 268 |
+
train_df, forecast_index, exog_cols,
|
| 269 |
+
method=exog_method,
|
| 270 |
+
exog_m=int(exog_m or 0),
|
| 271 |
+
)
|
| 272 |
+
return X_train, X_future, f"Exogenous: auto-forecasted ({exog_method})."
|
| 273 |
+
else:
|
| 274 |
+
raise ValueError(f"Unknown exog_policy: {exog_policy}")
|
| 275 |
+
|
| 276 |
+
# ----------------------------
|
| 277 |
+
# Forecast runners
|
| 278 |
+
# ----------------------------
|
| 279 |
+
def run_auto_arima_forecast(
|
| 280 |
+
df_indexed: pd.DataFrame,
|
| 281 |
+
target_col: str,
|
| 282 |
+
horizon: int,
|
| 283 |
+
seasonal: bool,
|
| 284 |
+
m: int,
|
| 285 |
+
freq: str = "infer",
|
| 286 |
+
exog_cols: Optional[List[str]] = None,
|
| 287 |
+
future_exog_df: Optional[pd.DataFrame] = None,
|
| 288 |
+
train_start: Optional[str] = None,
|
| 289 |
+
train_end: Optional[str] = None,
|
| 290 |
+
return_diagnostics: bool = False,
|
| 291 |
+
exog_policy: str = "auto_forecast",
|
| 292 |
+
exog_method: str = "naive",
|
| 293 |
+
exog_m: int = 0, # MODIFIED
|
| 294 |
+
):
|
| 295 |
+
if not _PMDARIMA_OK:
|
| 296 |
+
return None, "Error: pmdarima is not installed; Auto-ARIMA unavailable.", None, None, None
|
| 297 |
+
|
| 298 |
+
train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
|
| 299 |
+
df_indexed, target_col, train_start, train_end, int(horizon), freq
|
| 300 |
+
)
|
| 301 |
+
y = train_df[target_col].astype(float)
|
| 302 |
+
|
| 303 |
+
X_train = X_future = None
|
| 304 |
+
exog_note = ""
|
| 305 |
+
if exog_cols:
|
| 306 |
+
try:
|
| 307 |
+
X_train, X_future, exog_note = _prepare_exog_with_policy(
|
| 308 |
+
train_df, fc_index, exog_cols, df_indexed, future_exog_df,
|
| 309 |
+
exog_policy, exog_method, exog_m
|
| 310 |
+
)
|
| 311 |
+
except Exception as e:
|
| 312 |
+
return None, f"Auto-ARIMA error: {e}", None, None, None
|
| 313 |
+
|
| 314 |
+
try:
|
| 315 |
+
model = pm.auto_arima(
|
| 316 |
+
y,
|
| 317 |
+
seasonal=bool(seasonal),
|
| 318 |
+
m=int(m) if seasonal else 1,
|
| 319 |
+
exogenous=X_train,
|
| 320 |
+
error_action="ignore",
|
| 321 |
+
suppress_warnings=True,
|
| 322 |
+
stepwise=True,
|
| 323 |
+
)
|
| 324 |
+
fcst, conf = model.predict(n_periods=int(horizon), exogenous=X_future, return_conf_int=True)
|
| 325 |
+
yhat = pd.Series(fcst, index=fc_index)
|
| 326 |
+
conf_df = pd.DataFrame(conf, index=fc_index, columns=["lower", "upper"])
|
| 327 |
+
except Exception as e:
|
| 328 |
+
return None, f"Auto-ARIMA error: {e}", None, None, None
|
| 329 |
+
|
| 330 |
+
fig = plt.figure(figsize=(10, 5))
|
| 331 |
+
plt.plot(y.index, y.values, label="history")
|
| 332 |
+
plt.plot(yhat.index, yhat.values, label="forecast")
|
| 333 |
+
plt.fill_between(yhat.index, conf_df["lower"], conf_df["upper"], alpha=0.2)
|
| 334 |
+
plt.legend()
|
| 335 |
+
plt.title("Auto-ARIMA Forecast")
|
| 336 |
+
plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
|
| 337 |
+
fig.tight_layout() # MODIFIED: ensure labels fit
|
| 338 |
+
|
| 339 |
+
try:
|
| 340 |
+
fitted_in = pd.Series(model.predict_in_sample(exogenous=X_train), index=y.index)
|
| 341 |
+
resid = y - fitted_in
|
| 342 |
+
except Exception:
|
| 343 |
+
resid = pd.Series(index=y.index, dtype=float)
|
| 344 |
+
|
| 345 |
+
metrics_block = _metrics_text(y_true, yhat)
|
| 346 |
+
summary = "\n".join([s for s in [
|
| 347 |
+
f"Model: Auto-ARIMA ({'seasonal m='+str(m) if seasonal else 'non-seasonal'})",
|
| 348 |
+
exog_note,
|
| 349 |
+
tr_range, fc_range, metrics_block,
|
| 350 |
+
"Residual Diagnostics" if return_diagnostics else ""
|
| 351 |
+
] if s]).strip()
|
| 352 |
+
|
| 353 |
+
if not return_diagnostics:
|
| 354 |
+
return fig, summary, None, yhat, conf_df
|
| 355 |
+
diag_fig = _render_diagnostics(y, resid)
|
| 356 |
+
return fig, summary, diag_fig, yhat, conf_df
|
| 357 |
+
|
| 358 |
+
def run_sarimax_forecast(
|
| 359 |
+
df_indexed: pd.DataFrame,
|
| 360 |
+
target_col: str,
|
| 361 |
+
horizon: int,
|
| 362 |
+
seasonal: bool,
|
| 363 |
+
m: int,
|
| 364 |
+
freq: str = "infer",
|
| 365 |
+
exog_cols: Optional[List[str]] = None,
|
| 366 |
+
future_exog_df: Optional[pd.DataFrame] = None,
|
| 367 |
+
train_start: Optional[str] = None,
|
| 368 |
+
train_end: Optional[str] = None,
|
| 369 |
+
return_diagnostics: bool = False,
|
| 370 |
+
exog_policy: str = "auto_forecast",
|
| 371 |
+
exog_method: str = "naive",
|
| 372 |
+
exog_m: int = 0, # MODIFIED
|
| 373 |
+
):
|
| 374 |
+
if not _PMDARIMA_OK:
|
| 375 |
+
return None, "Error: pmdarima is required to select SARIMAX orders.", None, None, None
|
| 376 |
+
|
| 377 |
+
train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
|
| 378 |
+
df_indexed, target_col, train_start, train_end, int(horizon), freq
|
| 379 |
+
)
|
| 380 |
+
y = train_df[target_col].astype(float)
|
| 381 |
+
|
| 382 |
+
X_train = X_future = None
|
| 383 |
+
exog_note = ""
|
| 384 |
+
if exog_cols:
|
| 385 |
+
try:
|
| 386 |
+
X_train, X_future, exog_note = _prepare_exog_with_policy(
|
| 387 |
+
train_df, fc_index, exog_cols, df_indexed, future_exog_df,
|
| 388 |
+
exog_policy, exog_method, exog_m
|
| 389 |
+
)
|
| 390 |
+
except Exception as e:
|
| 391 |
+
return None, f"SARIMAX error: {e}", None, None, None
|
| 392 |
+
|
| 393 |
+
try:
|
| 394 |
+
selector = pm.auto_arima(
|
| 395 |
+
y, exogenous=X_train, seasonal=bool(seasonal), m=int(m) if seasonal else 1,
|
| 396 |
+
stepwise=True, error_action="ignore", suppress_warnings=True
|
| 397 |
+
)
|
| 398 |
+
order = selector.order
|
| 399 |
+
sorder = selector.seasonal_order if seasonal else (0, 0, 0, 0)
|
| 400 |
+
|
| 401 |
+
model = SARIMAX(
|
| 402 |
+
y, exog=X_train,
|
| 403 |
+
order=order,
|
| 404 |
+
seasonal_order=sorder if seasonal else (0, 0, 0, 0),
|
| 405 |
+
enforce_stationarity=False, enforce_invertibility=False
|
| 406 |
+
).fit(disp=False)
|
| 407 |
+
|
| 408 |
+
pred = model.get_forecast(steps=int(horizon), exog=X_future)
|
| 409 |
+
yhat = pd.Series(pred.predicted_mean, index=fc_index)
|
| 410 |
+
conf_int = pred.conf_int(alpha=0.05)
|
| 411 |
+
conf_df = pd.DataFrame({"lower": conf_int.iloc[:, 0].values, "upper": conf_int.iloc[:, 1].values}, index=fc_index)
|
| 412 |
+
except Exception as e:
|
| 413 |
+
return None, f"SARIMAX error: {e}", None, None, None
|
| 414 |
+
|
| 415 |
+
fig = plt.figure(figsize=(10, 5))
|
| 416 |
+
plt.plot(y.index, y.values, label="history")
|
| 417 |
+
plt.plot(yhat.index, yhat.values, label="forecast")
|
| 418 |
+
plt.fill_between(yhat.index, conf_df["lower"], conf_df["upper"], alpha=0.2)
|
| 419 |
+
plt.legend()
|
| 420 |
+
plt.title(f"SARIMAX order={order} seasonal_order={sorder}")
|
| 421 |
+
plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
|
| 422 |
+
fig.tight_layout() # MODIFIED
|
| 423 |
+
|
| 424 |
+
try:
|
| 425 |
+
resid = y - model.fittedvalues
|
| 426 |
+
except Exception:
|
| 427 |
+
resid = pd.Series(index=y.index, dtype=float)
|
| 428 |
+
|
| 429 |
+
metrics_block = _metrics_text(y_true, yhat)
|
| 430 |
+
summary = "\n".join([s for s in [
|
| 431 |
+
f"Model: SARIMAX order={order} seasonal_order={sorder}",
|
| 432 |
+
exog_note,
|
| 433 |
+
tr_range, fc_range, metrics_block,
|
| 434 |
+
"Residual Diagnostics" if return_diagnostics else ""
|
| 435 |
+
] if s]).strip()
|
| 436 |
+
|
| 437 |
+
if not return_diagnostics:
|
| 438 |
+
return fig, summary, None, yhat, conf_df
|
| 439 |
+
diag_fig = _render_diagnostics(y, resid)
|
| 440 |
+
return fig, summary, diag_fig, yhat, conf_df
|
| 441 |
+
|
| 442 |
+
def run_ets_forecast(
|
| 443 |
+
df_indexed: pd.DataFrame,
|
| 444 |
+
target_col: str,
|
| 445 |
+
horizon: int,
|
| 446 |
+
error: str,
|
| 447 |
+
trend: Optional[str],
|
| 448 |
+
seasonal: Optional[str],
|
| 449 |
+
m: int,
|
| 450 |
+
damped: bool,
|
| 451 |
+
freq: str = "infer",
|
| 452 |
+
train_start: Optional[str] = None,
|
| 453 |
+
train_end: Optional[str] = None,
|
| 454 |
+
return_diagnostics: bool = False,
|
| 455 |
+
):
|
| 456 |
+
train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
|
| 457 |
+
df_indexed, target_col, train_start, train_end, int(horizon), freq
|
| 458 |
+
)
|
| 459 |
+
y = train_df[target_col].astype(float)
|
| 460 |
+
|
| 461 |
+
try:
|
| 462 |
+
model = ETSModel(
|
| 463 |
+
y,
|
| 464 |
+
error=error,
|
| 465 |
+
trend=trend if trend != "none" else None,
|
| 466 |
+
seasonal=seasonal if seasonal != "none" else None,
|
| 467 |
+
seasonal_periods=int(m) if seasonal != "none" else None,
|
| 468 |
+
damped_trend=bool(damped) if (trend and trend != "none") else False,
|
| 469 |
+
initialization_method="estimated",
|
| 470 |
+
).fit()
|
| 471 |
+
except Exception as e:
|
| 472 |
+
return None, f"ETS error: {e}", None, None, None
|
| 473 |
+
|
| 474 |
+
try:
|
| 475 |
+
fc_vals = model.forecast(int(horizon))
|
| 476 |
+
yhat = pd.Series(fc_vals.values, index=fc_index)
|
| 477 |
+
|
| 478 |
+
resid = y - model.fittedvalues
|
| 479 |
+
se = float(np.nanstd(resid, ddof=1)) if len(resid.dropna()) else 0.0
|
| 480 |
+
z = 1.96
|
| 481 |
+
conf_df = pd.DataFrame({"lower": yhat - z * se, "upper": yhat + z * se}, index=fc_index)
|
| 482 |
+
except Exception as e:
|
| 483 |
+
return None, f"ETS forecast error: {e}", None, None, None
|
| 484 |
+
|
| 485 |
+
fig = plt.figure(figsize=(10, 5))
|
| 486 |
+
plt.plot(y.index, y.values, label="history")
|
| 487 |
+
plt.plot(yhat.index, yhat.values, label="forecast")
|
| 488 |
+
plt.fill_between(conf_df.index, conf_df["lower"], conf_df["upper"], alpha=0.2)
|
| 489 |
+
plt.legend()
|
| 490 |
+
plt.title("ETS Forecast")
|
| 491 |
+
plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
|
| 492 |
+
fig.tight_layout() # MODIFIED
|
| 493 |
+
|
| 494 |
+
try:
|
| 495 |
+
resid = y - model.fittedvalues
|
| 496 |
+
except Exception:
|
| 497 |
+
resid = pd.Series(index=y.index, dtype=float)
|
| 498 |
+
|
| 499 |
+
metrics_block = _metrics_text(y_true, yhat)
|
| 500 |
+
summary = "\n".join([s for s in [
|
| 501 |
+
f"Model: ETS(error={error}, trend={trend}, seasonal={seasonal}, m={m}, damped={damped})",
|
| 502 |
+
tr_range, fc_range, metrics_block,
|
| 503 |
+
"Residual Diagnostics" if return_diagnostics else ""
|
| 504 |
+
] if s]).strip()
|
| 505 |
+
|
| 506 |
+
if not return_diagnostics:
|
| 507 |
+
return fig, summary, None, yhat, conf_df
|
| 508 |
+
diag_fig = _render_diagnostics(y, resid)
|
| 509 |
+
return fig, summary, diag_fig, yhat, conf_df
|
| 510 |
+
|
| 511 |
+
def run_prophet_forecast(
|
| 512 |
+
df_indexed: pd.DataFrame,
|
| 513 |
+
target_col: str,
|
| 514 |
+
horizon: int,
|
| 515 |
+
seasonality_mode: str,
|
| 516 |
+
yearly: bool,
|
| 517 |
+
weekly: bool,
|
| 518 |
+
daily: bool,
|
| 519 |
+
freq: str = "infer",
|
| 520 |
+
exog_cols: Optional[List[str]] = None,
|
| 521 |
+
future_exog_df: Optional[pd.DataFrame] = None,
|
| 522 |
+
train_start: Optional[str] = None,
|
| 523 |
+
train_end: Optional[str] = None,
|
| 524 |
+
return_diagnostics: bool = False,
|
| 525 |
+
exog_policy: str = "auto_forecast",
|
| 526 |
+
exog_method: str = "naive",
|
| 527 |
+
exog_m: int = 0, # MODIFIED
|
| 528 |
+
):
|
| 529 |
+
if not _PROPHET_OK:
|
| 530 |
+
return None, "Error: prophet is not installed.", None, None, None
|
| 531 |
+
|
| 532 |
+
idx_name = df_indexed.index.name or "ds"
|
| 533 |
+
train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
|
| 534 |
+
df_indexed, target_col, train_start, train_end, int(horizon), freq
|
| 535 |
+
)
|
| 536 |
+
|
| 537 |
+
X_train = X_future = None
|
| 538 |
+
exog_note = ""
|
| 539 |
+
exog_effective: List[str] = []
|
| 540 |
+
if exog_cols:
|
| 541 |
+
try:
|
| 542 |
+
X_train, X_future, exog_note = _prepare_exog_with_policy(
|
| 543 |
+
train_df, fc_index, exog_cols, df_indexed, future_exog_df,
|
| 544 |
+
exog_policy, exog_method, exog_m
|
| 545 |
+
)
|
| 546 |
+
if X_train is not None:
|
| 547 |
+
exog_effective = list(exog_cols)
|
| 548 |
+
except Exception as e:
|
| 549 |
+
return None, f"Prophet error: {e}", None, None, None
|
| 550 |
+
|
| 551 |
+
train = train_df.reset_index().rename(columns={idx_name: "ds"})
|
| 552 |
+
train = train[["ds", target_col]].rename(columns={target_col: "y"})
|
| 553 |
+
train["y"] = train["y"].astype(float)
|
| 554 |
+
if exog_effective:
|
| 555 |
+
train = pd.concat([train, X_train.reset_index(drop=True)], axis=1)
|
| 556 |
+
|
| 557 |
+
mobj = Prophet(seasonality_mode=seasonality_mode)
|
| 558 |
+
if yearly:
|
| 559 |
+
mobj.add_seasonality(name="yearly", period=365.25, fourier_order=10)
|
| 560 |
+
if weekly:
|
| 561 |
+
mobj.add_seasonality(name="weekly", period=7, fourier_order=6)
|
| 562 |
+
if daily:
|
| 563 |
+
mobj.add_seasonality(name="daily", period=1, fourier_order=4)
|
| 564 |
+
for c in exog_effective:
|
| 565 |
+
mobj.add_regressor(c)
|
| 566 |
+
|
| 567 |
+
try:
|
| 568 |
+
mobj.fit(train)
|
| 569 |
+
future = pd.DataFrame({"ds": fc_index})
|
| 570 |
+
if exog_effective:
|
| 571 |
+
for c in exog_effective:
|
| 572 |
+
if c not in X_future.columns:
|
| 573 |
+
raise ValueError(f"Prophet future DataFrame missing required regressor column: {c}")
|
| 574 |
+
future = pd.concat([future, X_future.reset_index(drop=True)], axis=1)
|
| 575 |
+
fcst = mobj.predict(future)
|
| 576 |
+
except Exception as e:
|
| 577 |
+
return None, f"Prophet error: {e}", None, None, None
|
| 578 |
+
|
| 579 |
+
fig = plt.figure(figsize=(10, 5))
|
| 580 |
+
plt.plot(train["ds"], train["y"], label="history")
|
| 581 |
+
plt.plot(fcst["ds"], fcst["yhat"], label="forecast")
|
| 582 |
+
if "yhat_lower" in fcst and "yhat_upper" in fcst:
|
| 583 |
+
plt.fill_between(fcst["ds"], fcst["yhat_lower"], fcst["yhat_upper"], alpha=0.2)
|
| 584 |
+
plt.legend()
|
| 585 |
+
plt.title("Prophet Forecast")
|
| 586 |
+
plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
|
| 587 |
+
fig.tight_layout() # MODIFIED
|
| 588 |
+
|
| 589 |
+
try:
|
| 590 |
+
in_fit = mobj.predict(train.drop(columns=["y"]))
|
| 591 |
+
resid = train["y"].values - in_fit["yhat"].values
|
| 592 |
+
resid = pd.Series(resid, index=train["ds"])
|
| 593 |
+
except Exception:
|
| 594 |
+
resid = pd.Series(dtype=float)
|
| 595 |
+
|
| 596 |
+
yhat_series = pd.Series(fcst["yhat"].values, index=pd.DatetimeIndex(fcst["ds"]))
|
| 597 |
+
metrics_block = _metrics_text(y_true, yhat_series)
|
| 598 |
+
conf_df = pd.DataFrame(
|
| 599 |
+
{"lower": fcst.get("yhat_lower", pd.Series(index=fcst.index, dtype=float)),
|
| 600 |
+
"upper": fcst.get("yhat_upper", pd.Series(index=fcst.index, dtype=float))},
|
| 601 |
+
index=pd.DatetimeIndex(fcst["ds"])
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
summary = "\n".join([
|
| 605 |
+
f"Model: Prophet(seasonality_mode={seasonality_mode}, yearly={yearly}, weekly={weekly}, daily={daily})",
|
| 606 |
+
exog_note,
|
| 607 |
+
tr_range, fc_range, metrics_block,
|
| 608 |
+
"Residual Diagnostics" if return_diagnostics else ""
|
| 609 |
+
]).strip()
|
| 610 |
+
|
| 611 |
+
if not return_diagnostics:
|
| 612 |
+
return fig, summary, None, yhat_series, conf_df
|
| 613 |
+
diag_fig = _render_diagnostics(train.set_index("ds")["y"], resid)
|
| 614 |
+
return fig, summary, diag_fig, yhat_series, conf_df
|
| 615 |
+
|
| 616 |
+
# ----------------------------
|
| 617 |
+
# exports list
|
| 618 |
+
# ----------------------------
|
| 619 |
+
__all__ = [
|
| 620 |
+
"run_auto_arima_forecast",
|
| 621 |
+
"run_ets_forecast",
|
| 622 |
+
"run_prophet_forecast",
|
| 623 |
+
"run_sarimax_forecast",
|
| 624 |
+
"perform_stationarity_tests",
|
| 625 |
+
"detect_outliers",
|
| 626 |
+
]
|