causalscience commited on
Commit
c535180
·
verified ·
1 Parent(s): 3700111

Added Arima ect

Browse files
Files changed (1) hide show
  1. models/timeseries_forecasting.py +626 -0
models/timeseries_forecasting.py ADDED
@@ -0,0 +1,626 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+
4
+ import sys, subprocess
5
+ def _ensure(pkg):
6
+ try:
7
+ __import__(pkg.split("==")[0].split(">=")[0])
8
+ except Exception:
9
+ subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
10
+ for _pkg in [
11
+ "pandas", "numpy", "matplotlib", "scikit-learn",
12
+ "statsmodels", "pmdarima", "prophet"
13
+ ]:
14
+ _ensure(_pkg)
15
+
16
+ import warnings
17
+ warnings.filterwarnings("ignore")
18
+
19
+ from typing import List, Optional, Tuple
20
+ import numpy as np
21
+ import pandas as pd
22
+ import matplotlib.pyplot as plt
23
+
24
+ # Soft deps
25
+ try:
26
+ import pmdarima as pm
27
+ _PMDARIMA_OK = True
28
+ except Exception:
29
+ _PMDARIMA_OK = False
30
+
31
+ try:
32
+ from prophet import Prophet
33
+ _PROPHET_OK = True
34
+ except Exception:
35
+ _PROPHET_OK = False
36
+
37
+ from statsmodels.tsa.exponential_smoothing.ets import ETSModel
38
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
39
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
40
+
41
+ # ----------------------------
42
+ # Helpers
43
+ # ----------------------------
44
+ def _parse_date(val: Optional[str]) -> Optional[pd.Timestamp]:
45
+ if val is None:
46
+ return None
47
+ if isinstance(val, str) and val.strip() == "":
48
+ return None
49
+ try:
50
+ return pd.to_datetime(val)
51
+ except Exception:
52
+ return None
53
+
54
+ def _rmse(y_true: np.ndarray, y_pred: np.ndarray) -> float:
55
+ return float(np.sqrt(mean_squared_error(y_true, y_pred)))
56
+
57
+ def _metrics_text(y_true: Optional[pd.Series], y_pred: Optional[pd.Series]) -> str:
58
+ if y_true is None or y_pred is None:
59
+ return ""
60
+ yt = pd.Series(y_true).dropna()
61
+ yp = pd.Series(y_pred).reindex(yt.index).dropna()
62
+ idx = yt.index.intersection(yp.index)
63
+ if len(idx) == 0:
64
+ return ""
65
+ mae = mean_absolute_error(yt.loc[idx], yp.loc[idx])
66
+ rmse_v = _rmse(yt.loc[idx], yp.loc[idx])
67
+ try:
68
+ mape_v = mean_absolute_percentage_error(yt.loc[idx], yp.loc[idx])
69
+ except Exception:
70
+ mape_v = np.nan
71
+ return f"MAE: {mae:.4f}\nRMSE: {rmse_v:.4f}\nMAPE: {mape_v:.4f}"
72
+
73
+ def _render_diagnostics(series: pd.Series, resid: pd.Series) -> plt.Figure:
74
+ from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
75
+ from statsmodels.stats.diagnostic import acorr_ljungbox
76
+
77
+ fig = plt.figure(figsize=(10, 8))
78
+ ax1 = fig.add_subplot(2, 2, 1)
79
+ ax1.plot(resid)
80
+ ax1.set_title("Residuals")
81
+ # MODIFIED: tilt x-axis labels on residuals plot
82
+ for lbl in ax1.get_xticklabels():
83
+ lbl.set_rotation(45)
84
+ lbl.set_ha("right")
85
+
86
+ ax2 = fig.add_subplot(2, 2, 2)
87
+ plot_acf(resid.dropna(), ax=ax2)
88
+
89
+ ax3 = fig.add_subplot(2, 2, 3)
90
+ plot_pacf(resid.dropna(), ax=ax3)
91
+
92
+ ax4 = fig.add_subplot(2, 2, 4)
93
+ lags = min(10, max(1, len(resid) // 10))
94
+ try:
95
+ lb = acorr_ljungbox(resid.dropna(), lags=[lags], return_df=True)
96
+ pval = float(lb["lb_pvalue"].iloc[-1])
97
+ except Exception:
98
+ pval = np.nan
99
+ ax4.axis("off")
100
+ ax4.text(0, 0.8, f"Ljung-Box p-value (~lag {lags}): {pval:.4f}")
101
+ fig.tight_layout()
102
+ return fig
103
+
104
+ def perform_stationarity_tests(df_indexed: pd.DataFrame, target_col: str) -> str:
105
+ from statsmodels.tsa.stattools import adfuller, kpss # local import
106
+ y = df_indexed[target_col].astype(float).dropna()
107
+ lines = [f"Observations: {len(y)}"]
108
+ try:
109
+ adf_stat, adf_p, _, _, crit, _ = adfuller(y, autolag="AIC")
110
+ lines.append("ADF Test")
111
+ lines.append(f" Statistic: {adf_stat:.4f}, p-value: {adf_p:.4f}")
112
+ for k, v in crit.items():
113
+ lines.append(f" Critical {k}: {v:.4f}")
114
+ except Exception as e:
115
+ lines.append(f"ADF error: {e}")
116
+ try:
117
+ kpss_stat, kpss_p, _, crit = kpss(y, regression="c", nlags="auto")
118
+ lines.append("KPSS Test")
119
+ lines.append(f" Statistic: {kpss_stat:.4f}, p-value: {kpss_p:.4f}")
120
+ for k, v in crit.items():
121
+ lines.append(f" Critical {k}: {v}")
122
+ except Exception as e:
123
+ lines.append(f"KPSS error: {e}")
124
+ return "\n".join(lines)
125
+
126
+
127
+ def detect_outliers(df_indexed: pd.DataFrame, target_col: str, z_thresh: float = 3.0) -> str:
128
+ y = df_indexed[target_col].astype(float)
129
+ std = y.std(ddof=1)
130
+ std = float(std) if pd.notna(std) and std != 0 else 1.0
131
+ z = (y - y.mean()) / std
132
+ idx = y.index[(np.abs(z) > z_thresh)]
133
+ return f"Potential outliers beyond |z|>{z_thresh}: {len(idx)}"
134
+
135
+ # ----------------------------
136
+ # WINDOWING
137
+ # ----------------------------
138
+ def _build_train_and_forecast_index(
139
+ df_indexed: pd.DataFrame,
140
+ target_col: str,
141
+ train_start: Optional[str],
142
+ train_end: Optional[str],
143
+ horizon: int,
144
+ freq: str,
145
+ ) -> Tuple[pd.DataFrame, pd.DatetimeIndex, str, str, Optional[pd.Series]]:
146
+ hist = df_indexed[df_indexed[target_col].notna()].copy()
147
+ if hist.empty:
148
+ raise ValueError("Training window has 0 rows; check train_start/train_end.")
149
+
150
+ t_start = _parse_date(train_start) or hist.index.min()
151
+ t_end = _parse_date(train_end) or hist.index.max()
152
+
153
+ train_df = hist.loc[(hist.index >= t_start) & (hist.index <= t_end)].copy()
154
+ if train_df.empty:
155
+ raise ValueError("Training window has 0 rows; check train_start/train_end.")
156
+
157
+ if freq == "infer":
158
+ inferred = pd.infer_freq(train_df.index)
159
+ if inferred is None:
160
+ raise ValueError("Could not infer frequency from training index; please select a frequency in the UI.")
161
+ use_freq = inferred
162
+ else:
163
+ use_freq = freq
164
+
165
+ last_train = train_df.index.max()
166
+ fc_index = pd.date_range(start=last_train, periods=horizon + 1, freq=use_freq)[1:]
167
+
168
+ y_true = None
169
+ if set(fc_index).issubset(set(df_indexed.index)):
170
+ yt = df_indexed.loc[fc_index, target_col]
171
+ if yt.notna().any():
172
+ y_true = yt
173
+
174
+ train_range = f"Train range: {train_df.index.min()} → {train_df.index.max()}"
175
+ forecast_range = f"Forecast range: {fc_index.min()} → {fc_index.max()}"
176
+
177
+ assert train_df.index.max() < fc_index.min()
178
+ assert (train_df.index <= train_df.index.max()).all()
179
+
180
+ return train_df, fc_index, train_range, forecast_range, y_true
181
+
182
+ # ----------------------------
183
+ # Exogenous handling
184
+ # ----------------------------
185
+ def _forecast_exog(
186
+ train_df: pd.DataFrame,
187
+ forecast_index: pd.DatetimeIndex,
188
+ exog_cols: List[str],
189
+ method: str = "naive",
190
+ exog_m: int = 0,
191
+ ) -> pd.DataFrame:
192
+ if not exog_cols:
193
+ return pd.DataFrame(index=forecast_index)
194
+ fc_dict = {}
195
+ H = len(forecast_index)
196
+ for c in exog_cols:
197
+ y = train_df[c].astype(float)
198
+ if y.isna().any():
199
+ raise ValueError(f"Exogenous column '{c}' has NaNs in the training window.")
200
+ if method == "naive":
201
+ fc = np.repeat(y.iloc[-1], H)
202
+ elif method == "seasonal_naive":
203
+ if exog_m is None or int(exog_m) < 1:
204
+ raise ValueError("seasonal_naive requires exog_m >= 1.")
205
+ if len(y) < int(exog_m):
206
+ raise ValueError(f"Training window too short for exog_m={exog_m} on exogenous '{c}'.")
207
+ last_block = y.iloc[-int(exog_m):].values
208
+ reps = int(np.ceil(H / int(exog_m)))
209
+ fc = np.tile(last_block, reps)[:H]
210
+ elif method == "auto_arima":
211
+ if not _PMDARIMA_OK:
212
+ raise ValueError("pmdarima not installed; cannot auto_arima exogenous.")
213
+ seasonal_flag = int(exog_m) > 1
214
+ m_used = int(exog_m) if seasonal_flag else 1
215
+ model = pm.auto_arima(
216
+ y,
217
+ seasonal=seasonal_flag,
218
+ m=m_used,
219
+ error_action="ignore",
220
+ suppress_warnings=True,
221
+ stepwise=True,
222
+ )
223
+ fc = model.predict(n_periods=H)
224
+ else:
225
+ raise ValueError(f"Unknown exogenous forecast method: {method}")
226
+ fc_dict[c] = fc
227
+ return pd.DataFrame(fc_dict, index=forecast_index)
228
+
229
+ def _prepare_exog_with_policy(
230
+ train_df: pd.DataFrame,
231
+ forecast_index: pd.DatetimeIndex,
232
+ exog_cols: Optional[List[str]],
233
+ df_indexed: pd.DataFrame,
234
+ future_exog_df: Optional[pd.DataFrame],
235
+ exog_policy: str = "auto_forecast",
236
+ exog_method: str = "naive",
237
+ exog_m: int = 0,
238
+ ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], str]:
239
+ if not exog_cols:
240
+ return None, None, ""
241
+
242
+ X_train = train_df[exog_cols].copy()
243
+ for c in exog_cols:
244
+ if not pd.api.types.is_numeric_dtype(X_train[c]):
245
+ raise ValueError(f"Exogenous column '{c}' must be numeric.")
246
+
247
+ if set(forecast_index).issubset(set(df_indexed.index)):
248
+ cand = df_indexed.loc[forecast_index, exog_cols]
249
+ if cand.notna().all().all():
250
+ return X_train, cand.copy(), "Exogenous: using future values from main file."
251
+
252
+ if future_exog_df is not None:
253
+ try:
254
+ cand = future_exog_df.reindex(forecast_index)[exog_cols]
255
+ if cand.notna().all().all():
256
+ return X_train, cand.copy(), "Exogenous: using provided future exog file."
257
+ except Exception:
258
+ pass
259
+
260
+ if exog_policy == "require_future":
261
+ need = ", ".join(exog_cols)
262
+ missing = ", ".join(str(ts) for ts in forecast_index[:6])
263
+ raise ValueError(f"Future exogenous values required for: {need}; missing at (first few): {missing}")
264
+ elif exog_policy == "drop_if_missing":
265
+ return None, None, "Exogenous: dropped (future values missing)."
266
+ elif exog_policy == "auto_forecast":
267
+ X_future = _forecast_exog(
268
+ train_df, forecast_index, exog_cols,
269
+ method=exog_method,
270
+ exog_m=int(exog_m or 0),
271
+ )
272
+ return X_train, X_future, f"Exogenous: auto-forecasted ({exog_method})."
273
+ else:
274
+ raise ValueError(f"Unknown exog_policy: {exog_policy}")
275
+
276
+ # ----------------------------
277
+ # Forecast runners
278
+ # ----------------------------
279
+ def run_auto_arima_forecast(
280
+ df_indexed: pd.DataFrame,
281
+ target_col: str,
282
+ horizon: int,
283
+ seasonal: bool,
284
+ m: int,
285
+ freq: str = "infer",
286
+ exog_cols: Optional[List[str]] = None,
287
+ future_exog_df: Optional[pd.DataFrame] = None,
288
+ train_start: Optional[str] = None,
289
+ train_end: Optional[str] = None,
290
+ return_diagnostics: bool = False,
291
+ exog_policy: str = "auto_forecast",
292
+ exog_method: str = "naive",
293
+ exog_m: int = 0, # MODIFIED
294
+ ):
295
+ if not _PMDARIMA_OK:
296
+ return None, "Error: pmdarima is not installed; Auto-ARIMA unavailable.", None, None, None
297
+
298
+ train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
299
+ df_indexed, target_col, train_start, train_end, int(horizon), freq
300
+ )
301
+ y = train_df[target_col].astype(float)
302
+
303
+ X_train = X_future = None
304
+ exog_note = ""
305
+ if exog_cols:
306
+ try:
307
+ X_train, X_future, exog_note = _prepare_exog_with_policy(
308
+ train_df, fc_index, exog_cols, df_indexed, future_exog_df,
309
+ exog_policy, exog_method, exog_m
310
+ )
311
+ except Exception as e:
312
+ return None, f"Auto-ARIMA error: {e}", None, None, None
313
+
314
+ try:
315
+ model = pm.auto_arima(
316
+ y,
317
+ seasonal=bool(seasonal),
318
+ m=int(m) if seasonal else 1,
319
+ exogenous=X_train,
320
+ error_action="ignore",
321
+ suppress_warnings=True,
322
+ stepwise=True,
323
+ )
324
+ fcst, conf = model.predict(n_periods=int(horizon), exogenous=X_future, return_conf_int=True)
325
+ yhat = pd.Series(fcst, index=fc_index)
326
+ conf_df = pd.DataFrame(conf, index=fc_index, columns=["lower", "upper"])
327
+ except Exception as e:
328
+ return None, f"Auto-ARIMA error: {e}", None, None, None
329
+
330
+ fig = plt.figure(figsize=(10, 5))
331
+ plt.plot(y.index, y.values, label="history")
332
+ plt.plot(yhat.index, yhat.values, label="forecast")
333
+ plt.fill_between(yhat.index, conf_df["lower"], conf_df["upper"], alpha=0.2)
334
+ plt.legend()
335
+ plt.title("Auto-ARIMA Forecast")
336
+ plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
337
+ fig.tight_layout() # MODIFIED: ensure labels fit
338
+
339
+ try:
340
+ fitted_in = pd.Series(model.predict_in_sample(exogenous=X_train), index=y.index)
341
+ resid = y - fitted_in
342
+ except Exception:
343
+ resid = pd.Series(index=y.index, dtype=float)
344
+
345
+ metrics_block = _metrics_text(y_true, yhat)
346
+ summary = "\n".join([s for s in [
347
+ f"Model: Auto-ARIMA ({'seasonal m='+str(m) if seasonal else 'non-seasonal'})",
348
+ exog_note,
349
+ tr_range, fc_range, metrics_block,
350
+ "Residual Diagnostics" if return_diagnostics else ""
351
+ ] if s]).strip()
352
+
353
+ if not return_diagnostics:
354
+ return fig, summary, None, yhat, conf_df
355
+ diag_fig = _render_diagnostics(y, resid)
356
+ return fig, summary, diag_fig, yhat, conf_df
357
+
358
+ def run_sarimax_forecast(
359
+ df_indexed: pd.DataFrame,
360
+ target_col: str,
361
+ horizon: int,
362
+ seasonal: bool,
363
+ m: int,
364
+ freq: str = "infer",
365
+ exog_cols: Optional[List[str]] = None,
366
+ future_exog_df: Optional[pd.DataFrame] = None,
367
+ train_start: Optional[str] = None,
368
+ train_end: Optional[str] = None,
369
+ return_diagnostics: bool = False,
370
+ exog_policy: str = "auto_forecast",
371
+ exog_method: str = "naive",
372
+ exog_m: int = 0, # MODIFIED
373
+ ):
374
+ if not _PMDARIMA_OK:
375
+ return None, "Error: pmdarima is required to select SARIMAX orders.", None, None, None
376
+
377
+ train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
378
+ df_indexed, target_col, train_start, train_end, int(horizon), freq
379
+ )
380
+ y = train_df[target_col].astype(float)
381
+
382
+ X_train = X_future = None
383
+ exog_note = ""
384
+ if exog_cols:
385
+ try:
386
+ X_train, X_future, exog_note = _prepare_exog_with_policy(
387
+ train_df, fc_index, exog_cols, df_indexed, future_exog_df,
388
+ exog_policy, exog_method, exog_m
389
+ )
390
+ except Exception as e:
391
+ return None, f"SARIMAX error: {e}", None, None, None
392
+
393
+ try:
394
+ selector = pm.auto_arima(
395
+ y, exogenous=X_train, seasonal=bool(seasonal), m=int(m) if seasonal else 1,
396
+ stepwise=True, error_action="ignore", suppress_warnings=True
397
+ )
398
+ order = selector.order
399
+ sorder = selector.seasonal_order if seasonal else (0, 0, 0, 0)
400
+
401
+ model = SARIMAX(
402
+ y, exog=X_train,
403
+ order=order,
404
+ seasonal_order=sorder if seasonal else (0, 0, 0, 0),
405
+ enforce_stationarity=False, enforce_invertibility=False
406
+ ).fit(disp=False)
407
+
408
+ pred = model.get_forecast(steps=int(horizon), exog=X_future)
409
+ yhat = pd.Series(pred.predicted_mean, index=fc_index)
410
+ conf_int = pred.conf_int(alpha=0.05)
411
+ conf_df = pd.DataFrame({"lower": conf_int.iloc[:, 0].values, "upper": conf_int.iloc[:, 1].values}, index=fc_index)
412
+ except Exception as e:
413
+ return None, f"SARIMAX error: {e}", None, None, None
414
+
415
+ fig = plt.figure(figsize=(10, 5))
416
+ plt.plot(y.index, y.values, label="history")
417
+ plt.plot(yhat.index, yhat.values, label="forecast")
418
+ plt.fill_between(yhat.index, conf_df["lower"], conf_df["upper"], alpha=0.2)
419
+ plt.legend()
420
+ plt.title(f"SARIMAX order={order} seasonal_order={sorder}")
421
+ plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
422
+ fig.tight_layout() # MODIFIED
423
+
424
+ try:
425
+ resid = y - model.fittedvalues
426
+ except Exception:
427
+ resid = pd.Series(index=y.index, dtype=float)
428
+
429
+ metrics_block = _metrics_text(y_true, yhat)
430
+ summary = "\n".join([s for s in [
431
+ f"Model: SARIMAX order={order} seasonal_order={sorder}",
432
+ exog_note,
433
+ tr_range, fc_range, metrics_block,
434
+ "Residual Diagnostics" if return_diagnostics else ""
435
+ ] if s]).strip()
436
+
437
+ if not return_diagnostics:
438
+ return fig, summary, None, yhat, conf_df
439
+ diag_fig = _render_diagnostics(y, resid)
440
+ return fig, summary, diag_fig, yhat, conf_df
441
+
442
+ def run_ets_forecast(
443
+ df_indexed: pd.DataFrame,
444
+ target_col: str,
445
+ horizon: int,
446
+ error: str,
447
+ trend: Optional[str],
448
+ seasonal: Optional[str],
449
+ m: int,
450
+ damped: bool,
451
+ freq: str = "infer",
452
+ train_start: Optional[str] = None,
453
+ train_end: Optional[str] = None,
454
+ return_diagnostics: bool = False,
455
+ ):
456
+ train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
457
+ df_indexed, target_col, train_start, train_end, int(horizon), freq
458
+ )
459
+ y = train_df[target_col].astype(float)
460
+
461
+ try:
462
+ model = ETSModel(
463
+ y,
464
+ error=error,
465
+ trend=trend if trend != "none" else None,
466
+ seasonal=seasonal if seasonal != "none" else None,
467
+ seasonal_periods=int(m) if seasonal != "none" else None,
468
+ damped_trend=bool(damped) if (trend and trend != "none") else False,
469
+ initialization_method="estimated",
470
+ ).fit()
471
+ except Exception as e:
472
+ return None, f"ETS error: {e}", None, None, None
473
+
474
+ try:
475
+ fc_vals = model.forecast(int(horizon))
476
+ yhat = pd.Series(fc_vals.values, index=fc_index)
477
+
478
+ resid = y - model.fittedvalues
479
+ se = float(np.nanstd(resid, ddof=1)) if len(resid.dropna()) else 0.0
480
+ z = 1.96
481
+ conf_df = pd.DataFrame({"lower": yhat - z * se, "upper": yhat + z * se}, index=fc_index)
482
+ except Exception as e:
483
+ return None, f"ETS forecast error: {e}", None, None, None
484
+
485
+ fig = plt.figure(figsize=(10, 5))
486
+ plt.plot(y.index, y.values, label="history")
487
+ plt.plot(yhat.index, yhat.values, label="forecast")
488
+ plt.fill_between(conf_df.index, conf_df["lower"], conf_df["upper"], alpha=0.2)
489
+ plt.legend()
490
+ plt.title("ETS Forecast")
491
+ plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
492
+ fig.tight_layout() # MODIFIED
493
+
494
+ try:
495
+ resid = y - model.fittedvalues
496
+ except Exception:
497
+ resid = pd.Series(index=y.index, dtype=float)
498
+
499
+ metrics_block = _metrics_text(y_true, yhat)
500
+ summary = "\n".join([s for s in [
501
+ f"Model: ETS(error={error}, trend={trend}, seasonal={seasonal}, m={m}, damped={damped})",
502
+ tr_range, fc_range, metrics_block,
503
+ "Residual Diagnostics" if return_diagnostics else ""
504
+ ] if s]).strip()
505
+
506
+ if not return_diagnostics:
507
+ return fig, summary, None, yhat, conf_df
508
+ diag_fig = _render_diagnostics(y, resid)
509
+ return fig, summary, diag_fig, yhat, conf_df
510
+
511
+ def run_prophet_forecast(
512
+ df_indexed: pd.DataFrame,
513
+ target_col: str,
514
+ horizon: int,
515
+ seasonality_mode: str,
516
+ yearly: bool,
517
+ weekly: bool,
518
+ daily: bool,
519
+ freq: str = "infer",
520
+ exog_cols: Optional[List[str]] = None,
521
+ future_exog_df: Optional[pd.DataFrame] = None,
522
+ train_start: Optional[str] = None,
523
+ train_end: Optional[str] = None,
524
+ return_diagnostics: bool = False,
525
+ exog_policy: str = "auto_forecast",
526
+ exog_method: str = "naive",
527
+ exog_m: int = 0, # MODIFIED
528
+ ):
529
+ if not _PROPHET_OK:
530
+ return None, "Error: prophet is not installed.", None, None, None
531
+
532
+ idx_name = df_indexed.index.name or "ds"
533
+ train_df, fc_index, tr_range, fc_range, y_true = _build_train_and_forecast_index(
534
+ df_indexed, target_col, train_start, train_end, int(horizon), freq
535
+ )
536
+
537
+ X_train = X_future = None
538
+ exog_note = ""
539
+ exog_effective: List[str] = []
540
+ if exog_cols:
541
+ try:
542
+ X_train, X_future, exog_note = _prepare_exog_with_policy(
543
+ train_df, fc_index, exog_cols, df_indexed, future_exog_df,
544
+ exog_policy, exog_method, exog_m
545
+ )
546
+ if X_train is not None:
547
+ exog_effective = list(exog_cols)
548
+ except Exception as e:
549
+ return None, f"Prophet error: {e}", None, None, None
550
+
551
+ train = train_df.reset_index().rename(columns={idx_name: "ds"})
552
+ train = train[["ds", target_col]].rename(columns={target_col: "y"})
553
+ train["y"] = train["y"].astype(float)
554
+ if exog_effective:
555
+ train = pd.concat([train, X_train.reset_index(drop=True)], axis=1)
556
+
557
+ mobj = Prophet(seasonality_mode=seasonality_mode)
558
+ if yearly:
559
+ mobj.add_seasonality(name="yearly", period=365.25, fourier_order=10)
560
+ if weekly:
561
+ mobj.add_seasonality(name="weekly", period=7, fourier_order=6)
562
+ if daily:
563
+ mobj.add_seasonality(name="daily", period=1, fourier_order=4)
564
+ for c in exog_effective:
565
+ mobj.add_regressor(c)
566
+
567
+ try:
568
+ mobj.fit(train)
569
+ future = pd.DataFrame({"ds": fc_index})
570
+ if exog_effective:
571
+ for c in exog_effective:
572
+ if c not in X_future.columns:
573
+ raise ValueError(f"Prophet future DataFrame missing required regressor column: {c}")
574
+ future = pd.concat([future, X_future.reset_index(drop=True)], axis=1)
575
+ fcst = mobj.predict(future)
576
+ except Exception as e:
577
+ return None, f"Prophet error: {e}", None, None, None
578
+
579
+ fig = plt.figure(figsize=(10, 5))
580
+ plt.plot(train["ds"], train["y"], label="history")
581
+ plt.plot(fcst["ds"], fcst["yhat"], label="forecast")
582
+ if "yhat_lower" in fcst and "yhat_upper" in fcst:
583
+ plt.fill_between(fcst["ds"], fcst["yhat_lower"], fcst["yhat_upper"], alpha=0.2)
584
+ plt.legend()
585
+ plt.title("Prophet Forecast")
586
+ plt.xticks(rotation=45, ha="right") # MODIFIED: tilt x-axis labels on forecast plot
587
+ fig.tight_layout() # MODIFIED
588
+
589
+ try:
590
+ in_fit = mobj.predict(train.drop(columns=["y"]))
591
+ resid = train["y"].values - in_fit["yhat"].values
592
+ resid = pd.Series(resid, index=train["ds"])
593
+ except Exception:
594
+ resid = pd.Series(dtype=float)
595
+
596
+ yhat_series = pd.Series(fcst["yhat"].values, index=pd.DatetimeIndex(fcst["ds"]))
597
+ metrics_block = _metrics_text(y_true, yhat_series)
598
+ conf_df = pd.DataFrame(
599
+ {"lower": fcst.get("yhat_lower", pd.Series(index=fcst.index, dtype=float)),
600
+ "upper": fcst.get("yhat_upper", pd.Series(index=fcst.index, dtype=float))},
601
+ index=pd.DatetimeIndex(fcst["ds"])
602
+ )
603
+
604
+ summary = "\n".join([
605
+ f"Model: Prophet(seasonality_mode={seasonality_mode}, yearly={yearly}, weekly={weekly}, daily={daily})",
606
+ exog_note,
607
+ tr_range, fc_range, metrics_block,
608
+ "Residual Diagnostics" if return_diagnostics else ""
609
+ ]).strip()
610
+
611
+ if not return_diagnostics:
612
+ return fig, summary, None, yhat_series, conf_df
613
+ diag_fig = _render_diagnostics(train.set_index("ds")["y"], resid)
614
+ return fig, summary, diag_fig, yhat_series, conf_df
615
+
616
+ # ----------------------------
617
+ # exports list
618
+ # ----------------------------
619
+ __all__ = [
620
+ "run_auto_arima_forecast",
621
+ "run_ets_forecast",
622
+ "run_prophet_forecast",
623
+ "run_sarimax_forecast",
624
+ "perform_stationarity_tests",
625
+ "detect_outliers",
626
+ ]