Gil Stetler commited on
Commit
c5cdf21
·
1 Parent(s): c9aa5e1
Files changed (1) hide show
  1. app.py +52 -40
app.py CHANGED
@@ -9,15 +9,12 @@ import matplotlib
9
  matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
11
 
12
- # ---- Chronos Zero-Shot (Fallback) ----
13
  from chronos import ChronosPipeline
14
 
15
- # ---- AutoGluon (für Finetune + Laden) ----
16
  AGTS_AVAILABLE = False
17
  try:
18
  from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
19
  try:
20
- # optional: AutoGluon global seeding helper (nicht in allen Versionen vorhanden)
21
  from autogluon.common.utils.seed import set_seed as ag_set_seed
22
  except Exception:
23
  ag_set_seed = None
@@ -26,15 +23,13 @@ except Exception:
26
  ag_set_seed = None
27
  pass
28
 
29
- # unsere bestehende Daten-Pipeline
30
  import pipeline_v2 as pipe2
31
 
32
  # --------------------
33
- # Konfiguration
34
  # --------------------
35
- # IMMER auf finetuned gehen -> wir trainieren automatisch, falls noch nicht vorhanden
36
- FINETUNED_DIR = os.path.abspath("./finetuned_predictor") # persistiert im Space-Repo
37
- MODEL_ID_FALLBACK = os.getenv("MODEL_ID", "amazon/chronos-t5-large") # nur falls FT scheitert
38
 
39
  PREDICTION_LENGTH = 30
40
  NUM_SAMPLES = 1
@@ -42,22 +37,18 @@ RV_WINDOW = 20
42
  ANNUALIZE = True
43
  EPS = 1e-8
44
 
45
- # Auto-Finetune-Defaults (einmalig beim ersten Start)
46
  AUTO_TICKERS = os.getenv("AUTO_TICKERS", "AAPL,MSFT,AMZN,NVDA,GOOGL,TSLA,SPY,TLT").split(",")
47
  AUTO_START = os.getenv("AUTO_START", "2010-01-01")
48
  AUTO_INTERVAL = os.getenv("AUTO_INTERVAL", "1d") # "1d","1wk","1mo"
49
  AUTO_MODEL_PATH = os.getenv("AUTO_MODEL_PATH", "autogluon/chronos-bolt-base")
50
- AUTO_STEPS = int(os.getenv("AUTO_STEPS", "900")) # moderat schnell
51
  AUTO_LR = float(os.getenv("AUTO_LR", "1e-4"))
52
  AUTO_SEED = int(os.getenv("AUTO_SEED", "0"))
53
 
54
- # --------------------
55
- # Utils
56
- # --------------------
57
  device = "cuda" if torch.cuda.is_available() else "cpu"
58
  dtype = torch.bfloat16 if device == "cuda" else torch.float32
59
 
60
- # ---- global seeding (kompatibel über Versionen hinweg) ----
61
  def set_global_seed(seed: int):
62
  random.seed(seed)
63
  np.random.seed(seed)
@@ -72,9 +63,9 @@ def set_global_seed(seed: int):
72
  ag_set_seed(seed)
73
  except Exception:
74
  pass
75
-
76
  set_global_seed(AUTO_SEED)
77
 
 
78
  def _extract_close(df: pd.DataFrame) -> pd.Series:
79
  if isinstance(df.columns, pd.MultiIndex):
80
  for name in ["Adj Close", "Adj_Close", "adj close", "adj_close"]:
@@ -129,8 +120,19 @@ def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict:
129
  rmse = float(np.sqrt(np.mean(err**2)))
130
  return {"MAPE": mape, "MPE": mpe, "RMSE": rmse}
131
 
 
 
 
 
 
 
 
 
 
 
 
132
  # --------------------
133
- # Auto-Finetune: einmalig beim ersten Start
134
  # --------------------
135
  def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
136
  import yfinance as yf
@@ -153,7 +155,7 @@ def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
153
  raise RuntimeError(f"No numeric close for {ticker}")
154
  return pd.Series(df[num_cols[-1]]).astype(float)
155
 
156
- def _build_tsdf(tickers, start, interval, rv_window, annualize=True):
157
  rows = []
158
  for t in tickers:
159
  s_close = _download_close(t, start, interval)
@@ -164,13 +166,27 @@ def _build_tsdf(tickers, start, interval, rv_window, annualize=True):
164
  rv = rv.dropna()
165
  rows.append(pd.DataFrame({"item_id": t, "timestamp": rv.index, "target": rv.values}))
166
  df_long = pd.concat(rows, ignore_index=True)
167
- return TimeSeriesDataFrame.from_data_frame(df_long, id_column="item_id", timestamp_column="timestamp")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  def ensure_finetuned_predictor(log_cb=print):
170
- """
171
- Prüft, ob ein finetunter AutoGluon-Predictor existiert.
172
- Falls nicht, trainiert er ihn direkt im Space und speichert nach FINETUNED_DIR.
173
- """
174
  if not AGTS_AVAILABLE:
175
  log_cb("AutoGluon not available; using Zero-Shot Chronos.")
176
  return None
@@ -183,17 +199,18 @@ def ensure_finetuned_predictor(log_cb=print):
183
  except Exception as e:
184
  log_cb(f"Existing predictor could not be loaded, retraining. Reason: {e}")
185
 
186
- # Train einmalig
187
  os.makedirs(FINETUNED_DIR, exist_ok=True)
188
  log_cb("No finetuned predictor found. Starting on-device fine-tuning (Chronos-Bolt)...")
189
 
190
  tsdf = _build_tsdf([t.strip() for t in AUTO_TICKERS if t.strip()],
191
  AUTO_START, AUTO_INTERVAL, RV_WINDOW, annualize=True)
192
 
 
193
  predictor = TimeSeriesPredictor(
194
  prediction_length=PREDICTION_LENGTH,
195
  target="target",
196
  eval_metric="WQL",
 
197
  )
198
 
199
  hyperparams = {
@@ -202,7 +219,6 @@ def ensure_finetuned_predictor(log_cb=print):
202
  "fine_tune": True,
203
  "fine_tune_steps": AUTO_STEPS,
204
  "fine_tune_lr": AUTO_LR,
205
- # "device": "gpu" # AutoGluon nutzt automatisch CUDA, wenn verfügbar
206
  }
207
  }
208
 
@@ -219,10 +235,8 @@ ag_predictor = None
219
 
220
  def _load_models():
221
  global pipe, ag_predictor
222
- # 1) Versuche finetuned zu laden bzw. zu trainieren
223
  ag_predictor = ensure_finetuned_predictor(log_cb=lambda m: print(f"[AutoFT] {m}"))
224
  if ag_predictor is None:
225
- # 2) Fallback Zero-Shot
226
  print(f"[AutoFT] Falling back to Zero-Shot: {MODEL_ID_FALLBACK}")
227
  pipe = ChronosPipeline.from_pretrained(
228
  MODEL_ID_FALLBACK,
@@ -235,30 +249,32 @@ def _load_models():
235
  _load_models()
236
 
237
  # --------------------
238
- # Forecast-Backends
239
  # --------------------
240
  def _predict_with_chronos(rv_train: np.ndarray, H: int) -> np.ndarray:
241
  random.seed(0); np.random.seed(0); torch.manual_seed(0)
242
  if torch.cuda.is_available():
243
  torch.cuda.manual_seed_all(0)
244
  context = torch.tensor(rv_train, dtype=torch.float32)
245
- fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES) # [1, 1, H]
246
  return fcst[0].cpu().numpy()[0]
247
 
248
  def _predict_with_ag(rv_train_idx: pd.DatetimeIndex, rv_train: np.ndarray, H: int) -> np.ndarray:
249
- ts = pd.DataFrame({
250
- "item_id": "series",
251
- "timestamp": rv_train_idx,
252
- "target": rv_train,
253
- })
254
  ts_df = TimeSeriesDataFrame.from_data_frame(ts, id_column="item_id", timestamp_column="timestamp")
 
 
 
 
 
 
255
  preds = ag_predictor.predict(ts_df, prediction_length=H)
256
  if 0.5 in preds.quantile_levels:
257
  return preds.loc[("series", 0.5)].to_numpy()
258
  return preds.mean(axis=1).loc["series"].to_numpy()
259
 
260
  # --------------------
261
- # App-Logik (unverändert in der Funktionalität)
262
  # --------------------
263
  def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: bool):
264
  tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
@@ -290,7 +306,6 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
290
  rv_train = rv[: n - H]
291
  rv_test = rv[n - H :]
292
 
293
- # Forecast mit finetuned Predictor (wenn vorhanden), sonst Zero-Shot Chronos
294
  if ag_predictor is not None and isinstance(dates, np.ndarray) and isinstance(df.index, pd.DatetimeIndex):
295
  rv_index = df.index[-len(rv):][:len(rv_train)]
296
  path_pred = _predict_with_ag(rv_index, rv_train, H)
@@ -299,15 +314,13 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
299
  path_pred = _predict_with_chronos(rv_train, H)
300
  provider = f"Chronos {MODEL_ID_FALLBACK.split('/')[-1]}"
301
 
302
- alpha = None
303
  if use_calibration:
304
  alpha, path_pred_cal = bias_scale_calibration(rv_test, path_pred)
305
  metrics_raw = compute_metrics(rv_test, path_pred)
306
  metrics_cal = compute_metrics(rv_test, path_pred_cal)
307
  else:
 
308
  metrics_raw = compute_metrics(rv_test, path_pred)
309
- metrics_cal = None
310
- path_pred_cal = None
311
 
312
  fig = plt.figure(figsize=(10, 4))
313
  H0 = len(rv_train)
@@ -331,9 +344,8 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
331
  plt.xlabel(x_lbl); plt.ylabel("realized volatility")
332
  plt.legend(loc="best"); plt.tight_layout()
333
 
334
- last_dates = x_fcst
335
  df_days = pd.DataFrame({
336
- "date": last_dates,
337
  "actual_vol": rv_test,
338
  "forecast_raw": path_pred,
339
  })
 
9
  matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
11
 
 
12
  from chronos import ChronosPipeline
13
 
 
14
  AGTS_AVAILABLE = False
15
  try:
16
  from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
17
  try:
 
18
  from autogluon.common.utils.seed import set_seed as ag_set_seed
19
  except Exception:
20
  ag_set_seed = None
 
23
  ag_set_seed = None
24
  pass
25
 
 
26
  import pipeline_v2 as pipe2
27
 
28
  # --------------------
29
+ # Config
30
  # --------------------
31
+ FINETUNED_DIR = os.path.abspath("./finetuned_predictor")
32
+ MODEL_ID_FALLBACK = os.getenv("MODEL_ID", "amazon/chronos-t5-large")
 
33
 
34
  PREDICTION_LENGTH = 30
35
  NUM_SAMPLES = 1
 
37
  ANNUALIZE = True
38
  EPS = 1e-8
39
 
 
40
  AUTO_TICKERS = os.getenv("AUTO_TICKERS", "AAPL,MSFT,AMZN,NVDA,GOOGL,TSLA,SPY,TLT").split(",")
41
  AUTO_START = os.getenv("AUTO_START", "2010-01-01")
42
  AUTO_INTERVAL = os.getenv("AUTO_INTERVAL", "1d") # "1d","1wk","1mo"
43
  AUTO_MODEL_PATH = os.getenv("AUTO_MODEL_PATH", "autogluon/chronos-bolt-base")
44
+ AUTO_STEPS = int(os.getenv("AUTO_STEPS", "900"))
45
  AUTO_LR = float(os.getenv("AUTO_LR", "1e-4"))
46
  AUTO_SEED = int(os.getenv("AUTO_SEED", "0"))
47
 
 
 
 
48
  device = "cuda" if torch.cuda.is_available() else "cpu"
49
  dtype = torch.bfloat16 if device == "cuda" else torch.float32
50
 
51
+ # ---- global seeding ----
52
  def set_global_seed(seed: int):
53
  random.seed(seed)
54
  np.random.seed(seed)
 
63
  ag_set_seed(seed)
64
  except Exception:
65
  pass
 
66
  set_global_seed(AUTO_SEED)
67
 
68
+ # ---- utils ----
69
  def _extract_close(df: pd.DataFrame) -> pd.Series:
70
  if isinstance(df.columns, pd.MultiIndex):
71
  for name in ["Adj Close", "Adj_Close", "adj close", "adj_close"]:
 
120
  rmse = float(np.sqrt(np.mean(err**2)))
121
  return {"MAPE": mape, "MPE": mpe, "RMSE": rmse}
122
 
123
+ # ---- frequency helpers ----
124
+ def interval_to_freq(interval: str) -> str:
125
+ interval = (interval or "").lower().strip()
126
+ if interval == "1d":
127
+ return "B" # Business day
128
+ if interval == "1wk":
129
+ return "W-FRI" # Wochenende vermeiden, Börsenwoche endend Fr
130
+ if interval == "1mo":
131
+ return "M" # Monatlich (Kalenderende)
132
+ return "B"
133
+
134
  # --------------------
135
+ # Auto-Finetune
136
  # --------------------
137
  def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
138
  import yfinance as yf
 
155
  raise RuntimeError(f"No numeric close for {ticker}")
156
  return pd.Series(df[num_cols[-1]]).astype(float)
157
 
158
+ def _build_tsdf(tickers, start, interval, rv_window, annualize=True) -> TimeSeriesDataFrame:
159
  rows = []
160
  for t in tickers:
161
  s_close = _download_close(t, start, interval)
 
166
  rv = rv.dropna()
167
  rows.append(pd.DataFrame({"item_id": t, "timestamp": rv.index, "target": rv.values}))
168
  df_long = pd.concat(rows, ignore_index=True)
169
+ tsdf = TimeSeriesDataFrame.from_data_frame(df_long, id_column="item_id", timestamp_column="timestamp")
170
+
171
+ # Frequenz erzwingen/vereinheitlichen (gegen "Cannot infer frequency")
172
+ freq = interval_to_freq(interval)
173
+ try:
174
+ tsdf = tsdf.convert_frequency(freq=freq) # reguläre Zeitachsen je item
175
+ except Exception:
176
+ # Fallback: per GroupBy resamplen (asfreq) + forward-fill kleiner Lücken
177
+ def _regularize(g):
178
+ g = g.set_index("timestamp").asfreq(freq)
179
+ g["target"] = g["target"].ffill()
180
+ g["item_id"] = g["item_id"].ffill().bfill()
181
+ return g.reset_index()
182
+ reg = (
183
+ df_long.groupby("item_id", group_keys=False)
184
+ .apply(_regularize)
185
+ )
186
+ tsdf = TimeSeriesDataFrame.from_data_frame(reg, id_column="item_id", timestamp_column="timestamp")
187
+ return tsdf
188
 
189
  def ensure_finetuned_predictor(log_cb=print):
 
 
 
 
190
  if not AGTS_AVAILABLE:
191
  log_cb("AutoGluon not available; using Zero-Shot Chronos.")
192
  return None
 
199
  except Exception as e:
200
  log_cb(f"Existing predictor could not be loaded, retraining. Reason: {e}")
201
 
 
202
  os.makedirs(FINETUNED_DIR, exist_ok=True)
203
  log_cb("No finetuned predictor found. Starting on-device fine-tuning (Chronos-Bolt)...")
204
 
205
  tsdf = _build_tsdf([t.strip() for t in AUTO_TICKERS if t.strip()],
206
  AUTO_START, AUTO_INTERVAL, RV_WINDOW, annualize=True)
207
 
208
+ freq = interval_to_freq(AUTO_INTERVAL)
209
  predictor = TimeSeriesPredictor(
210
  prediction_length=PREDICTION_LENGTH,
211
  target="target",
212
  eval_metric="WQL",
213
+ freq=freq, # <<<<<< WICHTIG
214
  )
215
 
216
  hyperparams = {
 
219
  "fine_tune": True,
220
  "fine_tune_steps": AUTO_STEPS,
221
  "fine_tune_lr": AUTO_LR,
 
222
  }
223
  }
224
 
 
235
 
236
  def _load_models():
237
  global pipe, ag_predictor
 
238
  ag_predictor = ensure_finetuned_predictor(log_cb=lambda m: print(f"[AutoFT] {m}"))
239
  if ag_predictor is None:
 
240
  print(f"[AutoFT] Falling back to Zero-Shot: {MODEL_ID_FALLBACK}")
241
  pipe = ChronosPipeline.from_pretrained(
242
  MODEL_ID_FALLBACK,
 
249
  _load_models()
250
 
251
  # --------------------
252
+ # Forecast backends
253
  # --------------------
254
  def _predict_with_chronos(rv_train: np.ndarray, H: int) -> np.ndarray:
255
  random.seed(0); np.random.seed(0); torch.manual_seed(0)
256
  if torch.cuda.is_available():
257
  torch.cuda.manual_seed_all(0)
258
  context = torch.tensor(rv_train, dtype=torch.float32)
259
+ fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)
260
  return fcst[0].cpu().numpy()[0]
261
 
262
  def _predict_with_ag(rv_train_idx: pd.DatetimeIndex, rv_train: np.ndarray, H: int) -> np.ndarray:
263
+ ts = pd.DataFrame({"item_id": "series", "timestamp": rv_train_idx, "target": rv_train})
 
 
 
 
264
  ts_df = TimeSeriesDataFrame.from_data_frame(ts, id_column="item_id", timestamp_column="timestamp")
265
+ # Für Inferenz sicherstellen, dass Frequenz konsistent ist:
266
+ freq = interval_to_freq("1d") # rv_train_idx kommt von daily-Daten im UI; falls nicht, kannst du hier dynamisch mappen
267
+ try:
268
+ ts_df = ts_df.convert_frequency(freq=freq)
269
+ except Exception:
270
+ pass
271
  preds = ag_predictor.predict(ts_df, prediction_length=H)
272
  if 0.5 in preds.quantile_levels:
273
  return preds.loc[("series", 0.5)].to_numpy()
274
  return preds.mean(axis=1).loc["series"].to_numpy()
275
 
276
  # --------------------
277
+ # App-Logik (gleichbleibende Funktionalität)
278
  # --------------------
279
  def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: bool):
280
  tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
 
306
  rv_train = rv[: n - H]
307
  rv_test = rv[n - H :]
308
 
 
309
  if ag_predictor is not None and isinstance(dates, np.ndarray) and isinstance(df.index, pd.DatetimeIndex):
310
  rv_index = df.index[-len(rv):][:len(rv_train)]
311
  path_pred = _predict_with_ag(rv_index, rv_train, H)
 
314
  path_pred = _predict_with_chronos(rv_train, H)
315
  provider = f"Chronos {MODEL_ID_FALLBACK.split('/')[-1]}"
316
 
 
317
  if use_calibration:
318
  alpha, path_pred_cal = bias_scale_calibration(rv_test, path_pred)
319
  metrics_raw = compute_metrics(rv_test, path_pred)
320
  metrics_cal = compute_metrics(rv_test, path_pred_cal)
321
  else:
322
+ alpha, path_pred_cal, metrics_cal = None, None, None
323
  metrics_raw = compute_metrics(rv_test, path_pred)
 
 
324
 
325
  fig = plt.figure(figsize=(10, 4))
326
  H0 = len(rv_train)
 
344
  plt.xlabel(x_lbl); plt.ylabel("realized volatility")
345
  plt.legend(loc="best"); plt.tight_layout()
346
 
 
347
  df_days = pd.DataFrame({
348
+ "date": x_fcst,
349
  "actual_vol": rv_test,
350
  "forecast_raw": path_pred,
351
  })