Gil Stetler commited on
Commit
09f6668
·
1 Parent(s): c5cdf21

finetune chronos-bolt-small

Browse files
Files changed (4) hide show
  1. app.py +67 -409
  2. requirements.txt +4 -4
  3. train_autogluon.py +40 -0
  4. utils_vol.py +29 -0
app.py CHANGED
@@ -1,411 +1,69 @@
1
- # app.py
2
- import os, random, time
3
- from typing import Tuple
4
- import numpy as np
5
- import pandas as pd
6
- import torch
7
  import gradio as gr
8
- import matplotlib
9
- matplotlib.use("Agg")
10
  import matplotlib.pyplot as plt
11
-
12
- from chronos import ChronosPipeline
13
-
14
- AGTS_AVAILABLE = False
15
- try:
16
- from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
17
- try:
18
- from autogluon.common.utils.seed import set_seed as ag_set_seed
19
- except Exception:
20
- ag_set_seed = None
21
- AGTS_AVAILABLE = True
22
- except Exception:
23
- ag_set_seed = None
24
- pass
25
-
26
- import pipeline_v2 as pipe2
27
-
28
- # --------------------
29
- # Config
30
- # --------------------
31
- FINETUNED_DIR = os.path.abspath("./finetuned_predictor")
32
- MODEL_ID_FALLBACK = os.getenv("MODEL_ID", "amazon/chronos-t5-large")
33
-
34
- PREDICTION_LENGTH = 30
35
- NUM_SAMPLES = 1
36
- RV_WINDOW = 20
37
- ANNUALIZE = True
38
- EPS = 1e-8
39
-
40
- AUTO_TICKERS = os.getenv("AUTO_TICKERS", "AAPL,MSFT,AMZN,NVDA,GOOGL,TSLA,SPY,TLT").split(",")
41
- AUTO_START = os.getenv("AUTO_START", "2010-01-01")
42
- AUTO_INTERVAL = os.getenv("AUTO_INTERVAL", "1d") # "1d","1wk","1mo"
43
- AUTO_MODEL_PATH = os.getenv("AUTO_MODEL_PATH", "autogluon/chronos-bolt-base")
44
- AUTO_STEPS = int(os.getenv("AUTO_STEPS", "900"))
45
- AUTO_LR = float(os.getenv("AUTO_LR", "1e-4"))
46
- AUTO_SEED = int(os.getenv("AUTO_SEED", "0"))
47
-
48
- device = "cuda" if torch.cuda.is_available() else "cpu"
49
- dtype = torch.bfloat16 if device == "cuda" else torch.float32
50
-
51
- # ---- global seeding ----
52
- def set_global_seed(seed: int):
53
- random.seed(seed)
54
- np.random.seed(seed)
55
- try:
56
- torch.manual_seed(seed)
57
- if torch.cuda.is_available():
58
- torch.cuda.manual_seed_all(seed)
59
- except Exception:
60
- pass
61
- if ag_set_seed is not None:
62
- try:
63
- ag_set_seed(seed)
64
- except Exception:
65
- pass
66
- set_global_seed(AUTO_SEED)
67
-
68
- # ---- utils ----
69
- def _extract_close(df: pd.DataFrame) -> pd.Series:
70
- if isinstance(df.columns, pd.MultiIndex):
71
- for name in ["Adj Close", "Adj_Close", "adj close", "adj_close"]:
72
- if name in df.columns.get_level_values(0):
73
- sub = df.xs(name, axis=1, level=0)
74
- if sub.shape[1] > 1:
75
- sub = sub.iloc[:, 0]
76
- return pd.to_numeric(sub.squeeze(), errors="coerce").dropna()
77
- for name in ["Close", "close", "Price", "price"]:
78
- if name in df.columns.get_level_values(0):
79
- sub = df.xs(name, axis=1, level=0)
80
- if sub.shape[1] > 1:
81
- sub = sub.iloc[:, 0]
82
- return pd.to_numeric(sub.squeeze(), errors="coerce").dropna()
83
- mapping = {c.lower(): c for c in df.columns}
84
- for name in ["adj close", "adj_close", "close", "price"]:
85
- if name in mapping:
86
- return pd.to_numeric(df[mapping[name]], errors="coerce").dropna()
87
- num_cols = df.select_dtypes(include=[np.number]).columns
88
- if len(num_cols) == 0:
89
- raise gr.Error("No numeric price column found in downloaded data.")
90
- return pd.Series(df[num_cols[-1]]).astype(float)
91
-
92
- def _extract_dates(df: pd.DataFrame):
93
- if isinstance(df.index, pd.DatetimeIndex):
94
- return df.index.to_numpy()
95
- mapping = {c.lower(): c for c in df.columns}
96
- for name in ["date", "time", "timestamp"]:
97
- if name in mapping:
98
- try:
99
- return pd.to_datetime(df[mapping[name]]).to_numpy()
100
- except Exception:
101
- pass
102
- return np.arange(len(df))
103
-
104
- def compute_realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series:
105
- r = np.log(close).diff().dropna()
106
- rv = r.rolling(window, min_periods=window).std()
107
- if annualize:
108
- rv = rv * np.sqrt(252.0)
109
- return rv.dropna().reset_index(drop=True)
110
-
111
- def bias_scale_calibration(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, np.ndarray]:
112
- alpha = float(np.sum(y_true * y_pred) / (np.sum(y_pred**2) + EPS))
113
- return alpha, alpha * y_pred
114
-
115
- def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict:
116
- err = y_pred - y_true
117
- denom = np.maximum(EPS, np.abs(y_true))
118
- mape = float((np.abs(err) / denom).mean() * 100)
119
- mpe = float((err / np.maximum(EPS, y_true)).mean() * 100)
120
- rmse = float(np.sqrt(np.mean(err**2)))
121
- return {"MAPE": mape, "MPE": mpe, "RMSE": rmse}
122
-
123
- # ---- frequency helpers ----
124
- def interval_to_freq(interval: str) -> str:
125
- interval = (interval or "").lower().strip()
126
- if interval == "1d":
127
- return "B" # Business day
128
- if interval == "1wk":
129
- return "W-FRI" # Wochenende vermeiden, Börsenwoche endend Fr
130
- if interval == "1mo":
131
- return "M" # Monatlich (Kalenderende)
132
- return "B"
133
-
134
- # --------------------
135
- # Auto-Finetune
136
- # --------------------
137
- def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
138
- import yfinance as yf
139
- df = yf.download(ticker, start=start, interval=interval, auto_adjust=False, progress=False, threads=True)
140
- if df is None or df.empty:
141
- raise RuntimeError(f"No data for {ticker}")
142
- if isinstance(df.columns, pd.MultiIndex):
143
- for name in ["Adj Close", "Close"]:
144
- if name in df.columns.get_level_values(0):
145
- s = df.xs(name, axis=1, level=0)
146
- if s.shape[1] > 1:
147
- s = s.iloc[:, 0]
148
- return pd.to_numeric(s.squeeze(), errors="coerce").dropna()
149
- if "Adj Close" in df.columns:
150
- return pd.to_numeric(df["Adj Close"], errors="coerce").dropna()
151
- if "Close" in df.columns:
152
- return pd.to_numeric(df["Close"], errors="coerce").dropna()
153
- num_cols = df.select_dtypes(include=[np.number]).columns
154
- if len(num_cols) == 0:
155
- raise RuntimeError(f"No numeric close for {ticker}")
156
- return pd.Series(df[num_cols[-1]]).astype(float)
157
-
158
- def _build_tsdf(tickers, start, interval, rv_window, annualize=True) -> TimeSeriesDataFrame:
159
- rows = []
160
- for t in tickers:
161
- s_close = _download_close(t, start, interval)
162
- r = np.log(s_close).diff().dropna()
163
- rv = r.rolling(rv_window, min_periods=rv_window).std()
164
- if annualize:
165
- rv = rv * np.sqrt(252.0)
166
- rv = rv.dropna()
167
- rows.append(pd.DataFrame({"item_id": t, "timestamp": rv.index, "target": rv.values}))
168
- df_long = pd.concat(rows, ignore_index=True)
169
- tsdf = TimeSeriesDataFrame.from_data_frame(df_long, id_column="item_id", timestamp_column="timestamp")
170
-
171
- # Frequenz erzwingen/vereinheitlichen (gegen "Cannot infer frequency")
172
- freq = interval_to_freq(interval)
173
- try:
174
- tsdf = tsdf.convert_frequency(freq=freq) # reguläre Zeitachsen je item
175
- except Exception:
176
- # Fallback: per GroupBy resamplen (asfreq) + forward-fill kleiner Lücken
177
- def _regularize(g):
178
- g = g.set_index("timestamp").asfreq(freq)
179
- g["target"] = g["target"].ffill()
180
- g["item_id"] = g["item_id"].ffill().bfill()
181
- return g.reset_index()
182
- reg = (
183
- df_long.groupby("item_id", group_keys=False)
184
- .apply(_regularize)
185
- )
186
- tsdf = TimeSeriesDataFrame.from_data_frame(reg, id_column="item_id", timestamp_column="timestamp")
187
- return tsdf
188
-
189
- def ensure_finetuned_predictor(log_cb=print):
190
- if not AGTS_AVAILABLE:
191
- log_cb("AutoGluon not available; using Zero-Shot Chronos.")
192
- return None
193
-
194
- if os.path.isdir(FINETUNED_DIR) and os.path.exists(os.path.join(FINETUNED_DIR, "metadata.json")):
195
- try:
196
- predictor = TimeSeriesPredictor.load(FINETUNED_DIR)
197
- log_cb(f"Loaded finetuned predictor from {FINETUNED_DIR}.")
198
- return predictor
199
- except Exception as e:
200
- log_cb(f"Existing predictor could not be loaded, retraining. Reason: {e}")
201
-
202
- os.makedirs(FINETUNED_DIR, exist_ok=True)
203
- log_cb("No finetuned predictor found. Starting on-device fine-tuning (Chronos-Bolt)...")
204
-
205
- tsdf = _build_tsdf([t.strip() for t in AUTO_TICKERS if t.strip()],
206
- AUTO_START, AUTO_INTERVAL, RV_WINDOW, annualize=True)
207
-
208
- freq = interval_to_freq(AUTO_INTERVAL)
209
- predictor = TimeSeriesPredictor(
210
- prediction_length=PREDICTION_LENGTH,
211
- target="target",
212
- eval_metric="WQL",
213
- freq=freq, # <<<<<< WICHTIG
214
- )
215
-
216
- hyperparams = {
217
- "Chronos": {
218
- "model_path": AUTO_MODEL_PATH,
219
- "fine_tune": True,
220
- "fine_tune_steps": AUTO_STEPS,
221
- "fine_tune_lr": AUTO_LR,
222
- }
223
- }
224
-
225
- predictor.fit(train_data=tsdf, hyperparameters=hyperparams, time_limit=None, presets=None)
226
- predictor.save(FINETUNED_DIR)
227
- log_cb(f"Saved finetuned predictor to: {FINETUNED_DIR}")
228
- return predictor
229
-
230
- # --------------------
231
- # Modelle laden
232
- # --------------------
233
- pipe = None
234
- ag_predictor = None
235
-
236
- def _load_models():
237
- global pipe, ag_predictor
238
- ag_predictor = ensure_finetuned_predictor(log_cb=lambda m: print(f"[AutoFT] {m}"))
239
- if ag_predictor is None:
240
- print(f"[AutoFT] Falling back to Zero-Shot: {MODEL_ID_FALLBACK}")
241
- pipe = ChronosPipeline.from_pretrained(
242
- MODEL_ID_FALLBACK,
243
- device_map="auto",
244
- torch_dtype=dtype,
245
- )
246
- else:
247
- pipe = None
248
-
249
- _load_models()
250
-
251
- # --------------------
252
- # Forecast backends
253
- # --------------------
254
- def _predict_with_chronos(rv_train: np.ndarray, H: int) -> np.ndarray:
255
- random.seed(0); np.random.seed(0); torch.manual_seed(0)
256
- if torch.cuda.is_available():
257
- torch.cuda.manual_seed_all(0)
258
- context = torch.tensor(rv_train, dtype=torch.float32)
259
- fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)
260
- return fcst[0].cpu().numpy()[0]
261
-
262
- def _predict_with_ag(rv_train_idx: pd.DatetimeIndex, rv_train: np.ndarray, H: int) -> np.ndarray:
263
- ts = pd.DataFrame({"item_id": "series", "timestamp": rv_train_idx, "target": rv_train})
264
- ts_df = TimeSeriesDataFrame.from_data_frame(ts, id_column="item_id", timestamp_column="timestamp")
265
- # Für Inferenz sicherstellen, dass Frequenz konsistent ist:
266
- freq = interval_to_freq("1d") # rv_train_idx kommt von daily-Daten im UI; falls nicht, kannst du hier dynamisch mappen
267
- try:
268
- ts_df = ts_df.convert_frequency(freq=freq)
269
- except Exception:
270
- pass
271
- preds = ag_predictor.predict(ts_df, prediction_length=H)
272
- if 0.5 in preds.quantile_levels:
273
- return preds.loc[("series", 0.5)].to_numpy()
274
- return preds.mean(axis=1).loc["series"].to_numpy()
275
-
276
- # --------------------
277
- # App-Logik (gleichbleibende Funktionalität)
278
- # --------------------
279
- def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: bool):
280
- tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
281
- if not tick_list:
282
- raise gr.Error("Please enter at least one ticker, e.g. AAPL or NESN.SW")
283
- ticker = tick_list[0]
284
-
285
- try:
286
- csv_path = pipe2.update_ticker_csv(ticker, start=start, interval=interval)
287
- except Exception as e:
288
- raise gr.Error(
289
- f"Data fetch failed for '{ticker}'. Tip: ensure exchange suffixes (e.g., NESN.SW, BMW.DE, VOD.L).\n{e}"
290
- )
291
-
292
- try:
293
- df = pd.read_csv(csv_path, index_col=0, parse_dates=True)
294
- if not isinstance(df.index, pd.DatetimeIndex):
295
- df = pd.read_csv(csv_path)
296
- except Exception:
297
- df = pd.read_csv(csv_path)
298
-
299
- dates = _extract_dates(df)
300
- close = _extract_close(df)
301
-
302
- rv = compute_realized_vol(close, window=RV_WINDOW, annualize=ANNUALIZE).to_numpy()
303
- n = len(rv); H = PREDICTION_LENGTH
304
- if n <= H + 5:
305
- raise gr.Error(f"Vol series too short after rolling window. Need > {H+5}, got {n}.")
306
- rv_train = rv[: n - H]
307
- rv_test = rv[n - H :]
308
-
309
- if ag_predictor is not None and isinstance(dates, np.ndarray) and isinstance(df.index, pd.DatetimeIndex):
310
- rv_index = df.index[-len(rv):][:len(rv_train)]
311
- path_pred = _predict_with_ag(rv_index, rv_train, H)
312
- provider = "AutoGluon (finetuned)"
313
- else:
314
- path_pred = _predict_with_chronos(rv_train, H)
315
- provider = f"Chronos {MODEL_ID_FALLBACK.split('/')[-1]}"
316
-
317
- if use_calibration:
318
- alpha, path_pred_cal = bias_scale_calibration(rv_test, path_pred)
319
- metrics_raw = compute_metrics(rv_test, path_pred)
320
- metrics_cal = compute_metrics(rv_test, path_pred_cal)
321
- else:
322
- alpha, path_pred_cal, metrics_cal = None, None, None
323
- metrics_raw = compute_metrics(rv_test, path_pred)
324
-
325
- fig = plt.figure(figsize=(10, 4))
326
- H0 = len(rv_train)
327
- if isinstance(dates, np.ndarray) and len(dates) >= len(close):
328
- dates_rv = np.array(dates[-len(rv):])
329
- x_hist = dates_rv[:H0]
330
- x_fcst = dates_rv[H0:]
331
- x_lbl = "date"
332
- else:
333
- x_hist = np.arange(H0)
334
- x_fcst = np.arange(H0, H0 + H)
335
- x_lbl = "time index"
336
-
337
- plt.plot(x_hist, rv_train, label="realized vol (history)")
338
- plt.plot(x_fcst, rv_test, label="realized vol (actual last 30)")
339
- plt.plot(x_fcst, path_pred, linestyle="--", label="forecast (raw path)")
340
- if use_calibration:
341
- plt.plot(x_fcst, path_pred_cal, linestyle="--", label=f"forecast (calibrated, α={alpha:.3f})")
342
-
343
- plt.title(f"{ticker.upper()} — Volatility Forecast (RV={RV_WINDOW}, H={H}, interval={interval}, model={provider})")
344
- plt.xlabel(x_lbl); plt.ylabel("realized volatility")
345
- plt.legend(loc="best"); plt.tight_layout()
346
-
347
- df_days = pd.DataFrame({
348
- "date": x_fcst,
349
- "actual_vol": rv_test,
350
- "forecast_raw": path_pred,
351
- })
352
- if use_calibration:
353
- df_days["forecast_calibrated"] = path_pred_cal
354
- df_days["abs_pct_error_raw_%"] = np.abs((path_pred - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
355
- df_days["abs_pct_error_cal_%"] = np.abs((path_pred_cal - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
356
- else:
357
- df_days["abs_pct_error_raw_%"] = np.abs((path_pred - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
358
-
359
- out = {
360
- "ticker": ticker.upper(),
361
- "csv_path": csv_path,
362
- "config": {
363
- "start": start,
364
- "interval": interval,
365
- "rv_window": RV_WINDOW,
366
- "prediction_length": H,
367
- "num_samples": NUM_SAMPLES,
368
- "annualized": ANNUALIZE,
369
- "point_forecast": "median_quantile" if ag_predictor is not None else "single_sample_path",
370
- "model": provider,
371
- "auto_finetuned_dir": FINETUNED_DIR,
372
- },
373
- "metrics_raw": {k: round(v, 4) for k, v in metrics_raw.items()},
374
- }
375
- metrics_md = f"**RAW** — MAPE {metrics_raw['MAPE']:.2f}% | MPE {metrics_raw['MPE']:.2f}% | RMSE {metrics_raw['RMSE']:.5f}"
376
- if use_calibration and metrics_cal is not None:
377
- out["alpha"] = alpha
378
- out["metrics_calibrated"] = {k: round(v, 4) for k, v in metrics_cal.items()}
379
- metrics_md += f"\n**CALIBRATED** — MAPE {metrics_cal['MAPE']:.2f}% | MPE {metrics_cal['MPE']:.2f}% | RMSE {metrics_cal['RMSE']:.5f}"
380
-
381
- return fig, out, df_days, metrics_md
382
-
383
- # --------------------
384
- # UI
385
- # --------------------
386
- with gr.Blocks(title="Volatility Forecast • Auto-Finetuned Chronos-Bolt (on-device)") as demo:
387
- gr.Markdown(
388
- "### Predict last 30 days of realized volatility for any ticker\n"
389
- "- **Auto-Finetune on first launch** (Chronos-Bolt via AutoGluon) → afterwards always uses the finetuned predictor.\n"
390
- "- If AutoGluon is unavailable or training fails, falls back to Zero-Shot Chronos.\n"
391
- "- Data via **yfinance** (pipeline_v2.update_ticker_csv).\n"
392
- "- Day-by-day comparison with **MAPE/MPE/RMSE** and optional **α-calibration**."
393
- )
394
- with gr.Row():
395
- tickers_in = gr.Textbox(value="AAPL", label="Ticker")
396
- with gr.Row():
397
- start_in = gr.Textbox(value="2015-01-01", label="Start date (YYYY-MM-DD)")
398
- interval_in = gr.Dropdown(choices=["1d", "1wk", "1mo"], value="1d", label="Interval")
399
- calib_in = gr.Checkbox(value=True, label="Apply bias/scale calibration (α)")
400
- run_btn = gr.Button("Run", variant="primary")
401
-
402
- plot = gr.Plot(label="Forecast vs Actual (last 30 days)")
403
- meta = gr.JSON(label="Run config & metrics")
404
- table = gr.Dataframe(label="Per-day comparison", wrap=True)
405
- metrics = gr.Markdown(label="Summary")
406
-
407
- run_btn.click(run_for_ticker, inputs=[tickers_in, start_in, interval_in, calib_in],
408
- outputs=[plot, meta, table, metrics])
409
-
410
- if __name__ == "__main__":
411
- demo.launch()
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
2
  import matplotlib.pyplot as plt
3
+ import pandas as pd
4
+ from utils_vol import fetch_close_series, realized_vol
5
+ from autogluon.timeseries import TimeSeriesPredictor
6
+ from train_autogluon import train_bolt_small
7
+ import os
8
+
9
+ MODEL_DIR = "/mnt/data/AutogluonChronosBoltSmall"
10
+
11
+ # ---------- Handlers ----------
12
+
13
+ def predict_vol(ticker, start, interval):
14
+ if not os.path.isdir(MODEL_DIR):
15
+ raise gr.Error("Kein trainiertes Modell gefunden. Bitte zuerst trainieren.")
16
+ predictor = TimeSeriesPredictor.load(MODEL_DIR)
17
+ close = fetch_close_series(ticker, start=start, interval=interval)
18
+ rv = realized_vol(close)
19
+ df = pd.DataFrame({"timestamp": rv.index, "target": rv.values, "item_id": "series_1"})
20
+ forecast = predictor.predict(df)
21
+ f = forecast.to_pandas()
22
+ plt.figure(figsize=(8,4))
23
+ plt.plot(rv.index, rv.values, label="Historie")
24
+ plt.plot(f.index, f["0.5"], "--", label="Forecast (Median)")
25
+ plt.legend()
26
+ plt.title(f"{ticker} Volatilitätsprognose (Chronos-Bolt-Small)")
27
+ return plt
28
+
29
+ def train_model(ticker, start, interval):
30
+ train_bolt_small(ticker=ticker, start=start, interval=interval)
31
+ return f"Training abgeschlossen und unter {MODEL_DIR} gespeichert."
32
+
33
+ def clear_model():
34
+ import shutil
35
+ if os.path.isdir(MODEL_DIR):
36
+ shutil.rmtree(MODEL_DIR)
37
+ return "Modell gelöscht."
38
+ return "Kein Modell zum Löschen gefunden."
39
+
40
+ # ---------- UI ----------
41
+ with gr.Blocks(title="Chronos-Bolt-Small (CPU) Fine-Tuning App") as demo:
42
+ gr.Markdown("## Chronos-Bolt-Small – Volatilitäts-Vorhersage\n"
43
+ "Trainiert auf CPU innerhalb von ~10 Minuten über AutoGluon.\n"
44
+ "• Tab **Train**: neues Modell fine-tunen\n"
45
+ "• Tab **Predict**: Vorhersage anzeigen\n"
46
+ "• Tab **Manage**: Modell löschen")
47
+
48
+ with gr.Tab("Predict"):
49
+ t1 = gr.Textbox(label="Ticker", value="AAPL")
50
+ s1 = gr.Textbox(label="Startdatum", value="2015-01-01")
51
+ i1 = gr.Dropdown(["1d","1wk","1mo"], value="1d", label="Intervall")
52
+ btn_p = gr.Button("Vorhersagen")
53
+ out_p = gr.Plot()
54
+ btn_p.click(predict_vol, inputs=[t1, s1, i1], outputs=[out_p])
55
+
56
+ with gr.Tab("Train"):
57
+ t2 = gr.Textbox(label="Ticker", value="AAPL")
58
+ s2 = gr.Textbox(label="Startdatum", value="2015-01-01")
59
+ i2 = gr.Dropdown(["1d","1wk","1mo"], value="1d", label="Intervall")
60
+ btn_t = gr.Button("Train (AutoGluon Chronos-Bolt-Small)")
61
+ out_t = gr.Textbox(label="Train-Log", lines=8)
62
+ btn_t.click(train_model, inputs=[t2, s2, i2], outputs=[out_t])
63
+
64
+ with gr.Tab("Manage"):
65
+ btn_c = gr.Button("Modell löschen")
66
+ out_c = gr.Textbox(label="Status")
67
+ btn_c.click(clear_model, outputs=[out_c])
68
+
69
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
- gradio>=4.0
2
- chronos-forecasting>=1.5
3
  torch>=2.2
4
- pandas>=2.0
5
  numpy>=1.26
 
 
6
  matplotlib>=3.8
7
  yfinance>=0.2.40
8
- autogluon.timeseries>=1.3,<1.6
 
1
+ autogluon.timeseries==1.4.0
2
+ chronos-forecasting>=2.0.0
3
  torch>=2.2
 
4
  numpy>=1.26
5
+ pandas>=2.0
6
+ gradio>=4.0
7
  matplotlib>=3.8
8
  yfinance>=0.2.40
 
train_autogluon.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from autogluon.timeseries import TimeSeriesPredictor
2
+ from utils_vol import fetch_close_series, realized_vol, rv_to_autogluon_df
3
+
4
+ def train_bolt_small(ticker="AAPL", start="2015-01-01", interval="1d",
5
+ prediction_length=30, time_limit=900):
6
+ """
7
+ Trainiert Chronos-Bolt-Small mit AutoGluon auf CPU.
8
+ time_limit in Sekunden (Standard: 15 min).
9
+ """
10
+ print(f"[AutoFT] Lade {ticker}...")
11
+ close = fetch_close_series(ticker, start=start, interval=interval)
12
+ rv = realized_vol(close)
13
+ df = rv_to_autogluon_df(rv)
14
+
15
+ predictor = TimeSeriesPredictor(
16
+ path="/mnt/data/AutogluonChronosBoltSmall",
17
+ prediction_length=prediction_length,
18
+ eval_metric="WQL",
19
+ verbosity=2,
20
+ )
21
+
22
+ predictor.fit(
23
+ train_data=df,
24
+ enable_ensemble=False,
25
+ num_val_windows=1,
26
+ hyperparameters={
27
+ "Chronos": {
28
+ "model_path": "autogluon/chronos-bolt-small",
29
+ "fine_tune": True,
30
+ "fine_tune_steps": 200,
31
+ "fine_tune_lr": 1e-4,
32
+ "context_length": 128,
33
+ "quantile_levels": [0.1, 0.5, 0.9],
34
+ }
35
+ },
36
+ time_limit=time_limit,
37
+ )
38
+
39
+ print("✅ Training abgeschlossen. Modellpfad:", predictor.path)
40
+ return predictor
utils_vol.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yfinance as yf
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ def fetch_close_series(ticker: str, start="2015-01-01", interval="1d") -> pd.Series:
6
+ """Downloadet Daten von yfinance und gibt die Schlusskurse zurück."""
7
+ df = yf.download(ticker, start=start, interval=interval, progress=False, threads=True)
8
+ if df is None or df.empty:
9
+ raise ValueError(f"Keine Daten für {ticker}.")
10
+ col = None
11
+ for c in ["Adj Close", "Close", "close", "adj close"]:
12
+ if c in df.columns:
13
+ col = c; break
14
+ if col is None:
15
+ col = df.select_dtypes("number").columns[-1]
16
+ return df[col].dropna()
17
+
18
+ def realized_vol(close: pd.Series, window=20, annualize=True) -> pd.Series:
19
+ r = np.log(close).diff().dropna()
20
+ rv = r.rolling(window, min_periods=window).std()
21
+ if annualize:
22
+ rv *= np.sqrt(252)
23
+ return rv.dropna()
24
+
25
+ def rv_to_autogluon_df(rv: pd.Series) -> pd.DataFrame:
26
+ """Formatiert Realized Vol als DataFrame für AutoGluon TimeSeries."""
27
+ df = pd.DataFrame({"timestamp": rv.index, "target": rv.values})
28
+ df["item_id"] = "series_1"
29
+ return df