Spaces:
Running
Running
Gil Stetler
commited on
Commit
·
09f6668
1
Parent(s):
c5cdf21
finetune chronos-bolt-small
Browse files- app.py +67 -409
- requirements.txt +4 -4
- train_autogluon.py +40 -0
- utils_vol.py +29 -0
app.py
CHANGED
|
@@ -1,411 +1,69 @@
|
|
| 1 |
-
# app.py
|
| 2 |
-
import os, random, time
|
| 3 |
-
from typing import Tuple
|
| 4 |
-
import numpy as np
|
| 5 |
-
import pandas as pd
|
| 6 |
-
import torch
|
| 7 |
import gradio as gr
|
| 8 |
-
import matplotlib
|
| 9 |
-
matplotlib.use("Agg")
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
-
|
| 12 |
-
from
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
if name in df.columns.get_level_values(0):
|
| 79 |
-
sub = df.xs(name, axis=1, level=0)
|
| 80 |
-
if sub.shape[1] > 1:
|
| 81 |
-
sub = sub.iloc[:, 0]
|
| 82 |
-
return pd.to_numeric(sub.squeeze(), errors="coerce").dropna()
|
| 83 |
-
mapping = {c.lower(): c for c in df.columns}
|
| 84 |
-
for name in ["adj close", "adj_close", "close", "price"]:
|
| 85 |
-
if name in mapping:
|
| 86 |
-
return pd.to_numeric(df[mapping[name]], errors="coerce").dropna()
|
| 87 |
-
num_cols = df.select_dtypes(include=[np.number]).columns
|
| 88 |
-
if len(num_cols) == 0:
|
| 89 |
-
raise gr.Error("No numeric price column found in downloaded data.")
|
| 90 |
-
return pd.Series(df[num_cols[-1]]).astype(float)
|
| 91 |
-
|
| 92 |
-
def _extract_dates(df: pd.DataFrame):
|
| 93 |
-
if isinstance(df.index, pd.DatetimeIndex):
|
| 94 |
-
return df.index.to_numpy()
|
| 95 |
-
mapping = {c.lower(): c for c in df.columns}
|
| 96 |
-
for name in ["date", "time", "timestamp"]:
|
| 97 |
-
if name in mapping:
|
| 98 |
-
try:
|
| 99 |
-
return pd.to_datetime(df[mapping[name]]).to_numpy()
|
| 100 |
-
except Exception:
|
| 101 |
-
pass
|
| 102 |
-
return np.arange(len(df))
|
| 103 |
-
|
| 104 |
-
def compute_realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series:
|
| 105 |
-
r = np.log(close).diff().dropna()
|
| 106 |
-
rv = r.rolling(window, min_periods=window).std()
|
| 107 |
-
if annualize:
|
| 108 |
-
rv = rv * np.sqrt(252.0)
|
| 109 |
-
return rv.dropna().reset_index(drop=True)
|
| 110 |
-
|
| 111 |
-
def bias_scale_calibration(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, np.ndarray]:
|
| 112 |
-
alpha = float(np.sum(y_true * y_pred) / (np.sum(y_pred**2) + EPS))
|
| 113 |
-
return alpha, alpha * y_pred
|
| 114 |
-
|
| 115 |
-
def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict:
|
| 116 |
-
err = y_pred - y_true
|
| 117 |
-
denom = np.maximum(EPS, np.abs(y_true))
|
| 118 |
-
mape = float((np.abs(err) / denom).mean() * 100)
|
| 119 |
-
mpe = float((err / np.maximum(EPS, y_true)).mean() * 100)
|
| 120 |
-
rmse = float(np.sqrt(np.mean(err**2)))
|
| 121 |
-
return {"MAPE": mape, "MPE": mpe, "RMSE": rmse}
|
| 122 |
-
|
| 123 |
-
# ---- frequency helpers ----
|
| 124 |
-
def interval_to_freq(interval: str) -> str:
|
| 125 |
-
interval = (interval or "").lower().strip()
|
| 126 |
-
if interval == "1d":
|
| 127 |
-
return "B" # Business day
|
| 128 |
-
if interval == "1wk":
|
| 129 |
-
return "W-FRI" # Wochenende vermeiden, Börsenwoche endend Fr
|
| 130 |
-
if interval == "1mo":
|
| 131 |
-
return "M" # Monatlich (Kalenderende)
|
| 132 |
-
return "B"
|
| 133 |
-
|
| 134 |
-
# --------------------
|
| 135 |
-
# Auto-Finetune
|
| 136 |
-
# --------------------
|
| 137 |
-
def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
|
| 138 |
-
import yfinance as yf
|
| 139 |
-
df = yf.download(ticker, start=start, interval=interval, auto_adjust=False, progress=False, threads=True)
|
| 140 |
-
if df is None or df.empty:
|
| 141 |
-
raise RuntimeError(f"No data for {ticker}")
|
| 142 |
-
if isinstance(df.columns, pd.MultiIndex):
|
| 143 |
-
for name in ["Adj Close", "Close"]:
|
| 144 |
-
if name in df.columns.get_level_values(0):
|
| 145 |
-
s = df.xs(name, axis=1, level=0)
|
| 146 |
-
if s.shape[1] > 1:
|
| 147 |
-
s = s.iloc[:, 0]
|
| 148 |
-
return pd.to_numeric(s.squeeze(), errors="coerce").dropna()
|
| 149 |
-
if "Adj Close" in df.columns:
|
| 150 |
-
return pd.to_numeric(df["Adj Close"], errors="coerce").dropna()
|
| 151 |
-
if "Close" in df.columns:
|
| 152 |
-
return pd.to_numeric(df["Close"], errors="coerce").dropna()
|
| 153 |
-
num_cols = df.select_dtypes(include=[np.number]).columns
|
| 154 |
-
if len(num_cols) == 0:
|
| 155 |
-
raise RuntimeError(f"No numeric close for {ticker}")
|
| 156 |
-
return pd.Series(df[num_cols[-1]]).astype(float)
|
| 157 |
-
|
| 158 |
-
def _build_tsdf(tickers, start, interval, rv_window, annualize=True) -> TimeSeriesDataFrame:
|
| 159 |
-
rows = []
|
| 160 |
-
for t in tickers:
|
| 161 |
-
s_close = _download_close(t, start, interval)
|
| 162 |
-
r = np.log(s_close).diff().dropna()
|
| 163 |
-
rv = r.rolling(rv_window, min_periods=rv_window).std()
|
| 164 |
-
if annualize:
|
| 165 |
-
rv = rv * np.sqrt(252.0)
|
| 166 |
-
rv = rv.dropna()
|
| 167 |
-
rows.append(pd.DataFrame({"item_id": t, "timestamp": rv.index, "target": rv.values}))
|
| 168 |
-
df_long = pd.concat(rows, ignore_index=True)
|
| 169 |
-
tsdf = TimeSeriesDataFrame.from_data_frame(df_long, id_column="item_id", timestamp_column="timestamp")
|
| 170 |
-
|
| 171 |
-
# Frequenz erzwingen/vereinheitlichen (gegen "Cannot infer frequency")
|
| 172 |
-
freq = interval_to_freq(interval)
|
| 173 |
-
try:
|
| 174 |
-
tsdf = tsdf.convert_frequency(freq=freq) # reguläre Zeitachsen je item
|
| 175 |
-
except Exception:
|
| 176 |
-
# Fallback: per GroupBy resamplen (asfreq) + forward-fill kleiner Lücken
|
| 177 |
-
def _regularize(g):
|
| 178 |
-
g = g.set_index("timestamp").asfreq(freq)
|
| 179 |
-
g["target"] = g["target"].ffill()
|
| 180 |
-
g["item_id"] = g["item_id"].ffill().bfill()
|
| 181 |
-
return g.reset_index()
|
| 182 |
-
reg = (
|
| 183 |
-
df_long.groupby("item_id", group_keys=False)
|
| 184 |
-
.apply(_regularize)
|
| 185 |
-
)
|
| 186 |
-
tsdf = TimeSeriesDataFrame.from_data_frame(reg, id_column="item_id", timestamp_column="timestamp")
|
| 187 |
-
return tsdf
|
| 188 |
-
|
| 189 |
-
def ensure_finetuned_predictor(log_cb=print):
|
| 190 |
-
if not AGTS_AVAILABLE:
|
| 191 |
-
log_cb("AutoGluon not available; using Zero-Shot Chronos.")
|
| 192 |
-
return None
|
| 193 |
-
|
| 194 |
-
if os.path.isdir(FINETUNED_DIR) and os.path.exists(os.path.join(FINETUNED_DIR, "metadata.json")):
|
| 195 |
-
try:
|
| 196 |
-
predictor = TimeSeriesPredictor.load(FINETUNED_DIR)
|
| 197 |
-
log_cb(f"Loaded finetuned predictor from {FINETUNED_DIR}.")
|
| 198 |
-
return predictor
|
| 199 |
-
except Exception as e:
|
| 200 |
-
log_cb(f"Existing predictor could not be loaded, retraining. Reason: {e}")
|
| 201 |
-
|
| 202 |
-
os.makedirs(FINETUNED_DIR, exist_ok=True)
|
| 203 |
-
log_cb("No finetuned predictor found. Starting on-device fine-tuning (Chronos-Bolt)...")
|
| 204 |
-
|
| 205 |
-
tsdf = _build_tsdf([t.strip() for t in AUTO_TICKERS if t.strip()],
|
| 206 |
-
AUTO_START, AUTO_INTERVAL, RV_WINDOW, annualize=True)
|
| 207 |
-
|
| 208 |
-
freq = interval_to_freq(AUTO_INTERVAL)
|
| 209 |
-
predictor = TimeSeriesPredictor(
|
| 210 |
-
prediction_length=PREDICTION_LENGTH,
|
| 211 |
-
target="target",
|
| 212 |
-
eval_metric="WQL",
|
| 213 |
-
freq=freq, # <<<<<< WICHTIG
|
| 214 |
-
)
|
| 215 |
-
|
| 216 |
-
hyperparams = {
|
| 217 |
-
"Chronos": {
|
| 218 |
-
"model_path": AUTO_MODEL_PATH,
|
| 219 |
-
"fine_tune": True,
|
| 220 |
-
"fine_tune_steps": AUTO_STEPS,
|
| 221 |
-
"fine_tune_lr": AUTO_LR,
|
| 222 |
-
}
|
| 223 |
-
}
|
| 224 |
-
|
| 225 |
-
predictor.fit(train_data=tsdf, hyperparameters=hyperparams, time_limit=None, presets=None)
|
| 226 |
-
predictor.save(FINETUNED_DIR)
|
| 227 |
-
log_cb(f"Saved finetuned predictor to: {FINETUNED_DIR}")
|
| 228 |
-
return predictor
|
| 229 |
-
|
| 230 |
-
# --------------------
|
| 231 |
-
# Modelle laden
|
| 232 |
-
# --------------------
|
| 233 |
-
pipe = None
|
| 234 |
-
ag_predictor = None
|
| 235 |
-
|
| 236 |
-
def _load_models():
|
| 237 |
-
global pipe, ag_predictor
|
| 238 |
-
ag_predictor = ensure_finetuned_predictor(log_cb=lambda m: print(f"[AutoFT] {m}"))
|
| 239 |
-
if ag_predictor is None:
|
| 240 |
-
print(f"[AutoFT] Falling back to Zero-Shot: {MODEL_ID_FALLBACK}")
|
| 241 |
-
pipe = ChronosPipeline.from_pretrained(
|
| 242 |
-
MODEL_ID_FALLBACK,
|
| 243 |
-
device_map="auto",
|
| 244 |
-
torch_dtype=dtype,
|
| 245 |
-
)
|
| 246 |
-
else:
|
| 247 |
-
pipe = None
|
| 248 |
-
|
| 249 |
-
_load_models()
|
| 250 |
-
|
| 251 |
-
# --------------------
|
| 252 |
-
# Forecast backends
|
| 253 |
-
# --------------------
|
| 254 |
-
def _predict_with_chronos(rv_train: np.ndarray, H: int) -> np.ndarray:
|
| 255 |
-
random.seed(0); np.random.seed(0); torch.manual_seed(0)
|
| 256 |
-
if torch.cuda.is_available():
|
| 257 |
-
torch.cuda.manual_seed_all(0)
|
| 258 |
-
context = torch.tensor(rv_train, dtype=torch.float32)
|
| 259 |
-
fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)
|
| 260 |
-
return fcst[0].cpu().numpy()[0]
|
| 261 |
-
|
| 262 |
-
def _predict_with_ag(rv_train_idx: pd.DatetimeIndex, rv_train: np.ndarray, H: int) -> np.ndarray:
|
| 263 |
-
ts = pd.DataFrame({"item_id": "series", "timestamp": rv_train_idx, "target": rv_train})
|
| 264 |
-
ts_df = TimeSeriesDataFrame.from_data_frame(ts, id_column="item_id", timestamp_column="timestamp")
|
| 265 |
-
# Für Inferenz sicherstellen, dass Frequenz konsistent ist:
|
| 266 |
-
freq = interval_to_freq("1d") # rv_train_idx kommt von daily-Daten im UI; falls nicht, kannst du hier dynamisch mappen
|
| 267 |
-
try:
|
| 268 |
-
ts_df = ts_df.convert_frequency(freq=freq)
|
| 269 |
-
except Exception:
|
| 270 |
-
pass
|
| 271 |
-
preds = ag_predictor.predict(ts_df, prediction_length=H)
|
| 272 |
-
if 0.5 in preds.quantile_levels:
|
| 273 |
-
return preds.loc[("series", 0.5)].to_numpy()
|
| 274 |
-
return preds.mean(axis=1).loc["series"].to_numpy()
|
| 275 |
-
|
| 276 |
-
# --------------------
|
| 277 |
-
# App-Logik (gleichbleibende Funktionalität)
|
| 278 |
-
# --------------------
|
| 279 |
-
def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: bool):
|
| 280 |
-
tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
|
| 281 |
-
if not tick_list:
|
| 282 |
-
raise gr.Error("Please enter at least one ticker, e.g. AAPL or NESN.SW")
|
| 283 |
-
ticker = tick_list[0]
|
| 284 |
-
|
| 285 |
-
try:
|
| 286 |
-
csv_path = pipe2.update_ticker_csv(ticker, start=start, interval=interval)
|
| 287 |
-
except Exception as e:
|
| 288 |
-
raise gr.Error(
|
| 289 |
-
f"Data fetch failed for '{ticker}'. Tip: ensure exchange suffixes (e.g., NESN.SW, BMW.DE, VOD.L).\n{e}"
|
| 290 |
-
)
|
| 291 |
-
|
| 292 |
-
try:
|
| 293 |
-
df = pd.read_csv(csv_path, index_col=0, parse_dates=True)
|
| 294 |
-
if not isinstance(df.index, pd.DatetimeIndex):
|
| 295 |
-
df = pd.read_csv(csv_path)
|
| 296 |
-
except Exception:
|
| 297 |
-
df = pd.read_csv(csv_path)
|
| 298 |
-
|
| 299 |
-
dates = _extract_dates(df)
|
| 300 |
-
close = _extract_close(df)
|
| 301 |
-
|
| 302 |
-
rv = compute_realized_vol(close, window=RV_WINDOW, annualize=ANNUALIZE).to_numpy()
|
| 303 |
-
n = len(rv); H = PREDICTION_LENGTH
|
| 304 |
-
if n <= H + 5:
|
| 305 |
-
raise gr.Error(f"Vol series too short after rolling window. Need > {H+5}, got {n}.")
|
| 306 |
-
rv_train = rv[: n - H]
|
| 307 |
-
rv_test = rv[n - H :]
|
| 308 |
-
|
| 309 |
-
if ag_predictor is not None and isinstance(dates, np.ndarray) and isinstance(df.index, pd.DatetimeIndex):
|
| 310 |
-
rv_index = df.index[-len(rv):][:len(rv_train)]
|
| 311 |
-
path_pred = _predict_with_ag(rv_index, rv_train, H)
|
| 312 |
-
provider = "AutoGluon (finetuned)"
|
| 313 |
-
else:
|
| 314 |
-
path_pred = _predict_with_chronos(rv_train, H)
|
| 315 |
-
provider = f"Chronos {MODEL_ID_FALLBACK.split('/')[-1]}"
|
| 316 |
-
|
| 317 |
-
if use_calibration:
|
| 318 |
-
alpha, path_pred_cal = bias_scale_calibration(rv_test, path_pred)
|
| 319 |
-
metrics_raw = compute_metrics(rv_test, path_pred)
|
| 320 |
-
metrics_cal = compute_metrics(rv_test, path_pred_cal)
|
| 321 |
-
else:
|
| 322 |
-
alpha, path_pred_cal, metrics_cal = None, None, None
|
| 323 |
-
metrics_raw = compute_metrics(rv_test, path_pred)
|
| 324 |
-
|
| 325 |
-
fig = plt.figure(figsize=(10, 4))
|
| 326 |
-
H0 = len(rv_train)
|
| 327 |
-
if isinstance(dates, np.ndarray) and len(dates) >= len(close):
|
| 328 |
-
dates_rv = np.array(dates[-len(rv):])
|
| 329 |
-
x_hist = dates_rv[:H0]
|
| 330 |
-
x_fcst = dates_rv[H0:]
|
| 331 |
-
x_lbl = "date"
|
| 332 |
-
else:
|
| 333 |
-
x_hist = np.arange(H0)
|
| 334 |
-
x_fcst = np.arange(H0, H0 + H)
|
| 335 |
-
x_lbl = "time index"
|
| 336 |
-
|
| 337 |
-
plt.plot(x_hist, rv_train, label="realized vol (history)")
|
| 338 |
-
plt.plot(x_fcst, rv_test, label="realized vol (actual last 30)")
|
| 339 |
-
plt.plot(x_fcst, path_pred, linestyle="--", label="forecast (raw path)")
|
| 340 |
-
if use_calibration:
|
| 341 |
-
plt.plot(x_fcst, path_pred_cal, linestyle="--", label=f"forecast (calibrated, α={alpha:.3f})")
|
| 342 |
-
|
| 343 |
-
plt.title(f"{ticker.upper()} — Volatility Forecast (RV={RV_WINDOW}, H={H}, interval={interval}, model={provider})")
|
| 344 |
-
plt.xlabel(x_lbl); plt.ylabel("realized volatility")
|
| 345 |
-
plt.legend(loc="best"); plt.tight_layout()
|
| 346 |
-
|
| 347 |
-
df_days = pd.DataFrame({
|
| 348 |
-
"date": x_fcst,
|
| 349 |
-
"actual_vol": rv_test,
|
| 350 |
-
"forecast_raw": path_pred,
|
| 351 |
-
})
|
| 352 |
-
if use_calibration:
|
| 353 |
-
df_days["forecast_calibrated"] = path_pred_cal
|
| 354 |
-
df_days["abs_pct_error_raw_%"] = np.abs((path_pred - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
|
| 355 |
-
df_days["abs_pct_error_cal_%"] = np.abs((path_pred_cal - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
|
| 356 |
-
else:
|
| 357 |
-
df_days["abs_pct_error_raw_%"] = np.abs((path_pred - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
|
| 358 |
-
|
| 359 |
-
out = {
|
| 360 |
-
"ticker": ticker.upper(),
|
| 361 |
-
"csv_path": csv_path,
|
| 362 |
-
"config": {
|
| 363 |
-
"start": start,
|
| 364 |
-
"interval": interval,
|
| 365 |
-
"rv_window": RV_WINDOW,
|
| 366 |
-
"prediction_length": H,
|
| 367 |
-
"num_samples": NUM_SAMPLES,
|
| 368 |
-
"annualized": ANNUALIZE,
|
| 369 |
-
"point_forecast": "median_quantile" if ag_predictor is not None else "single_sample_path",
|
| 370 |
-
"model": provider,
|
| 371 |
-
"auto_finetuned_dir": FINETUNED_DIR,
|
| 372 |
-
},
|
| 373 |
-
"metrics_raw": {k: round(v, 4) for k, v in metrics_raw.items()},
|
| 374 |
-
}
|
| 375 |
-
metrics_md = f"**RAW** — MAPE {metrics_raw['MAPE']:.2f}% | MPE {metrics_raw['MPE']:.2f}% | RMSE {metrics_raw['RMSE']:.5f}"
|
| 376 |
-
if use_calibration and metrics_cal is not None:
|
| 377 |
-
out["alpha"] = alpha
|
| 378 |
-
out["metrics_calibrated"] = {k: round(v, 4) for k, v in metrics_cal.items()}
|
| 379 |
-
metrics_md += f"\n**CALIBRATED** — MAPE {metrics_cal['MAPE']:.2f}% | MPE {metrics_cal['MPE']:.2f}% | RMSE {metrics_cal['RMSE']:.5f}"
|
| 380 |
-
|
| 381 |
-
return fig, out, df_days, metrics_md
|
| 382 |
-
|
| 383 |
-
# --------------------
|
| 384 |
-
# UI
|
| 385 |
-
# --------------------
|
| 386 |
-
with gr.Blocks(title="Volatility Forecast • Auto-Finetuned Chronos-Bolt (on-device)") as demo:
|
| 387 |
-
gr.Markdown(
|
| 388 |
-
"### Predict last 30 days of realized volatility for any ticker\n"
|
| 389 |
-
"- **Auto-Finetune on first launch** (Chronos-Bolt via AutoGluon) → afterwards always uses the finetuned predictor.\n"
|
| 390 |
-
"- If AutoGluon is unavailable or training fails, falls back to Zero-Shot Chronos.\n"
|
| 391 |
-
"- Data via **yfinance** (pipeline_v2.update_ticker_csv).\n"
|
| 392 |
-
"- Day-by-day comparison with **MAPE/MPE/RMSE** and optional **α-calibration**."
|
| 393 |
-
)
|
| 394 |
-
with gr.Row():
|
| 395 |
-
tickers_in = gr.Textbox(value="AAPL", label="Ticker")
|
| 396 |
-
with gr.Row():
|
| 397 |
-
start_in = gr.Textbox(value="2015-01-01", label="Start date (YYYY-MM-DD)")
|
| 398 |
-
interval_in = gr.Dropdown(choices=["1d", "1wk", "1mo"], value="1d", label="Interval")
|
| 399 |
-
calib_in = gr.Checkbox(value=True, label="Apply bias/scale calibration (α)")
|
| 400 |
-
run_btn = gr.Button("Run", variant="primary")
|
| 401 |
-
|
| 402 |
-
plot = gr.Plot(label="Forecast vs Actual (last 30 days)")
|
| 403 |
-
meta = gr.JSON(label="Run config & metrics")
|
| 404 |
-
table = gr.Dataframe(label="Per-day comparison", wrap=True)
|
| 405 |
-
metrics = gr.Markdown(label="Summary")
|
| 406 |
-
|
| 407 |
-
run_btn.click(run_for_ticker, inputs=[tickers_in, start_in, interval_in, calib_in],
|
| 408 |
-
outputs=[plot, meta, table, metrics])
|
| 409 |
-
|
| 410 |
-
if __name__ == "__main__":
|
| 411 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
| 2 |
import matplotlib.pyplot as plt
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from utils_vol import fetch_close_series, realized_vol
|
| 5 |
+
from autogluon.timeseries import TimeSeriesPredictor
|
| 6 |
+
from train_autogluon import train_bolt_small
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
MODEL_DIR = "/mnt/data/AutogluonChronosBoltSmall"
|
| 10 |
+
|
| 11 |
+
# ---------- Handlers ----------
|
| 12 |
+
|
| 13 |
+
def predict_vol(ticker, start, interval):
|
| 14 |
+
if not os.path.isdir(MODEL_DIR):
|
| 15 |
+
raise gr.Error("Kein trainiertes Modell gefunden. Bitte zuerst trainieren.")
|
| 16 |
+
predictor = TimeSeriesPredictor.load(MODEL_DIR)
|
| 17 |
+
close = fetch_close_series(ticker, start=start, interval=interval)
|
| 18 |
+
rv = realized_vol(close)
|
| 19 |
+
df = pd.DataFrame({"timestamp": rv.index, "target": rv.values, "item_id": "series_1"})
|
| 20 |
+
forecast = predictor.predict(df)
|
| 21 |
+
f = forecast.to_pandas()
|
| 22 |
+
plt.figure(figsize=(8,4))
|
| 23 |
+
plt.plot(rv.index, rv.values, label="Historie")
|
| 24 |
+
plt.plot(f.index, f["0.5"], "--", label="Forecast (Median)")
|
| 25 |
+
plt.legend()
|
| 26 |
+
plt.title(f"{ticker} – Volatilitätsprognose (Chronos-Bolt-Small)")
|
| 27 |
+
return plt
|
| 28 |
+
|
| 29 |
+
def train_model(ticker, start, interval):
|
| 30 |
+
train_bolt_small(ticker=ticker, start=start, interval=interval)
|
| 31 |
+
return f"Training abgeschlossen und unter {MODEL_DIR} gespeichert."
|
| 32 |
+
|
| 33 |
+
def clear_model():
|
| 34 |
+
import shutil
|
| 35 |
+
if os.path.isdir(MODEL_DIR):
|
| 36 |
+
shutil.rmtree(MODEL_DIR)
|
| 37 |
+
return "Modell gelöscht."
|
| 38 |
+
return "Kein Modell zum Löschen gefunden."
|
| 39 |
+
|
| 40 |
+
# ---------- UI ----------
|
| 41 |
+
with gr.Blocks(title="Chronos-Bolt-Small (CPU) Fine-Tuning App") as demo:
|
| 42 |
+
gr.Markdown("## Chronos-Bolt-Small – Volatilitäts-Vorhersage\n"
|
| 43 |
+
"Trainiert auf CPU innerhalb von ~10 Minuten über AutoGluon.\n"
|
| 44 |
+
"• Tab **Train**: neues Modell fine-tunen\n"
|
| 45 |
+
"• Tab **Predict**: Vorhersage anzeigen\n"
|
| 46 |
+
"• Tab **Manage**: Modell löschen")
|
| 47 |
+
|
| 48 |
+
with gr.Tab("Predict"):
|
| 49 |
+
t1 = gr.Textbox(label="Ticker", value="AAPL")
|
| 50 |
+
s1 = gr.Textbox(label="Startdatum", value="2015-01-01")
|
| 51 |
+
i1 = gr.Dropdown(["1d","1wk","1mo"], value="1d", label="Intervall")
|
| 52 |
+
btn_p = gr.Button("Vorhersagen")
|
| 53 |
+
out_p = gr.Plot()
|
| 54 |
+
btn_p.click(predict_vol, inputs=[t1, s1, i1], outputs=[out_p])
|
| 55 |
+
|
| 56 |
+
with gr.Tab("Train"):
|
| 57 |
+
t2 = gr.Textbox(label="Ticker", value="AAPL")
|
| 58 |
+
s2 = gr.Textbox(label="Startdatum", value="2015-01-01")
|
| 59 |
+
i2 = gr.Dropdown(["1d","1wk","1mo"], value="1d", label="Intervall")
|
| 60 |
+
btn_t = gr.Button("Train (AutoGluon Chronos-Bolt-Small)")
|
| 61 |
+
out_t = gr.Textbox(label="Train-Log", lines=8)
|
| 62 |
+
btn_t.click(train_model, inputs=[t2, s2, i2], outputs=[out_t])
|
| 63 |
+
|
| 64 |
+
with gr.Tab("Manage"):
|
| 65 |
+
btn_c = gr.Button("Modell löschen")
|
| 66 |
+
out_c = gr.Textbox(label="Status")
|
| 67 |
+
btn_c.click(clear_model, outputs=[out_c])
|
| 68 |
+
|
| 69 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
|
| 2 |
-
chronos-forecasting>=
|
| 3 |
torch>=2.2
|
| 4 |
-
pandas>=2.0
|
| 5 |
numpy>=1.26
|
|
|
|
|
|
|
| 6 |
matplotlib>=3.8
|
| 7 |
yfinance>=0.2.40
|
| 8 |
-
autogluon.timeseries>=1.3,<1.6
|
|
|
|
| 1 |
+
autogluon.timeseries==1.4.0
|
| 2 |
+
chronos-forecasting>=2.0.0
|
| 3 |
torch>=2.2
|
|
|
|
| 4 |
numpy>=1.26
|
| 5 |
+
pandas>=2.0
|
| 6 |
+
gradio>=4.0
|
| 7 |
matplotlib>=3.8
|
| 8 |
yfinance>=0.2.40
|
|
|
train_autogluon.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from autogluon.timeseries import TimeSeriesPredictor
|
| 2 |
+
from utils_vol import fetch_close_series, realized_vol, rv_to_autogluon_df
|
| 3 |
+
|
| 4 |
+
def train_bolt_small(ticker="AAPL", start="2015-01-01", interval="1d",
|
| 5 |
+
prediction_length=30, time_limit=900):
|
| 6 |
+
"""
|
| 7 |
+
Trainiert Chronos-Bolt-Small mit AutoGluon auf CPU.
|
| 8 |
+
time_limit in Sekunden (Standard: 15 min).
|
| 9 |
+
"""
|
| 10 |
+
print(f"[AutoFT] Lade {ticker}...")
|
| 11 |
+
close = fetch_close_series(ticker, start=start, interval=interval)
|
| 12 |
+
rv = realized_vol(close)
|
| 13 |
+
df = rv_to_autogluon_df(rv)
|
| 14 |
+
|
| 15 |
+
predictor = TimeSeriesPredictor(
|
| 16 |
+
path="/mnt/data/AutogluonChronosBoltSmall",
|
| 17 |
+
prediction_length=prediction_length,
|
| 18 |
+
eval_metric="WQL",
|
| 19 |
+
verbosity=2,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
predictor.fit(
|
| 23 |
+
train_data=df,
|
| 24 |
+
enable_ensemble=False,
|
| 25 |
+
num_val_windows=1,
|
| 26 |
+
hyperparameters={
|
| 27 |
+
"Chronos": {
|
| 28 |
+
"model_path": "autogluon/chronos-bolt-small",
|
| 29 |
+
"fine_tune": True,
|
| 30 |
+
"fine_tune_steps": 200,
|
| 31 |
+
"fine_tune_lr": 1e-4,
|
| 32 |
+
"context_length": 128,
|
| 33 |
+
"quantile_levels": [0.1, 0.5, 0.9],
|
| 34 |
+
}
|
| 35 |
+
},
|
| 36 |
+
time_limit=time_limit,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
print("✅ Training abgeschlossen. Modellpfad:", predictor.path)
|
| 40 |
+
return predictor
|
utils_vol.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import yfinance as yf
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
def fetch_close_series(ticker: str, start="2015-01-01", interval="1d") -> pd.Series:
|
| 6 |
+
"""Downloadet Daten von yfinance und gibt die Schlusskurse zurück."""
|
| 7 |
+
df = yf.download(ticker, start=start, interval=interval, progress=False, threads=True)
|
| 8 |
+
if df is None or df.empty:
|
| 9 |
+
raise ValueError(f"Keine Daten für {ticker}.")
|
| 10 |
+
col = None
|
| 11 |
+
for c in ["Adj Close", "Close", "close", "adj close"]:
|
| 12 |
+
if c in df.columns:
|
| 13 |
+
col = c; break
|
| 14 |
+
if col is None:
|
| 15 |
+
col = df.select_dtypes("number").columns[-1]
|
| 16 |
+
return df[col].dropna()
|
| 17 |
+
|
| 18 |
+
def realized_vol(close: pd.Series, window=20, annualize=True) -> pd.Series:
|
| 19 |
+
r = np.log(close).diff().dropna()
|
| 20 |
+
rv = r.rolling(window, min_periods=window).std()
|
| 21 |
+
if annualize:
|
| 22 |
+
rv *= np.sqrt(252)
|
| 23 |
+
return rv.dropna()
|
| 24 |
+
|
| 25 |
+
def rv_to_autogluon_df(rv: pd.Series) -> pd.DataFrame:
|
| 26 |
+
"""Formatiert Realized Vol als DataFrame für AutoGluon TimeSeries."""
|
| 27 |
+
df = pd.DataFrame({"timestamp": rv.index, "target": rv.values})
|
| 28 |
+
df["item_id"] = "series_1"
|
| 29 |
+
return df
|