Spaces:

Gilette
/

volatilitypredictor

Running

App Files Files Community

Gil Stetler commited on 20 days ago

Commit

c5cdf21

1 Parent(s): c9aa5e1

fix

Browse files

Files changed (1) hide show

app.py +52 -40

app.py CHANGED Viewed

@@ -9,15 +9,12 @@ import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-# ---- Chronos Zero-Shot (Fallback) ----
 from chronos import ChronosPipeline
-# ---- AutoGluon (für Finetune + Laden) ----
 AGTS_AVAILABLE = False
 try:
     from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
     try:
-        # optional: AutoGluon global seeding helper (nicht in allen Versionen vorhanden)
         from autogluon.common.utils.seed import set_seed as ag_set_seed
     except Exception:
         ag_set_seed = None
@@ -26,15 +23,13 @@ except Exception:
     ag_set_seed = None
     pass
-# unsere bestehende Daten-Pipeline
 import pipeline_v2 as pipe2
 # --------------------
-# Konfiguration
 # --------------------
-# IMMER auf finetuned gehen -> wir trainieren automatisch, falls noch nicht vorhanden
-FINETUNED_DIR = os.path.abspath("./finetuned_predictor")  # persistiert im Space-Repo
-MODEL_ID_FALLBACK = os.getenv("MODEL_ID", "amazon/chronos-t5-large")  # nur falls FT scheitert
 PREDICTION_LENGTH = 30
 NUM_SAMPLES = 1
@@ -42,22 +37,18 @@ RV_WINDOW = 20
 ANNUALIZE = True
 EPS = 1e-8
-# Auto-Finetune-Defaults (einmalig beim ersten Start)
 AUTO_TICKERS = os.getenv("AUTO_TICKERS", "AAPL,MSFT,AMZN,NVDA,GOOGL,TSLA,SPY,TLT").split(",")
 AUTO_START = os.getenv("AUTO_START", "2010-01-01")
 AUTO_INTERVAL = os.getenv("AUTO_INTERVAL", "1d")  # "1d","1wk","1mo"
 AUTO_MODEL_PATH = os.getenv("AUTO_MODEL_PATH", "autogluon/chronos-bolt-base")
-AUTO_STEPS = int(os.getenv("AUTO_STEPS", "900"))     # moderat schnell
 AUTO_LR = float(os.getenv("AUTO_LR", "1e-4"))
 AUTO_SEED = int(os.getenv("AUTO_SEED", "0"))
-# --------------------
-# Utils
-# --------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16 if device == "cuda" else torch.float32
-# ---- global seeding (kompatibel über Versionen hinweg) ----
 def set_global_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
@@ -72,9 +63,9 @@ def set_global_seed(seed: int):
             ag_set_seed(seed)
         except Exception:
             pass
 set_global_seed(AUTO_SEED)
 def _extract_close(df: pd.DataFrame) -> pd.Series:
     if isinstance(df.columns, pd.MultiIndex):
         for name in ["Adj Close", "Adj_Close", "adj close", "adj_close"]:
@@ -129,8 +120,19 @@ def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict:
     rmse = float(np.sqrt(np.mean(err**2)))
     return {"MAPE": mape, "MPE": mpe, "RMSE": rmse}
 # --------------------
-# Auto-Finetune: einmalig beim ersten Start
 # --------------------
 def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
     import yfinance as yf
@@ -153,7 +155,7 @@ def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
         raise RuntimeError(f"No numeric close for {ticker}")
     return pd.Series(df[num_cols[-1]]).astype(float)
-def _build_tsdf(tickers, start, interval, rv_window, annualize=True):
     rows = []
     for t in tickers:
         s_close = _download_close(t, start, interval)
@@ -164,13 +166,27 @@ def _build_tsdf(tickers, start, interval, rv_window, annualize=True):
         rv = rv.dropna()
         rows.append(pd.DataFrame({"item_id": t, "timestamp": rv.index, "target": rv.values}))
     df_long = pd.concat(rows, ignore_index=True)
-    return TimeSeriesDataFrame.from_data_frame(df_long, id_column="item_id", timestamp_column="timestamp")
 def ensure_finetuned_predictor(log_cb=print):
-    """
-    Prüft, ob ein finetunter AutoGluon-Predictor existiert.
-    Falls nicht, trainiert er ihn direkt im Space und speichert nach FINETUNED_DIR.
-    """
     if not AGTS_AVAILABLE:
         log_cb("AutoGluon not available; using Zero-Shot Chronos.")
         return None
@@ -183,17 +199,18 @@ def ensure_finetuned_predictor(log_cb=print):
         except Exception as e:
             log_cb(f"Existing predictor could not be loaded, retraining. Reason: {e}")
-    # Train einmalig
     os.makedirs(FINETUNED_DIR, exist_ok=True)
     log_cb("No finetuned predictor found. Starting on-device fine-tuning (Chronos-Bolt)...")
     tsdf = _build_tsdf([t.strip() for t in AUTO_TICKERS if t.strip()],
                        AUTO_START, AUTO_INTERVAL, RV_WINDOW, annualize=True)
     predictor = TimeSeriesPredictor(
         prediction_length=PREDICTION_LENGTH,
         target="target",
         eval_metric="WQL",
     )
     hyperparams = {
@@ -202,7 +219,6 @@ def ensure_finetuned_predictor(log_cb=print):
             "fine_tune": True,
             "fine_tune_steps": AUTO_STEPS,
             "fine_tune_lr": AUTO_LR,
-            # "device": "gpu"  # AutoGluon nutzt automatisch CUDA, wenn verfügbar
         }
     }
@@ -219,10 +235,8 @@ ag_predictor = None
 def _load_models():
     global pipe, ag_predictor
-    # 1) Versuche finetuned zu laden bzw. zu trainieren
     ag_predictor = ensure_finetuned_predictor(log_cb=lambda m: print(f"[AutoFT] {m}"))
     if ag_predictor is None:
-        # 2) Fallback Zero-Shot
         print(f"[AutoFT] Falling back to Zero-Shot: {MODEL_ID_FALLBACK}")
         pipe = ChronosPipeline.from_pretrained(
             MODEL_ID_FALLBACK,
@@ -235,30 +249,32 @@ def _load_models():
 _load_models()
 # --------------------
-# Forecast-Backends
 # --------------------
 def _predict_with_chronos(rv_train: np.ndarray, H: int) -> np.ndarray:
     random.seed(0); np.random.seed(0); torch.manual_seed(0)
     if torch.cuda.is_available():
         torch.cuda.manual_seed_all(0)
     context = torch.tensor(rv_train, dtype=torch.float32)
-    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)  # [1, 1, H]
     return fcst[0].cpu().numpy()[0]
 def _predict_with_ag(rv_train_idx: pd.DatetimeIndex, rv_train: np.ndarray, H: int) -> np.ndarray:
-    ts = pd.DataFrame({
-        "item_id": "series",
-        "timestamp": rv_train_idx,
-        "target": rv_train,
-    })
     ts_df = TimeSeriesDataFrame.from_data_frame(ts, id_column="item_id", timestamp_column="timestamp")
     preds = ag_predictor.predict(ts_df, prediction_length=H)
     if 0.5 in preds.quantile_levels:
         return preds.loc[("series", 0.5)].to_numpy()
     return preds.mean(axis=1).loc["series"].to_numpy()
 # --------------------
-# App-Logik (unverändert in der Funktionalität)
 # --------------------
 def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: bool):
     tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
@@ -290,7 +306,6 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
     rv_train = rv[: n - H]
     rv_test  = rv[n - H :]
-    # Forecast mit finetuned Predictor (wenn vorhanden), sonst Zero-Shot Chronos
     if ag_predictor is not None and isinstance(dates, np.ndarray) and isinstance(df.index, pd.DatetimeIndex):
         rv_index = df.index[-len(rv):][:len(rv_train)]
         path_pred = _predict_with_ag(rv_index, rv_train, H)
@@ -299,15 +314,13 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
         path_pred = _predict_with_chronos(rv_train, H)
         provider = f"Chronos {MODEL_ID_FALLBACK.split('/')[-1]}"
-    alpha = None
     if use_calibration:
         alpha, path_pred_cal = bias_scale_calibration(rv_test, path_pred)
         metrics_raw = compute_metrics(rv_test, path_pred)
         metrics_cal = compute_metrics(rv_test, path_pred_cal)
     else:
         metrics_raw = compute_metrics(rv_test, path_pred)
-        metrics_cal = None
-        path_pred_cal = None
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
@@ -331,9 +344,8 @@ def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: boo
     plt.xlabel(x_lbl); plt.ylabel("realized volatility")
     plt.legend(loc="best"); plt.tight_layout()
-    last_dates = x_fcst
     df_days = pd.DataFrame({
-        "date": last_dates,
         "actual_vol": rv_test,
         "forecast_raw": path_pred,
     })

 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from chronos import ChronosPipeline
 AGTS_AVAILABLE = False
 try:
     from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame
     try:
         from autogluon.common.utils.seed import set_seed as ag_set_seed
     except Exception:
         ag_set_seed = None
     ag_set_seed = None
     pass
 import pipeline_v2 as pipe2
 # --------------------
+# Config
 # --------------------
+FINETUNED_DIR = os.path.abspath("./finetuned_predictor")
+MODEL_ID_FALLBACK = os.getenv("MODEL_ID", "amazon/chronos-t5-large")
 PREDICTION_LENGTH = 30
 NUM_SAMPLES = 1
 ANNUALIZE = True
 EPS = 1e-8
 AUTO_TICKERS = os.getenv("AUTO_TICKERS", "AAPL,MSFT,AMZN,NVDA,GOOGL,TSLA,SPY,TLT").split(",")
 AUTO_START = os.getenv("AUTO_START", "2010-01-01")
 AUTO_INTERVAL = os.getenv("AUTO_INTERVAL", "1d")  # "1d","1wk","1mo"
 AUTO_MODEL_PATH = os.getenv("AUTO_MODEL_PATH", "autogluon/chronos-bolt-base")
+AUTO_STEPS = int(os.getenv("AUTO_STEPS", "900"))
 AUTO_LR = float(os.getenv("AUTO_LR", "1e-4"))
 AUTO_SEED = int(os.getenv("AUTO_SEED", "0"))
 device = "cuda" if torch.cuda.is_available() else "cpu"
 dtype = torch.bfloat16 if device == "cuda" else torch.float32
+# ---- global seeding ----
 def set_global_seed(seed: int):
     random.seed(seed)
     np.random.seed(seed)
             ag_set_seed(seed)
         except Exception:
             pass
 set_global_seed(AUTO_SEED)
+# ---- utils ----
 def _extract_close(df: pd.DataFrame) -> pd.Series:
     if isinstance(df.columns, pd.MultiIndex):
         for name in ["Adj Close", "Adj_Close", "adj close", "adj_close"]:
     rmse = float(np.sqrt(np.mean(err**2)))
     return {"MAPE": mape, "MPE": mpe, "RMSE": rmse}
+# ---- frequency helpers ----
+def interval_to_freq(interval: str) -> str:
+    interval = (interval or "").lower().strip()
+    if interval == "1d":
+        return "B"        # Business day
+    if interval == "1wk":
+        return "W-FRI"    # Wochenende vermeiden, Börsenwoche endend Fr
+    if interval == "1mo":
+        return "M"        # Monatlich (Kalenderende)
+    return "B"
 # --------------------
+# Auto-Finetune
 # --------------------
 def _download_close(ticker: str, start: str, interval: str) -> pd.Series:
     import yfinance as yf
         raise RuntimeError(f"No numeric close for {ticker}")
     return pd.Series(df[num_cols[-1]]).astype(float)
+def _build_tsdf(tickers, start, interval, rv_window, annualize=True) -> TimeSeriesDataFrame:
     rows = []
     for t in tickers:
         s_close = _download_close(t, start, interval)
         rv = rv.dropna()
         rows.append(pd.DataFrame({"item_id": t, "timestamp": rv.index, "target": rv.values}))
     df_long = pd.concat(rows, ignore_index=True)
+    tsdf = TimeSeriesDataFrame.from_data_frame(df_long, id_column="item_id", timestamp_column="timestamp")
+    # Frequenz erzwingen/vereinheitlichen (gegen "Cannot infer frequency")
+    freq = interval_to_freq(interval)
+    try:
+        tsdf = tsdf.convert_frequency(freq=freq)  # reguläre Zeitachsen je item
+    except Exception:
+        # Fallback: per GroupBy resamplen (asfreq) + forward-fill kleiner Lücken
+        def _regularize(g):
+            g = g.set_index("timestamp").asfreq(freq)
+            g["target"] = g["target"].ffill()
+            g["item_id"] = g["item_id"].ffill().bfill()
+            return g.reset_index()
+        reg = (
+            df_long.groupby("item_id", group_keys=False)
+            .apply(_regularize)
+        )
+        tsdf = TimeSeriesDataFrame.from_data_frame(reg, id_column="item_id", timestamp_column="timestamp")
+    return tsdf
 def ensure_finetuned_predictor(log_cb=print):
     if not AGTS_AVAILABLE:
         log_cb("AutoGluon not available; using Zero-Shot Chronos.")
         return None
         except Exception as e:
             log_cb(f"Existing predictor could not be loaded, retraining. Reason: {e}")
     os.makedirs(FINETUNED_DIR, exist_ok=True)
     log_cb("No finetuned predictor found. Starting on-device fine-tuning (Chronos-Bolt)...")
     tsdf = _build_tsdf([t.strip() for t in AUTO_TICKERS if t.strip()],
                        AUTO_START, AUTO_INTERVAL, RV_WINDOW, annualize=True)
+    freq = interval_to_freq(AUTO_INTERVAL)
     predictor = TimeSeriesPredictor(
         prediction_length=PREDICTION_LENGTH,
         target="target",
         eval_metric="WQL",
+        freq=freq,  # <<<<<< WICHTIG
     )
     hyperparams = {
             "fine_tune": True,
             "fine_tune_steps": AUTO_STEPS,
             "fine_tune_lr": AUTO_LR,
         }
     }
 def _load_models():
     global pipe, ag_predictor
     ag_predictor = ensure_finetuned_predictor(log_cb=lambda m: print(f"[AutoFT] {m}"))
     if ag_predictor is None:
         print(f"[AutoFT] Falling back to Zero-Shot: {MODEL_ID_FALLBACK}")
         pipe = ChronosPipeline.from_pretrained(
             MODEL_ID_FALLBACK,
 _load_models()
 # --------------------
+# Forecast backends
 # --------------------
 def _predict_with_chronos(rv_train: np.ndarray, H: int) -> np.ndarray:
     random.seed(0); np.random.seed(0); torch.manual_seed(0)
     if torch.cuda.is_available():
         torch.cuda.manual_seed_all(0)
     context = torch.tensor(rv_train, dtype=torch.float32)
+    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)
     return fcst[0].cpu().numpy()[0]
 def _predict_with_ag(rv_train_idx: pd.DatetimeIndex, rv_train: np.ndarray, H: int) -> np.ndarray:
+    ts = pd.DataFrame({"item_id": "series", "timestamp": rv_train_idx, "target": rv_train})
     ts_df = TimeSeriesDataFrame.from_data_frame(ts, id_column="item_id", timestamp_column="timestamp")
+    # Für Inferenz sicherstellen, dass Frequenz konsistent ist:
+    freq = interval_to_freq("1d")  # rv_train_idx kommt von daily-Daten im UI; falls nicht, kannst du hier dynamisch mappen
+    try:
+        ts_df = ts_df.convert_frequency(freq=freq)
+    except Exception:
+        pass
     preds = ag_predictor.predict(ts_df, prediction_length=H)
     if 0.5 in preds.quantile_levels:
         return preds.loc[("series", 0.5)].to_numpy()
     return preds.mean(axis=1).loc["series"].to_numpy()
 # --------------------
+# App-Logik (gleichbleibende Funktionalität)
 # --------------------
 def run_for_ticker(tickers: str, start: str, interval: str, use_calibration: bool):
     tick_list = [t.strip() for t in tickers.replace(";", ",").replace("|", ",").split(",") if t.strip()]
     rv_train = rv[: n - H]
     rv_test  = rv[n - H :]
     if ag_predictor is not None and isinstance(dates, np.ndarray) and isinstance(df.index, pd.DatetimeIndex):
         rv_index = df.index[-len(rv):][:len(rv_train)]
         path_pred = _predict_with_ag(rv_index, rv_train, H)
         path_pred = _predict_with_chronos(rv_train, H)
         provider = f"Chronos {MODEL_ID_FALLBACK.split('/')[-1]}"
     if use_calibration:
         alpha, path_pred_cal = bias_scale_calibration(rv_test, path_pred)
         metrics_raw = compute_metrics(rv_test, path_pred)
         metrics_cal = compute_metrics(rv_test, path_pred_cal)
     else:
+        alpha, path_pred_cal, metrics_cal = None, None, None
         metrics_raw = compute_metrics(rv_test, path_pred)
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
     plt.xlabel(x_lbl); plt.ylabel("realized volatility")
     plt.legend(loc="best"); plt.tight_layout()
     df_days = pd.DataFrame({
+        "date": x_fcst,
         "actual_vol": rv_test,
         "forecast_raw": path_pred,
     })