Spaces:

Gilette
/

volatilitypredictor

Running

App Files Files Community

Gil Stetler commited on 29 days ago

Commit

1d730a5

1 Parent(s): 9a3942b

fix

Browse files

Files changed (1) hide show

app.py +44 -37

app.py CHANGED Viewed

@@ -116,7 +116,8 @@
-import os
 import numpy as np
 import pandas as pd
 import torch
@@ -130,11 +131,11 @@ from chronos import ChronosPipeline
 # Config
 # --------------------
 MODEL_ID = "amazon/chronos-t5-large"
-PREDICTION_LENGTH = 30          # Vorhersage-Horizont (letzte 30 Tage)
-NUM_SAMPLES = 100               # >1: stabilerer Punktwert (Mittelwert). Für deterministisch: 1
-RV_WINDOW = 20                  # Rollendes Fenster für RV (Handelstage)
-ANNUALIZE = True                # annualisiere mit sqrt(252)
-EPS = 1e-8
 # --------------------
 # Model load
@@ -188,7 +189,7 @@ def compute_realized_vol(close: pd.Series, window: int = 20, annualize: bool = T
 # Main
 # --------------------
 def run_vol_forecast_and_evaluate():
-    # Daten
     raw = _read_ohlcv_csv()
     dates = _extract_dates(raw)
     close = _extract_close(raw)
@@ -199,45 +200,44 @@ def run_vol_forecast_and_evaluate():
     if n <= H + 5:
         raise gr.Error(f"RV-Serie zu kurz nach Rolling. Benötigt > {H+5}, erhalten {n}.")
-    # Split: letzte H Tage als Holdout
     rv_train = rv[: n - H]
     rv_test  = rv[n - H :]
-    # Forecast (Samples) und **Punktprognose = Mittelwert**
     context = torch.tensor(rv_train, dtype=torch.float32)
-    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)   # [1, S, H]
-    samples = fcst[0].cpu().numpy()                                             # (S, H)
-    mean_pred = samples.mean(axis=0)                                            # (H,)  <-- Punktprognose
-    p10, p90 = np.quantile(samples, [0.1, 0.9], axis=0)                         # nur für Band
-    # Fehler je Tag
-    err = mean_pred - rv_test
-    abs_pct_err = np.abs(err) / np.maximum(EPS, np.abs(rv_test)) * 100.0
-    mape_pct = float(abs_pct_err.mean())
-    rmse = float(np.sqrt(np.mean(err**2)))
-   # Fehler je Tag
-    err = mean_pred - rv_test
-    abs_pct_err = np.abs(err) / np.maximum(EPS, np.abs(rv_test)) * 100.0
-    mape_pct = float(abs_pct_err.mean())
     rmse = float(np.sqrt(np.mean(err**2)))
-    # Plot
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         plt.plot(dates_rv[:H0], rv_train, label="realized vol (history)")
         plt.plot(dates_rv[H0:], rv_test, label="realized vol (actual holdout)")
-        plt.plot(dates_rv[H0:], mean_pred, linestyle="--", label="forecast (point/mean)")
-        plt.fill_between(dates_rv[H0:], p10, p90, alpha=0.3, label="80% interval")
         plt.xlabel("date")
     else:
         x_all = np.arange(len(rv)); x_fcst = np.arange(H0, H0 + H)
         plt.plot(x_all[:H0], rv_train, label="realized vol (history)")
         plt.plot(x_fcst, rv_test, label="realized vol (actual holdout)")
-        plt.plot(x_fcst, mean_pred, linestyle="--", label="forecast (point/mean)")
-        plt.fill_between(x_fcst, p10, p90, alpha=0.3, label="80% interval")
         plt.xlabel("time index")
     plt.title(f"Volatility Forecast (RV window={RV_WINDOW}, H={H})")
@@ -246,7 +246,6 @@ def run_vol_forecast_and_evaluate():
     plt.tight_layout()
     # Tabelle: Tag-für-Tag Vergleich
-    # (falls Datum vorhanden, verwende die letzten H RV-Datenpunkte)
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         last_dates = dates_rv[H0:]
@@ -256,8 +255,8 @@ def run_vol_forecast_and_evaluate():
     df_days = pd.DataFrame({
         "date": last_dates,
         "actual_vol": rv_test,
-        "forecast_vol_point": mean_pred,
-        "abs_error": np.abs(err),
         "abs_pct_error_%": abs_pct_err,
     })
@@ -267,28 +266,35 @@ def run_vol_forecast_and_evaluate():
             "prediction_length": H,
             "num_samples": NUM_SAMPLES,
             "annualized": ANNUALIZE,
-            "point_forecast": "mean",
         },
         "metrics": {
             "MAPE_%": mape_pct,
             "RMSE": rmse,
         },
     }
-    metrics_md = f"**MAPE (durchschn. %-Fehler): {mape_pct:.2f}%**  **RMSE:** {rmse:.6f}"
     return fig, out_json, df_days, metrics_md
 # --------------------
 # UI
 # --------------------
-with gr.Blocks(title="Volatility Forecast • Punktprognose") as demo:
     gr.Markdown(
-        "## Letzte 30 Tage Volatilität prognostizieren und pro Tag vergleichen\n"
-        "- Punktprognose = **Mittelwert** der Verteilung (kein Median).\n"
-        "- Ausgabe: Plot, MAPE%, RMSE, und **tägliche Tabelle** (Actual vs. Forecast + %-Fehler)."
     )
     run_btn = gr.Button("Run", variant="primary")
-    plot = gr.Plot(label="Forecast (point) vs Actual")
     meta = gr.JSON(label="Konfiguration & Gesamtmetriken")
     table = gr.Dataframe(label="Per-Day Vergleich", wrap=True)
     metrics = gr.Markdown(label="Metriken")
@@ -298,3 +304,4 @@ with gr.Blocks(title="Volatility Forecast • Punktprognose") as demo:
 if __name__ == "__main__":
     demo.launch()

+# app.py
+import os, random
 import numpy as np
 import pandas as pd
 import torch
 # Config
 # --------------------
 MODEL_ID = "amazon/chronos-t5-large"
+PREDICTION_LENGTH = 30          # letzte 30 Tage
+NUM_SAMPLES = 1                 # genau EINE Bahn -> tagesgenaue Punktvorhersage
+RV_WINDOW = 20                  # Rolling-Fenster für RV (Handelstage)
+ANNUALIZE = True                # annualisiert mit sqrt(252)
+EPS = 1e-8                      # Schutz gegen Division durch 0
 # --------------------
 # Model load
 # Main
 # --------------------
 def run_vol_forecast_and_evaluate():
+    # Daten laden
     raw = _read_ohlcv_csv()
     dates = _extract_dates(raw)
     close = _extract_close(raw)
     if n <= H + 5:
         raise gr.Error(f"RV-Serie zu kurz nach Rolling. Benötigt > {H+5}, erhalten {n}.")
+    # Holdout: letzte H Tage
     rv_train = rv[: n - H]
     rv_test  = rv[n - H :]
+    # Reproduzierbare EINZELNE Sample-Bahn ziehen
+    random.seed(0); np.random.seed(0); torch.manual_seed(0)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(0)
     context = torch.tensor(rv_train, dtype=torch.float32)
+    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)   # [1, 1, H]
+    samples = fcst[0].cpu().numpy()                                             # (1, H)
+    path_pred = samples[0]                                                      # (H,)  <-- tagesgenaue Vorhersage
+    # Tagesfehler & Prozentfehler
+    err = path_pred - rv_test
+    denom = np.maximum(EPS, np.abs(rv_test))
+    abs_pct_err = np.abs(err) / denom * 100.0
+    pct_err = err / np.maximum(EPS, rv_test) * 100.0
+    mape_pct = float(abs_pct_err.mean())   # Hauptmetrik: mittlere absolute proz. Abweichung
+    mpe_pct  = float(pct_err.mean())       # signiert (Bias)
     rmse = float(np.sqrt(np.mean(err**2)))
+    # Plot: History + Actual (Holdout) + Forecast-Pfad
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         plt.plot(dates_rv[:H0], rv_train, label="realized vol (history)")
         plt.plot(dates_rv[H0:], rv_test, label="realized vol (actual holdout)")
+        plt.plot(dates_rv[H0:], path_pred, linestyle="--", label="forecast (sample path)")
         plt.xlabel("date")
     else:
         x_all = np.arange(len(rv)); x_fcst = np.arange(H0, H0 + H)
         plt.plot(x_all[:H0], rv_train, label="realized vol (history)")
         plt.plot(x_fcst, rv_test, label="realized vol (actual holdout)")
+        plt.plot(x_fcst, path_pred, linestyle="--", label="forecast (sample path)")
         plt.xlabel("time index")
     plt.title(f"Volatility Forecast (RV window={RV_WINDOW}, H={H})")
     plt.tight_layout()
     # Tabelle: Tag-für-Tag Vergleich
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         last_dates = dates_rv[H0:]
     df_days = pd.DataFrame({
         "date": last_dates,
         "actual_vol": rv_test,
+        "forecast_vol": path_pred,
+        "pct_error_% (signed)": pct_err,
         "abs_pct_error_%": abs_pct_err,
     })
             "prediction_length": H,
             "num_samples": NUM_SAMPLES,
             "annualized": ANNUALIZE,
+            "point_forecast": "single_sample_path",
+            "seed": 0,
         },
         "metrics": {
             "MAPE_%": mape_pct,
+            "MPE_%": mpe_pct,
             "RMSE": rmse,
         },
     }
+    metrics_md = (
+        f"**MAPE (Ø absolute %-Abweichung): {mape_pct:.2f}%**  "
+        f"**MPE (Ø signed %): {mpe_pct:.2f}%**  "
+        f"**RMSE:** {rmse:.6f}"
+    )
     return fig, out_json, df_days, metrics_md
 # --------------------
 # UI
 # --------------------
+with gr.Blocks(title="Volatility Forecast • Tagesgenaue Punktwerte") as demo:
     gr.Markdown(
+        "## Vorhersage der letzten 30 Tage (tagesgenaue Punktwerte)\n"
+        "- Es wird **eine einzelne Sample-Bahn** prognostiziert (keine Mittelung, kein Median).\n"
+        "- Vergleich pro Tag: Forecast vs. Actual + Prozentfehler.\n"
+        "- Gesamt: **MAPE%** (Hauptmetrik), **MPE%** (Bias) und RMSE."
     )
     run_btn = gr.Button("Run", variant="primary")
+    plot = gr.Plot(label="Forecast (einzelne Bahn) vs Actual")
     meta = gr.JSON(label="Konfiguration & Gesamtmetriken")
     table = gr.Dataframe(label="Per-Day Vergleich", wrap=True)
     metrics = gr.Markdown(label="Metriken")
 if __name__ == "__main__":
     demo.launch()