Spaces:

Gilette
/

volatilitypredictor

Running

App Files Files Community

Gil Stetler commited on 27 days ago

Commit

92e4d77

1 Parent(s): 1d730a5

version mit bias/scale kalibrierung

Browse files

Files changed (1) hide show

app.py +258 -54

app.py CHANGED Viewed

@@ -117,6 +117,195 @@
 # app.py
 import os, random
 import numpy as np
 import pandas as pd
@@ -132,10 +321,10 @@ from chronos import ChronosPipeline
 # --------------------
 MODEL_ID = "amazon/chronos-t5-large"
 PREDICTION_LENGTH = 30          # letzte 30 Tage
-NUM_SAMPLES = 1                 # genau EINE Bahn -> tagesgenaue Punktvorhersage
-RV_WINDOW = 20                  # Rolling-Fenster für RV (Handelstage)
-ANNUALIZE = True                # annualisiert mit sqrt(252)
-EPS = 1e-8                      # Schutz gegen Division durch 0
 # --------------------
 # Model load
@@ -176,7 +365,7 @@ def _extract_dates(df: pd.DataFrame):
                 return pd.to_datetime(df[mapping[name]]).to_numpy()
             except Exception:
                 pass
-    return np.arange(len(df))  # Fallback
 def compute_realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series:
     r = np.log(close).diff().dropna()
@@ -194,50 +383,66 @@ def run_vol_forecast_and_evaluate():
     dates = _extract_dates(raw)
     close = _extract_close(raw)
-    # RV-Zeitreihe
     rv = compute_realized_vol(close, window=RV_WINDOW, annualize=ANNUALIZE).to_numpy()
     n = len(rv); H = PREDICTION_LENGTH
     if n <= H + 5:
         raise gr.Error(f"RV-Serie zu kurz nach Rolling. Benötigt > {H+5}, erhalten {n}.")
-    # Holdout: letzte H Tage
     rv_train = rv[: n - H]
     rv_test  = rv[n - H :]
-    # Reproduzierbare EINZELNE Sample-Bahn ziehen
     random.seed(0); np.random.seed(0); torch.manual_seed(0)
     if torch.cuda.is_available():
         torch.cuda.manual_seed_all(0)
     context = torch.tensor(rv_train, dtype=torch.float32)
-    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)   # [1, 1, H]
-    samples = fcst[0].cpu().numpy()                                             # (1, H)
-    path_pred = samples[0]                                                      # (H,)  <-- tagesgenaue Vorhersage
-    # Tagesfehler & Prozentfehler
-    err = path_pred - rv_test
-    denom = np.maximum(EPS, np.abs(rv_test))
-    abs_pct_err = np.abs(err) / denom * 100.0
-    pct_err = err / np.maximum(EPS, rv_test) * 100.0
-    mape_pct = float(abs_pct_err.mean())   # Hauptmetrik: mittlere absolute proz. Abweichung
-    mpe_pct  = float(pct_err.mean())       # signiert (Bias)
-    rmse = float(np.sqrt(np.mean(err**2)))
-    # Plot: History + Actual (Holdout) + Forecast-Pfad
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         plt.plot(dates_rv[:H0], rv_train, label="realized vol (history)")
-        plt.plot(dates_rv[H0:], rv_test, label="realized vol (actual holdout)")
-        plt.plot(dates_rv[H0:], path_pred, linestyle="--", label="forecast (sample path)")
         plt.xlabel("date")
     else:
         x_all = np.arange(len(rv)); x_fcst = np.arange(H0, H0 + H)
         plt.plot(x_all[:H0], rv_train, label="realized vol (history)")
-        plt.plot(x_fcst, rv_test, label="realized vol (actual holdout)")
-        plt.plot(x_fcst, path_pred, linestyle="--", label="forecast (sample path)")
         plt.xlabel("time index")
     plt.title(f"Volatility Forecast (RV window={RV_WINDOW}, H={H})")
@@ -245,63 +450,62 @@ def run_vol_forecast_and_evaluate():
     plt.legend(loc="best")
     plt.tight_layout()
-    # Tabelle: Tag-für-Tag Vergleich
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         last_dates = dates_rv[H0:]
     else:
         last_dates = np.arange(H)
     df_days = pd.DataFrame({
         "date": last_dates,
         "actual_vol": rv_test,
-        "forecast_vol": path_pred,
-        "pct_error_% (signed)": pct_err,
-        "abs_pct_error_%": abs_pct_err,
     })
     out_json = {
-        "config": {
-            "rv_window": RV_WINDOW,
-            "prediction_length": H,
-            "num_samples": NUM_SAMPLES,
-            "annualized": ANNUALIZE,
-            "point_forecast": "single_sample_path",
-            "seed": 0,
-        },
-        "metrics": {
-            "MAPE_%": mape_pct,
-            "MPE_%": mpe_pct,
-            "RMSE": rmse,
-        },
     }
     metrics_md = (
-        f"**MAPE (Ø absolute %-Abweichung): {mape_pct:.2f}%**  "
-        f"**MPE (Ø signed %): {mpe_pct:.2f}%**  "
-        f"**RMSE:** {rmse:.6f}"
     )
     return fig, out_json, df_days, metrics_md
 # --------------------
 # UI
 # --------------------
-with gr.Blocks(title="Volatility Forecast • Tagesgenaue Punktwerte") as demo:
     gr.Markdown(
-        "## Vorhersage der letzten 30 Tage (tagesgenaue Punktwerte)\n"
-        "- Es wird **eine einzelne Sample-Bahn** prognostiziert (keine Mittelung, kein Median).\n"
-        "- Vergleich pro Tag: Forecast vs. Actual + Prozentfehler.\n"
-        "- Gesamt: **MAPE%** (Hauptmetrik), **MPE%** (Bias) und RMSE."
     )
     run_btn = gr.Button("Run", variant="primary")
-    plot = gr.Plot(label="Forecast (einzelne Bahn) vs Actual")
-    meta = gr.JSON(label="Konfiguration & Gesamtmetriken")
     table = gr.Dataframe(label="Per-Day Vergleich", wrap=True)
-    metrics = gr.Markdown(label="Metriken")
     run_btn.click(run_vol_forecast_and_evaluate, inputs=None, outputs=[plot, meta, table, metrics])
 if __name__ == "__main__":
     demo.launch()

 # app.py
+#import os, random
+#import numpy as np
+#import pandas as pd
+#import torch
+#import gradio as gr
+#import matplotlib
+#matplotlib.use("Agg")
+#import matplotlib.pyplot as plt
+#from chronos import ChronosPipeline
+#
+## --------------------
+## Config
+## --------------------
+#MODEL_ID = "amazon/chronos-t5-large"
+#PREDICTION_LENGTH = 30          # letzte 30 Tage
+#NUM_SAMPLES = 1                 # genau EINE Bahn -> tagesgenaue Punktvorhersage
+#RV_WINDOW = 20                  # Rolling-Fenster für RV (Handelstage)
+#ANNUALIZE = True                # annualisiert mit sqrt(252)
+#EPS = 1e-8                      # Schutz gegen Division durch 0
+#
+## --------------------
+## Model load
+## --------------------
+#device = "cuda" if torch.cuda.is_available() else "cpu"
+#dtype = torch.bfloat16 if device == "cuda" else torch.float32
+#
+#pipe = ChronosPipeline.from_pretrained(
+#    MODEL_ID,
+#    device_map="auto",
+#    torch_dtype=dtype,
+#)
+#
+## --------------------
+## Helpers
+## --------------------
+#def _read_ohlcv_csv():
+#    for p in ["/mnt/data/ohlcv_clean.csv", "ohlcv_clean.csv"]:
+#        if os.path.exists(p):
+#            return pd.read_csv(p)
+#    raise gr.Error("CSV nicht gefunden. Lege sie unter /mnt/data/ohlcv_clean.csv oder ./ohlcv_clean.csv ab.")
+#
+#def _extract_close(df: pd.DataFrame) -> pd.Series:
+#    mapping = {c.lower(): c for c in df.columns}
+#    for name in ["close", "adj close", "adj_close", "price"]:
+#        if name in mapping:
+#            return pd.Series(df[mapping[name]].astype(float))
+#    numeric_cols = df.select_dtypes(include=[np.number]).columns
+#    if len(numeric_cols) == 0:
+#        raise gr.Error("Keine numerische Preisspalte gefunden (z.B. Close).")
+#    return pd.Series(df[numeric_cols[-1]].astype(float))
+#
+#def _extract_dates(df: pd.DataFrame):
+#    mapping = {c.lower(): c for c in df.columns}
+#    for name in ["date", "time", "timestamp"]:
+#        if name in mapping:
+#            try:
+#                return pd.to_datetime(df[mapping[name]]).to_numpy()
+#            except Exception:
+#                pass
+#    return np.arange(len(df))  # Fallback
+#
+#def compute_realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series:
+#    r = np.log(close).diff().dropna()
+#    rv = r.rolling(window, min_periods=window).std()
+#    if annualize:
+#        rv = rv * np.sqrt(252.0)
+#    return rv.dropna().reset_index(drop=True)
+#
+## --------------------
+## Main
+## --------------------
+#def run_vol_forecast_and_evaluate():
+#    # Daten laden
+#    raw = _read_ohlcv_csv()
+#    dates = _extract_dates(raw)
+#    close = _extract_close(raw)
+#
+#    # RV-Zeitreihe
+#    rv = compute_realized_vol(close, window=RV_WINDOW, annualize=ANNUALIZE).to_numpy()
+#    n = len(rv); H = PREDICTION_LENGTH
+#    if n <= H + 5:
+#        raise gr.Error(f"RV-Serie zu kurz nach Rolling. Benötigt > {H+5}, erhalten {n}.")
+#
+#    # Holdout: letzte H Tage
+#    rv_train = rv[: n - H]
+#    rv_test  = rv[n - H :]
+#
+#    # Reproduzierbare EINZELNE Sample-Bahn ziehen
+#    random.seed(0); np.random.seed(0); torch.manual_seed(0)
+#    if torch.cuda.is_available():
+#        torch.cuda.manual_seed_all(0)
+#
+#    context = torch.tensor(rv_train, dtype=torch.float32)
+#    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)   # [1, 1, H]
+#    samples = fcst[0].cpu().numpy()                                             # (1, H)
+#    path_pred = samples[0]                                                      # (H,)  <-- tagesgenaue Vorhersage
+#
+#    # Tagesfehler & Prozentfehler
+#    err = path_pred - rv_test
+#    denom = np.maximum(EPS, np.abs(rv_test))
+#    abs_pct_err = np.abs(err) / denom * 100.0
+#    pct_err = err / np.maximum(EPS, rv_test) * 100.0
+#
+#    mape_pct = float(abs_pct_err.mean())   # Hauptmetrik: mittlere absolute proz. Abweichung
+#    mpe_pct  = float(pct_err.mean())       # signiert (Bias)
+#    rmse = float(np.sqrt(np.mean(err**2)))
+#
+#    # Plot: History + Actual (Holdout) + Forecast-Pfad
+#    fig = plt.figure(figsize=(10, 4))
+#    H0 = len(rv_train)
+#    if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
+#        dates_rv = np.array(dates[-len(rv):])
+#        plt.plot(dates_rv[:H0], rv_train, label="realized vol (history)")
+#        plt.plot(dates_rv[H0:], rv_test, label="realized vol (actual holdout)")
+#        plt.plot(dates_rv[H0:], path_pred, linestyle="--", label="forecast (sample path)")
+#        plt.xlabel("date")
+#    else:
+#        x_all = np.arange(len(rv)); x_fcst = np.arange(H0, H0 + H)
+#        plt.plot(x_all[:H0], rv_train, label="realized vol (history)")
+#        plt.plot(x_fcst, rv_test, label="realized vol (actual holdout)")
+#        plt.plot(x_fcst, path_pred, linestyle="--", label="forecast (sample path)")
+#        plt.xlabel("time index")
+#
+#    plt.title(f"Volatility Forecast (RV window={RV_WINDOW}, H={H})")
+#    plt.ylabel("realized volatility")
+#    plt.legend(loc="best")
+#    plt.tight_layout()
+#
+#    # Tabelle: Tag-für-Tag Vergleich
+#    if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
+#        dates_rv = np.array(dates[-len(rv):])
+#        last_dates = dates_rv[H0:]
+#    else:
+#        last_dates = np.arange(H)
+#
+#    df_days = pd.DataFrame({
+#        "date": last_dates,
+#        "actual_vol": rv_test,
+#        "forecast_vol": path_pred,
+#        "pct_error_% (signed)": pct_err,
+#        "abs_pct_error_%": abs_pct_err,
+#    })
+#
+#    out_json = {
+#        "config": {
+#            "rv_window": RV_WINDOW,
+#            "prediction_length": H,
+#            "num_samples": NUM_SAMPLES,
+#            "annualized": ANNUALIZE,
+#            "point_forecast": "single_sample_path",
+#            "seed": 0,
+#        },
+#        "metrics": {
+#            "MAPE_%": mape_pct,
+#            "MPE_%": mpe_pct,
+#            "RMSE": rmse,
+#        },
+#    }
+#
+#    metrics_md = (
+#        f"**MAPE (Ø absolute %-Abweichung): {mape_pct:.2f}%**  "
+#        f"**MPE (Ø signed %): {mpe_pct:.2f}%**  "
+#        f"**RMSE:** {rmse:.6f}"
+#    )
+#    return fig, out_json, df_days, metrics_md
+#
+## --------------------
+## UI
+## --------------------
+#with gr.Blocks(title="Volatility Forecast • Tagesgenaue Punktwerte") as demo:
+#    gr.Markdown(
+#        "## Vorhersage der letzten 30 Tage (tagesgenaue Punktwerte)\n"
+#        "- Es wird **eine einzelne Sample-Bahn** prognostiziert (keine Mittelung, kein Median).\n"
+#        "- Vergleich pro Tag: Forecast vs. Actual + Prozentfehler.\n"
+#        "- Gesamt: **MAPE%** (Hauptmetrik), **MPE%** (Bias) und RMSE."
+#    )
+#    run_btn = gr.Button("Run", variant="primary")
+#    plot = gr.Plot(label="Forecast (einzelne Bahn) vs Actual")
+#    meta = gr.JSON(label="Konfiguration & Gesamtmetriken")
+#    table = gr.Dataframe(label="Per-Day Vergleich", wrap=True)
+#    metrics = gr.Markdown(label="Metriken")
+#
+#    run_btn.click(run_vol_forecast_and_evaluate, inputs=None, outputs=[plot, meta, table, metrics])
+#
+#if __name__ == "__main__":
+#    demo.launch()
+#
+#
+#
 import os, random
 import numpy as np
 import pandas as pd
 # --------------------
 MODEL_ID = "amazon/chronos-t5-large"
 PREDICTION_LENGTH = 30          # letzte 30 Tage
+NUM_SAMPLES = 1                 # eine Bahn -> tagesgenaue Punktvorhersage
+RV_WINDOW = 20
+ANNUALIZE = True
+EPS = 1e-8
 # --------------------
 # Model load
                 return pd.to_datetime(df[mapping[name]]).to_numpy()
             except Exception:
                 pass
+    return np.arange(len(df))
 def compute_realized_vol(close: pd.Series, window: int = 20, annualize: bool = True) -> pd.Series:
     r = np.log(close).diff().dropna()
     dates = _extract_dates(raw)
     close = _extract_close(raw)
+    # Realized Volatility
     rv = compute_realized_vol(close, window=RV_WINDOW, annualize=ANNUALIZE).to_numpy()
     n = len(rv); H = PREDICTION_LENGTH
     if n <= H + 5:
         raise gr.Error(f"RV-Serie zu kurz nach Rolling. Benötigt > {H+5}, erhalten {n}.")
+    # Split
     rv_train = rv[: n - H]
     rv_test  = rv[n - H :]
+    # Eine Sample-Bahn prognostizieren
     random.seed(0); np.random.seed(0); torch.manual_seed(0)
     if torch.cuda.is_available():
         torch.cuda.manual_seed_all(0)
     context = torch.tensor(rv_train, dtype=torch.float32)
+    fcst = pipe.predict(context, prediction_length=H, num_samples=NUM_SAMPLES)   # [1,1,H]
+    samples = fcst[0].cpu().numpy()
+    path_pred = samples[0]  # (H,) — Punktprognose
+    # --------------------
+    # Bias-/Scale-Kalibrierung
+    # --------------------
+    # α so wählen, dass MSE zwischen α*pred und actual minimal wird
+    alpha = float(np.sum(rv_test * path_pred) / np.sum(path_pred**2 + EPS))
+    path_pred_cal = alpha * path_pred
+    # Fehler (original & kalibriert)
+    def metrics(y_true, y_pred):
+        err = y_pred - y_true
+        denom = np.maximum(EPS, np.abs(y_true))
+        abs_pct_err = np.abs(err) / denom * 100
+        pct_err = err / np.maximum(EPS, y_true) * 100
+        return {
+            "MAPE": abs_pct_err.mean(),
+            "MPE": pct_err.mean(),
+            "RMSE": np.sqrt(np.mean(err**2))
+        }
+    m_orig = metrics(rv_test, path_pred)
+    m_cal  = metrics(rv_test, path_pred_cal)
+    # --------------------
+    # Plot
+    # --------------------
     fig = plt.figure(figsize=(10, 4))
     H0 = len(rv_train)
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         plt.plot(dates_rv[:H0], rv_train, label="realized vol (history)")
+        plt.plot(dates_rv[H0:], rv_test, label="actual (holdout)")
+        plt.plot(dates_rv[H0:], path_pred, linestyle="--", label="forecast (raw)")
+        plt.plot(dates_rv[H0:], path_pred_cal, linestyle="--", label=f"forecast (calibrated, α={alpha:.3f})")
         plt.xlabel("date")
     else:
         x_all = np.arange(len(rv)); x_fcst = np.arange(H0, H0 + H)
         plt.plot(x_all[:H0], rv_train, label="realized vol (history)")
+        plt.plot(x_fcst, rv_test, label="actual (holdout)")
+        plt.plot(x_fcst, path_pred, linestyle="--", label="forecast (raw)")
+        plt.plot(x_fcst, path_pred_cal, linestyle="--", label=f"forecast (calibrated, α={alpha:.3f})")
         plt.xlabel("time index")
     plt.title(f"Volatility Forecast (RV window={RV_WINDOW}, H={H})")
     plt.legend(loc="best")
     plt.tight_layout()
+    # --------------------
+    # Tages-Tabelle
+    # --------------------
     if isinstance(dates, np.ndarray) and dates.shape[0] >= len(close):
         dates_rv = np.array(dates[-len(rv):])
         last_dates = dates_rv[H0:]
     else:
         last_dates = np.arange(H)
+    abs_pct_err_orig = np.abs((path_pred - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
+    abs_pct_err_cal  = np.abs((path_pred_cal - rv_test) / np.maximum(EPS, np.abs(rv_test))) * 100
     df_days = pd.DataFrame({
         "date": last_dates,
         "actual_vol": rv_test,
+        "forecast_raw": path_pred,
+        "forecast_calibrated": path_pred_cal,
+        "abs_error_raw": np.abs(path_pred - rv_test),
+        "abs_pct_error_raw_%": abs_pct_err_orig,
+        "abs_pct_error_cal_%": abs_pct_err_cal,
     })
+    # --------------------
+    # Outputs
+    # --------------------
     out_json = {
+        "alpha": alpha,
+        "metrics_raw": {k: round(v, 4) for k, v in m_orig.items()},
+        "metrics_calibrated": {k: round(v, 4) for k, v in m_cal.items()},
     }
     metrics_md = (
+        f"**Bias-/Scale-Kalibrierung** α = {alpha:.3f}\n\n"
+        f"**RAW:** MAPE {m_orig['MAPE']:.2f}% | MPE {m_orig['MPE']:.2f}% | RMSE {m_orig['RMSE']:.5f}\n"
+        f"**CALIBRATED:** MAPE {m_cal['MAPE']:.2f}% | MPE {m_cal['MPE']:.2f}% | RMSE {m_cal['RMSE']:.5f}"
     )
     return fig, out_json, df_days, metrics_md
 # --------------------
 # UI
 # --------------------
+with gr.Blocks(title="Volatility Forecast • mit Bias-/Scale-Kalibrierung") as demo:
     gr.Markdown(
+        "## Letzte 30 Tage Volatilität (mit automatischer Bias-/Scale-Kalibrierung)\n"
+        "- Prognose einer einzelnen Sample-Bahn (kein Mittelwert, kein Median).\n"
+        "- Anschließend wird ein Skalierungsfaktor α berechnet, um systematische Unter-/Überschätzung zu korrigieren.\n"
+        "- Darstellung: Forecast (roh) & Forecast (kalibriert)."
     )
     run_btn = gr.Button("Run", variant="primary")
+    plot = gr.Plot(label="Forecast vs Actual (roh & kalibriert)")
+    meta = gr.JSON(label="Kalibrierungsparameter & Metriken")
     table = gr.Dataframe(label="Per-Day Vergleich", wrap=True)
+    metrics = gr.Markdown(label="Zusammenfassung")
     run_btn.click(run_vol_forecast_and_evaluate, inputs=None, outputs=[plot, meta, table, metrics])
 if __name__ == "__main__":
     demo.launch()