Spaces:

causalscience
/

Impact_Analysis_Tools

Running

App Files Files Community

causalscience commited on Aug 20

Commit

51836ac

verified ·

1 Parent(s): e15b260

UI for timeseries

Browse files

Files changed (1) hide show

ui/timeseries_tab.py +499 -0

ui/timeseries_tab.py ADDED Viewed

	@@ -0,0 +1,499 @@

+from __future__ import annotations
+import sys, subprocess
+def _ensure(pkg):
+    try:
+        __import__(pkg.split("==")[0].split(">=")[0])
+    except Exception:
+        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
+for _pkg in ["gradio", "pandas", "numpy", "matplotlib"]:
+    _ensure(_pkg)
+import os
+from pathlib import Path
+from datetime import datetime
+import zipfile  # ADDED
+import io       # ADDED
+import gradio as gr
+import pandas as pd
+import numpy as np
+from typing import List, Optional
+def _export_dir() -> Path:
+    candidates = [
+        Path(os.getenv("HF_MNT_DIR", "")).expanduser(),
+        Path("/mnt/data"),
+        Path.cwd() / "exports",
+    ]
+    for p in candidates:
+        try:
+            if p and str(p).strip():
+                p.mkdir(parents=True, exist_ok=True)
+                return p
+        except Exception:
+            continue
+    return Path.cwd()
+def _import_models():
+    from timeseries_forecasting import (
+        run_auto_arima_forecast,
+        run_ets_forecast,
+        run_prophet_forecast,
+        run_sarimax_forecast,
+        perform_stationarity_tests,
+        detect_outliers,
+    )
+    return (
+        run_auto_arima_forecast,
+        run_ets_forecast,
+        run_prophet_forecast,
+        run_sarimax_forecast,
+        perform_stationarity_tests,
+        detect_outliers,
+    )
+def timeseries_tab():
+    (
+        run_auto_arima_forecast,
+        run_ets_forecast,
+        run_prophet_forecast,
+        run_sarimax_forecast,
+        perform_stationarity_tests,
+        detect_outliers,
+    ) = _import_models()
+    with gr.Column():
+        gr.Markdown("## Time Series Forecasting")
+        file_input = gr.File(label="Upload CSV with date, target, optional regressors", type="filepath")
+        # --- Data configuration ---
+        with gr.Group():
+            gr.Markdown("### Data Configuration")
+            date_col = gr.Dropdown(label="Date column", interactive=True)
+            target_col = gr.Dropdown(label="Target (numeric)", interactive=True)
+            exog_cols = gr.Dropdown(label="Exogenous regressors (optional; numeric only)", interactive=True, multiselect=True)
+            data_preview = gr.Dataframe(label="Preview (first 12 rows)", interactive=False, visible=False)
+            data_info = gr.Textbox(label="Data summary", lines=4, interactive=False, visible=False)
+        # --- Train / Forecast controls ---
+        with gr.Group():
+            gr.Markdown("### Train / Forecast Controls")
+            train_start = gr.Textbox(label="Train start (optional, YYYY-MM-DD)", placeholder="auto")
+            train_end = gr.Textbox(label="Train end (optional, YYYY-MM-DD)", placeholder="auto")
+            horizon = gr.Number(value=12, label="Forecast horizon H (steps)", precision=0)
+            freq = gr.Dropdown(
+                label="Frequency",
+                value="infer",
+                choices=["infer", "D", "W-MON", "MS", "M", "Q", "H"]
+            )
+        # --- Model selection & params ---
+        with gr.Group():
+            gr.Markdown("### Model & Parameters")
+            model = gr.Dropdown(
+                label="Model",
+                choices=["Auto-ARIMA", "ETS", "Prophet", "SARIMAX"],
+                value="Auto-ARIMA",
+            )
+            with gr.Accordion("Auto-ARIMA / SARIMAX settings", open=False, visible=True) as aa_group:
+                aa_seasonal = gr.Checkbox(value=False, label="Seasonal")
+                aa_m = gr.Number(value=12, label="Seasonal period m", precision=0)
+            with gr.Accordion("ETS settings", open=False, visible=False) as ets_group:
+                ets_error = gr.Dropdown(choices=["add", "mul"], value="add", label="Error")
+                ets_trend = gr.Dropdown(choices=["none", "add", "mul"], value="none", label="Trend")
+                ets_seasonal = gr.Dropdown(choices=["none", "add", "mul"], value="none", label="Seasonal")
+                ets_m = gr.Number(value=1, label="Seasonal periods (m)", precision=0)
+                ets_damped = gr.Checkbox(value=False, label="Damped trend")
+            with gr.Accordion("Prophet settings", open=False, visible=False) as pr_group:
+                pr_mode = gr.Dropdown(choices=["additive", "multiplicative"], value="additive", label="Seasonality mode")
+                pr_yearly = gr.Checkbox(value=True, label="Yearly")
+                pr_weekly = gr.Checkbox(value=True, label="Weekly")
+                pr_daily = gr.Checkbox(value=False, label="Daily")
+        # --- Exogenous handling controls ---
+        with gr.Row():
+            exog_policy = gr.Dropdown(
+                label="Exogenous handling",
+                value="auto_forecast",
+                choices=["auto_forecast", "drop_if_missing", "require_future"],
+                info="How to handle future exogenous values if missing in the file."
+            )
+            exog_method = gr.Dropdown(
+                label="Exog forecast method",
+                value="naive",
+                choices=["naive", "seasonal_naive", "auto_arima"],
+            )
+            exog_m = gr.Number(
+                value=0,
+                label="Exog seasonal period (m)",
+                precision=0,
+                info="Used for seasonal-naive and seasonal ARIMA; set m>1 to enable seasonality."
+            )
+        run_btn = gr.Button("Run Forecast", variant="primary")
+        show_diag = gr.Checkbox(value=True, label="Show residual diagnostics")
+        export_toggle = gr.Checkbox(value=False, label="Enable export widgets", visible=False)
+        # --- Outputs ---
+        fig_out = gr.Plot(label="Forecast")
+        summary_out = gr.Textbox(label="Summary", lines=16)
+        diag_out = gr.Plot(label="Diagnostics", visible=False)
+        metrics_out = gr.Textbox(label="Quick metrics", lines=3, visible=False)
+        residual_out = gr.Textbox(label="Residual info", lines=8, visible=False)
+        forecast_store = gr.State()  # holds last forecast DataFrame
+        fig_state = gr.State()       # ADDED: last forecast figure
+        diag_state = gr.State()      # ADDED: last diagnostics figure (optional)
+        summary_state = gr.State()   # ADDED: last summary string
+        with gr.Row() as export_row:  # MODIFIED: visible by default
+            download_csv_btn = gr.DownloadButton("Download forecast CSV")
+            export_report_btn = gr.DownloadButton("Export full report (ZIP)")  # MODIFIED: was Button, now DownloadButton
+        # --- Diagnostics ---
+        with gr.Accordion("Advanced diagnostics (optional)", open=False):
+            analyze_btn = gr.Button("Run stationarity & outlier scan", variant="secondary")
+            stationarity_txt = gr.Textbox(label="Stationarity tests (ADF, KPSS)", lines=8, interactive=False)
+            outlier_txt = gr.Textbox(label="Outlier scan", lines=2, interactive=False)
+        # ---------- Callbacks ----------
+        def _read_csv(fp):
+            if not fp:
+                return (
+                    gr.update(choices=[], value=None),
+                    gr.update(choices=[], value=None),
+                    gr.update(choices=[], value=[]),
+                    gr.update(visible=False),
+                    gr.update(visible=False),
+                )
+            try:
+                df = pd.read_csv(fp)
+                df.columns = df.columns.str.strip()  # MODIFIED: trim whitespace in headers
+            except Exception as e:
+                gr.Warning(f"Failed to read CSV: {e}")
+                return (
+                    gr.update(choices=[], value=None),
+                    gr.update(choices=[], value=None),
+                    gr.update(choices=[], value=[]),
+                    gr.update(visible=False),
+                    gr.update(visible=True, value=f"Error: {e}"),
+                )
+            cols = df.columns.tolist()
+            # heuristic: first datetime-like as date, last numeric as target
+            date_guess = None
+            for c in cols:
+                try:
+                    pd.to_datetime(df[c])
+                    date_guess = c
+                    break
+                except Exception:
+                    continue
+            num_cols = [c for c in cols if pd.api.types.is_numeric_dtype(df[c])]
+            tgt_guess = num_cols[-1] if num_cols else None
+            info = f"Shape: {df.shape[0]} x {df.shape[1]}\n"
+            if date_guess:
+                dt = pd.to_datetime(df[date_guess], errors="coerce")
+                info += f"Date range in file: {dt.min()} → {dt.max()}\n"
+            info += f"Numeric columns: {', '.join(num_cols[:6])}{'...' if len(num_cols)>6 else ''}\n"
+            info += f"Missing cells: {int(df.isna().sum().sum())}"
+            preview = df.head(12)
+            return (
+                gr.update(choices=cols, value=date_guess),
+                gr.update(choices=cols, value=tgt_guess),
+                gr.update(choices=[c for c in num_cols], value=[]),
+                gr.update(visible=True, value=preview),
+                gr.update(visible=True, value=info),
+            )
+        file_input.change(
+            _read_csv,
+            inputs=[file_input],
+            outputs=[date_col, target_col, exog_cols, data_preview, data_info],
+        )
+        def _analyze(fp, dcol, tcol):
+            if not fp or not dcol or not tcol:
+                return "Upload a CSV and select columns.", "—"
+            df = pd.read_csv(fp)
+            df.columns = df.columns.str.strip()  # MODIFIED: trim whitespace in headers
+            missing = [name for name in [dcol, tcol] if name not in df.columns]
+            if missing:
+                return (f"Selected column(s) not found: {', '.join(missing)}.\n"
+                        f"Available columns: {', '.join(df.columns.tolist())}", "—")
+            df = df[[dcol, tcol]].dropna(subset=[dcol])
+            dfi = df.copy()
+            dfi[dcol] = pd.to_datetime(dfi[dcol], errors="coerce")
+            dfi = dfi.sort_values(dcol).set_index(dcol)
+            try:
+                st = perform_stationarity_tests(dfi, tcol)
+            except Exception as e:
+                st = f"Stationarity test error: {e}"
+            try:
+                ot = detect_outliers(dfi, tcol)
+            except Exception as e:
+                ot = f"Outlier detection error: {e}"
+            return st, ot
+        analyze_btn.click(
+            _analyze, inputs=[file_input, date_col, target_col], outputs=[stationarity_txt, outlier_txt]
+        )
+        def _toggle_param_visibility(model_name: str):
+            return (
+                gr.update(visible=model_name in ["Auto-ARIMA", "SARIMAX"]),
+                gr.update(visible=model_name == "ETS"),
+                gr.update(visible=model_name == "Prophet"),
+            )
+        model.change(
+            _toggle_param_visibility,
+            inputs=[model],
+            outputs=[aa_group, ets_group, pr_group],
+        )
+        # Forecast callback
+        def _forecast(
+            fp, dcol, tcol,
+            model_name,
+            H, FREQ, show_d,
+            aa_seas, aa_m,
+            ets_err, ets_tr, ets_seas, ets_m_per, ets_damp,
+            pr_mode, pr_year, pr_week, pr_day,
+            exog_selected,
+            exog_policy_val, exog_method_val, exog_m_val,
+            tr_start, tr_end
+        ):
+            if not fp:
+                return (None, "Error: upload a CSV.", gr.update(visible=False), gr.update(visible=False),
+                        gr.update(visible=False), None, None, None, None)  # MODIFIED: extra Nones for states
+            try:
+                df = pd.read_csv(fp)
+                df.columns = df.columns.str.strip()  # MODIFIED
+            except Exception as e:
+                return (None, f"CSV read error: {e}", gr.update(visible=False), gr.update(visible=False),
+                        gr.update(visible=False), None, None, None, None)
+            if dcol not in df.columns or tcol not in df.columns:
+                return (None, f"Error: selected column(s) not found. Have: {', '.join(df.columns.tolist())}",
+                        gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
+                        None, None, None, None)
+            dfi = df.copy()
+            dfi[dcol] = pd.to_datetime(dfi[dcol], errors="coerce")
+            dfi = dfi.sort_values(dcol).set_index(dcol)
+            exog_selected = [c for c in (exog_selected or []) if c in dfi.columns and c != tcol]
+            try:
+                if model_name == "Auto-ARIMA":
+                    res = run_auto_arima_forecast(
+                        dfi, tcol, int(H),
+                        bool(aa_seas), int(aa_m) if aa_seas else 1,
+                        freq=FREQ,
+                        exog_cols=exog_selected or None,
+                        future_exog_df=None,
+                        train_start=tr_start or None,
+                        train_end=tr_end or None,
+                        return_diagnostics=bool(show_d),
+                        exog_policy=exog_policy_val,
+                        exog_method=exog_method_val,
+                        exog_m=int(exog_m_val or 0),
+                    )
+                elif model_name == "ETS":
+                    res = run_ets_forecast(
+                        dfi, tcol, int(H),
+                        ets_err, ets_tr, ets_seas, int(ets_m_per), bool(ets_damp),
+                        freq=FREQ,
+                        train_start=tr_start or None,
+                        train_end=tr_end or None,
+                        return_diagnostics=bool(show_d),
+                    )
+                elif model_name == "Prophet":
+                    res = run_prophet_forecast(
+                        dfi, tcol, int(H),
+                        pr_mode, bool(pr_year), bool(pr_week), bool(pr_day),
+                        freq=FREQ,
+                        exog_cols=exog_selected or None,
+                        future_exog_df=None,
+                        train_start=tr_start or None,
+                        train_end=tr_end or None,
+                        return_diagnostics=bool(show_d),
+                        exog_policy=exog_policy_val,
+                        exog_method=exog_method_val,
+                        exog_m=int(exog_m_val or 0),
+                    )
+                elif model_name == "SARIMAX":
+                    res = run_sarimax_forecast(
+                        dfi, tcol, int(H),
+                        bool(aa_seas), int(aa_m) if aa_seas else 1,
+                        freq=FREQ,
+                        exog_cols=exog_selected or None,
+                        future_exog_df=None,
+                        train_start=tr_start or None,
+                        train_end=tr_end or None,
+                        return_diagnostics=bool(show_d),
+                        exog_policy=exog_policy_val,
+                        exog_method=exog_method_val,
+                        exog_m=int(exog_m_val or 0),
+                    )
+                else:
+                    return (None, f"Unknown model: {model_name}", gr.update(visible=False), gr.update(visible=False),
+                            gr.update(visible=False), None, None, None, None)
+                fig, summary, diag_fig, yhat, conf_df = res
+                # Build CSV DataFrame for download
+                csv_df = None
+                if yhat is not None:
+                    csv_df = pd.DataFrame({
+                        "timestamp": pd.Index(yhat.index, name="timestamp"),
+                        "forecast": yhat.values,
+                    })
+                    if conf_df is not None and all(k in conf_df.columns for k in ["lower", "upper"]):
+                        csv_df["lower"] = np.asarray(conf_df["lower"])
+                        csv_df["upper"] = np.asarray(conf_df["upper"])
+                    csv_df = csv_df.reset_index(drop=True)
+                metrics_text = ""
+                if isinstance(summary, str):
+                    lines = summary.splitlines()
+                    metrics_text = "\n".join([ln for ln in lines if any(k in ln for k in ["MAE:", "RMSE:", "MAPE:"])])
+                # MODIFIED: return states so we can export a full report later
+                return (
+                    fig,
+                    summary,
+                    gr.update(visible=bool(show_d) and diag_fig is not None, value=diag_fig if diag_fig is not None else None),
+                    gr.update(visible=bool(metrics_text), value=metrics_text if metrics_text else None),
+                    gr.update(visible=False),
+                    csv_df,
+                    fig,                # ADDED: fig_state
+                    diag_fig,           # ADDED: diag_state
+                    summary             # ADDED: summary_state
+                )
+            except Exception as e:
+                return (None, f"Error: {e}", gr.update(visible=False), gr.update(visible=False),
+                        gr.update(visible=False), None, None, None, None)
+        run_btn.click(
+            _forecast,
+            inputs=[
+                file_input, date_col, target_col,
+                model,
+                horizon, freq, show_diag,
+                aa_seasonal, aa_m,
+                ets_error, ets_trend, ets_seasonal, ets_m, ets_damped,
+                pr_mode, pr_yearly, pr_weekly, pr_daily,
+                exog_cols,
+                exog_policy, exog_method, exog_m,
+                train_start, train_end,
+            ],
+            outputs=[
+                fig_out, summary_out, diag_out, metrics_out, residual_out, forecast_store,
+                fig_state, diag_state, summary_state  # ADDED
+            ],
+        )
+        def _prepare_csv(forecast_df: Optional[pd.DataFrame]):
+            if forecast_df is None or not isinstance(forecast_df, pd.DataFrame) or forecast_df.empty:
+                return gr.update(value=None)
+            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+            save_dir = _export_dir()
+            path = save_dir / f"forecast_{ts}.csv"
+            forecast_df.to_csv(path, index=False)
+            return gr.update(value=str(path))  # DownloadButton(value=path)
+        download_csv_btn.click(_prepare_csv, inputs=[forecast_store], outputs=[download_csv_btn])
+        def _export_report(fig_obj, diag_obj, summary_text, forecast_df: Optional[pd.DataFrame]):
+            if fig_obj is None and (forecast_df is None or forecast_df.empty) and not summary_text:
+                return gr.update(value=None)
+            ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+            save_dir = _export_dir()
+            work_dir = save_dir / f"report_{ts}"
+            work_dir.mkdir(parents=True, exist_ok=True)
+            # Save CSV (if any)
+            csv_path = None
+            if isinstance(forecast_df, pd.DataFrame) and not forecast_df.empty:
+                csv_path = work_dir / "forecast.csv"
+                forecast_df.to_csv(csv_path, index=False)
+            # Best-effort save of forecast plot
+            plot_path = None
+            if fig_obj is not None:
+                plot_path = work_dir / "forecast_plot.png"
+                try:
+                    # If matplotlib Figure-like
+                    if hasattr(fig_obj, "savefig"):
+                        fig_obj.savefig(plot_path, bbox_inches="tight", dpi=180)
+                    # If plotly Figure-like with to_image (avoid extra deps; may fail)
+                    elif hasattr(fig_obj, "to_image"):
+                        img_bytes = fig_obj.to_image(format="png")  # requires kaleido; may raise
+                        with open(plot_path, "wb") as f:
+                            f.write(img_bytes)
+                    else:
+                        plot_path = None  # unsupported figure type
+                except Exception:
+                    plot_path = None  # keep going; we still zip other artifacts
+            diag_path = None
+            if diag_obj is not None:
+                diag_path = work_dir / "diagnostics.png"
+                try:
+                    if hasattr(diag_obj, "savefig"):
+                        diag_obj.savefig(diag_path, bbox_inches="tight", dpi=180)
+                    elif hasattr(diag_obj, "to_image"):
+                        img_bytes = diag_obj.to_image(format="png")
+                        with open(diag_path, "wb") as f:
+                            f.write(img_bytes)
+                    else:
+                        diag_path = None
+                except Exception:
+                    diag_path = None
+            # Save summary text
+            summary_path = None
+            if isinstance(summary_text, str) and summary_text.strip():
+                summary_path = work_dir / "summary.txt"
+                with open(summary_path, "w", encoding="utf-8") as f:
+                    f.write(summary_text)
+            # Zip everything that exists
+            zip_path = save_dir / f"full_report_{ts}.zip"
+            with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
+                for p in [csv_path, plot_path, diag_path, summary_path]:
+                    if p and p.exists():
+                        zf.write(p, arcname=p.name)
+            return gr.update(value=str(zip_path))
+        export_report_btn.click(
+            _export_report,
+            inputs=[fig_state, diag_state, summary_state, forecast_store],
+            outputs=[export_report_btn]
+        )
+    return [
+        file_input, date_col, target_col, exog_cols,
+        model, horizon, aa_group, ets_group, pr_group,
+        exog_policy, exog_method, exog_m,
+        run_btn, show_diag, fig_out, summary_out, diag_out, metrics_out, residual_out,
+        export_toggle, export_row,  # export_row now always visible; toggle is hidden
+        freq, train_start, train_end,
+        forecast_store, download_csv_btn,
+    ]
+__all__ = ["timeseries_tab"]