Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

mgbam commited on May 7

Commit

afa778c

verified ·

1 Parent(s): ccdbd61

Update tools/forecaster.py

Browse files

Files changed (1) hide show

tools/forecaster.py +66 -39

tools/forecaster.py CHANGED Viewed

@@ -1,10 +1,19 @@
 # tools/forecaster.py
 import os
 import tempfile
 import pandas as pd
-from statsmodels.tsa.arima.model import ARIMA
 import plotly.graph_objects as go
-from typing import Tuple, Union
 def forecast_metric_tool(
@@ -12,39 +21,51 @@ def forecast_metric_tool(
     date_col: str,
     value_col: str,
     periods: int = 3,
-    output_dir: str = "/tmp"
 ) -> Union[Tuple[pd.DataFrame, str], str]:
     """
-    Load CSV or Excel, parse a time series metric, fit ARIMA(1,1,1),
-    forecast next `periods` steps, return DataFrame and PNG path.
-    Returns:
-      - (forecast_df, plot_path) on success
-      - error string starting with '❌' on failure
     """
-    # Load data
     ext = os.path.splitext(file_path)[1].lower()
     try:
-        df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
     except Exception as exc:
         return f"❌ Failed to load file: {exc}"
-    # Validate columns
     missing = [c for c in (date_col, value_col) if c not in df.columns]
     if missing:
         return f"❌ Missing column(s): {', '.join(missing)}"
-    # Parse and clean
-    try:
-        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
-    except Exception:
-        return f"❌ Could not parse '{date_col}' as dates."
-    df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
     df = df.dropna(subset=[date_col, value_col])
     if df.empty:
-        return f"❌ No valid data after cleaning '{date_col}'/'{value_col}'"
-    # Aggregate duplicates and sort
     df = (
         df[[date_col, value_col]]
         .groupby(date_col, as_index=True)
@@ -52,50 +73,56 @@ def forecast_metric_tool(
         .sort_index()
     )
-    # Infer frequency
-    freq = pd.infer_freq(df.index) or 'D'
     try:
         df = df.asfreq(freq)
     except Exception:
-        df = df[~df.index.duplicated(keep='first')].asfreq(freq)
-    # Fit ARIMA
     try:
         model = ARIMA(df[value_col], order=(1, 1, 1))
         fit = model.fit()
     except Exception as exc:
         return f"❌ ARIMA fitting failed: {exc}"
-    # Forecast
     try:
         pred = fit.get_forecast(steps=periods)
         forecast = pred.predicted_mean
     except Exception as exc:
         return f"❌ Forecast generation failed: {exc}"
-    forecast_df = forecast.to_frame(name='Forecast')
-    # Plot history + forecast
-    fig = go.Figure(
-        data=[
-            go.Scatter(x=df.index, y=df[value_col], mode='lines', name='History'),
-            go.Scatter(x=forecast.index, y=forecast, mode='lines+markers', name='Forecast')
-        ]
     )
     fig.update_layout(
         title=f"{value_col} Forecast",
         xaxis_title=date_col,
         yaxis_title=value_col,
-        template='plotly_dark'
     )
-    # Save PNG
     os.makedirs(output_dir, exist_ok=True)
-    tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix='forecast_', dir=output_dir, delete=False)
-    plot_path = tmp.name
-    tmp.close()
     try:
-        fig.write_image(plot_path, scale=2)
     except Exception as exc:
         return f"❌ Plot saving failed: {exc}"
-    return forecast_df, plot_path

 # tools/forecaster.py
+# ------------------------------------------------------------
+# Fits an ARIMA(1,1,1) model to any (date, value) series,
+# forecasts the next `periods` steps, plots history + forecast,
+# and saves a hi‑res PNG copy to /tmp (or custom output_dir).
 import os
 import tempfile
+from typing import Tuple, Union
 import pandas as pd
 import plotly.graph_objects as go
+from statsmodels.tsa.arima.model import ARIMA
+# Typing alias
+Plot = go.Figure
 def forecast_metric_tool(
     date_col: str,
     value_col: str,
     periods: int = 3,
+    output_dir: str = "/tmp",
 ) -> Union[Tuple[pd.DataFrame, str], str]:
     """
+    Parameters
+    ----------
+    file_path : str
+        CSV or Excel path.
+    date_col  : str
+        Column to treat as the date index.
+    value_col : str
+        Numeric column to forecast.
+    periods   : int
+        Steps ahead to forecast.
+    output_dir: str
+        Directory to save PNG.
+    Returns
+    -------
+    (forecast_df, png_path)   on success
+    error string (starting '❌') otherwise
     """
+    # ── 1. Load file ──────────────────────────────────────────
     ext = os.path.splitext(file_path)[1].lower()
     try:
+        df = (
+            pd.read_excel(file_path)
+            if ext in (".xls", ".xlsx")
+            else pd.read_csv(file_path)
+        )
     except Exception as exc:
         return f"❌ Failed to load file: {exc}"
+    # ── 2. Column validation ─────────────────────────────────
     missing = [c for c in (date_col, value_col) if c not in df.columns]
     if missing:
         return f"❌ Missing column(s): {', '.join(missing)}"
+    # ── 3. Parse & clean ─────────────────────────────────────
+    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
+    df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
     df = df.dropna(subset=[date_col, value_col])
     if df.empty:
+        return f"❌ No valid data after cleaning '{date_col}' / '{value_col}'."
+    # Aggregate duplicate timestamps → mean
     df = (
         df[[date_col, value_col]]
         .groupby(date_col, as_index=True)
         .sort_index()
     )
+    # Infer or default frequency
+    freq = pd.infer_freq(df.index) or "D"
     try:
         df = df.asfreq(freq)
     except Exception:
+        # fallback if duplicates still exist
+        df = (
+            df[~df.index.duplicated(keep="first")]
+            .asfreq(freq)
+        )
+    # ── 4. Fit ARIMA(1,1,1) ──────────────────────────────────
     try:
         model = ARIMA(df[value_col], order=(1, 1, 1))
         fit = model.fit()
     except Exception as exc:
         return f"❌ ARIMA fitting failed: {exc}"
+    # ── 5. Forecast ──────────────────────────────────────────
     try:
         pred = fit.get_forecast(steps=periods)
         forecast = pred.predicted_mean
     except Exception as exc:
         return f"❌ Forecast generation failed: {exc}"
+    forecast_df = forecast.to_frame(name="Forecast")
+    # ── 6. Plot history + forecast ───────────────────────────
+    fig: Plot = go.Figure()
+    fig.add_scatter(x=df.index, y=df[value_col], mode="lines", name="History")
+    fig.add_scatter(
+        x=forecast.index, y=forecast, mode="lines+markers", name="Forecast"
     )
     fig.update_layout(
         title=f"{value_col} Forecast",
         xaxis_title=date_col,
         yaxis_title=value_col,
+        template="plotly_dark",
     )
+    # ── 7. Save PNG ──────────────────────────────────────────
     os.makedirs(output_dir, exist_ok=True)
+    tmp_png = tempfile.NamedTemporaryFile(
+        prefix="forecast_", suffix=".png", dir=output_dir, delete=False
+    )
+    png_path = tmp_png.name
+    tmp_png.close()
     try:
+        fig.write_image(png_path, scale=2)
     except Exception as exc:
         return f"❌ Plot saving failed: {exc}"
+    return forecast_df, png_path