Spaces:

mgbam
/

BizIntel_AI

Running

App Files Files Community

mgbam commited on 9 days ago

Commit

eec9db3

verified ·

1 Parent(s): e1d8bc9

Update tools/forecaster.py

Browse files

Files changed (1) hide show

tools/forecaster.py +76 -40

tools/forecaster.py CHANGED Viewed

@@ -1,75 +1,111 @@
-# tools/forecaster.py
 import pandas as pd
 from statsmodels.tsa.arima.model import ARIMA
 import plotly.graph_objects as go
-def forecast_metric_tool(file_path: str, date_col: str, value_col: str):
-    """
-    Forecast the next 3 periods for any numeric metric.
-    - Saves a date‐indexed Plotly PNG under /tmp via the safe write monkey‐patch.
-    - Returns a text table of the forecast.
     """
-    # 0) Read full CSV
-    df = pd.read_csv(file_path)
-    # 1) Check that both columns actually exist
-    if date_col not in df.columns:
-        return f"❌ Date column '{date_col}' not found in your data."
-    if value_col not in df.columns:
-        return f"❌ Metric column '{value_col}' not found in your data."
-    # 2) Parse dates
     try:
         df[date_col] = pd.to_datetime(df[date_col])
     except Exception:
         return f"❌ Could not parse '{date_col}' as dates."
-    # 3) Coerce metric to numeric & drop invalid rows
-    df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
     df = df.dropna(subset=[date_col, value_col])
     if df.empty:
-        return f"❌ After coercion, no valid data remains for '{value_col}'."
-    # 4) Sort & index by date, collapse duplicates
-    df = df.sort_values(date_col).set_index(date_col)
-    df = df[[value_col]].groupby(level=0).mean()
-    # 5) Infer a frequency and re‐index
     freq = pd.infer_freq(df.index)
     if freq is None:
-        freq = "D"  # fallback to daily
-    df = df.asfreq(freq)
-    # 6) Fit ARIMA (1,1,1)
     try:
-        model     = ARIMA(df[value_col], order=(1, 1, 1))
-        model_fit = model.fit()
     except Exception as e:
         return f"❌ ARIMA fitting failed: {e}"
-    # 7) Produce a proper date‐indexed forecast
-    fc_res   = model_fit.get_forecast(steps=3)
     forecast = fc_res.predicted_mean
-    # 8) Plot history + forecast
     fig = go.Figure()
-    fig.add_scatter(
-        x=df.index, y=df[value_col],
-        mode="lines", name=value_col
     )
-    fig.add_scatter(
-        x=forecast.index, y=forecast,
-        mode="lines+markers", name="Forecast"
     )
     fig.update_layout(
         title=f"{value_col} Forecast",
         xaxis_title=date_col,
         yaxis_title=value_col,
-        template="plotly_dark",
     )
-    fig.write_image("forecast_plot.png")  # lands in /tmp via our monkey‐patch
-    # 9) Return the forecast as a text table
-    return forecast.to_frame(name="Forecast").to_string()

+import os
+import tempfile
 import pandas as pd
 from statsmodels.tsa.arima.model import ARIMA
 import plotly.graph_objects as go
+def forecast_metric_tool(
+    file_path: str,
+    date_col: str,
+    value_col: str,
+    periods: int = 3,
+    output_dir: str = "/tmp"
+):
     """
+    Load a CSV or Excel file, parse a time series metric, fit an ARIMA(1,1,1) model,
+    forecast the next `periods` steps, and save a combined history+forecast plot.
+    Returns:
+      forecast_df (pd.DataFrame): next-period predicted values, indexed by date.
+      plot_path (str): full path to the saved PNG plot.
+    Errors return a string starting with '❌' describing the problem.
+    """
+    # 0) Load data (CSV or Excel)
+    ext = os.path.splitext(file_path)[1].lower()
+    try:
+        if ext in ('.xls', '.xlsx'):
+            df = pd.read_excel(file_path)
+        else:
+            df = pd.read_csv(file_path)
+    except Exception as e:
+        return f"❌ Failed to load file: {e}"
+    # 1) Validate columns
+    for col in (date_col, value_col):
+        if col not in df.columns:
+            return f"❌ Column '{col}' not found."
+    # 2) Parse dates and numeric
     try:
         df[date_col] = pd.to_datetime(df[date_col])
     except Exception:
         return f"❌ Could not parse '{date_col}' as dates."
+    df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
     df = df.dropna(subset=[date_col, value_col])
     if df.empty:
+        return f"❌ No valid rows after dropping NaNs in '{date_col}'/'{value_col}'."
+    # 3) Aggregate duplicates & index
+    df = (
+        df[[date_col, value_col]]
+        .groupby(date_col, as_index=True)
+        .mean()
+        .sort_index()
+    )
+    # 4) Infer frequency
     freq = pd.infer_freq(df.index)
     if freq is None:
+        freq = 'D'  # fallback
+    try:
+        df = df.asfreq(freq)
+    except ValueError as e:
+        # if duplicates remain
+        df = df[~df.index.duplicated(keep='first')].asfreq(freq)
+    # 5) Fit ARIMA
     try:
+        model = ARIMA(df[value_col], order=(1, 1, 1))
+        fit = model.fit()
     except Exception as e:
         return f"❌ ARIMA fitting failed: {e}"
+    # 6) Forecast future
+    fc_res = fit.get_forecast(steps=periods)
     forecast = fc_res.predicted_mean
+    forecast_df = forecast.to_frame(name='Forecast')
+    # 7) Plot history + forecast
     fig = go.Figure()
+    fig.add_trace(
+        go.Scatter(
+            x=df.index, y=df[value_col],
+            mode='lines+markers', name=value_col
+        )
     )
+    fig.add_trace(
+        go.Scatter(
+            x=forecast.index, y=forecast,
+            mode='lines+markers', name='Forecast'
+        )
     )
     fig.update_layout(
         title=f"{value_col} Forecast",
         xaxis_title=date_col,
         yaxis_title=value_col,
+        template='plotly_dark',
+    )
+    # 8) Save to temporary file
+    os.makedirs(output_dir, exist_ok=True)
+    tmp = tempfile.NamedTemporaryFile(
+        suffix='.png', prefix='forecast_', dir=output_dir, delete=False
     )
+    plot_path = tmp.name
+    tmp.close()
+    fig.write_image(plot_path)
+    return forecast_df, plot_path