Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

BizIntel_AI / tools /forecaster.py

mgbam

Update tools/forecaster.py

afa778c verified about 2 months ago

raw

history blame contribute delete

4.38 kB

	# tools/forecaster.py
	# ------------------------------------------------------------
	# Fits an ARIMA(1,1,1) model to any (date, value) series,
	# forecasts the next `periods` steps, plots history + forecast,
	# and saves a hi‑res PNG copy to /tmp (or custom output_dir).

	import os
	import tempfile
	from typing import Tuple, Union

	import pandas as pd
	import plotly.graph_objects as go
	from statsmodels.tsa.arima.model import ARIMA

	# Typing alias
	Plot = go.Figure


	def forecast_metric_tool(
	file_path: str,
	date_col: str,
	value_col: str,
	periods: int = 3,
	output_dir: str = "/tmp",
	) -> Union[Tuple[pd.DataFrame, str], str]:
	"""
	Parameters
	----------
	file_path : str
	CSV or Excel path.
	date_col : str
	Column to treat as the date index.
	value_col : str
	Numeric column to forecast.
	periods : int
	Steps ahead to forecast.
	output_dir: str
	Directory to save PNG.

	Returns
	-------
	(forecast_df, png_path) on success
	error string (starting '❌') otherwise
	"""
	# ── 1. Load file ──────────────────────────────────────────
	ext = os.path.splitext(file_path)[1].lower()
	try:
	df = (
	pd.read_excel(file_path)
	if ext in (".xls", ".xlsx")
	else pd.read_csv(file_path)
	)
	except Exception as exc:
	return f"❌ Failed to load file: {exc}"

	# ── 2. Column validation ─────────────────────────────────
	missing = [c for c in (date_col, value_col) if c not in df.columns]
	if missing:
	return f"❌ Missing column(s): {', '.join(missing)}"

	# ── 3. Parse & clean ─────────────────────────────────────
	df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
	df[value_col] = pd.to_numeric(df[value_col], errors="coerce")
	df = df.dropna(subset=[date_col, value_col])
	if df.empty:
	return f"❌ No valid data after cleaning '{date_col}' / '{value_col}'."

	# Aggregate duplicate timestamps → mean
	df = (
	df[[date_col, value_col]]
	.groupby(date_col, as_index=True)
	.mean()
	.sort_index()
	)

	# Infer or default frequency
	freq = pd.infer_freq(df.index) or "D"
	try:
	df = df.asfreq(freq)
	except Exception:
	# fallback if duplicates still exist
	df = (
	df[~df.index.duplicated(keep="first")]
	.asfreq(freq)
	)

	# ── 4. Fit ARIMA(1,1,1) ──────────────────────────────────
	try:
	model = ARIMA(df[value_col], order=(1, 1, 1))
	fit = model.fit()
	except Exception as exc:
	return f"❌ ARIMA fitting failed: {exc}"

	# ── 5. Forecast ──────────────────────────────────────────
	try:
	pred = fit.get_forecast(steps=periods)
	forecast = pred.predicted_mean
	except Exception as exc:
	return f"❌ Forecast generation failed: {exc}"

	forecast_df = forecast.to_frame(name="Forecast")

	# ── 6. Plot history + forecast ───────────────────────────
	fig: Plot = go.Figure()
	fig.add_scatter(x=df.index, y=df[value_col], mode="lines", name="History")
	fig.add_scatter(
	x=forecast.index, y=forecast, mode="lines+markers", name="Forecast"
	)
	fig.update_layout(
	title=f"{value_col} Forecast",
	xaxis_title=date_col,
	yaxis_title=value_col,
	template="plotly_dark",
	)

	# ── 7. Save PNG ──────────────────────────────────────────
	os.makedirs(output_dir, exist_ok=True)
	tmp_png = tempfile.NamedTemporaryFile(
	prefix="forecast_", suffix=".png", dir=output_dir, delete=False
	)
	png_path = tmp_png.name
	tmp_png.close()
	try:
	fig.write_image(png_path, scale=2)
	except Exception as exc:
	return f"❌ Plot saving failed: {exc}"

	return forecast_df, png_path