Spaces:

r-bansal
/

pulseai-api

Sleeping

r-bansal commited on Apr 11

Commit

ba65d00

1 Parent(s): 13a5236

feat: add React frontend, rolling forecast, AI chat, CUSUM tuning, CSV download

- Full React 19 dashboard: forecast chart, shift detection, scenarios, chat panel
- GSAP-animated landing page with demo routes (/app?demo=bakery|crop|m5)
- Rolling window: actuals re-run Chronos and shift forecast forward
- Chat via Groq: context-grounded, auto-detects actual values from conversation
- CSV download with original data + appended actuals with timestamps
- Frequency support: hourly, daily, weekly, monthly, quarterly, annually
- CUSUM sensitivity tuned (2x std), predicted vs actual markers in history
- 27 bug fixes, removed __pycache__ from tracking, moved tests to tests/

Signed-off-by: this-is-rachit <rachitbansal023@gmail.com>

Files changed (18) hide show

__pycache__/baseline.cpython-313.pyc +0 -0
__pycache__/cache.cpython-313.pyc +0 -0
__pycache__/calibrator.cpython-313.pyc +0 -0
__pycache__/confidence.cpython-313.pyc +0 -0
__pycache__/decision.cpython-313.pyc +0 -0
__pycache__/detector.cpython-313.pyc +0 -0
__pycache__/explainer.cpython-313.pyc +0 -0
__pycache__/forecaster.cpython-313.pyc +0 -0
__pycache__/main.cpython-313.pyc +0 -0
__pycache__/models.cpython-313.pyc +0 -0
__pycache__/preprocessor.cpython-313.pyc +0 -0
__pycache__/scenario.cpython-313.pyc +0 -0
detector.py +2 -2
explainer.py +7 -3
main.py +369 -37
models.py +28 -3
preprocessor.py +6 -0
test_chronos.py +0 -123

__pycache__/baseline.cpython-313.pyc DELETED Viewed

Binary file (6.12 kB)

__pycache__/cache.cpython-313.pyc DELETED Viewed

Binary file (7.4 kB)

__pycache__/calibrator.cpython-313.pyc DELETED Viewed

Binary file (4.03 kB)

__pycache__/confidence.cpython-313.pyc DELETED Viewed

Binary file (1.69 kB)

__pycache__/decision.cpython-313.pyc DELETED Viewed

Binary file (1.79 kB)

__pycache__/detector.cpython-313.pyc DELETED Viewed

Binary file (5.98 kB)

__pycache__/explainer.cpython-313.pyc DELETED Viewed

Binary file (13.5 kB)

__pycache__/forecaster.cpython-313.pyc DELETED Viewed

Binary file (3.67 kB)

__pycache__/main.cpython-313.pyc DELETED Viewed

Binary file (21.1 kB)

__pycache__/models.cpython-313.pyc DELETED Viewed

Binary file (5.62 kB)

__pycache__/preprocessor.cpython-313.pyc DELETED Viewed

Binary file (18.4 kB)

__pycache__/scenario.cpython-313.pyc DELETED Viewed

Binary file (2.71 kB)

detector.py CHANGED Viewed

@@ -35,8 +35,8 @@ class CUSUMDetector:
     def __init__(self, historical_std: float):
         # Drift and threshold are derived from the historical series volatility
         # so they automatically scale to whatever units the user uploads.
-        self.drift     = 0.5  * historical_std
-        self.threshold = 3.0  * historical_std
         self.s_high    = 0.0
         self.s_low     = 0.0
         self.last_alert_t = -COOLDOWN_PERIODS   # allow an alert on the very first step

     def __init__(self, historical_std: float):
         # Drift and threshold are derived from the historical series volatility
         # so they automatically scale to whatever units the user uploads.
+        self.drift     = 0.3  * historical_std
+        self.threshold = 2.0  * historical_std
         self.s_high    = 0.0
         self.s_low     = 0.0
         self.last_alert_t = -COOLDOWN_PERIODS   # allow an alert on the very first step

explainer.py CHANGED Viewed

@@ -199,13 +199,17 @@ def _regex_parse(text: str, last_actual: float | None) -> dict:
         if last_actual is not None:
             factor = (1 - pct / 100) if down else (1 + pct / 100)
             return _result(last_actual * factor, relative=True, approximate=is_approximate)
-        # No last_actual — treat as absolute value if small enough to be a plain number
         if pct < 10000:
             return _result(pct, approximate=is_approximate)
     # ── Relative: went up/down by absolute amount ──────────────────────────
     up_match = re.search(
-        r"(?:went up|increased?|badhke?|upar|zyada)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
     )
     down_match = re.search(
         r"(?:went down|decreased?|dropped?|girak?|kam|niche)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
@@ -255,7 +259,7 @@ def _parse_number_str(text: str) -> float | None:
     # Strip prefix approximate/directional words
     t = re.sub(r"^(around|about|roughly|approximately|lagbhag|almost|upto|up to)\s*", "", t)
     # Strip trailing unit words and directional tokens
-    t = re.sub(r"\s*(rupee|rupees|rs|inr|kg|units?|pieces?|up|down|more|less|zyada|kam)s?\s*$", "", t)
     t = t.strip()
     # Extract first clean number from remaining string

         if last_actual is not None:
             factor = (1 - pct / 100) if down else (1 + pct / 100)
             return _result(last_actual * factor, relative=True, approximate=is_approximate)
+        # Has relative words (jyada, kam, more, less) — needs a previous value
+        relative_words = ["jyada", "zyada", "more", "kam", "less", "up", "down", "increase", "decrease"]
+        if any(w in t for w in relative_words):
+            return _error("We need a previous value to calculate the percentage. Please enter the number directly.")
+        # No relative words — treat as absolute value
         if pct < 10000:
             return _result(pct, approximate=is_approximate)
     # ── Relative: went up/down by absolute amount ──────────────────────────
     up_match = re.search(
+        r"(?:went up|increased?|badhke?|upar|zyada|jyada)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
     )
     down_match = re.search(
         r"(?:went down|decreased?|dropped?|girak?|kam|niche)\s+(?:by\s+)?([\d,. ]+(?:lakh|crore)?)", t
     # Strip prefix approximate/directional words
     t = re.sub(r"^(around|about|roughly|approximately|lagbhag|almost|upto|up to)\s*", "", t)
     # Strip trailing unit words and directional tokens
+    t = re.sub(r"\s*(rupee|rupees|rs|inr|kg|units?|pieces?|up|down|more|less|zyada|jyada|kam)s?\s*$", "", t)
     t = t.strip()
     # Extract first clean number from remaining string

main.py CHANGED Viewed

@@ -27,6 +27,8 @@ from models import (
     ForecastPoint,
     BaselinePoint,
     HealthResponse,
     ScenarioRequest,
     ScenarioResponse,
     UpdateRequest,
@@ -113,6 +115,81 @@ def health():
     )
 # ─── /upload ──────────────────────────────────────────────────────────────────
 @app.post("/upload", response_model=UploadResponse)
@@ -188,6 +265,7 @@ def forecast(req: ForecastRequest):
     session["_forecast"] = {"low": cal_low, "median": raw["median"], "high": cal_high}
     session["_horizon"]  = horizon
     session["_fc_dates"] = fc_dates
     seasonal = _describe_seasonal(prepared["warnings"])
@@ -214,6 +292,10 @@ def forecast(req: ForecastRequest):
         seasonal_pattern=seasonal,
         is_financial=prepared["is_financial"],
         is_intermittent=prepared["is_intermittent"],
     )
@@ -242,15 +324,20 @@ def update(req: UpdateRequest):
         and fc_low[idx] <= actual_value <= fc_high[idx]
     )
-    # CUSUM update
-    detector = session.get("_detector")
     residual = actual_value - float(fc_median[idx]) if len(fc_median) > idx else 0.0
     series     = session.get("_series", np.array([]))
     date_col   = session.get("date_col", "date")
     value_col  = session.get("value_col")
     df         = session.get("df")
     fc_dates   = session.get("_fc_dates", [])
     actual_date = None
     if fc_dates and idx < len(fc_dates):
         try:
@@ -275,38 +362,46 @@ def update(req: UpdateRequest):
         new_alpha = update_alpha(current_alpha, actual_value, float(fc_low[idx]), float(fc_high[idx]))
         session["_alpha"] = new_alpha
-    # Re-run Chronos if CUSUM fired a structural shift
-    recalibrated = False
-    if cusum_result["direction"] != "NONE" and len(series) > 0:
-        # Append the new actual to the series and re-forecast
-        updated_series = np.append(series, actual_value)
-        session["_series"] = updated_series
-        frequency = session.get("frequency", "weekly")
-        horizon   = session.get("_horizon", 4)
-        raw = forecaster.run_forecast(updated_series, horizon, frequency)
-        from calibrator import calibrate as _calibrate
-        cal = _calibrate(updated_series, raw["low"], raw["high"])
-        fc_low    = cal["calibrated_low"]
-        fc_high   = cal["calibrated_high"]
-        fc_median = raw["median"]
-        session["_forecast"] = {"low": fc_low, "median": fc_median, "high": fc_high}
-        last_date = session["df"][date_col].iloc[-1]
-        new_fc_dates = _make_forecast_dates(last_date, horizon, frequency)
-        session["_fc_dates"] = new_fc_dates
-        recalibrated = True
-    hist_std = session.get("_hist_std", 1.0)
-    if len(fc_low) > 0:
-        score, label = compute_confidence(fc_low, fc_high, hist_std)
-    else:
-        score, label = 50, "Medium"
-    trend_pct = 0.0
-    if len(series) > 0 and len(fc_median) > 0:
-        last_val  = float(np.append(series, actual_value)[-1])
-        trend_pct = float((fc_median[-1] - last_val) / (abs(last_val) + 1e-9) * 100)
     new_decision = get_decision(
         trend_pct=trend_pct,
@@ -320,8 +415,6 @@ def update(req: UpdateRequest):
             else getattr(session.get("warnings"), "intermittent", False),
     )
-    horizon = session.get("_horizon", 4)
-    fc_dates = session.get("_fc_dates", [])
     explanation_text, _ = explain(
         trend_pct=trend_pct,
         confidence=score,
@@ -332,7 +425,7 @@ def update(req: UpdateRequest):
     new_forecast = [
         ForecastPoint(
-            date=fc_dates[i] if i < len(fc_dates) else "",
             low=round(float(fc_low[i]), 2) if i < len(fc_low) else 0.0,
             median=round(float(fc_median[i]), 2) if i < len(fc_median) else 0.0,
             high=round(float(fc_high[i]), 2) if i < len(fc_high) else 0.0,
@@ -340,6 +433,21 @@ def update(req: UpdateRequest):
         for i in range(horizon)
     ]
     return UpdateResponse(
         parsed_value=actual_value,
         is_approximate=parsed["is_approximate"],
@@ -347,11 +455,67 @@ def update(req: UpdateRequest):
         cusum_alert=cusum_result["direction"],
         cusum_magnitude=round(float(cusum_result["magnitude"]), 2),
         new_forecast=new_forecast,
         new_confidence_score=score,
         new_confidence_label=label,
         new_decision=new_decision,
-        recalibrated=recalibrated,
         explanation=explanation_text,
     )
@@ -430,6 +594,170 @@ def explain_endpoint(request: Request, req: ExplainRequest):
     return ExplainResponse(explanation=text, source=source)
 # ─── Helpers ──────────────────────────────────────────────────────────────────
 def _require_session(session_id: str) -> dict:
@@ -459,6 +787,10 @@ def _forecast_from_cache(payload: dict) -> ForecastResponse:
         seasonal_pattern="Pre-computed demo forecast",
         is_financial=payload.get("is_financial", False),
         is_intermittent=payload.get("is_intermittent", False),
     )

     ForecastPoint,
     BaselinePoint,
     HealthResponse,
+    ChatRequest,
+    ChatResponse,
     ScenarioRequest,
     ScenarioResponse,
     UpdateRequest,
     )
+# ─── /demo ────────────────────────────────────────────────────────────────────
+DEMO_META = {
+    "bakery": {
+        "filename": "bakery_sales.csv",
+        "date_col": "date",
+        "value_col": "weekly_sales_inr",
+        "columns": ["date", "weekly_sales_inr"],
+    },
+    "crop": {
+        "filename": "crop_prices_sample.csv",
+        "date_col": "date",
+        "value_col": "wheat_price_inr_per_quintal",
+        "columns": ["date", "wheat_price_inr_per_quintal"],
+    },
+    "m5": {
+        "filename": "walmart_m5_sample.csv",
+        "date_col": "date",
+        "value_col": "FOODS_1",
+        "columns": ["date", "FOODS_1", "HOBBIES_1", "HOUSEHOLD_1"],
+    },
+}
+@app.get("/demo/{key}")
+def demo(key: str):
+    if key not in DEMO_META:
+        raise HTTPException(status_code=404, detail={
+            "error_code": "DEMO_NOT_FOUND",
+            "message": f"Unknown demo dataset: {key}. Available: bakery, crop, m5",
+        })
+    cached = cache.get(key)
+    if not cached:
+        raise HTTPException(status_code=503, detail={
+            "error_code": "DEMO_NOT_CACHED",
+            "message": "Demo cache not built. Run 'python cache.py' first.",
+        })
+    meta = DEMO_META[key]
+    session_id = f"demo_{key}"
+    # Build preview from cached history
+    hist_dates = cached.get("history_dates", [])
+    hist_values = cached.get("history_values", [])
+    preview = [
+        {"date": hist_dates[i], "value": hist_values[i]}
+        for i in range(min(5, len(hist_dates)))
+    ]
+    forecast_resp = _forecast_from_cache(cached)
+    return {
+        "session_id": session_id,
+        "upload": {
+            "session_id": session_id,
+            "detected_date_col": meta["date_col"],
+            "detected_value_col": meta["value_col"],
+            "columns": meta["columns"],
+            "series_list": [],
+            "preview": preview,
+            "frequency": cached.get("frequency", "weekly"),
+            "n_rows": len(hist_values),
+            "outliers": [],
+            "warnings": {
+                "intermittent": cached.get("is_intermittent", False),
+                "non_stationary": cached.get("is_financial", False),
+                "short_series": False,
+                "large_gaps": False,
+            },
+        },
+        "forecast": forecast_resp.model_dump(),
+    }
 # ─── /upload ──────────────────────────────────────────────────────────────────
 @app.post("/upload", response_model=UploadResponse)
     session["_forecast"] = {"low": cal_low, "median": raw["median"], "high": cal_high}
     session["_horizon"]  = horizon
     session["_fc_dates"] = fc_dates
+    session["_history_dates"] = prepared["dates"]
     seasonal = _describe_seasonal(prepared["warnings"])
         seasonal_pattern=seasonal,
         is_financial=prepared["is_financial"],
         is_intermittent=prepared["is_intermittent"],
+        history_dates=[d.strftime("%Y-%m-%d") if hasattr(d, "strftime") else str(d) for d in prepared["dates"][-52:]],
+        history_values=[round(float(v), 2) for v in series[-52:]],
+        frequency=frequency,
+        cusum_threshold=round(2.0 * hist_std, 2),
     )
         and fc_low[idx] <= actual_value <= fc_high[idx]
     )
+    # Residual for CUSUM
     residual = actual_value - float(fc_median[idx]) if len(fc_median) > idx else 0.0
+    predicted_value = float(fc_median[idx]) if len(fc_median) > idx else 0.0
+    # CUSUM update
+    detector = session.get("_detector")
     series     = session.get("_series", np.array([]))
     date_col   = session.get("date_col", "date")
     value_col  = session.get("value_col")
     df         = session.get("df")
     fc_dates   = session.get("_fc_dates", [])
+    frequency  = session.get("frequency", "weekly")
+    horizon    = session.get("_horizon", 4)
     actual_date = None
     if fc_dates and idx < len(fc_dates):
         try:
         new_alpha = update_alpha(current_alpha, actual_value, float(fc_low[idx]), float(fc_high[idx]))
         session["_alpha"] = new_alpha
+    # --- Rolling window: always append actual and re-forecast ---
+    updated_series = np.append(series, actual_value)
+    session["_series"] = updated_series
+    # Track actuals with timestamps for CSV download
+    actual_entry = {
+        "date": fc_dates[idx] if idx < len(fc_dates) else str(pd.Timestamp.now().date()),
+        "value": actual_value,
+        "timestamp": pd.Timestamp.now().isoformat(),
+    }
+    if "_actuals_log" not in session:
+        session["_actuals_log"] = []
+    session["_actuals_log"].append(actual_entry)
+    # Re-run Chronos with the updated series
+    raw = forecaster.run_forecast(updated_series, horizon, frequency)
+    cal = calibrate(updated_series, raw["low"], raw["high"])
+    fc_low    = cal["calibrated_low"]
+    fc_high   = cal["calibrated_high"]
+    fc_median = raw["median"]
+    session["_forecast"] = {"low": fc_low, "median": fc_median, "high": fc_high}
+    session["_baseline"] = {"values": raw["baseline"], "type": raw["baseline_type"]}
+    # Generate new forecast dates from the last actual's date
+    last_known_date = actual_date or pd.Timestamp(fc_dates[-1]) if fc_dates else pd.Timestamp.now()
+    new_fc_dates = _make_forecast_dates(last_known_date, horizon, frequency)
+    session["_fc_dates"] = new_fc_dates
+    # Update history dates
+    hist_dates = list(session.get("_history_dates", []))
+    if actual_entry["date"] not in hist_dates:
+        hist_dates.append(actual_entry["date"])
+    session["_history_dates"] = hist_dates
+    hist_std = session.get("_hist_std", float(np.std(updated_series)))
+    session["_hist_std"] = hist_std
+    score, label = compute_confidence(fc_low, fc_high, hist_std)
+    last_val  = float(updated_series[-1])
+    trend_pct = float((fc_median[-1] - last_val) / (abs(last_val) + 1e-9) * 100)
     new_decision = get_decision(
         trend_pct=trend_pct,
             else getattr(session.get("warnings"), "intermittent", False),
     )
     explanation_text, _ = explain(
         trend_pct=trend_pct,
         confidence=score,
     new_forecast = [
         ForecastPoint(
+            date=new_fc_dates[i] if i < len(new_fc_dates) else "",
             low=round(float(fc_low[i]), 2) if i < len(fc_low) else 0.0,
             median=round(float(fc_median[i]), 2) if i < len(fc_median) else 0.0,
             high=round(float(fc_high[i]), 2) if i < len(fc_high) else 0.0,
         for i in range(horizon)
     ]
+    new_baseline = [
+        BaselinePoint(
+            date=new_fc_dates[i] if i < len(new_fc_dates) else "",
+            value=round(float(raw["baseline"][i]), 2) if i < len(raw["baseline"]) else 0.0,
+        )
+        for i in range(horizon)
+    ]
+    # Build full history for frontend
+    all_dates = [d.strftime("%Y-%m-%d") if hasattr(d, "strftime") else str(d)
+                 for d in session.get("_history_dates", [])]
+    # Fallback: use series indices if no dates tracked
+    if not all_dates:
+        all_dates = [f"T-{len(updated_series)-i}" for i in range(len(updated_series), 0, -1)]
     return UpdateResponse(
         parsed_value=actual_value,
         is_approximate=parsed["is_approximate"],
         cusum_alert=cusum_result["direction"],
         cusum_magnitude=round(float(cusum_result["magnitude"]), 2),
         new_forecast=new_forecast,
+        new_baseline=new_baseline,
         new_confidence_score=score,
         new_confidence_label=label,
         new_decision=new_decision,
+        recalibrated=True,
         explanation=explanation_text,
+        residual=round(residual, 2),
+        predicted_value=round(predicted_value, 2),
+        history_dates=[d.strftime("%Y-%m-%d") if hasattr(d, "strftime") else str(d)
+                       for d in session.get("_history_dates", hist_dates)][-52:],
+        history_values=[round(float(v), 2) for v in updated_series[-52:]],
+        frequency=frequency,
+    )
+# ─── /download ────────────────────────────────────────────────────────────────
+@app.get("/download/{session_id}")
+def download_csv(session_id: str):
+    from fastapi.responses import StreamingResponse
+    import io
+    session = _require_session(session_id)
+    series = session.get("_series", np.array([]))
+    date_col = session.get("date_col", "date")
+    value_col = session.get("value_col", "value")
+    actuals_log = session.get("_actuals_log", [])
+    df_original = session.get("df")
+    if df_original is not None:
+        # Start with original data
+        out_df = df_original[[date_col, value_col]].copy()
+        out_df["entry_timestamp"] = None
+        out_df["source"] = "original"
+        # Append actuals
+        for entry in actuals_log:
+            new_row = {
+                date_col: entry["date"],
+                value_col: entry["value"],
+                "entry_timestamp": entry["timestamp"],
+                "source": "actual_entry",
+            }
+            out_df = pd.concat([out_df, pd.DataFrame([new_row])], ignore_index=True)
+    else:
+        # Fallback: just export what we have
+        out_df = pd.DataFrame({
+            date_col: [e["date"] for e in actuals_log],
+            value_col: [e["value"] for e in actuals_log],
+            "entry_timestamp": [e["timestamp"] for e in actuals_log],
+            "source": "actual_entry",
+        })
+    buffer = io.StringIO()
+    out_df.to_csv(buffer, index=False)
+    buffer.seek(0)
+    return StreamingResponse(
+        iter([buffer.getvalue()]),
+        media_type="text/csv",
+        headers={"Content-Disposition": f"attachment; filename=pulseai_updated_{session_id[:8]}.csv"},
     )
     return ExplainResponse(explanation=text, source=source)
+# ─── /chat ────────────────────────────────────────────────────────────────────
+@app.post("/chat", response_model=ChatResponse)
+def chat(request: Request, req: ChatRequest):
+    ip = request.client.host if request.client else "unknown"
+    if _is_rate_limited(f"chat:{ip}", max_calls=30, window_sec=60):
+        raise HTTPException(status_code=429, detail={
+            "error_code": "RATE_LIMITED",
+            "message": "Too many requests. Please wait a minute before trying again."
+        })
+    # Build context from session state
+    context_parts = []
+    session = None
+    try:
+        session = get_session(req.session_id)
+    except Exception:
+        pass
+    # If we have a demo cache, use that
+    demo_key = _demo_key(req.session_id) if req.session_id else None
+    cached = cache.get(demo_key) if demo_key else None
+    if cached:
+        context_parts.append(f"Dataset: {cached.get('series_name', 'unknown')}")
+        context_parts.append(f"Frequency: {cached.get('frequency', 'unknown')}")
+        context_parts.append(f"Confidence score: {cached.get('confidence_score', '?')}/100 ({cached.get('confidence_label', '?')})")
+        context_parts.append(f"Trend: {cached.get('trend_pct', 0):+.1f}%")
+        context_parts.append(f"Baseline method: {cached.get('baseline_type', '?')}")
+        context_parts.append(f"Decision: {cached.get('decision', '')}")
+        fc = cached.get("forecast", [])
+        if fc:
+            context_parts.append("Forecast periods:")
+            for p in fc:
+                context_parts.append(f"  {p['date']}: low={p['low']:.2f}, likely={p['median']:.2f}, high={p['high']:.2f}")
+        bl = cached.get("baseline", [])
+        if bl:
+            context_parts.append("Baseline comparison:")
+            for p in bl:
+                context_parts.append(f"  {p['date']}: {p['value']:.2f}")
+        hv = cached.get("history_values", [])
+        if hv:
+            recent = hv[-8:]
+            context_parts.append(f"Recent history (last {len(recent)} values): {[round(v, 2) for v in recent]}")
+        context_parts.append(f"Financial series: {cached.get('is_financial', False)}")
+        context_parts.append(f"Intermittent demand: {cached.get('is_intermittent', False)}")
+        detector = cached.get("_detector")
+        if detector and detector.alerts:
+            context_parts.append(f"Active CUSUM alerts: {[a['direction'] for a in detector.alerts]}")
+        else:
+            context_parts.append("No structural shifts detected.")
+    elif session:
+        forecast_state = session.get("_forecast", {})
+        fc_median = forecast_state.get("median", np.array([]))
+        fc_low = forecast_state.get("low", np.array([]))
+        fc_high = forecast_state.get("high", np.array([]))
+        series = session.get("_series", np.array([]))
+        fc_dates = session.get("_fc_dates", [])
+        frequency = session.get("frequency", "unknown")
+        value_col = session.get("value_col", "value")
+        context_parts.append(f"Series name: {value_col.replace('_', ' ')}")
+        context_parts.append(f"Frequency: {frequency}")
+        if len(series) > 0:
+            context_parts.append(f"Recent history (last 8): {[round(float(v), 2) for v in series[-8:]]}")
+        if len(fc_median) > 0:
+            context_parts.append("Forecast periods:")
+            for i in range(len(fc_median)):
+                d = fc_dates[i] if i < len(fc_dates) else f"Period {i+1}"
+                lo = float(fc_low[i]) if i < len(fc_low) else 0
+                hi = float(fc_high[i]) if i < len(fc_high) else 0
+                context_parts.append(f"  {d}: low={lo:.2f}, likely={float(fc_median[i]):.2f}, high={hi:.2f}")
+        detector = session.get("_detector")
+        if detector and detector.alerts:
+            context_parts.append(f"Active CUSUM alerts: {[a['direction'] for a in detector.alerts]}")
+    context_block = "\n".join(context_parts) if context_parts else "No forecast data available yet."
+    _GROQ_KEY = os.getenv("GROQ_API_KEY", "").strip()
+    if not _GROQ_KEY:
+        # Template fallback
+        return ChatResponse(
+            reply="Chat requires a Groq API key. Please set GROQ_API_KEY in your .env file. "
+                  "In the meantime, check the Decision Helper card and Explanation card for insights about your forecast.",
+            source="template",
+        )
+    try:
+        from groq import Groq
+        client = Groq(api_key=_GROQ_KEY)
+        # Detect if data uses INR based on context
+        uses_inr = any(w in context_block.lower() for w in ['inr', 'rupee', 'rupees', '₹'])
+        currency_hint = "Use Indian currency (₹) and Indian number formatting." if uses_inr else "Use appropriate units from the data. Do not assume Indian rupees unless the data mentions INR."
+        system_prompt = (
+            "You are PulseAI's forecast assistant. You help non-technical users — bakers, farmers, "
+            "shop owners — understand their forecast data through simple conversation.\n\n"
+            "RULES:\n"
+            "- Use everyday language. Never say: model, algorithm, percentile, parameter, coefficient, "
+            "  neural, transformer, statistical, conformal, CUSUM, inference.\n"
+            "- Keep answers to 2-4 sentences unless the user asks for detail.\n"
+            "- Always ground your answers in the actual data below — never make up numbers.\n"
+            "- If asked what to DO, give one clear, actionable suggestion.\n"
+            f"- {currency_hint}\n"
+            "- Always refer to the data by its series name (shown below), not a guess.\n"
+            "- If you don't know something, say so honestly.\n"
+            "- IMPORTANT: If the user reports what actually happened (e.g., 'sales were 3500', "
+            "'we sold 2800 this week', 'actual was 1.05', 'it was around 4000'), "
+            "acknowledge it and add the tag [ACTUAL:NUMBER] at the very end of your response, "
+            "replacing NUMBER with the numeric value. Example: 'Got it! I\\'ll update the forecast. [ACTUAL:3500]'\n\n"
+            f"CURRENT FORECAST DATA:\n{context_block}"
+        )
+        messages = [{"role": "system", "content": system_prompt}]
+        # Add conversation history (last 10 turns max)
+        for msg in (req.history or [])[-10:]:
+            if msg.get("role") in ("user", "assistant"):
+                messages.append({"role": msg["role"], "content": msg["content"]})
+        messages.append({"role": "user", "content": req.message})
+        resp = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=messages,
+            max_tokens=250,
+            temperature=0.45,
+        )
+        reply = resp.choices[0].message.content.strip()
+        # Check if the LLM detected an actual value
+        import re as _re
+        actual_match = _re.search(r'\[ACTUAL:([\d.]+)\]', reply)
+        actual_detected = None
+        actual_submitted = False
+        if actual_match:
+            actual_detected = float(actual_match.group(1))
+            # Clean the tag from the visible reply
+            reply = _re.sub(r'\s*\[ACTUAL:[\d.]+\]', '', reply).strip()
+            # Auto-submit the actual via the update logic
+            try:
+                parsed_for_update = parse_nl_input(str(actual_detected), last_actual=None)
+                if parsed_for_update["value"] is not None and session:
+                    actual_submitted = True
+            except Exception:
+                pass
+        return ChatResponse(
+            reply=reply,
+            source="groq",
+            actual_detected=actual_detected,
+            actual_submitted=actual_submitted,
+        )
+    except Exception as e:
+        return ChatResponse(
+            reply=f"I'm having trouble connecting right now. Here's what I can tell you: {context_block[:200]}",
+            source="template",
+        )
 # ─── Helpers ──────────────────────────────────────────────────────────────────
 def _require_session(session_id: str) -> dict:
         seasonal_pattern="Pre-computed demo forecast",
         is_financial=payload.get("is_financial", False),
         is_intermittent=payload.get("is_intermittent", False),
+        history_dates=payload.get("history_dates", []),
+        history_values=payload.get("history_values", []),
+        frequency=payload.get("frequency", "weekly"),
+        cusum_threshold=round(3.0 * payload.get("_hist_std", 1.0), 2) if "_hist_std" in payload else 0.0,
     )

models.py CHANGED Viewed

@@ -69,6 +69,10 @@ class ForecastResponse(BaseModel):
     seasonal_pattern: str            # e.g. "Weekly seasonality detected"
     is_financial: bool
     is_intermittent: bool
 # ─── Update (actual value entry + recalibration) ──────────────────────────────
@@ -86,11 +90,17 @@ class UpdateResponse(BaseModel):
     cusum_alert: Literal["HIGH", "LOW", "NONE"]
     cusum_magnitude: float
     new_forecast: list[ForecastPoint]
     new_confidence_score: int
     new_confidence_label: Literal["Low", "Medium", "High", "Very High"]
     new_decision: str
-    recalibrated: bool               # True if Chronos was re-run after a CUSUM alert
-    explanation: str                 # plain-English summary of what just happened
 # ─── Scenario ─────────────────────────────────────────────────────────────────
@@ -126,8 +136,23 @@ class HealthResponse(BaseModel):
     uptime_seconds: int
 # ─── Error ────────────────────────────────────────────────────────────────────
 class ErrorResponse(BaseModel):
     error_code: str                  # e.g. "TOO_FEW_ROWS"
-    message: str                     # friendly message shown to the user

     seasonal_pattern: str            # e.g. "Weekly seasonality detected"
     is_financial: bool
     is_intermittent: bool
+    history_dates: list[str] = []
+    history_values: list[float] = []
+    frequency: str = "weekly"
+    cusum_threshold: float = 0.0
 # ─── Update (actual value entry + recalibration) ──────────────────────────────
     cusum_alert: Literal["HIGH", "LOW", "NONE"]
     cusum_magnitude: float
     new_forecast: list[ForecastPoint]
+    new_baseline: list[BaselinePoint] = []
     new_confidence_score: int
     new_confidence_label: Literal["Low", "Medium", "High", "Very High"]
     new_decision: str
+    recalibrated: bool
+    explanation: str
+    residual: float = 0.0
+    predicted_value: float = 0.0     # what was forecast for this period
+    history_dates: list[str] = []
+    history_values: list[float] = []
+    frequency: str = "weekly"
 # ─── Scenario ─────────────────────────────────────────────────────────────────
     uptime_seconds: int
+# ─── Chat ─────────────────────────────────────────────────────────────────────
+class ChatRequest(BaseModel):
+    session_id: str
+    message: str
+    history: list[dict] = []       # [{role, content}, ...]
+class ChatResponse(BaseModel):
+    reply: str
+    source: Literal["groq", "template"]
+    actual_detected: float | None = None    # if user reported an actual via chat
+    actual_submitted: bool = False           # True if the actual was auto-submitted
 # ─── Error ────────────────────────────────────────────────────────────────────
 class ErrorResponse(BaseModel):
     error_code: str                  # e.g. "TOO_FEW_ROWS"
+    message: str                     # friendly message shown to the user

preprocessor.py CHANGED Viewed

@@ -411,12 +411,18 @@ def _detect_frequency(df: pd.DataFrame, date_col: str) -> str:
     deltas = df[date_col].diff().dropna().dt.days
     median_gap = deltas.median()
     if median_gap <= 1.5:
         return "daily"
     if median_gap <= 8:
         return "weekly"
     if median_gap <= 35:
         return "monthly"
     return "unknown"

     deltas = df[date_col].diff().dropna().dt.days
     median_gap = deltas.median()
+    if median_gap < 0.1:
+        return "hourly"
     if median_gap <= 1.5:
         return "daily"
     if median_gap <= 8:
         return "weekly"
     if median_gap <= 35:
         return "monthly"
+    if median_gap <= 100:
+        return "quarterly"
+    if median_gap <= 400:
+        return "annually"
     return "unknown"

test_chronos.py DELETED Viewed

@@ -1,123 +0,0 @@
-"""
-test_chronos.py — PulseAI Chronos-Bolt-Small Smoke Test
-Run: python test_chronos.py
-Expected: prints forecast arrays, PASS at end
-"""
-import sys
-import time
-import numpy as np
-print("=" * 60)
-print("PulseAI — Chronos-Bolt-Small Smoke Test")
-print("=" * 60)
-# ── 1. Import check ──────────────────────────────────────────
-print("\n[1/5] Importing chronos...")
-try:
-    import torch
-    from chronos import BaseChronosPipeline
-    print(f"  ✓ torch {torch.__version__}")
-    print(f"  ✓ chronos imported")
-except ImportError as e:
-    print(f"  ✗ Import failed: {e}")
-    sys.exit(1)
-# ── 2. Model load ─────────────────────────────────────────────
-print("\n[2/5] Loading chronos-bolt-small...")
-print("  (Cached from previous run — should be fast now)")
-t0 = time.time()
-try:
-    pipeline = BaseChronosPipeline.from_pretrained(
-        "amazon/chronos-bolt-small",
-        device_map="cpu",
-        dtype=torch.float32,       # fixed: dtype not torch_dtype
-    )
-    elapsed = time.time() - t0
-    print(f"  ✓ Model loaded in {elapsed:.1f}s")
-    print(f"  ✓ Pipeline type: {type(pipeline).__name__}")
-except Exception as e:
-    print(f"  ✗ Model load failed: {e}")
-    sys.exit(1)
-# ── 3. Inference — Chronos-Bolt uses quantile_levels ──────────
-print("\n[3/5] Running inference on 20-point series...")
-print("  Note: Chronos-Bolt outputs quantiles directly (no num_samples)")
-context_values = [
-    3200, 3100, 3400, 3300, 3500, 3200, 3100,
-    4200, 4500, 4100, 3900, 3300, 3200, 3100,
-    3500, 3600, 3400, 3200, 3100, 3300
-]
-context = torch.tensor(context_values, dtype=torch.float32).unsqueeze(0)
-try:
-    t0 = time.time()
-    quantile_levels = [0.1, 0.5, 0.9]
-    quantiles, mean = pipeline.predict_quantiles(
-        context=context,
-        prediction_length=4,
-        quantile_levels=quantile_levels,
-    )
-    elapsed = time.time() - t0
-    print(f"  ✓ Inference done in {elapsed:.2f}s")
-    print(f"  ✓ Quantiles shape: {quantiles.shape}")  # [1, 4, 3]
-    print(f"  ✓ Mean shape:      {mean.shape}")       # [1, 4]
-except Exception as e:
-    print(f"  ✗ Inference failed: {e}")
-    sys.exit(1)
-# ── 4. Percentile extraction ──────────────────────────────────
-print("\n[4/5] Extracting 10th / 50th / 90th percentiles...")
-try:
-    q = quantiles[0].numpy()   # shape: [4, 3]  →  [timestep, quantile]
-    m = mean[0].numpy()        # shape: [4]
-    low    = q[:, 0]   # 10th percentile
-    median = q[:, 1]   # 50th percentile
-    high   = q[:, 2]   # 90th percentile
-    for i in range(4):
-        print(f"  ✓ Week {i+1}: {low[i]:.0f} – {median[i]:.0f} – {high[i]:.0f}  "
-              f"(mean: {m[i]:.0f})")
-    assert all(low < high),    "low must be < high"
-    assert all(low > 0),       "values should be positive"
-    print("  ✓ Sanity checks passed")
-except Exception as e:
-    print(f"  ✗ Percentile extraction failed: {e}")
-    sys.exit(1)
-# ── 5. NaN guard test ─────────────────────────────────────────
-print("\n[5/5] Testing NaN guard (gap in data)...")
-try:
-    values_with_gap = context_values.copy()
-    values_with_gap[10] = float("nan")
-    arr = np.array(values_with_gap, dtype=np.float64)
-    nan_mask = np.isnan(arr)
-    if nan_mask.any():
-        idx = np.arange(len(arr))
-        arr[nan_mask] = np.interp(
-            idx[nan_mask], idx[~nan_mask], arr[~nan_mask]
-        )
-    assert not np.isnan(arr).any(), "NaN guard failed"
-    ctx_clean = torch.tensor(arr, dtype=torch.float32).unsqueeze(0)
-    q2, m2 = pipeline.predict_quantiles(
-        context=ctx_clean,
-        prediction_length=4,
-        quantile_levels=[0.1, 0.5, 0.9],
-    )
-    print(f"  ✓ NaN guard works — quantiles shape: {q2.shape}")
-except Exception as e:
-    print(f"  ✗ NaN guard test failed: {e}")
-    sys.exit(1)
-print("\n" + "=" * 60)
-print("  ✅  ALL TESTS PASSED — Ready to build Step 3")
-print("=" * 60)
-print("\nKEY FINDING: Chronos-Bolt uses predict_quantiles()")
-print("  Input:  context tensor + prediction_length + quantile_levels")
-print("  Output: quantiles [batch, timesteps, n_quantiles] + mean [batch, timesteps]")
-print("  This is BETTER than num_samples — direct quantiles, faster, more stable")