Spaces:

mgbam
/

BizIntel_AI

Running

App Files Files Community

mgbam commited on 2 days ago

Commit

a7d25a1

verified ·

1 Parent(s): 7453b19

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -125

app.py CHANGED Viewed

@@ -1,26 +1,40 @@
-# app.py — BizIntel AI Ultra v2
 # =============================================================
-# CSV / Excel / DB ingestion • Trend + ARIMA forecast (90 d or 3 steps)
-# Confidence bands • Model explainability • Gemini 1.5 Pro strategy
-# Safe Plotly writes -> /tmp • KPI cards • Optional EDA visuals
 # =============================================================
-import os, tempfile, warnings
-from typing import List
 import numpy as np
 import pandas as pd
-import streamlit as st
 import plotly.graph_objects as go
 from statsmodels.tsa.arima.model import ARIMA
 from statsmodels.graphics.tsaplots import plot_acf
 from statsmodels.tsa.seasonal import seasonal_decompose
 from statsmodels.tools.sm_exceptions import ConvergenceWarning
 import google.generativeai as genai
-import matplotlib.pyplot as plt
 # ──────────────────────────────────────────────────────────────
-# 0)  Plotly safe write → /tmp
 # ──────────────────────────────────────────────────────────────
 TMP = tempfile.gettempdir()
 orig_write = go.Figure.write_image
@@ -29,15 +43,7 @@ go.Figure.write_image = lambda self, p, *a, **k: orig_write(
 )
 # ──────────────────────────────────────────────────────────────
-# 1)  Local helpers & DB connector
-# ──────────────────────────────────────────────────────────────
-from tools.csv_parser      import parse_csv_tool
-from tools.plot_generator  import plot_metric_tool
-from tools.visuals         import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
-from db_connector          import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
-# ──────────────────────────────────────────────────────────────
-# 2)  Gemini 1.5 Pro
 # ──────────────────────────────────────────────────────────────
 genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
 gemini = genai.GenerativeModel(
@@ -46,33 +52,34 @@ gemini = genai.GenerativeModel(
 )
 # ──────────────────────────────────────────────────────────────
-# 3)  Streamlit setup
 # ──────────────────────────────────────────────────────────────
-st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
-st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
 # ──────────────────────────────────────────────────────────────
-# 4)  Data source
 # ──────────────────────────────────────────────────────────────
-choice = st.radio("Select data source", ["Upload CSV��/ Excel", "Connect to SQL Database"])
 csv_path: str | None = None
 if choice.startswith("Upload"):
-    up = st.file_uploader("CSV or Excel (≤ 500 MB)", type=["csv","xlsx","xls"])
     if up:
         tmp = os.path.join(TMP, up.name)
-        with open(tmp, "wb") as f: f.write(up.read())
         if up.name.lower().endswith(".csv"):
             csv_path = tmp
         else:
             try:
-                pd.read_excel(tmp, sheet_name=0).to_csv(tmp+".csv", index=False)
-                csv_path = tmp+".csv"
             except Exception as e:
                 st.error(f"Excel parse failed: {e}")
 else:
-    eng  = st.selectbox("DB engine", SUPPORTED_ENGINES)
-    conn = st.text_input("SQLAlchemy connection string")
     if conn:
         try:
             tbl = st.selectbox("Table", list_tables(conn))
@@ -86,43 +93,53 @@ if not csv_path:
     st.stop()
 with open(csv_path, "rb") as f:
-    st.download_button("⬇️ Download working CSV", f, file_name=os.path.basename(csv_path))
 # ──────────────────────────────────────────────────────────────
-# 5)  Column selectors
 # ──────────────────────────────────────────────────────────────
 df_head = pd.read_csv(csv_path, nrows=5)
 st.dataframe(df_head)
-date_col = st.selectbox("Date/time column", df_head.columns)
-numeric_cols = df_head.select_dtypes("number").columns.tolist()
-metric_options = [c for c in numeric_cols if c != date_col]
-if not metric_options:
-    st.error("No numeric columns available apart from the date column.")
     st.stop()
-metric_col = st.selectbox("Numeric metric column", metric_options)
 # ──────────────────────────────────────────────────────────────
-# 6)  Summary & trend chart
 # ──────────────────────────────────────────────────────────────
-summary = parse_csv_tool(csv_path)
-trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
-if isinstance(trend_fig, go.Figure):
-    st.subheader("📈 Trend")
     st.plotly_chart(trend_fig, use_container_width=True)
-else:
-    st.warning(trend_fig)
 # ──────────────────────────────────────────────────────────────
-# 7)  Robust ARIMA + explainability
 # ──────────────────────────────────────────────────────────────
 def build_series(path, dcol, vcol):
     df = pd.read_csv(path, usecols=[dcol, vcol])
     df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
     df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
     df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
-    if df.empty or df[dcol].nunique() < 2:
-        raise ValueError("Need ≥ 2 valid timestamps.")
     s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
     freq = pd.infer_freq(s.index) or "D"
     s = s.asfreq(freq).interpolate()
@@ -131,139 +148,167 @@ def build_series(path, dcol, vcol):
 @st.cache_data(show_spinner="Fitting ARIMA…")
 def fit_arima(series):
     warnings.simplefilter("ignore", ConvergenceWarning)
-    model = ARIMA(series, order=(1,1,1))
-    return model.fit()
 try:
     series, freq = build_series(csv_path, date_col, metric_col)
     horizon = 90 if freq == "D" else 3
-    res = fit_arima(series)
-    fc  = res.get_forecast(steps=horizon)
-    forecast = fc.predicted_mean
-    ci = fc.conf_int()
 except Exception as e:
-    st.subheader(f"🔮 {metric_col} Forecast")
     st.warning(f"Forecast failed: {e}")
-    series = forecast = ci = None
 if forecast is not None:
-    # Plot with CI
     fig = go.Figure()
-    fig.add_scatter(x=series.index,   y=series,  mode="lines", name=metric_col)
     fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
-    fig.add_scatter(x=ci.index, y=ci.iloc[:,1], mode="lines",
-                    line=dict(width=0), showlegend=False)
-    fig.add_scatter(x=ci.index, y=ci.iloc[:,0], mode="lines",
-                    line=dict(width=0), fill="tonexty",
-                    fillcolor="rgba(255,0,0,0.25)", showlegend=False)
-    fig.update_layout(title=f"{metric_col} Forecast ({horizon} steps)",
-                      template="plotly_dark", xaxis_title=date_col,
-                      yaxis_title=metric_col)
-    st.subheader(f"🔮 {metric_col} Forecast")
     st.plotly_chart(fig, use_container_width=True)
-    # ---------------- summary & interpretation ----------------
-    st.subheader("📄 Model Summary")
-    st.code(res.summary().as_text(), language="text")
-    st.subheader("🗒 Coefficient Interpretation")
-    ar = res.arparams
-    ma = res.maparams
-    interp: List[str] = []
     if ar.size:
-        interp.append(f"• AR(1) ={ar[0]:.2f} → "
-                      f"{'strong' if abs(ar[0])>0.5 else 'moderate'} "
-                      "persistence in the series.")
     if ma.size:
-        interp.append(f"• MA(1) ={ma[0]:.2f} → "
-                      f"{'large' if abs(ma[0])>0.5 else 'modest'} "
-                      "shock adjustment.")
     st.markdown("\n".join(interp) or "N/A")
-    # ---------------- Residual ACF ----------------
-    st.subheader("🔍 Residual Autocorrelation (ACF)")
-    plt.figure(figsize=(6,3))
-    plot_acf(res.resid.dropna(), lags=30, alpha=0.05)
     acf_png = os.path.join(TMP, "acf.png")
     plt.tight_layout()
     plt.savefig(acf_png, dpi=120)
     plt.close()
     st.image(acf_png, use_container_width=True)
-    # ---------------- Back‑test ----------------
-    k = max(int(len(series)*0.2), 10)
     train, test = series[:-k], series[-k:]
-    bt_res   = ARIMA(train, order=(1,1,1)).fit()
-    bt_pred  = bt_res.forecast(k)
-    mape = (abs(bt_pred - test)/test).mean()*100
-    rmse = np.sqrt(((bt_pred - test)**2).mean())
-    st.subheader("🧪 Back‑test (last 20 %)")
-    colA, colB = st.columns(2)
-    colA.metric("MAPE", f"{mape:.2f} %")
-    colB.metric("RMSE", f"{rmse:,.0f}")
-    # ---------------- Optional seasonal decomposition -------
     with st.expander("Seasonal Decomposition"):
         try:
-            period = {"D":7, "H":24, "M":12}.get(freq, None)
             if period:
                 dec = seasonal_decompose(series, period=period, model="additive")
-                for comp in ["trend","seasonal","resid"]:
-                    st.line_chart(getattr(dec, comp), height=150)
             else:
                 st.info("Frequency not suited for decomposition.")
         except Exception as e:
             st.info(f"Decomposition failed: {e}")
 # ──────────────────────────────────────────────────────────────
-# 8)  Gemini strategy report
 # ──────────────────────────────────────────────────────────────
 prompt = (
     "You are **BizIntel Strategist AI**.\n\n"
-    f"### Dataset Summary\n```\n{summary}\n```\n\n"
     f"### {metric_col} Forecast\n```\n"
-    f"{forecast.to_string() if forecast is not None else 'N/A'}\n```\n\n"
-    "Craft a Markdown report:\n"
-    "1. Five insights\n2. Three actionable strategies\n"
-    "3. Risks / anomalies\n4. Extra visuals to consider."
 )
-with st.spinner("Gemini generating strategy…"):
     md = gemini.generate_content(prompt).text
-st.subheader("🚀 Strategy Recommendations (Gemini 1.5 Pro)")
 st.markdown(md)
-st.download_button("⬇️ Download Strategy (.md)", md, file_name="strategy.md")
 # ──────────────────────────────────────────────────────────────
-# 9)  KPI cards + detailed stats + optional EDA  (unchanged)
 # ──────────────────────────────────────────────────────────────
 fulldf = pd.read_csv(csv_path, low_memory=False)
 rows, cols = fulldf.shape
-miss_pct = fulldf.isna().mean().mean()*100
 st.markdown("---")
-st.subheader("📑 Dataset Overview")
-c1,c2,c3 = st.columns(3)
-c1.metric("Rows", f"{rows:,}")
-c2.metric("Columns", cols)
-c3.metric("Missing %", f"{miss_pct:.1f}%")
-with st.expander("Descriptive Statistics"):
-    st.dataframe(fulldf.describe().T.style.format(precision=2).background_gradient("Blues"),
-                 use_container_width=True)
 st.markdown("---")
-st.subheader("🔍 Optional Exploratory Visuals")
-num_cols = fulldf.select_dtypes("number").columns.tolist()
 if st.checkbox("Histogram"):
-    st.plotly_chart(histogram_tool(csv_path, st.selectbox("Var", num_cols, key="hist")),
-                    use_container_width=True)
-if st.checkbox("Scatter Matrix"):
-    sel = st.multiselect("Columns", num_cols, default=num_cols[:3])
     if sel:
-        st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)
-if st.checkbox("Correlation Heat‑map"):
-    st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)

+# app.py  – BizIntel AI Ultra v2.1
 # =============================================================
+# • Upload CSV / Excel  • SQL–DB fetch  • Trend + ARIMA forecast
+# • Model explainability (summary, coef interp, ACF, back-test)
+# • Gemini 1.5 Pro strategy generation
+# • Optional EDA visuals  • Safe Plotly PNG write to /tmp
 # =============================================================
+import os
+import tempfile
+import warnings
+from typing import List, Tuple
 import numpy as np
 import pandas as pd
 import plotly.graph_objects as go
+import streamlit as st
 from statsmodels.tsa.arima.model import ARIMA
 from statsmodels.graphics.tsaplots import plot_acf
 from statsmodels.tsa.seasonal import seasonal_decompose
 from statsmodels.tools.sm_exceptions import ConvergenceWarning
 import google.generativeai as genai
 # ──────────────────────────────────────────────────────────────
+# Local helper modules
+# ──────────────────────────────────────────────────────────────
+from tools.csv_parser      import parse_csv_tool
+from tools.plot_generator  import plot_metric_tool
+from tools.forecaster      import forecast_metric_tool   # only for png path if needed
+from tools.visuals         import (
+    histogram_tool, scatter_matrix_tool, corr_heatmap_tool
+)
+from db_connector          import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
+# ──────────────────────────────────────────────────────────────
+# Plotly safe write — ensure PNGs go to writable /tmp
 # ──────────────────────────────────────────────────────────────
 TMP = tempfile.gettempdir()
 orig_write = go.Figure.write_image
 )
 # ──────────────────────────────────────────────────────────────
+# Gemini 1.5 Pro setup
 # ──────────────────────────────────────────────────────────────
 genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
 gemini = genai.GenerativeModel(
 )
 # ──────────────────────────────────────────────────────────────
+# Streamlit layout
 # ──────────────────────────────────────────────────────────────
+st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
+st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
 # ──────────────────────────────────────────────────────────────
+# 1) Data source selection
 # ──────────────────────────────────────────────────────────────
+choice = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
 csv_path: str | None = None
 if choice.startswith("Upload"):
+    up = st.file_uploader("CSV or Excel (≤ 500 MB)", type=["csv", "xlsx", "xls"])
     if up:
         tmp = os.path.join(TMP, up.name)
+        with open(tmp, "wb") as f:
+            f.write(up.read())
         if up.name.lower().endswith(".csv"):
             csv_path = tmp
         else:
             try:
+                pd.read_excel(tmp).to_csv(tmp + ".csv", index=False)
+                csv_path = tmp + ".csv"
             except Exception as e:
                 st.error(f"Excel parse failed: {e}")
 else:
+    eng  = st.selectbox("DB engine", SUPPORTED_ENGINES, key="db_eng")
+    conn = st.text_input("SQLAlchemy connection string")
     if conn:
         try:
             tbl = st.selectbox("Table", list_tables(conn))
     st.stop()
 with open(csv_path, "rb") as f:
+    st.download_button("⬇️ Download working CSV", f, file_name=os.path.basename(csv_path))
 # ──────────────────────────────────────────────────────────────
+# 2) Column pickers
 # ──────────────────────────────────────────────────────────────
 df_head = pd.read_csv(csv_path, nrows=5)
 st.dataframe(df_head)
+date_col   = st.selectbox("Date/time column", df_head.columns)
+numeric_df = df_head.select_dtypes("number")
+metric_col = st.selectbox(
+    "Numeric metric column",
+    [c for c in numeric_df.columns if c != date_col] or numeric_df.columns
+)
+if metric_col is None:
+    st.warning("Need at least one numeric column.")
     st.stop()
 # ──────────────────────────────────────────────────────────────
+# 3) Quick data summary & trend chart
 # ──────────────────────────────────────────────────────────────
+summary_md = parse_csv_tool(csv_path)
+trend_res = plot_metric_tool(csv_path, date_col, metric_col)
+if isinstance(trend_res, tuple):
+    trend_fig, _ = trend_res
+elif isinstance(trend_res, go.Figure):
+    trend_fig = trend_res
+else:  # error message str
+    st.warning(trend_res)
+    trend_fig = None
+if trend_fig is not None:
+    st.subheader("📈 Trend")
     st.plotly_chart(trend_fig, use_container_width=True)
 # ──────────────────────────────────────────────────────────────
+# 4) Build clean series & ARIMA helpers
 # ──────────────────────────────────────────────────────────────
+@st.cache_data(show_spinner="Preparing series…")
 def build_series(path, dcol, vcol):
     df = pd.read_csv(path, usecols=[dcol, vcol])
     df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
     df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
     df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
+    if df.empty:
+        raise ValueError("Not enough valid data.")
     s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
     freq = pd.infer_freq(s.index) or "D"
     s = s.asfreq(freq).interpolate()
 @st.cache_data(show_spinner="Fitting ARIMA…")
 def fit_arima(series):
     warnings.simplefilter("ignore", ConvergenceWarning)
+    return ARIMA(series, order=(1, 1, 1)).fit()
 try:
     series, freq = build_series(csv_path, date_col, metric_col)
     horizon = 90 if freq == "D" else 3
+    model_res = fit_arima(series)
+    fc_obj    = model_res.get_forecast(horizon)
+    forecast  = fc_obj.predicted_mean
+    ci        = fc_obj.conf_int()
 except Exception as e:
+    st.subheader(f"🔮 {metric_col} Forecast")
     st.warning(f"Forecast failed: {e}")
+    forecast = ci = model_res = None
+# ──────────────────────────────────────────────────────────────
+# 5) Forecast plot & explainability
+# ──────────────────────────────────────────────────────────────
 if forecast is not None:
     fig = go.Figure()
+    fig.add_scatter(x=series.index, y=series, mode="lines", name=metric_col)
     fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
+    fig.add_scatter(
+        x=ci.index, y=ci.iloc[:, 1], mode="lines", line=dict(width=0), showlegend=False
+    )
+    fig.add_scatter(
+        x=ci.index,
+        y=ci.iloc[:, 0],
+        mode="lines",
+        line=dict(width=0),
+        fill="tonexty",
+        fillcolor="rgba(255,0,0,0.25)",
+        showlegend=False,
+    )
+    fig.update_layout(
+        title=f"{metric_col} Forecast ({horizon} steps)",
+        xaxis_title=date_col,
+        yaxis_title=metric_col,
+        template="plotly_dark",
+    )
+    st.subheader(f"🔮 {metric_col} Forecast")
     st.plotly_chart(fig, use_container_width=True)
+    # -- model summary -----------------------------------------------------
+    st.subheader("📄 ARIMA Model Summary")
+    st.code(model_res.summary().as_text())
+    # -- coefficient interpretation ---------------------------------------
+    ar, ma = model_res.arparams, model_res.maparams
+    interp = []
     if ar.size:
+        interp.append(
+            f"• AR(1) ={ar[0]:.2f} → "
+            f"{'strong' if abs(ar[0]) > 0.5 else 'moderate'} persistence."
+        )
     if ma.size:
+        interp.append(
+            f"• MA(1) ={ma[0]:.2f} → "
+            f"{'large' if abs(ma[0]) > 0.5 else 'modest'} shock adjustment."
+        )
+    st.subheader("🗒 Coefficient Interpretation")
     st.markdown("\n".join(interp) or "N/A")
+    # -- residual ACF ------------------------------------------------------
+    st.subheader("🔍 Residual ACF")
     acf_png = os.path.join(TMP, "acf.png")
+    plot_acf(model_res.resid.dropna(), lags=30, alpha=0.05)
+    import matplotlib.pyplot as plt
     plt.tight_layout()
     plt.savefig(acf_png, dpi=120)
     plt.close()
     st.image(acf_png, use_container_width=True)
+    # -- back-test ---------------------------------------------------------
+    k = max(int(len(series) * 0.2), 10)
     train, test = series[:-k], series[-k:]
+    bt_res = ARIMA(train, order=(1, 1, 1)).fit()
+    bt_pred = bt_res.forecast(k)
+    mape = (abs(bt_pred - test) / test).mean() * 100
+    rmse = np.sqrt(((bt_pred - test) ** 2).mean())
+    st.subheader("🧪 Back-test (last 20 %)")
+    col1, col2 = st.columns(2)
+    col1.metric("MAPE", f"{mape:.2f}%")
+    col2.metric("RMSE", f"{rmse:,.0f}")
+    # -- seasonal decomposition (optional) --------------------------------
     with st.expander("Seasonal Decomposition"):
         try:
+            period = {"D": 7, "H": 24, "M": 12}.get(freq)
             if period:
                 dec = seasonal_decompose(series, period=period, model="additive")
+                for comp in ["trend", "seasonal", "resid"]:
+                    st.line_chart(getattr(dec, comp).dropna(), height=150)
             else:
                 st.info("Frequency not suited for decomposition.")
         except Exception as e:
             st.info(f"Decomposition failed: {e}")
 # ──────────────────────────────────────────────────────────────
+# 6) Gemini strategy report
 # ──────────────────────────────────────────────────────────────
 prompt = (
     "You are **BizIntel Strategist AI**.\n\n"
+    f"### Dataset Summary\n```\n{summary_md}\n```\n\n"
     f"### {metric_col} Forecast\n```\n"
+    f"{forecast.to_string() if forecast is not None else 'N/A'}\n```"
+    "\nGenerate a Markdown report with:\n"
+    "• 5 insights\n• 3 actionable strategies\n• Risks / anomalies\n• Additional visuals."
 )
+with st.spinner("Gemini 1.5 Pro is thinking…"):
     md = gemini.generate_content(prompt).text
+st.subheader("🚀 Strategy Recommendations (Gemini 1.5 Pro)")
 st.markdown(md)
+st.download_button("⬇️ Download Strategy (.md)", md, file_name="strategy.md")
 # ──────────────────────────────────────────────────────────────
+# 7) High-level dataset KPIs + optional EDA
 # ──────────────────────────────────────────────────────────────
 fulldf = pd.read_csv(csv_path, low_memory=False)
 rows, cols = fulldf.shape
+miss_pct = fulldf.isna().mean().mean() * 100
 st.markdown("---")
+st.subheader("📑 Dataset KPIs")
+k1, k2, k3 = st.columns(3)
+k1.metric("Rows", f"{rows:,}")
+k2.metric("Columns", cols)
+k3.metric("Missing %", f"{miss_pct:.1f}%")
+with st.expander("Descriptive Statistics (numeric)"):
+    st.dataframe(
+        fulldf.describe().T.round(2).style.format(precision=2).background_gradient("Blues"),
+        use_container_width=True,
+    )
 st.markdown("---")
+st.subheader("🔍 Optional EDA Visuals")
 if st.checkbox("Histogram"):
+    col = st.selectbox("Variable", fulldf.select_dtypes("number").columns)
+    hr = histogram_tool(csv_path, col)
+    if isinstance(hr, tuple):
+        st.plotly_chart(hr[0], use_container_width=True)
+    else:
+        st.warning(hr)
+if st.checkbox("Scatter Matrix"):
+    opts = fulldf.select_dtypes("number").columns.tolist()
+    sel = st.multiselect("Columns", opts, default=opts[:3])
     if sel:
+        sm = scatter_matrix_tool(csv_path, sel)
+        if isinstance(sm, tuple):
+            st.plotly_chart(sm[0], use_container_width=True)
+        else:
+            st.warning(sm)
+if st.checkbox("Correlation Heat-map"):
+    hm = corr_heatmap_tool(csv_path)
+    if isinstance(hm, tuple):
+        st.plotly_chart(hm[0], use_container_width=True)
+    else:
+        st.warning(hm)