mgbam commited on
Commit
a7d25a1
ยท
verified ยท
1 Parent(s): 7453b19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -125
app.py CHANGED
@@ -1,26 +1,40 @@
1
- # app.py โ€” BizIntelย AIย Ultraย v2
2
  # =============================================================
3
- # CSVย /ย Excelย /ย DB ingestion โ€ข Trend + ARIMA forecast (90ย d or 3ย steps)
4
- # Confidence bands โ€ข Model explainability โ€ข Geminiย 1.5 Pro strategy
5
- # Safe Plotly writes -> /tmp โ€ข KPI cards โ€ข Optional EDA visuals
 
6
  # =============================================================
7
 
8
- import os, tempfile, warnings
9
- from typing import List
 
 
10
 
11
  import numpy as np
12
  import pandas as pd
13
- import streamlit as st
14
  import plotly.graph_objects as go
 
15
  from statsmodels.tsa.arima.model import ARIMA
16
  from statsmodels.graphics.tsaplots import plot_acf
17
  from statsmodels.tsa.seasonal import seasonal_decompose
18
  from statsmodels.tools.sm_exceptions import ConvergenceWarning
 
19
  import google.generativeai as genai
20
- import matplotlib.pyplot as plt
21
 
22
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
23
- # 0) Plotly safe write โ†’ /tmp
 
 
 
 
 
 
 
 
 
 
 
24
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
  TMP = tempfile.gettempdir()
26
  orig_write = go.Figure.write_image
@@ -29,15 +43,7 @@ go.Figure.write_image = lambda self, p, *a, **k: orig_write(
29
  )
30
 
31
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
32
- # 1) Local helpers & DB connector
33
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
- from tools.csv_parser import parse_csv_tool
35
- from tools.plot_generator import plot_metric_tool
36
- from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
37
- from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
38
-
39
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
40
- # 2) Gemini 1.5ย Pro
41
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
42
  genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
43
  gemini = genai.GenerativeModel(
@@ -46,33 +52,34 @@ gemini = genai.GenerativeModel(
46
  )
47
 
48
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
49
- # 3) Streamlit setup
50
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
51
- st.set_page_config(page_title="BizIntelย AIย Ultra", layout="wide")
52
- st.title("๐Ÿ“Šย BizIntelย AIย Ultraย โ€“ Advanced Analyticsย +ย Geminiย 1.5ย Pro")
53
 
54
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
55
- # 4) Data source
56
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
57
- choice = st.radio("Select data source", ["Upload CSV๏ฟฝ๏ฟฝ/ย Excel", "Connect to SQL Database"])
58
  csv_path: str | None = None
59
 
60
  if choice.startswith("Upload"):
61
- up = st.file_uploader("CSVย orย Excelย (โ‰คโ€ฏ500โ€ฏMB)", type=["csv","xlsx","xls"])
62
  if up:
63
  tmp = os.path.join(TMP, up.name)
64
- with open(tmp, "wb") as f: f.write(up.read())
 
65
  if up.name.lower().endswith(".csv"):
66
  csv_path = tmp
67
  else:
68
  try:
69
- pd.read_excel(tmp, sheet_name=0).to_csv(tmp+".csv", index=False)
70
- csv_path = tmp+".csv"
71
  except Exception as e:
72
  st.error(f"Excel parse failed: {e}")
73
  else:
74
- eng = st.selectbox("DB engine", SUPPORTED_ENGINES)
75
- conn = st.text_input("SQLAlchemyย connection string")
76
  if conn:
77
  try:
78
  tbl = st.selectbox("Table", list_tables(conn))
@@ -86,43 +93,53 @@ if not csv_path:
86
  st.stop()
87
 
88
  with open(csv_path, "rb") as f:
89
- st.download_button("โฌ‡๏ธย Download working CSV", f, file_name=os.path.basename(csv_path))
90
 
91
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
92
- # 5) Column selectors
93
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
94
  df_head = pd.read_csv(csv_path, nrows=5)
95
  st.dataframe(df_head)
96
 
97
- date_col = st.selectbox("Date/time column", df_head.columns)
98
- numeric_cols = df_head.select_dtypes("number").columns.tolist()
99
- metric_options = [c for c in numeric_cols if c != date_col]
100
- if not metric_options:
101
- st.error("No numeric columns available apart from the date column.")
 
 
 
102
  st.stop()
103
- metric_col = st.selectbox("Numeric metric column", metric_options)
104
 
105
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
106
- # 6) Summary & trend chart
107
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
108
- summary = parse_csv_tool(csv_path)
109
- trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
110
- if isinstance(trend_fig, go.Figure):
111
- st.subheader("๐Ÿ“ˆย Trend")
 
 
 
 
 
 
 
 
 
112
  st.plotly_chart(trend_fig, use_container_width=True)
113
- else:
114
- st.warning(trend_fig)
115
 
116
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
117
- # 7) Robust ARIMA + explainability
118
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
119
  def build_series(path, dcol, vcol):
120
  df = pd.read_csv(path, usecols=[dcol, vcol])
121
  df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
122
  df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
123
  df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
124
- if df.empty or df[dcol].nunique() < 2:
125
- raise ValueError("Need โ‰ฅโ€ฏ2 valid timestamps.")
126
  s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
127
  freq = pd.infer_freq(s.index) or "D"
128
  s = s.asfreq(freq).interpolate()
@@ -131,139 +148,167 @@ def build_series(path, dcol, vcol):
131
  @st.cache_data(show_spinner="Fitting ARIMAโ€ฆ")
132
  def fit_arima(series):
133
  warnings.simplefilter("ignore", ConvergenceWarning)
134
- model = ARIMA(series, order=(1,1,1))
135
- return model.fit()
136
 
137
  try:
138
  series, freq = build_series(csv_path, date_col, metric_col)
139
  horizon = 90 if freq == "D" else 3
140
- res = fit_arima(series)
141
- fc = res.get_forecast(steps=horizon)
142
- forecast = fc.predicted_mean
143
- ci = fc.conf_int()
144
  except Exception as e:
145
- st.subheader(f"๐Ÿ”ฎย {metric_col}ย Forecast")
146
  st.warning(f"Forecast failed: {e}")
147
- series = forecast = ci = None
148
 
 
 
 
149
  if forecast is not None:
150
- # Plot with CI
151
  fig = go.Figure()
152
- fig.add_scatter(x=series.index, y=series, mode="lines", name=metric_col)
153
  fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
154
- fig.add_scatter(x=ci.index, y=ci.iloc[:,1], mode="lines",
155
- line=dict(width=0), showlegend=False)
156
- fig.add_scatter(x=ci.index, y=ci.iloc[:,0], mode="lines",
157
- line=dict(width=0), fill="tonexty",
158
- fillcolor="rgba(255,0,0,0.25)", showlegend=False)
159
- fig.update_layout(title=f"{metric_col} Forecast ({horizon}ย steps)",
160
- template="plotly_dark", xaxis_title=date_col,
161
- yaxis_title=metric_col)
162
- st.subheader(f"๐Ÿ”ฎย {metric_col}ย Forecast")
 
 
 
 
 
 
 
 
 
 
 
163
  st.plotly_chart(fig, use_container_width=True)
164
 
165
- # ---------------- summary & interpretation ----------------
166
- st.subheader("๐Ÿ“„ย Model Summary")
167
- st.code(res.summary().as_text(), language="text")
168
 
169
- st.subheader("๐Ÿ—’ย Coefficient Interpretation")
170
- ar = res.arparams
171
- ma = res.maparams
172
- interp: List[str] = []
173
  if ar.size:
174
- interp.append(f"โ€ขย AR(1)ย ={ar[0]:.2f} โ†’ "
175
- f"{'strong' if abs(ar[0])>0.5 else 'moderate'} "
176
- "persistence in the series.")
 
177
  if ma.size:
178
- interp.append(f"โ€ขย MA(1)ย ={ma[0]:.2f} โ†’ "
179
- f"{'large' if abs(ma[0])>0.5 else 'modest'} "
180
- "shock adjustment.")
 
 
181
  st.markdown("\n".join(interp) or "N/A")
182
 
183
- # ---------------- Residual ACF ----------------
184
- st.subheader("๐Ÿ”ย Residual Autocorrelation (ACF)")
185
- plt.figure(figsize=(6,3))
186
- plot_acf(res.resid.dropna(), lags=30, alpha=0.05)
187
  acf_png = os.path.join(TMP, "acf.png")
 
 
188
  plt.tight_layout()
189
  plt.savefig(acf_png, dpi=120)
190
  plt.close()
191
  st.image(acf_png, use_container_width=True)
192
 
193
- # ---------------- Backโ€‘test ----------------
194
- k = max(int(len(series)*0.2), 10)
195
  train, test = series[:-k], series[-k:]
196
- bt_res = ARIMA(train, order=(1,1,1)).fit()
197
- bt_pred = bt_res.forecast(k)
198
- mape = (abs(bt_pred - test)/test).mean()*100
199
- rmse = np.sqrt(((bt_pred - test)**2).mean())
200
 
201
- st.subheader("๐Ÿงชย Backโ€‘test (last 20โ€ฏ%)")
202
- colA, colB = st.columns(2)
203
- colA.metric("MAPE", f"{mape:.2f}ย %")
204
- colB.metric("RMSE", f"{rmse:,.0f}")
205
 
206
- # ---------------- Optional seasonal decomposition -------
207
  with st.expander("Seasonal Decomposition"):
208
  try:
209
- period = {"D":7, "H":24, "M":12}.get(freq, None)
210
  if period:
211
  dec = seasonal_decompose(series, period=period, model="additive")
212
- for comp in ["trend","seasonal","resid"]:
213
- st.line_chart(getattr(dec, comp), height=150)
214
  else:
215
  st.info("Frequency not suited for decomposition.")
216
  except Exception as e:
217
  st.info(f"Decomposition failed: {e}")
218
 
219
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
220
- # 8) Gemini strategy report
221
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
222
  prompt = (
223
  "You are **BizIntel Strategist AI**.\n\n"
224
- f"### Dataset Summary\n```\n{summary}\n```\n\n"
225
  f"### {metric_col} Forecast\n```\n"
226
- f"{forecast.to_string() if forecast is not None else 'N/A'}\n```\n\n"
227
- "Craft a Markdown report:\n"
228
- "1. Five insights\n2. Three actionable strategies\n"
229
- "3. Risksย / anomalies\n4. Extra visuals to consider."
230
  )
231
- with st.spinner("Gemini generating strategyโ€ฆ"):
232
  md = gemini.generate_content(prompt).text
233
- st.subheader("๐Ÿš€ย Strategyย Recommendationsย (Geminiย 1.5ย Pro)")
 
234
  st.markdown(md)
235
- st.download_button("โฌ‡๏ธย Downloadย Strategy (.md)", md, file_name="strategy.md")
236
 
237
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
238
- # 9) KPI cards + detailed stats + optional EDA (unchanged)
239
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
240
  fulldf = pd.read_csv(csv_path, low_memory=False)
241
  rows, cols = fulldf.shape
242
- miss_pct = fulldf.isna().mean().mean()*100
243
 
244
  st.markdown("---")
245
- st.subheader("๐Ÿ“‘ย Datasetย Overview")
246
- c1,c2,c3 = st.columns(3)
247
- c1.metric("Rows", f"{rows:,}")
248
- c2.metric("Columns", cols)
249
- c3.metric("Missingย %", f"{miss_pct:.1f}%")
250
 
251
- with st.expander("Descriptiveย Statistics"):
252
- st.dataframe(fulldf.describe().T.style.format(precision=2).background_gradient("Blues"),
253
- use_container_width=True)
 
 
254
 
255
  st.markdown("---")
256
- st.subheader("๐Ÿ”ย Optionalย Exploratoryย Visuals")
257
- num_cols = fulldf.select_dtypes("number").columns.tolist()
258
 
259
  if st.checkbox("Histogram"):
260
- st.plotly_chart(histogram_tool(csv_path, st.selectbox("Var", num_cols, key="hist")),
261
- use_container_width=True)
 
 
 
 
262
 
263
- if st.checkbox("Scatterย Matrix"):
264
- sel = st.multiselect("Columns", num_cols, default=num_cols[:3])
 
265
  if sel:
266
- st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)
 
 
 
 
267
 
268
- if st.checkbox("Correlationย Heatโ€‘map"):
269
- st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)
 
 
 
 
 
1
+ # app.py โ€“ BizIntel AI Ultra v2.1
2
  # =============================================================
3
+ # โ€ข Upload CSV / Excel โ€ข SQLโ€“DB fetch โ€ข Trend + ARIMA forecast
4
+ # โ€ข Model explainability (summary, coef interp, ACF, back-test)
5
+ # โ€ข Gemini 1.5 Pro strategy generation
6
+ # โ€ข Optional EDA visuals โ€ข Safe Plotly PNG write to /tmp
7
  # =============================================================
8
 
9
+ import os
10
+ import tempfile
11
+ import warnings
12
+ from typing import List, Tuple
13
 
14
  import numpy as np
15
  import pandas as pd
 
16
  import plotly.graph_objects as go
17
+ import streamlit as st
18
  from statsmodels.tsa.arima.model import ARIMA
19
  from statsmodels.graphics.tsaplots import plot_acf
20
  from statsmodels.tsa.seasonal import seasonal_decompose
21
  from statsmodels.tools.sm_exceptions import ConvergenceWarning
22
+
23
  import google.generativeai as genai
 
24
 
25
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
26
+ # Local helper modules
27
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
28
+ from tools.csv_parser import parse_csv_tool
29
+ from tools.plot_generator import plot_metric_tool
30
+ from tools.forecaster import forecast_metric_tool # only for png path if needed
31
+ from tools.visuals import (
32
+ histogram_tool, scatter_matrix_tool, corr_heatmap_tool
33
+ )
34
+ from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
35
+
36
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
37
+ # Plotly safe write โ€” ensure PNGs go to writable /tmp
38
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
39
  TMP = tempfile.gettempdir()
40
  orig_write = go.Figure.write_image
 
43
  )
44
 
45
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
46
+ # Gemini 1.5 Pro setup
 
 
 
 
 
 
 
 
47
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
48
  genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
49
  gemini = genai.GenerativeModel(
 
52
  )
53
 
54
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
55
+ # Streamlit layout
56
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
57
+ st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
58
+ st.title("๐Ÿ“Š BizIntel AI Ultra โ€“ Advanced Analytics + Gemini 1.5 Pro")
59
 
60
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
61
+ # 1) Data source selection
62
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
63
+ choice = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
64
  csv_path: str | None = None
65
 
66
  if choice.startswith("Upload"):
67
+ up = st.file_uploader("CSV or Excel (โ‰ค 500 MB)", type=["csv", "xlsx", "xls"])
68
  if up:
69
  tmp = os.path.join(TMP, up.name)
70
+ with open(tmp, "wb") as f:
71
+ f.write(up.read())
72
  if up.name.lower().endswith(".csv"):
73
  csv_path = tmp
74
  else:
75
  try:
76
+ pd.read_excel(tmp).to_csv(tmp + ".csv", index=False)
77
+ csv_path = tmp + ".csv"
78
  except Exception as e:
79
  st.error(f"Excel parse failed: {e}")
80
  else:
81
+ eng = st.selectbox("DB engine", SUPPORTED_ENGINES, key="db_eng")
82
+ conn = st.text_input("SQLAlchemy connection string")
83
  if conn:
84
  try:
85
  tbl = st.selectbox("Table", list_tables(conn))
 
93
  st.stop()
94
 
95
  with open(csv_path, "rb") as f:
96
+ st.download_button("โฌ‡๏ธ Download working CSV", f, file_name=os.path.basename(csv_path))
97
 
98
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
99
+ # 2) Column pickers
100
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
101
  df_head = pd.read_csv(csv_path, nrows=5)
102
  st.dataframe(df_head)
103
 
104
+ date_col = st.selectbox("Date/time column", df_head.columns)
105
+ numeric_df = df_head.select_dtypes("number")
106
+ metric_col = st.selectbox(
107
+ "Numeric metric column",
108
+ [c for c in numeric_df.columns if c != date_col] or numeric_df.columns
109
+ )
110
+ if metric_col is None:
111
+ st.warning("Need at least one numeric column.")
112
  st.stop()
 
113
 
114
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
115
+ # 3) Quick data summary & trend chart
116
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
117
+ summary_md = parse_csv_tool(csv_path)
118
+
119
+ trend_res = plot_metric_tool(csv_path, date_col, metric_col)
120
+ if isinstance(trend_res, tuple):
121
+ trend_fig, _ = trend_res
122
+ elif isinstance(trend_res, go.Figure):
123
+ trend_fig = trend_res
124
+ else: # error message str
125
+ st.warning(trend_res)
126
+ trend_fig = None
127
+
128
+ if trend_fig is not None:
129
+ st.subheader("๐Ÿ“ˆ Trend")
130
  st.plotly_chart(trend_fig, use_container_width=True)
 
 
131
 
132
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
133
+ # 4) Build clean series & ARIMA helpers
134
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
135
+ @st.cache_data(show_spinner="Preparing seriesโ€ฆ")
136
  def build_series(path, dcol, vcol):
137
  df = pd.read_csv(path, usecols=[dcol, vcol])
138
  df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
139
  df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
140
  df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
141
+ if df.empty:
142
+ raise ValueError("Not enough valid data.")
143
  s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
144
  freq = pd.infer_freq(s.index) or "D"
145
  s = s.asfreq(freq).interpolate()
 
148
  @st.cache_data(show_spinner="Fitting ARIMAโ€ฆ")
149
  def fit_arima(series):
150
  warnings.simplefilter("ignore", ConvergenceWarning)
151
+ return ARIMA(series, order=(1, 1, 1)).fit()
 
152
 
153
  try:
154
  series, freq = build_series(csv_path, date_col, metric_col)
155
  horizon = 90 if freq == "D" else 3
156
+ model_res = fit_arima(series)
157
+ fc_obj = model_res.get_forecast(horizon)
158
+ forecast = fc_obj.predicted_mean
159
+ ci = fc_obj.conf_int()
160
  except Exception as e:
161
+ st.subheader(f"๐Ÿ”ฎ {metric_col} Forecast")
162
  st.warning(f"Forecast failed: {e}")
163
+ forecast = ci = model_res = None
164
 
165
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
166
+ # 5) Forecast plot & explainability
167
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
168
  if forecast is not None:
 
169
  fig = go.Figure()
170
+ fig.add_scatter(x=series.index, y=series, mode="lines", name=metric_col)
171
  fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
172
+ fig.add_scatter(
173
+ x=ci.index, y=ci.iloc[:, 1], mode="lines", line=dict(width=0), showlegend=False
174
+ )
175
+ fig.add_scatter(
176
+ x=ci.index,
177
+ y=ci.iloc[:, 0],
178
+ mode="lines",
179
+ line=dict(width=0),
180
+ fill="tonexty",
181
+ fillcolor="rgba(255,0,0,0.25)",
182
+ showlegend=False,
183
+ )
184
+ fig.update_layout(
185
+ title=f"{metric_col} Forecast ({horizon} steps)",
186
+ xaxis_title=date_col,
187
+ yaxis_title=metric_col,
188
+ template="plotly_dark",
189
+ )
190
+
191
+ st.subheader(f"๐Ÿ”ฎ {metric_col} Forecast")
192
  st.plotly_chart(fig, use_container_width=True)
193
 
194
+ # -- model summary -----------------------------------------------------
195
+ st.subheader("๐Ÿ“„ ARIMA Model Summary")
196
+ st.code(model_res.summary().as_text())
197
 
198
+ # -- coefficient interpretation ---------------------------------------
199
+ ar, ma = model_res.arparams, model_res.maparams
200
+ interp = []
 
201
  if ar.size:
202
+ interp.append(
203
+ f"โ€ข AR(1) ={ar[0]:.2f} โ†’ "
204
+ f"{'strong' if abs(ar[0]) > 0.5 else 'moderate'} persistence."
205
+ )
206
  if ma.size:
207
+ interp.append(
208
+ f"โ€ข MA(1) ={ma[0]:.2f} โ†’ "
209
+ f"{'large' if abs(ma[0]) > 0.5 else 'modest'} shock adjustment."
210
+ )
211
+ st.subheader("๐Ÿ—’ Coefficient Interpretation")
212
  st.markdown("\n".join(interp) or "N/A")
213
 
214
+ # -- residual ACF ------------------------------------------------------
215
+ st.subheader("๐Ÿ” Residual ACF")
 
 
216
  acf_png = os.path.join(TMP, "acf.png")
217
+ plot_acf(model_res.resid.dropna(), lags=30, alpha=0.05)
218
+ import matplotlib.pyplot as plt
219
  plt.tight_layout()
220
  plt.savefig(acf_png, dpi=120)
221
  plt.close()
222
  st.image(acf_png, use_container_width=True)
223
 
224
+ # -- back-test ---------------------------------------------------------
225
+ k = max(int(len(series) * 0.2), 10)
226
  train, test = series[:-k], series[-k:]
227
+ bt_res = ARIMA(train, order=(1, 1, 1)).fit()
228
+ bt_pred = bt_res.forecast(k)
229
+ mape = (abs(bt_pred - test) / test).mean() * 100
230
+ rmse = np.sqrt(((bt_pred - test) ** 2).mean())
231
 
232
+ st.subheader("๐Ÿงช Back-test (last 20 %)")
233
+ col1, col2 = st.columns(2)
234
+ col1.metric("MAPE", f"{mape:.2f}%")
235
+ col2.metric("RMSE", f"{rmse:,.0f}")
236
 
237
+ # -- seasonal decomposition (optional) --------------------------------
238
  with st.expander("Seasonal Decomposition"):
239
  try:
240
+ period = {"D": 7, "H": 24, "M": 12}.get(freq)
241
  if period:
242
  dec = seasonal_decompose(series, period=period, model="additive")
243
+ for comp in ["trend", "seasonal", "resid"]:
244
+ st.line_chart(getattr(dec, comp).dropna(), height=150)
245
  else:
246
  st.info("Frequency not suited for decomposition.")
247
  except Exception as e:
248
  st.info(f"Decomposition failed: {e}")
249
 
250
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
251
+ # 6) Gemini strategy report
252
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
253
  prompt = (
254
  "You are **BizIntel Strategist AI**.\n\n"
255
+ f"### Dataset Summary\n```\n{summary_md}\n```\n\n"
256
  f"### {metric_col} Forecast\n```\n"
257
+ f"{forecast.to_string() if forecast is not None else 'N/A'}\n```"
258
+ "\nGenerate a Markdown report with:\n"
259
+ "โ€ข 5 insights\nโ€ข 3 actionable strategies\nโ€ข Risks / anomalies\nโ€ข Additional visuals."
 
260
  )
261
+ with st.spinner("Gemini 1.5 Pro is thinkingโ€ฆ"):
262
  md = gemini.generate_content(prompt).text
263
+
264
+ st.subheader("๐Ÿš€ Strategy Recommendations (Gemini 1.5 Pro)")
265
  st.markdown(md)
266
+ st.download_button("โฌ‡๏ธ Download Strategy (.md)", md, file_name="strategy.md")
267
 
268
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
269
+ # 7) High-level dataset KPIs + optional EDA
270
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
271
  fulldf = pd.read_csv(csv_path, low_memory=False)
272
  rows, cols = fulldf.shape
273
+ miss_pct = fulldf.isna().mean().mean() * 100
274
 
275
  st.markdown("---")
276
+ st.subheader("๐Ÿ“‘ Dataset KPIs")
277
+ k1, k2, k3 = st.columns(3)
278
+ k1.metric("Rows", f"{rows:,}")
279
+ k2.metric("Columns", cols)
280
+ k3.metric("Missing %", f"{miss_pct:.1f}%")
281
 
282
+ with st.expander("Descriptive Statistics (numeric)"):
283
+ st.dataframe(
284
+ fulldf.describe().T.round(2).style.format(precision=2).background_gradient("Blues"),
285
+ use_container_width=True,
286
+ )
287
 
288
  st.markdown("---")
289
+ st.subheader("๐Ÿ” Optional EDA Visuals")
 
290
 
291
  if st.checkbox("Histogram"):
292
+ col = st.selectbox("Variable", fulldf.select_dtypes("number").columns)
293
+ hr = histogram_tool(csv_path, col)
294
+ if isinstance(hr, tuple):
295
+ st.plotly_chart(hr[0], use_container_width=True)
296
+ else:
297
+ st.warning(hr)
298
 
299
+ if st.checkbox("Scatter Matrix"):
300
+ opts = fulldf.select_dtypes("number").columns.tolist()
301
+ sel = st.multiselect("Columns", opts, default=opts[:3])
302
  if sel:
303
+ sm = scatter_matrix_tool(csv_path, sel)
304
+ if isinstance(sm, tuple):
305
+ st.plotly_chart(sm[0], use_container_width=True)
306
+ else:
307
+ st.warning(sm)
308
 
309
+ if st.checkbox("Correlation Heat-map"):
310
+ hm = corr_heatmap_tool(csv_path)
311
+ if isinstance(hm, tuple):
312
+ st.plotly_chart(hm[0], use_container_width=True)
313
+ else:
314
+ st.warning(hm)