Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,26 +1,40 @@
|
|
1 |
-
# app.py
|
2 |
# =============================================================
|
3 |
-
# CSV
|
4 |
-
#
|
5 |
-
#
|
|
|
6 |
# =============================================================
|
7 |
|
8 |
-
import os
|
9 |
-
|
|
|
|
|
10 |
|
11 |
import numpy as np
|
12 |
import pandas as pd
|
13 |
-
import streamlit as st
|
14 |
import plotly.graph_objects as go
|
|
|
15 |
from statsmodels.tsa.arima.model import ARIMA
|
16 |
from statsmodels.graphics.tsaplots import plot_acf
|
17 |
from statsmodels.tsa.seasonal import seasonal_decompose
|
18 |
from statsmodels.tools.sm_exceptions import ConvergenceWarning
|
|
|
19 |
import google.generativeai as genai
|
20 |
-
import matplotlib.pyplot as plt
|
21 |
|
22 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
23 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
25 |
TMP = tempfile.gettempdir()
|
26 |
orig_write = go.Figure.write_image
|
@@ -29,15 +43,7 @@ go.Figure.write_image = lambda self, p, *a, **k: orig_write(
|
|
29 |
)
|
30 |
|
31 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
32 |
-
# 1
|
33 |
-
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
34 |
-
from tools.csv_parser import parse_csv_tool
|
35 |
-
from tools.plot_generator import plot_metric_tool
|
36 |
-
from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
|
37 |
-
from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
|
38 |
-
|
39 |
-
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
40 |
-
# 2) Gemini 1.5ย Pro
|
41 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
42 |
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
|
43 |
gemini = genai.GenerativeModel(
|
@@ -46,33 +52,34 @@ gemini = genai.GenerativeModel(
|
|
46 |
)
|
47 |
|
48 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
49 |
-
#
|
50 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
51 |
-
st.set_page_config(page_title="BizIntel
|
52 |
-
st.title("๐
|
53 |
|
54 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
55 |
-
#
|
56 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
57 |
-
choice = st.radio("Select data source", ["Upload CSV
|
58 |
csv_path: str | None = None
|
59 |
|
60 |
if choice.startswith("Upload"):
|
61 |
-
up = st.file_uploader("CSV
|
62 |
if up:
|
63 |
tmp = os.path.join(TMP, up.name)
|
64 |
-
with open(tmp, "wb") as f:
|
|
|
65 |
if up.name.lower().endswith(".csv"):
|
66 |
csv_path = tmp
|
67 |
else:
|
68 |
try:
|
69 |
-
pd.read_excel(tmp
|
70 |
-
csv_path = tmp+".csv"
|
71 |
except Exception as e:
|
72 |
st.error(f"Excel parse failed: {e}")
|
73 |
else:
|
74 |
-
eng = st.selectbox("DB engine", SUPPORTED_ENGINES)
|
75 |
-
conn = st.text_input("SQLAlchemy
|
76 |
if conn:
|
77 |
try:
|
78 |
tbl = st.selectbox("Table", list_tables(conn))
|
@@ -86,43 +93,53 @@ if not csv_path:
|
|
86 |
st.stop()
|
87 |
|
88 |
with open(csv_path, "rb") as f:
|
89 |
-
st.download_button("โฌ๏ธ
|
90 |
|
91 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
92 |
-
#
|
93 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
94 |
df_head = pd.read_csv(csv_path, nrows=5)
|
95 |
st.dataframe(df_head)
|
96 |
|
97 |
-
date_col
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
|
|
|
|
|
|
102 |
st.stop()
|
103 |
-
metric_col = st.selectbox("Numeric metric column", metric_options)
|
104 |
|
105 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
106 |
-
#
|
107 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
st.plotly_chart(trend_fig, use_container_width=True)
|
113 |
-
else:
|
114 |
-
st.warning(trend_fig)
|
115 |
|
116 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
117 |
-
#
|
118 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
|
|
119 |
def build_series(path, dcol, vcol):
|
120 |
df = pd.read_csv(path, usecols=[dcol, vcol])
|
121 |
df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
|
122 |
df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
|
123 |
df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
|
124 |
-
if df.empty
|
125 |
-
raise ValueError("
|
126 |
s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
|
127 |
freq = pd.infer_freq(s.index) or "D"
|
128 |
s = s.asfreq(freq).interpolate()
|
@@ -131,139 +148,167 @@ def build_series(path, dcol, vcol):
|
|
131 |
@st.cache_data(show_spinner="Fitting ARIMAโฆ")
|
132 |
def fit_arima(series):
|
133 |
warnings.simplefilter("ignore", ConvergenceWarning)
|
134 |
-
|
135 |
-
return model.fit()
|
136 |
|
137 |
try:
|
138 |
series, freq = build_series(csv_path, date_col, metric_col)
|
139 |
horizon = 90 if freq == "D" else 3
|
140 |
-
|
141 |
-
|
142 |
-
forecast
|
143 |
-
ci
|
144 |
except Exception as e:
|
145 |
-
st.subheader(f"๐ฎ
|
146 |
st.warning(f"Forecast failed: {e}")
|
147 |
-
|
148 |
|
|
|
|
|
|
|
149 |
if forecast is not None:
|
150 |
-
# Plot with CI
|
151 |
fig = go.Figure()
|
152 |
-
fig.add_scatter(x=series.index,
|
153 |
fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
|
154 |
-
fig.add_scatter(
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
st.plotly_chart(fig, use_container_width=True)
|
164 |
|
165 |
-
#
|
166 |
-
st.subheader("๐
|
167 |
-
st.code(
|
168 |
|
169 |
-
|
170 |
-
ar =
|
171 |
-
|
172 |
-
interp: List[str] = []
|
173 |
if ar.size:
|
174 |
-
interp.append(
|
175 |
-
|
176 |
-
|
|
|
177 |
if ma.size:
|
178 |
-
interp.append(
|
179 |
-
|
180 |
-
|
|
|
|
|
181 |
st.markdown("\n".join(interp) or "N/A")
|
182 |
|
183 |
-
#
|
184 |
-
st.subheader("๐
|
185 |
-
plt.figure(figsize=(6,3))
|
186 |
-
plot_acf(res.resid.dropna(), lags=30, alpha=0.05)
|
187 |
acf_png = os.path.join(TMP, "acf.png")
|
|
|
|
|
188 |
plt.tight_layout()
|
189 |
plt.savefig(acf_png, dpi=120)
|
190 |
plt.close()
|
191 |
st.image(acf_png, use_container_width=True)
|
192 |
|
193 |
-
#
|
194 |
-
k = max(int(len(series)*0.2), 10)
|
195 |
train, test = series[:-k], series[-k:]
|
196 |
-
bt_res
|
197 |
-
bt_pred
|
198 |
-
mape = (abs(bt_pred - test)/test).mean()*100
|
199 |
-
rmse = np.sqrt(((bt_pred - test)**2).mean())
|
200 |
|
201 |
-
st.subheader("๐งช
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
|
206 |
-
#
|
207 |
with st.expander("Seasonal Decomposition"):
|
208 |
try:
|
209 |
-
period = {"D":7, "H":24, "M":12}.get(freq
|
210 |
if period:
|
211 |
dec = seasonal_decompose(series, period=period, model="additive")
|
212 |
-
for comp in ["trend","seasonal","resid"]:
|
213 |
-
st.line_chart(getattr(dec, comp), height=150)
|
214 |
else:
|
215 |
st.info("Frequency not suited for decomposition.")
|
216 |
except Exception as e:
|
217 |
st.info(f"Decomposition failed: {e}")
|
218 |
|
219 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
220 |
-
#
|
221 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
222 |
prompt = (
|
223 |
"You are **BizIntel Strategist AI**.\n\n"
|
224 |
-
f"### Dataset Summary\n```\n{
|
225 |
f"### {metric_col} Forecast\n```\n"
|
226 |
-
f"{forecast.to_string() if forecast is not None else 'N/A'}\n
|
227 |
-
"
|
228 |
-
"
|
229 |
-
"3. Risksย / anomalies\n4. Extra visuals to consider."
|
230 |
)
|
231 |
-
with st.spinner("Gemini
|
232 |
md = gemini.generate_content(prompt).text
|
233 |
-
|
|
|
234 |
st.markdown(md)
|
235 |
-
st.download_button("โฌ๏ธ
|
236 |
|
237 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
238 |
-
#
|
239 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
240 |
fulldf = pd.read_csv(csv_path, low_memory=False)
|
241 |
rows, cols = fulldf.shape
|
242 |
-
miss_pct = fulldf.isna().mean().mean()*100
|
243 |
|
244 |
st.markdown("---")
|
245 |
-
st.subheader("๐
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
|
251 |
-
with st.expander("Descriptive
|
252 |
-
st.dataframe(
|
253 |
-
|
|
|
|
|
254 |
|
255 |
st.markdown("---")
|
256 |
-
st.subheader("๐
|
257 |
-
num_cols = fulldf.select_dtypes("number").columns.tolist()
|
258 |
|
259 |
if st.checkbox("Histogram"):
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
262 |
|
263 |
-
if st.checkbox("Scatter
|
264 |
-
|
|
|
265 |
if sel:
|
266 |
-
|
|
|
|
|
|
|
|
|
267 |
|
268 |
-
if st.checkbox("Correlation
|
269 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
# app.py โ BizIntel AI Ultra v2.1
|
2 |
# =============================================================
|
3 |
+
# โข Upload CSV / Excel โข SQLโDB fetch โข Trend + ARIMA forecast
|
4 |
+
# โข Model explainability (summary, coef interp, ACF, back-test)
|
5 |
+
# โข Gemini 1.5 Pro strategy generation
|
6 |
+
# โข Optional EDA visuals โข Safe Plotly PNG write to /tmp
|
7 |
# =============================================================
|
8 |
|
9 |
+
import os
|
10 |
+
import tempfile
|
11 |
+
import warnings
|
12 |
+
from typing import List, Tuple
|
13 |
|
14 |
import numpy as np
|
15 |
import pandas as pd
|
|
|
16 |
import plotly.graph_objects as go
|
17 |
+
import streamlit as st
|
18 |
from statsmodels.tsa.arima.model import ARIMA
|
19 |
from statsmodels.graphics.tsaplots import plot_acf
|
20 |
from statsmodels.tsa.seasonal import seasonal_decompose
|
21 |
from statsmodels.tools.sm_exceptions import ConvergenceWarning
|
22 |
+
|
23 |
import google.generativeai as genai
|
|
|
24 |
|
25 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
26 |
+
# Local helper modules
|
27 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
28 |
+
from tools.csv_parser import parse_csv_tool
|
29 |
+
from tools.plot_generator import plot_metric_tool
|
30 |
+
from tools.forecaster import forecast_metric_tool # only for png path if needed
|
31 |
+
from tools.visuals import (
|
32 |
+
histogram_tool, scatter_matrix_tool, corr_heatmap_tool
|
33 |
+
)
|
34 |
+
from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
|
35 |
+
|
36 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
37 |
+
# Plotly safe write โ ensure PNGs go to writable /tmp
|
38 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
39 |
TMP = tempfile.gettempdir()
|
40 |
orig_write = go.Figure.write_image
|
|
|
43 |
)
|
44 |
|
45 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
46 |
+
# Gemini 1.5 Pro setup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
48 |
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
|
49 |
gemini = genai.GenerativeModel(
|
|
|
52 |
)
|
53 |
|
54 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
55 |
+
# Streamlit layout
|
56 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
57 |
+
st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
|
58 |
+
st.title("๐ BizIntel AI Ultra โ Advanced Analytics + Gemini 1.5 Pro")
|
59 |
|
60 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
61 |
+
# 1) Data source selection
|
62 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
63 |
+
choice = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
|
64 |
csv_path: str | None = None
|
65 |
|
66 |
if choice.startswith("Upload"):
|
67 |
+
up = st.file_uploader("CSV or Excel (โค 500 MB)", type=["csv", "xlsx", "xls"])
|
68 |
if up:
|
69 |
tmp = os.path.join(TMP, up.name)
|
70 |
+
with open(tmp, "wb") as f:
|
71 |
+
f.write(up.read())
|
72 |
if up.name.lower().endswith(".csv"):
|
73 |
csv_path = tmp
|
74 |
else:
|
75 |
try:
|
76 |
+
pd.read_excel(tmp).to_csv(tmp + ".csv", index=False)
|
77 |
+
csv_path = tmp + ".csv"
|
78 |
except Exception as e:
|
79 |
st.error(f"Excel parse failed: {e}")
|
80 |
else:
|
81 |
+
eng = st.selectbox("DB engine", SUPPORTED_ENGINES, key="db_eng")
|
82 |
+
conn = st.text_input("SQLAlchemy connection string")
|
83 |
if conn:
|
84 |
try:
|
85 |
tbl = st.selectbox("Table", list_tables(conn))
|
|
|
93 |
st.stop()
|
94 |
|
95 |
with open(csv_path, "rb") as f:
|
96 |
+
st.download_button("โฌ๏ธ Download working CSV", f, file_name=os.path.basename(csv_path))
|
97 |
|
98 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
99 |
+
# 2) Column pickers
|
100 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
101 |
df_head = pd.read_csv(csv_path, nrows=5)
|
102 |
st.dataframe(df_head)
|
103 |
|
104 |
+
date_col = st.selectbox("Date/time column", df_head.columns)
|
105 |
+
numeric_df = df_head.select_dtypes("number")
|
106 |
+
metric_col = st.selectbox(
|
107 |
+
"Numeric metric column",
|
108 |
+
[c for c in numeric_df.columns if c != date_col] or numeric_df.columns
|
109 |
+
)
|
110 |
+
if metric_col is None:
|
111 |
+
st.warning("Need at least one numeric column.")
|
112 |
st.stop()
|
|
|
113 |
|
114 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
115 |
+
# 3) Quick data summary & trend chart
|
116 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
117 |
+
summary_md = parse_csv_tool(csv_path)
|
118 |
+
|
119 |
+
trend_res = plot_metric_tool(csv_path, date_col, metric_col)
|
120 |
+
if isinstance(trend_res, tuple):
|
121 |
+
trend_fig, _ = trend_res
|
122 |
+
elif isinstance(trend_res, go.Figure):
|
123 |
+
trend_fig = trend_res
|
124 |
+
else: # error message str
|
125 |
+
st.warning(trend_res)
|
126 |
+
trend_fig = None
|
127 |
+
|
128 |
+
if trend_fig is not None:
|
129 |
+
st.subheader("๐ Trend")
|
130 |
st.plotly_chart(trend_fig, use_container_width=True)
|
|
|
|
|
131 |
|
132 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
133 |
+
# 4) Build clean series & ARIMA helpers
|
134 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
135 |
+
@st.cache_data(show_spinner="Preparing seriesโฆ")
|
136 |
def build_series(path, dcol, vcol):
|
137 |
df = pd.read_csv(path, usecols=[dcol, vcol])
|
138 |
df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
|
139 |
df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
|
140 |
df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
|
141 |
+
if df.empty:
|
142 |
+
raise ValueError("Not enough valid data.")
|
143 |
s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
|
144 |
freq = pd.infer_freq(s.index) or "D"
|
145 |
s = s.asfreq(freq).interpolate()
|
|
|
148 |
@st.cache_data(show_spinner="Fitting ARIMAโฆ")
|
149 |
def fit_arima(series):
|
150 |
warnings.simplefilter("ignore", ConvergenceWarning)
|
151 |
+
return ARIMA(series, order=(1, 1, 1)).fit()
|
|
|
152 |
|
153 |
try:
|
154 |
series, freq = build_series(csv_path, date_col, metric_col)
|
155 |
horizon = 90 if freq == "D" else 3
|
156 |
+
model_res = fit_arima(series)
|
157 |
+
fc_obj = model_res.get_forecast(horizon)
|
158 |
+
forecast = fc_obj.predicted_mean
|
159 |
+
ci = fc_obj.conf_int()
|
160 |
except Exception as e:
|
161 |
+
st.subheader(f"๐ฎ {metric_col} Forecast")
|
162 |
st.warning(f"Forecast failed: {e}")
|
163 |
+
forecast = ci = model_res = None
|
164 |
|
165 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
166 |
+
# 5) Forecast plot & explainability
|
167 |
+
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
168 |
if forecast is not None:
|
|
|
169 |
fig = go.Figure()
|
170 |
+
fig.add_scatter(x=series.index, y=series, mode="lines", name=metric_col)
|
171 |
fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
|
172 |
+
fig.add_scatter(
|
173 |
+
x=ci.index, y=ci.iloc[:, 1], mode="lines", line=dict(width=0), showlegend=False
|
174 |
+
)
|
175 |
+
fig.add_scatter(
|
176 |
+
x=ci.index,
|
177 |
+
y=ci.iloc[:, 0],
|
178 |
+
mode="lines",
|
179 |
+
line=dict(width=0),
|
180 |
+
fill="tonexty",
|
181 |
+
fillcolor="rgba(255,0,0,0.25)",
|
182 |
+
showlegend=False,
|
183 |
+
)
|
184 |
+
fig.update_layout(
|
185 |
+
title=f"{metric_col} Forecast ({horizon} steps)",
|
186 |
+
xaxis_title=date_col,
|
187 |
+
yaxis_title=metric_col,
|
188 |
+
template="plotly_dark",
|
189 |
+
)
|
190 |
+
|
191 |
+
st.subheader(f"๐ฎ {metric_col} Forecast")
|
192 |
st.plotly_chart(fig, use_container_width=True)
|
193 |
|
194 |
+
# -- model summary -----------------------------------------------------
|
195 |
+
st.subheader("๐ ARIMA Model Summary")
|
196 |
+
st.code(model_res.summary().as_text())
|
197 |
|
198 |
+
# -- coefficient interpretation ---------------------------------------
|
199 |
+
ar, ma = model_res.arparams, model_res.maparams
|
200 |
+
interp = []
|
|
|
201 |
if ar.size:
|
202 |
+
interp.append(
|
203 |
+
f"โข AR(1) ={ar[0]:.2f} โ "
|
204 |
+
f"{'strong' if abs(ar[0]) > 0.5 else 'moderate'} persistence."
|
205 |
+
)
|
206 |
if ma.size:
|
207 |
+
interp.append(
|
208 |
+
f"โข MA(1) ={ma[0]:.2f} โ "
|
209 |
+
f"{'large' if abs(ma[0]) > 0.5 else 'modest'} shock adjustment."
|
210 |
+
)
|
211 |
+
st.subheader("๐ Coefficient Interpretation")
|
212 |
st.markdown("\n".join(interp) or "N/A")
|
213 |
|
214 |
+
# -- residual ACF ------------------------------------------------------
|
215 |
+
st.subheader("๐ Residual ACF")
|
|
|
|
|
216 |
acf_png = os.path.join(TMP, "acf.png")
|
217 |
+
plot_acf(model_res.resid.dropna(), lags=30, alpha=0.05)
|
218 |
+
import matplotlib.pyplot as plt
|
219 |
plt.tight_layout()
|
220 |
plt.savefig(acf_png, dpi=120)
|
221 |
plt.close()
|
222 |
st.image(acf_png, use_container_width=True)
|
223 |
|
224 |
+
# -- back-test ---------------------------------------------------------
|
225 |
+
k = max(int(len(series) * 0.2), 10)
|
226 |
train, test = series[:-k], series[-k:]
|
227 |
+
bt_res = ARIMA(train, order=(1, 1, 1)).fit()
|
228 |
+
bt_pred = bt_res.forecast(k)
|
229 |
+
mape = (abs(bt_pred - test) / test).mean() * 100
|
230 |
+
rmse = np.sqrt(((bt_pred - test) ** 2).mean())
|
231 |
|
232 |
+
st.subheader("๐งช Back-test (last 20 %)")
|
233 |
+
col1, col2 = st.columns(2)
|
234 |
+
col1.metric("MAPE", f"{mape:.2f}%")
|
235 |
+
col2.metric("RMSE", f"{rmse:,.0f}")
|
236 |
|
237 |
+
# -- seasonal decomposition (optional) --------------------------------
|
238 |
with st.expander("Seasonal Decomposition"):
|
239 |
try:
|
240 |
+
period = {"D": 7, "H": 24, "M": 12}.get(freq)
|
241 |
if period:
|
242 |
dec = seasonal_decompose(series, period=period, model="additive")
|
243 |
+
for comp in ["trend", "seasonal", "resid"]:
|
244 |
+
st.line_chart(getattr(dec, comp).dropna(), height=150)
|
245 |
else:
|
246 |
st.info("Frequency not suited for decomposition.")
|
247 |
except Exception as e:
|
248 |
st.info(f"Decomposition failed: {e}")
|
249 |
|
250 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
251 |
+
# 6) Gemini strategy report
|
252 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
253 |
prompt = (
|
254 |
"You are **BizIntel Strategist AI**.\n\n"
|
255 |
+
f"### Dataset Summary\n```\n{summary_md}\n```\n\n"
|
256 |
f"### {metric_col} Forecast\n```\n"
|
257 |
+
f"{forecast.to_string() if forecast is not None else 'N/A'}\n```"
|
258 |
+
"\nGenerate a Markdown report with:\n"
|
259 |
+
"โข 5 insights\nโข 3 actionable strategies\nโข Risks / anomalies\nโข Additional visuals."
|
|
|
260 |
)
|
261 |
+
with st.spinner("Gemini 1.5 Pro is thinkingโฆ"):
|
262 |
md = gemini.generate_content(prompt).text
|
263 |
+
|
264 |
+
st.subheader("๐ Strategy Recommendations (Gemini 1.5 Pro)")
|
265 |
st.markdown(md)
|
266 |
+
st.download_button("โฌ๏ธ Download Strategy (.md)", md, file_name="strategy.md")
|
267 |
|
268 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
269 |
+
# 7) High-level dataset KPIs + optional EDA
|
270 |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
|
271 |
fulldf = pd.read_csv(csv_path, low_memory=False)
|
272 |
rows, cols = fulldf.shape
|
273 |
+
miss_pct = fulldf.isna().mean().mean() * 100
|
274 |
|
275 |
st.markdown("---")
|
276 |
+
st.subheader("๐ Dataset KPIs")
|
277 |
+
k1, k2, k3 = st.columns(3)
|
278 |
+
k1.metric("Rows", f"{rows:,}")
|
279 |
+
k2.metric("Columns", cols)
|
280 |
+
k3.metric("Missing %", f"{miss_pct:.1f}%")
|
281 |
|
282 |
+
with st.expander("Descriptive Statistics (numeric)"):
|
283 |
+
st.dataframe(
|
284 |
+
fulldf.describe().T.round(2).style.format(precision=2).background_gradient("Blues"),
|
285 |
+
use_container_width=True,
|
286 |
+
)
|
287 |
|
288 |
st.markdown("---")
|
289 |
+
st.subheader("๐ Optional EDA Visuals")
|
|
|
290 |
|
291 |
if st.checkbox("Histogram"):
|
292 |
+
col = st.selectbox("Variable", fulldf.select_dtypes("number").columns)
|
293 |
+
hr = histogram_tool(csv_path, col)
|
294 |
+
if isinstance(hr, tuple):
|
295 |
+
st.plotly_chart(hr[0], use_container_width=True)
|
296 |
+
else:
|
297 |
+
st.warning(hr)
|
298 |
|
299 |
+
if st.checkbox("Scatter Matrix"):
|
300 |
+
opts = fulldf.select_dtypes("number").columns.tolist()
|
301 |
+
sel = st.multiselect("Columns", opts, default=opts[:3])
|
302 |
if sel:
|
303 |
+
sm = scatter_matrix_tool(csv_path, sel)
|
304 |
+
if isinstance(sm, tuple):
|
305 |
+
st.plotly_chart(sm[0], use_container_width=True)
|
306 |
+
else:
|
307 |
+
st.warning(sm)
|
308 |
|
309 |
+
if st.checkbox("Correlation Heat-map"):
|
310 |
+
hm = corr_heatmap_tool(csv_path)
|
311 |
+
if isinstance(hm, tuple):
|
312 |
+
st.plotly_chart(hm[0], use_container_width=True)
|
313 |
+
else:
|
314 |
+
st.warning(hm)
|