# src/mis.py
from __future__ import annotations
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, date, timedelta
import sqlite3
# -----------------------------
# Lightweight data helpers
# -----------------------------
def _load_distinct_intents(conn) -> list[str]:
"""Return distinct non-empty intents (sorted)."""
try:
rows = conn.execute(
"SELECT DISTINCT intent FROM chat_history WHERE intent IS NOT NULL"
).fetchall()
return sorted([r[0] for r in rows if r and r[0]])
except Exception:
return []
def _load_users_for_picker(conn) -> pd.DataFrame:
"""Return DataFrame with id + label (username/email) for user picker."""
try:
df = pd.read_sql_query(
"SELECT id, username, email FROM users ORDER BY username COLLATE NOCASE",
conn,
)
df["label"] = df["username"].fillna(df["email"])
return df[["id", "label"]]
except Exception:
return pd.DataFrame(columns=["id", "label"])
def _apply_filters_base_query(
start_str: str | None,
end_str: str | None,
chosen_intents: list[str],
user_id_or_none: int | None,
) -> tuple[str, list]:
"""
Build WHERE clause for chat_history + users join, using column aliases:
ch.
for chat_history, u. for users.
"""
where = ["1=1"]
params: list = []
if start_str:
where.append("date(ch.timestamp) >= ?")
params.append(start_str)
if end_str:
where.append("date(ch.timestamp) <= ?")
params.append(end_str)
if chosen_intents:
q = ",".join(["?"] * len(chosen_intents))
where.append(f"ch.intent IN ({q})")
params.extend(chosen_intents)
if user_id_or_none:
where.append("ch.user_id = ?")
params.append(int(user_id_or_none))
return " WHERE " + " AND ".join(where) + " ", params
def _metric_card(label: str, value):
"""Render a small st.metric safely."""
if value is None:
value = "-"
st.metric(label, value)
# -----------------------------
# Caching layer for data query
# -----------------------------
@st.cache_data(show_spinner="Fetching data...")
def _load_mis_data(
_conn: sqlite3.Connection,
start_str: str | None,
end_str: str | None,
chosen_intents: list[str],
user_id_or_none: int | None,
) -> pd.DataFrame:
"""
Fetches filtered data from the database and caches it.
"""
where_sql, params = _apply_filters_base_query(
start_str, end_str, chosen_intents, user_id_or_none
)
base_sql = """
SELECT ch.id, ch.user_id, u.username,
ch.timestamp, ch.intent, ch.sentiment, ch.resolution, ch.ftr_ms,
substr(ch.user_message, 1, 200) AS user_message
FROM chat_history ch
JOIN users u ON u.id = ch.user_id
"""
return pd.read_sql_query(
base_sql + where_sql + " ORDER BY ch.timestamp DESC", _conn, params=params
)
# -----------------------------
# Compact Admin MIS
# -----------------------------
def render_mis_admin(conn):
"""
Compact Admin MIS:
- Preserves your filters and KPIs.
- Uses usernames (not IDs) in charts & tables.
- Four mini charts (~200px tall).
- Filtered table tucked in an expander.
"""
st.markdown("### 📊 MIS (Admin) — Compact")
# ---------- Filters ----------
intents = _load_distinct_intents(conn)
users_df = _load_users_for_picker(conn)
f1, f2, f3 = st.columns([1, 1, 1])
with f1:
start_date = st.date_input(
"From", value=(date.today() - timedelta(days=30)), key="mis_admin_from"
)
with f2:
end_date = st.date_input("To", value=date.today(), key="mis_admin_to")
with f3:
chosen_intents = st.multiselect(
"Intent(s)", intents, default=[], key="mis_admin_intents"
)
user_picker = None
if not users_df.empty:
label_to_id = {row.label: int(row.id) for _, row in users_df.iterrows()}
sel = st.selectbox(
"User", options=["(All)"] + users_df["label"].tolist(), index=0, key="mis_admin_user"
)
user_picker = label_to_id.get(sel, None)
else:
st.info("No users available to filter.")
start_str = start_date.strftime("%Y-%m-%d") if start_date else None
end_str = end_date.strftime("%Y-%m-%d") if end_date else None
# ---------- Load filtered dataset with USERNAME join (using cached function) ----------
df = _load_mis_data(conn, start_str, end_str, chosen_intents, user_picker)
if df.empty:
st.info("No chats found for the selected filters.")
return
# ---------- KPIs (preserved; rendered as compact cards) ----------
total = len(df)
resolved_pct = (df["resolution"] == "Resolved").mean() if "resolution" in df else None
avg_ftr = df["ftr_ms"].dropna().mean() if "ftr_ms" in df else None
p95_ftr = (
df["ftr_ms"].dropna().quantile(0.95)
if "ftr_ms" in df and not df["ftr_ms"].dropna().empty
else None
)
k1, k2, k3, k4 = st.columns(4)
with k1:
st.markdown('', unsafe_allow_html=True)
_metric_card("Total chats", f"{total}")
st.markdown("
", unsafe_allow_html=True)
with k2:
st.markdown('', unsafe_allow_html=True)
_metric_card(
"Resolved %", f"{resolved_pct*100:.1f}%" if resolved_pct is not None else None
)
st.markdown("
", unsafe_allow_html=True)
with k3:
st.markdown('', unsafe_allow_html=True)
_metric_card("Avg FTR (ms)", f"{avg_ftr:.0f}" if avg_ftr is not None else None)
st.markdown("
", unsafe_allow_html=True)
with k4:
st.markdown('', unsafe_allow_html=True)
_metric_card("P95 FTR (ms)", f"{p95_ftr:.0f}" if p95_ftr is not None else None)
st.markdown("
", unsafe_allow_html=True)
st.markdown("")
# ---------- Mini charts (4 panels, compact) ----------
cA, cB = st.columns(2, gap="small")
cC, cD = st.columns(2, gap="small")
# A) Volume by Day
with cA:
st.markdown(
'Volume by day
', unsafe_allow_html=True
)
_plot_bar_from_series(_series_count_by_day(df), height_in=2.0, rotate=30)
# B) Intents (Top 10)
with cB:
st.markdown(
'Intents (Top 10)
', unsafe_allow_html=True
)
top_intents = df["intent"].fillna("UNMAPPED").value_counts().head(10)
_plot_bar_from_series(top_intents, height_in=2.0, rotate=30)
# C) Sentiment distribution
with cC:
st.markdown(
'Sentiment distribution
',
unsafe_allow_html=True,
)
sent = df["sentiment"].fillna("Unknown").value_counts()
_plot_bar_from_series(sent, height_in=2.0, rotate=0)
# D) Top users by chat volume (username, truncated labels to keep compact)
with cD:
st.markdown(
'Top users by chat volume
',
unsafe_allow_html=True,
)
top_users = (
df["username"]
.fillna("(unknown)")
.apply(lambda s: s if len(s) <= 20 else s[:17] + "…")
.value_counts()
.head(10)
)
_plot_bar_from_series(top_users, height_in=2.0, rotate=0)
# ---------- Filtered table (use USERNAME, not user_id) ----------
# Manual state management without 'key'
if 'admin_expander_expanded' not in st.session_state:
st.session_state.admin_expander_expanded = False
with st.expander("Filtered chats (first 100)", expanded=st.session_state.admin_expander_expanded):
st.session_state.admin_expander_expanded = True
show_cols = [
"timestamp",
"username",
"intent",
"sentiment",
"resolution",
"user_message",
"ftr_ms",
]
show_cols = [c for c in show_cols if c in df.columns]
st.dataframe(df[show_cols].head(100), use_container_width=True, height=320)
# -----------------------------
# Tiny plotting helpers
# -----------------------------
def _series_count_by_day(df: pd.DataFrame) -> pd.Series:
"""Return a series: day(str) -> count."""
s = pd.to_datetime(df["timestamp"]).dt.date.value_counts().sort_index()
s.index = s.index.astype(str)
return s
def _plot_bar_from_series(s: pd.Series, height_in: float = 2.0, rotate: int = 0):
"""Render a compact bar chart from a series."""
if s is None or s.empty:
st.caption("No data for current filters.")
return
fig, ax = plt.subplots(figsize=(5.6, height_in))
ax.bar(s.index.astype(str), s.values)
ax.set_ylabel("Count")
ax.set_title("")
plt.setp(
ax.get_xticklabels(),
rotation=rotate,
ha="right" if rotate else "center",
fontsize=8,
)
st.pyplot(fig, use_container_width=True)
def _load_distinct_intents_for_user(conn, user_id: int) -> list[str]:
"""Return distinct intents used by this user."""
try:
rows = conn.execute(
"SELECT DISTINCT intent FROM chat_history WHERE user_id = ? AND intent IS NOT NULL",
(user_id,),
).fetchall()
return sorted([r[0] for r in rows if r and r[0]])
except Exception:
return []
def _apply_filters_base_query_user(
start_str: str | None,
end_str: str | None,
chosen_intents: list[str],
user_id: int,
) -> tuple[str, list]:
"""Construct WHERE clause and params for filtering user MIS queries."""
where = ["ch.user_id = ?"]
params = [int(user_id)]
if start_str:
where.append("date(ch.timestamp) >= ?"); params.append(start_str)
if end_str:
where.append("date(ch.timestamp) <= ?"); params.append(end_str)
if chosen_intents:
q = ",".join(["?"] * len(chosen_intents))
where.append(f"ch.intent IN ({q})"); params.extend(chosen_intents)
return " WHERE " + " AND ".join(where) + " ", params
def _one_val(sql: str, conn: sqlite3.Connection):
return pd.read_sql_query(sql, conn).iloc[0,0]
def kpi_avg_delivery_days(conn: sqlite3.Connection):
avg_days = _one_val("""
SELECT AVG(julianday(order_delivered_customer_date) - julianday(order_purchase_timestamp))
FROM olist_orders
WHERE order_delivered_customer_date IS NOT NULL
AND order_purchase_timestamp >= date('now','-30 day')
""", conn)
st.metric("Avg delivery time (30d)", f"{(avg_days or 0):.2f} days")
def _plot_resolution_rate_by_day(df, height_in=2.5):
"""
Plot daily resolution rate = (# resolved / total) per day.
"""
if df.empty or "timestamp" not in df or "resolution" not in df:
st.info("No resolution data to plot.")
return
# Ensure datetime
df["date_only"] = pd.to_datetime(df["timestamp"]).dt.date
grouped = df.groupby("date_only")["resolution"].apply(
lambda x: (x == "Resolved").mean()
)
fig, ax = plt.subplots(figsize=(5, height_in))
grouped.plot(kind="line", marker="o", ax=ax, color="#2563eb")
ax.set_title("Resolution Rate by Day")
ax.set_ylabel("Resolved %")
ax.set_xlabel("Date")
ax.set_ylim(0, 1)
ax.grid(True, alpha=0.3)
st.pyplot(fig)
def render_mis_user(conn, user_id: int):
"""
Compact MIS for a single end user.
- Filters: From / To, (optional) Intent(s)
- 4 small KPI cards
- 4 compact charts (~200px tall)
- Filtered table with username (not user_id)
"""
st.markdown("### 📊 My MIS — Compact")
# ---------- Filters ----------
intents = _load_distinct_intents_for_user(conn, user_id)
f1, f2, f3 = st.columns([1, 1, 1])
with f1:
start_date = st.date_input(
"From", value=(date.today() - timedelta(days=30)), key="mis_user_from"
)
with f2:
end_date = st.date_input("To", value=date.today(), key="mis_user_to")
with f3:
chosen_intents = st.multiselect(
"Intent(s)", intents, default=[], key="mis_user_intents"
)
start_str = start_date.strftime("%Y-%m-%d") if start_date else None
end_str = end_date.strftime("%Y-%m-%d") if end_date else None
# WHERE limited to this user
where_sql, params = _apply_filters_base_query_user(
start_str=start_str,
end_str=end_str,
chosen_intents=chosen_intents,
user_id=user_id,
)
# ---------- Load filtered dataset with USERNAME join (using cached function) ----------
df = _load_mis_data(conn, start_str, end_str, chosen_intents, user_id)
if df.empty:
st.info("No chats found for the selected filters.")
return
# ---------- KPIs (compact cards) ----------
total = len(df)
resolved_pct = (df["resolution"] == "Resolved").mean() if "resolution" in df else None
avg_ftr = df["ftr_ms"].dropna().mean() if "ftr_ms" in df else None
p95_ftr = (
df["ftr_ms"].dropna().quantile(0.95)
if "ftr_ms" in df and not df["ftr_ms"].dropna().empty
else None
)
k1, k2, k3, k4 = st.columns(4)
with k1:
st.markdown('', unsafe_allow_html=True)
_metric_card("My total chats", f"{total}")
st.markdown("
", unsafe_allow_html=True)
with k2:
st.markdown('', unsafe_allow_html=True)
_metric_card(
"Resolved %", f"{resolved_pct*100:.1f}%" if resolved_pct is not None else None
)
st.markdown("
", unsafe_allow_html=True)
with k3:
st.markdown('', unsafe_allow_html=True)
_metric_card("Avg FTR (ms)", f"{avg_ftr:.0f}" if avg_ftr is not None else None)
st.markdown("
", unsafe_allow_html=True)
with k4:
st.markdown('', unsafe_allow_html=True)
_metric_card("P95 FTR (ms)", f"{p95_ftr:.0f}" if p95_ftr is not None else None)
st.markdown("
", unsafe_allow_html=True)
st.markdown("")
# ---------- Mini charts (4 panels) ----------
cA, cB = st.columns(2, gap="small")
cC, cD = st.columns(2, gap="small")
# A) Volume by Day (my activity)
with cA:
st.markdown('My volume by day
', unsafe_allow_html=True)
_plot_bar_from_series(_series_count_by_day(df), height_in=2.0, rotate=30)
# B) My Intents (Top 10)
with cB:
st.markdown('My intents (Top 10)
', unsafe_allow_html=True)
top_intents = df["intent"].fillna("UNMAPPED").value_counts().head(10)
_plot_bar_from_series(top_intents, height_in=2.0, rotate=30)
# C) Sentiment distribution
with cC:
st.markdown('My sentiment distribution
', unsafe_allow_html=True)
sent = df["sentiment"].fillna("Unknown").value_counts()
_plot_bar_from_series(sent, height_in=2.0, rotate=0)
# D) Resolution by day (industry-favorite view)
with cD:
st.markdown('Average Delivery Time in Days (Last 30 days)
', unsafe_allow_html=True)
#_plot_resolution_rate_by_day(df, height_in=2.0)
kpi_avg_delivery_days(conn)
# ---------- Filtered table (username, not ID) ----------
# Manual state management without 'key'
if 'user_expander_expanded' not in st.session_state:
st.session_state.user_expander_expanded = False
with st.expander("My filtered chats (first 50)", expanded=st.session_state.user_expander_expanded):
st.session_state.user_expander_expanded = True
show_cols = [
"timestamp",
"username",
"intent",
"sentiment",
"resolution",
"user_message",
"ftr_ms",
]
show_cols = [c for c in show_cols if c in df.columns]
st.dataframe(df[show_cols].head(50), use_container_width=True, height=320)