# src/mis.py from __future__ import annotations import streamlit as st import pandas as pd import matplotlib.pyplot as plt from datetime import datetime, date, timedelta import sqlite3 # ----------------------------- # Lightweight data helpers # ----------------------------- def _load_distinct_intents(conn) -> list[str]: """Return distinct non-empty intents (sorted).""" try: rows = conn.execute( "SELECT DISTINCT intent FROM chat_history WHERE intent IS NOT NULL" ).fetchall() return sorted([r[0] for r in rows if r and r[0]]) except Exception: return [] def _load_users_for_picker(conn) -> pd.DataFrame: """Return DataFrame with id + label (username/email) for user picker.""" try: df = pd.read_sql_query( "SELECT id, username, email FROM users ORDER BY username COLLATE NOCASE", conn, ) df["label"] = df["username"].fillna(df["email"]) return df[["id", "label"]] except Exception: return pd.DataFrame(columns=["id", "label"]) def _apply_filters_base_query( start_str: str | None, end_str: str | None, chosen_intents: list[str], user_id_or_none: int | None, ) -> tuple[str, list]: """ Build WHERE clause for chat_history + users join, using column aliases: ch. for chat_history, u. for users. """ where = ["1=1"] params: list = [] if start_str: where.append("date(ch.timestamp) >= ?") params.append(start_str) if end_str: where.append("date(ch.timestamp) <= ?") params.append(end_str) if chosen_intents: q = ",".join(["?"] * len(chosen_intents)) where.append(f"ch.intent IN ({q})") params.extend(chosen_intents) if user_id_or_none: where.append("ch.user_id = ?") params.append(int(user_id_or_none)) return " WHERE " + " AND ".join(where) + " ", params def _metric_card(label: str, value): """Render a small st.metric safely.""" if value is None: value = "-" st.metric(label, value) # ----------------------------- # Caching layer for data query # ----------------------------- @st.cache_data(show_spinner="Fetching data...") def _load_mis_data( _conn: sqlite3.Connection, start_str: str | None, end_str: str | None, chosen_intents: list[str], user_id_or_none: int | None, ) -> pd.DataFrame: """ Fetches filtered data from the database and caches it. """ where_sql, params = _apply_filters_base_query( start_str, end_str, chosen_intents, user_id_or_none ) base_sql = """ SELECT ch.id, ch.user_id, u.username, ch.timestamp, ch.intent, ch.sentiment, ch.resolution, ch.ftr_ms, substr(ch.user_message, 1, 200) AS user_message FROM chat_history ch JOIN users u ON u.id = ch.user_id """ return pd.read_sql_query( base_sql + where_sql + " ORDER BY ch.timestamp DESC", _conn, params=params ) # ----------------------------- # Compact Admin MIS # ----------------------------- def render_mis_admin(conn): """ Compact Admin MIS: - Preserves your filters and KPIs. - Uses usernames (not IDs) in charts & tables. - Four mini charts (~200px tall). - Filtered table tucked in an expander. """ st.markdown("### 📊 MIS (Admin) — Compact") # ---------- Filters ---------- intents = _load_distinct_intents(conn) users_df = _load_users_for_picker(conn) f1, f2, f3 = st.columns([1, 1, 1]) with f1: start_date = st.date_input( "From", value=(date.today() - timedelta(days=30)), key="mis_admin_from" ) with f2: end_date = st.date_input("To", value=date.today(), key="mis_admin_to") with f3: chosen_intents = st.multiselect( "Intent(s)", intents, default=[], key="mis_admin_intents" ) user_picker = None if not users_df.empty: label_to_id = {row.label: int(row.id) for _, row in users_df.iterrows()} sel = st.selectbox( "User", options=["(All)"] + users_df["label"].tolist(), index=0, key="mis_admin_user" ) user_picker = label_to_id.get(sel, None) else: st.info("No users available to filter.") start_str = start_date.strftime("%Y-%m-%d") if start_date else None end_str = end_date.strftime("%Y-%m-%d") if end_date else None # ---------- Load filtered dataset with USERNAME join (using cached function) ---------- df = _load_mis_data(conn, start_str, end_str, chosen_intents, user_picker) if df.empty: st.info("No chats found for the selected filters.") return # ---------- KPIs (preserved; rendered as compact cards) ---------- total = len(df) resolved_pct = (df["resolution"] == "Resolved").mean() if "resolution" in df else None avg_ftr = df["ftr_ms"].dropna().mean() if "ftr_ms" in df else None p95_ftr = ( df["ftr_ms"].dropna().quantile(0.95) if "ftr_ms" in df and not df["ftr_ms"].dropna().empty else None ) k1, k2, k3, k4 = st.columns(4) with k1: st.markdown('
', unsafe_allow_html=True) _metric_card("Total chats", f"{total}") st.markdown("
", unsafe_allow_html=True) with k2: st.markdown('
', unsafe_allow_html=True) _metric_card( "Resolved %", f"{resolved_pct*100:.1f}%" if resolved_pct is not None else None ) st.markdown("
", unsafe_allow_html=True) with k3: st.markdown('
', unsafe_allow_html=True) _metric_card("Avg FTR (ms)", f"{avg_ftr:.0f}" if avg_ftr is not None else None) st.markdown("
", unsafe_allow_html=True) with k4: st.markdown('
', unsafe_allow_html=True) _metric_card("P95 FTR (ms)", f"{p95_ftr:.0f}" if p95_ftr is not None else None) st.markdown("
", unsafe_allow_html=True) st.markdown("") # ---------- Mini charts (4 panels, compact) ---------- cA, cB = st.columns(2, gap="small") cC, cD = st.columns(2, gap="small") # A) Volume by Day with cA: st.markdown( '
Volume by day
', unsafe_allow_html=True ) _plot_bar_from_series(_series_count_by_day(df), height_in=2.0, rotate=30) # B) Intents (Top 10) with cB: st.markdown( '
Intents (Top 10)
', unsafe_allow_html=True ) top_intents = df["intent"].fillna("UNMAPPED").value_counts().head(10) _plot_bar_from_series(top_intents, height_in=2.0, rotate=30) # C) Sentiment distribution with cC: st.markdown( '
Sentiment distribution
', unsafe_allow_html=True, ) sent = df["sentiment"].fillna("Unknown").value_counts() _plot_bar_from_series(sent, height_in=2.0, rotate=0) # D) Top users by chat volume (username, truncated labels to keep compact) with cD: st.markdown( '
Top users by chat volume
', unsafe_allow_html=True, ) top_users = ( df["username"] .fillna("(unknown)") .apply(lambda s: s if len(s) <= 20 else s[:17] + "…") .value_counts() .head(10) ) _plot_bar_from_series(top_users, height_in=2.0, rotate=0) # ---------- Filtered table (use USERNAME, not user_id) ---------- # Manual state management without 'key' if 'admin_expander_expanded' not in st.session_state: st.session_state.admin_expander_expanded = False with st.expander("Filtered chats (first 100)", expanded=st.session_state.admin_expander_expanded): st.session_state.admin_expander_expanded = True show_cols = [ "timestamp", "username", "intent", "sentiment", "resolution", "user_message", "ftr_ms", ] show_cols = [c for c in show_cols if c in df.columns] st.dataframe(df[show_cols].head(100), use_container_width=True, height=320) # ----------------------------- # Tiny plotting helpers # ----------------------------- def _series_count_by_day(df: pd.DataFrame) -> pd.Series: """Return a series: day(str) -> count.""" s = pd.to_datetime(df["timestamp"]).dt.date.value_counts().sort_index() s.index = s.index.astype(str) return s def _plot_bar_from_series(s: pd.Series, height_in: float = 2.0, rotate: int = 0): """Render a compact bar chart from a series.""" if s is None or s.empty: st.caption("No data for current filters.") return fig, ax = plt.subplots(figsize=(5.6, height_in)) ax.bar(s.index.astype(str), s.values) ax.set_ylabel("Count") ax.set_title("") plt.setp( ax.get_xticklabels(), rotation=rotate, ha="right" if rotate else "center", fontsize=8, ) st.pyplot(fig, use_container_width=True) def _load_distinct_intents_for_user(conn, user_id: int) -> list[str]: """Return distinct intents used by this user.""" try: rows = conn.execute( "SELECT DISTINCT intent FROM chat_history WHERE user_id = ? AND intent IS NOT NULL", (user_id,), ).fetchall() return sorted([r[0] for r in rows if r and r[0]]) except Exception: return [] def _apply_filters_base_query_user( start_str: str | None, end_str: str | None, chosen_intents: list[str], user_id: int, ) -> tuple[str, list]: """Construct WHERE clause and params for filtering user MIS queries.""" where = ["ch.user_id = ?"] params = [int(user_id)] if start_str: where.append("date(ch.timestamp) >= ?"); params.append(start_str) if end_str: where.append("date(ch.timestamp) <= ?"); params.append(end_str) if chosen_intents: q = ",".join(["?"] * len(chosen_intents)) where.append(f"ch.intent IN ({q})"); params.extend(chosen_intents) return " WHERE " + " AND ".join(where) + " ", params def _one_val(sql: str, conn: sqlite3.Connection): return pd.read_sql_query(sql, conn).iloc[0,0] def kpi_avg_delivery_days(conn: sqlite3.Connection): avg_days = _one_val(""" SELECT AVG(julianday(order_delivered_customer_date) - julianday(order_purchase_timestamp)) FROM olist_orders WHERE order_delivered_customer_date IS NOT NULL AND order_purchase_timestamp >= date('now','-30 day') """, conn) st.metric("Avg delivery time (30d)", f"{(avg_days or 0):.2f} days") def _plot_resolution_rate_by_day(df, height_in=2.5): """ Plot daily resolution rate = (# resolved / total) per day. """ if df.empty or "timestamp" not in df or "resolution" not in df: st.info("No resolution data to plot.") return # Ensure datetime df["date_only"] = pd.to_datetime(df["timestamp"]).dt.date grouped = df.groupby("date_only")["resolution"].apply( lambda x: (x == "Resolved").mean() ) fig, ax = plt.subplots(figsize=(5, height_in)) grouped.plot(kind="line", marker="o", ax=ax, color="#2563eb") ax.set_title("Resolution Rate by Day") ax.set_ylabel("Resolved %") ax.set_xlabel("Date") ax.set_ylim(0, 1) ax.grid(True, alpha=0.3) st.pyplot(fig) def render_mis_user(conn, user_id: int): """ Compact MIS for a single end user. - Filters: From / To, (optional) Intent(s) - 4 small KPI cards - 4 compact charts (~200px tall) - Filtered table with username (not user_id) """ st.markdown("### 📊 My MIS — Compact") # ---------- Filters ---------- intents = _load_distinct_intents_for_user(conn, user_id) f1, f2, f3 = st.columns([1, 1, 1]) with f1: start_date = st.date_input( "From", value=(date.today() - timedelta(days=30)), key="mis_user_from" ) with f2: end_date = st.date_input("To", value=date.today(), key="mis_user_to") with f3: chosen_intents = st.multiselect( "Intent(s)", intents, default=[], key="mis_user_intents" ) start_str = start_date.strftime("%Y-%m-%d") if start_date else None end_str = end_date.strftime("%Y-%m-%d") if end_date else None # WHERE limited to this user where_sql, params = _apply_filters_base_query_user( start_str=start_str, end_str=end_str, chosen_intents=chosen_intents, user_id=user_id, ) # ---------- Load filtered dataset with USERNAME join (using cached function) ---------- df = _load_mis_data(conn, start_str, end_str, chosen_intents, user_id) if df.empty: st.info("No chats found for the selected filters.") return # ---------- KPIs (compact cards) ---------- total = len(df) resolved_pct = (df["resolution"] == "Resolved").mean() if "resolution" in df else None avg_ftr = df["ftr_ms"].dropna().mean() if "ftr_ms" in df else None p95_ftr = ( df["ftr_ms"].dropna().quantile(0.95) if "ftr_ms" in df and not df["ftr_ms"].dropna().empty else None ) k1, k2, k3, k4 = st.columns(4) with k1: st.markdown('
', unsafe_allow_html=True) _metric_card("My total chats", f"{total}") st.markdown("
", unsafe_allow_html=True) with k2: st.markdown('
', unsafe_allow_html=True) _metric_card( "Resolved %", f"{resolved_pct*100:.1f}%" if resolved_pct is not None else None ) st.markdown("
", unsafe_allow_html=True) with k3: st.markdown('
', unsafe_allow_html=True) _metric_card("Avg FTR (ms)", f"{avg_ftr:.0f}" if avg_ftr is not None else None) st.markdown("
", unsafe_allow_html=True) with k4: st.markdown('
', unsafe_allow_html=True) _metric_card("P95 FTR (ms)", f"{p95_ftr:.0f}" if p95_ftr is not None else None) st.markdown("
", unsafe_allow_html=True) st.markdown("") # ---------- Mini charts (4 panels) ---------- cA, cB = st.columns(2, gap="small") cC, cD = st.columns(2, gap="small") # A) Volume by Day (my activity) with cA: st.markdown('
My volume by day
', unsafe_allow_html=True) _plot_bar_from_series(_series_count_by_day(df), height_in=2.0, rotate=30) # B) My Intents (Top 10) with cB: st.markdown('
My intents (Top 10)
', unsafe_allow_html=True) top_intents = df["intent"].fillna("UNMAPPED").value_counts().head(10) _plot_bar_from_series(top_intents, height_in=2.0, rotate=30) # C) Sentiment distribution with cC: st.markdown('
My sentiment distribution
', unsafe_allow_html=True) sent = df["sentiment"].fillna("Unknown").value_counts() _plot_bar_from_series(sent, height_in=2.0, rotate=0) # D) Resolution by day (industry-favorite view) with cD: st.markdown('
Average Delivery Time in Days (Last 30 days)
', unsafe_allow_html=True) #_plot_resolution_rate_by_day(df, height_in=2.0) kpi_avg_delivery_days(conn) # ---------- Filtered table (username, not ID) ---------- # Manual state management without 'key' if 'user_expander_expanded' not in st.session_state: st.session_state.user_expander_expanded = False with st.expander("My filtered chats (first 50)", expanded=st.session_state.user_expander_expanded): st.session_state.user_expander_expanded = True show_cols = [ "timestamp", "username", "intent", "sentiment", "resolution", "user_message", "ftr_ms", ] show_cols = [c for c in show_cols if c in df.columns] st.dataframe(df[show_cols].head(50), use_container_width=True, height=320)