IrisSupport-API

Sleeping

App Files Files Community

rairo commited on 17 days ago

Commit

4393c22

verified ·

1 Parent(s): 18fb538

Update main.py

Browse files

Files changed (1) hide show

main.py +798 -751

main.py CHANGED Viewed

@@ -1,54 +1,58 @@
 """
-main.py — Pricelyst Shopping Advisor (April Edition 2026 - Upgrade v3.1)
-✅ Feature: "Vernacular Engine" (Shona/Ndebele/English Input -> Native Response).
-✅ Feature: "Precision Search" (Prioritizes exact phrase matches over popularity).
-✅ Feature: "Concept Exploder" (Event Planning -> Shopping List).
-✅ UI/UX: "Nearest Match" phrasing for substitutions.
-✅ Core: Deep Vector Search + Market Matrix + Store Preferences + Savings Calculator.
 ENV VARS:
-- GOOGLE_API_KEY=...
-- FIREBASE='{"type":"service_account", ...}'
-- PRICE_API_BASE=https://api.pricelyst.co.zw
-- GEMINI_MODEL=gemini-2.5-flash
-- PORT=5000
 """
 import os
 import re
 import json
 import time
-import math
 import logging
 import base64
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, Tuple
 import requests
-import pandas as pd
 from flask import Flask, request, jsonify
 from flask_cors import CORS
-# ––––– Logging –––––
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s | %(levelname)s | %(message)s"
 )
-logger = logging.getLogger("pricelyst-advisor")
-# ––––– Gemini SDK –––––
 try:
     from google import genai
-    from google.genai import types
 except Exception as e:
     genai = None
-    logger.error("google-genai not installed. pip install google-genai. Error=%s", e)
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
-GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash")
 _gemini_client = None
 if genai and GOOGLE_API_KEY:
@@ -58,14 +62,26 @@ if genai and GOOGLE_API_KEY:
     except Exception as e:
         logger.error("Failed to init Gemini client: %s", e)
-# ––––– Firebase Admin –––––
-import firebase_admin
-from firebase_admin import credentials, firestore
 FIREBASE_ENV = os.environ.get("FIREBASE", "")
-def init_firestore_from_env() -> Optional[firestore.Client]:
     if firebase_admin._apps:
         return firestore.client()
     if not FIREBASE_ENV:
@@ -78,807 +94,838 @@ def init_firestore_from_env() -> Optional[firestore.Client]:
         logger.info("Firebase initialized.")
         return firestore.client()
     except Exception as e:
-        logger.critical("Failed to initialize Firebase: %s", e)
         return None
-db = init_firestore_from_env()
-# ––––– External API –––––
-PRICE_API_BASE = os.environ.get("PRICE_API_BASE", "https://api.pricelyst.co.zw").rstrip("/")
-HTTP_TIMEOUT = 30
-# ––––– Static Data (Zim Context) –––––
-ZIM_CONTEXT = {
-    "fuel_petrol": 1.58,
-    "fuel_diesel": 1.65,
-    "gas_lpg": 2.00,
-    "bread_avg": 1.10,
-    "zesa_step_1": {"limit": 50, "rate": 0.04},
-    "zesa_step_2": {"limit": 150, "rate": 0.09},
-    "zesa_step_3": {"limit": 9999, "rate": 0.14},
-    "zesa_levy": 0.06
-}
-# ––––– Cache –––––
-PRODUCT_CACHE_TTL = 60 * 20 # 20 mins
-_data_cache: Dict[str, Any] = {
-    "ts": 0,
-    "df": pd.DataFrame(),
-    "raw_count": 0
-}
 app = Flask(__name__)
 CORS(app)
-# =========================
-# 1. ETL Layer (Deep Search Indexing)
-# =========================
-def _norm(s: Any) -> str:
-    if not s: return ""
-    return str(s).strip().lower()
-def _coerce_price(v: Any) -> float:
     try:
-        return float(v) if v is not None else 0.0
-    except:
-        return 0.0
-def _safe_json_loads(s: str, fallback: Any):
     try:
-        if "```json" in s:
-            s = s.split("```json")[1].split("```")[0]
-        elif "```" in s:
-            s = s.split("```")[0]
-        return json.loads(s)
     except Exception as e:
-        logger.error(f"JSON Parse Error: {e}")
-        return fallback
-def fetch_and_flatten_data() -> pd.DataFrame:
-    all_products = []
-    page = 1
-    logger.info("ETL: Starting fetch from /api/v1/product-listing")
-    while True:
-        try:
-            url = f"{PRICE_API_BASE}/api/v1/product-listing"
-            r = requests.get(url, params={"page": page, "perPage": 50}, timeout=HTTP_TIMEOUT)
-            r.raise_for_status()
-            payload = r.json()
-            data = payload.get("data") or []
-            if not data: break
-            all_products.extend(data)
-            meta = payload
-            if page >= (meta.get("totalPages") or 99):
-                break
-            page += 1
-        except Exception as e:
-            logger.error(f"ETL Error on page {page}: {e}")
-            break
-    rows = []
-    for p in all_products:
-        try:
-            p_id = int(p.get("id") or 0)
-            p_name = str(p.get("name") or "Unknown")
-            brand_obj = p.get("brand") or {}
-            brand_name = str(brand_obj.get("brand_name") or "")
-            cats = p.get("categories") or []
-            cat_names = [str(c.get("name") or "") for c in cats]
-            cat_str = " ".join(cat_names)
-            primary_cat = cat_names[0] if cat_names else "General"
-            # Deep Search Vector
-            search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
-            views = int(p.get("view_count") or 0)
-            image = str(p.get("thumbnail") or p.get("image") or "")
-            prices = p.get("prices") or []
-            if not prices:
-                rows.append({
-                    "product_id": p_id,
-                    "product_name": p_name,
-                    "search_vector": search_vector,
-                    "brand": brand_name,
-                    "category": primary_cat,
-                    "retailer": "Listing",
-                    "price": 0.0,
-                    "views": views,
-                    "image": image,
-                    "is_offer": False
-                })
-                continue
-            for offer in prices:
-                retailer = offer.get("retailer") or {}
-                r_name = str(retailer.get("name") or "Unknown Store")
-                price_val = _coerce_price(offer.get("price"))
-                if price_val > 0:
-                    rows.append({
-                        "product_id": p_id,
-                        "product_name": p_name,
-                        "search_vector": search_vector,
-                        "brand": brand_name,
-                        "category": primary_cat,
-                        "retailer": r_name,
-                        "price": price_val,
-                        "views": views,
-                        "image": image,
-                        "is_offer": True
-                    })
-        except:
             continue
-    df = pd.DataFrame(rows)
-    logger.info(f"ETL: Flattened into {len(df)} rows.")
-    return df
-def get_market_index(force_refresh: bool = False) -> pd.DataFrame:
-    global _data_cache
-    if force_refresh or _data_cache["df"].empty or (time.time() - _data_cache["ts"] > PRODUCT_CACHE_TTL):
-        logger.info("ETL: Refreshing Market Index...")
-        df = fetch_and_flatten_data()
-        _data_cache["df"] = df
-        _data_cache["ts"] = time.time()
-        _data_cache["raw_count"] = len(df)
-    return _data_cache["df"]
-# =========================
-# 2. Analyst Engine (Precision Search & Matrix)
-# =========================
-def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
     """
-    Precision Search Algorithm.
-    Prioritizes:
-    1. Exact sequential match in Name/Vector (Highest Score)
-    2. Token overlap (Medium Score)
-    3. Views/Popularity (Tie-breaker)
     """
-    if df.empty or not query: return df
-    q_norm = _norm(query)
-    q_tokens = set(q_norm.split())
-    def scoring_algo(row):
-        score = 0
-        vector = row['search_vector']
-        # 1. Exact Name Match (Highest)
-        if q_norm == _norm(row['product_name']):
-            score += 1000
-        # 2. Sequential Vector Match (High)
-        if q_norm in vector:
-            score += 500
-        # 3. Brand Match
-        if row['brand'].lower() in q_norm:
-            score += 200
-        # 4. Token Overlap
-        text_tokens = set(vector.split())
-        overlap = len(q_tokens.intersection(text_tokens))
-        score += (overlap * 50)
-        return score
-    df_scored = df.copy()
-    df_scored['match_score'] = df_scored.apply(scoring_algo, axis=1)
-    # Filter out zero matches
-    matches = df_scored[df_scored['match_score'] > 0]
-    if matches.empty: return matches
-    # Sort: Match Score (Desc) -> Views (Desc) -> Price (Asc)
-    matches = matches.sort_values(by=['match_score', 'views', 'price'], ascending=[False, False, True])
-    return matches.head(limit)
-def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
     """
-    Generates a FULL MARKET MATRIX with Precision Search and Savings Calculation.
     """
-    df = get_market_index()
-    if df.empty:
-        return {"actionable": False, "error": "No data"}
-    found_items = []
-    missing_global = []
-    # 1. Resolve Items & Check Brand Fidelity
-    for item in item_names:
-        hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
-        if hits.empty:
-            missing_global.append(item)
-            continue
-        best_match = hits.iloc[0]
-        # --- Brand Fidelity Check ---
-        q_norm = _norm(item)
-        res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
-        q_tokens = q_norm.split()
-        is_substitute = False
-        found_tokens = sum(1 for t in q_tokens if t in res_norm)
-        if len(q_tokens) > 1 and found_tokens < len(q_tokens):
-            is_substitute = True
-        # Aggregate all offers
-        product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
-        offers_list = []
-        for _, r in product_offers.iterrows():
-            offers_list.append({"retailer": r['retailer'], "price": float(r['price'])})
-        best_price = offers_list[0]['price']
-        max_price = offers_list[-1]['price']
-        potential_savings = max_price - best_price
-        found_items.append({
-            "query": item,
-            "product_name": str(best_match['product_name']),
-            "is_substitute": is_substitute,
-            "offers": offers_list,
-            "best_price": best_price,
-            "potential_savings": potential_savings
-        })
-    if not found_items:
-        return {"actionable": True, "found_items": [], "global_missing": missing_global}
-    # 2. MARKET MATRIX (Comparison across all stores)
-    all_involved_retailers = set()
-    for f in found_items:
-        for o in f['offers']:
-            all_involved_retailers.add(o['retailer'])
-    store_comparison = []
-    for retailer in all_involved_retailers:
-        total_price = 0.0
-        found_count = 0
-        missing_in_store = []
-        for item in found_items:
-            price = next((o['price'] for o in item['offers'] if o['retailer'] == retailer), None)
-            if price:
-                total_price += price
-                found_count += 1
             else:
-                missing_in_store.append(item['product_name'])
-        store_comparison.append({
-            "retailer": retailer,
-            "total_price": total_price,
-            "found_count": found_count,
-            "total_items": len(found_items),
-            "missing_items": missing_in_store
-        })
-    store_comparison.sort(key=lambda x: (-x['found_count'], x['total_price']))
-    # 3. Calculate Basket-Level Savings
-    if len(store_comparison) > 1:
-        most_expensive_total = max(s['total_price'] for s in store_comparison if s['found_count'] == store_comparison[0]['found_count'])
-        for store in store_comparison:
-            # Savings calculated against the highest total for an equivalent sized basket
-            store['basket_savings'] = most_expensive_total - store['total_price'] if store['found_count'] == store_comparison[0]['found_count'] else 0.0
-    else:
-        for store in store_comparison:
-            store['basket_savings'] = 0.0
-    return {
-        "actionable": True,
-        "is_basket": len(found_items) > 1,
-        "found_items": found_items,
-        "global_missing": missing_global,
-        "market_matrix": store_comparison[:4],
-        "best_store": store_comparison[0] if store_comparison else None,
-        "preferred_retailer": preferred_retailer
-    }
-def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
-    remaining = amount_usd / 1.06
-    units = 0.0
-    t1 = ZIM_CONTEXT["zesa_step_1"]
-    cost_t1 = t1["limit"] * t1["rate"]
-    if remaining > cost_t1:
-        units += t1["limit"]
-        remaining -= cost_t1
-        t2 = ZIM_CONTEXT["zesa_step_2"]
-        cost_t2 = t2["limit"] * t2["rate"]
-        if remaining > cost_t2:
-            units += t2["limit"]
-            remaining -= cost_t2
-            units += remaining / ZIM_CONTEXT["zesa_step_3"]["rate"]
-        else:
-            units += remaining / t2["rate"]
     else:
-        units += remaining / t1["rate"]
-    return {
-        "amount_usd": float(amount_usd),
-        "est_units_kwh": float(round(units, 1))
-    }
-# =========================
-# 3. Gemini Helpers (Vernacular & Intelligence)
-# =========================
-def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
-    if not _gemini_client: return {"actionable": False}
-    PROMPT = """
-    Analyze transcript. Return STRICT JSON.
-    Classify intent:
-    - CASUAL_CHAT: Greetings, "hi".
-    - SHOPPING_BASKET: Looking for prices, products, "cheapest X".
-    - UTILITY_CALC: Electricity/ZESA questions.
-    - STORE_DECISION: "Where should I buy?", "Which store is cheapest?".
-    - EVENT_PLANNING: "Plan a braai", "Wedding list", "Dinner for 5" (Implicit lists).
-    Extract:
-    - items: list of specific products found. **TRANSLATE ALL ITEMS TO ENGLISH** (e.g. 'Hupfu' -> 'Maize Meal').
-    - utility_amount: number
-    - store_preference: if a specific store is named (e.g. "at OK Mart").
-    - is_event_planning: boolean (true if user asks to plan an event but lists no items).
-    - language: Detected user language (e.g., "Shona", "Ndebele", "English").
-    JSON Schema:
-    {
-      "actionable": boolean,
-      "intent": "string",
-      "items": ["string"],
-      "utility_amount": number,
-      "store_preference": "string",
-      "is_event_planning": boolean,
-      "language": "string"
-    }
-    """
-    try:
-        resp = _gemini_client.models.generate_content(
-            model=GEMINI_MODEL,
-            contents=PROMPT + "\nTranscript: " + transcript,
-            config=types.GenerateContentConfig(response_mime_type="application/json")
-        )
-        return _safe_json_loads(resp.text, {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"})
-    except Exception as e:
-        logger.error(f"Intent Detect Error: {e}")
-        return {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"}
-def gemini_explode_concept(transcript: str) -> List[str]:
-    """
-    Converts a concept ("Braai for 10") into a concrete list ("Wors", "Charcoal").
-    """
-    if not _gemini_client: return []
-    PROMPT = f"""
-    User wants to plan an event: "{transcript}".
-    Generate a STRICT list of 10-15 essential Zimbabwean shopping items for this.
-    Use English terms for database lookup (e.g. 'Maize Meal', 'Cooking Oil').
-    Return ONLY a JSON list of strings.
-    """
     try:
-        resp = _gemini_client.models.generate_content(
-            model=GEMINI_MODEL,
-            contents=PROMPT,
-            config=types.GenerateContentConfig(response_mime_type="application/json")
         )
-        return _safe_json_loads(resp.text, [])
     except Exception as e:
-        logger.error(f"Explode Concept Error: {e}")
-        return []
-def gemini_analyze_image(image_b64: str, caption: str = "") -> Dict[str, Any]:
-    if not _gemini_client: return {"error": "AI Offline"}
-    PROMPT = f"""
-    Analyze this image. Context: {caption}
-    1. SHOPPING LIST? -> Extract items.
-    2. SINGLE PRODUCT? -> Extract BRAND + NAME (e.g. "Pepsi 500ml").
-    3. MEAL/DISH? -> Identify dish + ingredients.
-    4. IRRELEVANT? -> Return type "IRRELEVANT".
-    Return STRICT JSON:
-    {{
-      "type": "LIST" | "PRODUCT" | "MEAL" | "IRRELEVANT",
-      "items": ["item1"],
-      "description": "Short description"
-    }}
-    """
     try:
-        image_bytes = base64.b64decode(image_b64)
-        resp = _gemini_client.models.generate_content(
-            model=GEMINI_MODEL,
-            contents=[
-                PROMPT,
-                types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
-            ],
-            config=types.GenerateContentConfig(response_mime_type="application/json")
         )
-        result = _safe_json_loads(resp.text, {"type": "IRRELEVANT", "items": []})
-        return result
     except Exception as e:
-        logger.error(f"Vision Error: {e}")
-        return {"type": "IRRELEVANT", "items": []}
-def gemini_chat_response(transcript: str, intent: Dict, analyst_data: Dict, chat_history: str = "") -> str:
-    if not _gemini_client: return "I'm having trouble connecting to my brain right now."
-    context_str = f"RECENT CHAT HISTORY (Last 6 messages):\n{chat_history}\n" if chat_history else ""
-    context_str += f"ZIMBABWE CONTEXT: Fuel={ZIM_CONTEXT['fuel_petrol']}, ZESA Rate={ZIM_CONTEXT['zesa_step_1']['rate']}\n"
-    if analyst_data:
-        context_str += f"ANALYST DATA: {json.dumps(analyst_data, default=str)}\n"
-    language = intent.get("language", "English")
-    PROMPT = f"""
-    You are April, Pricelyst's Shopping Advisor (Zimbabwe).
-    Role: Intelligent Shopping Companion.
-    Goal: Shortest path to value. Complete Transparency.
-    INPUT: "{transcript}"
-    USER LANGUAGE: {language}
-    INTENT: {intent.get('intent')}
-    CONTEXT:
-    {context_str}
-    LOGIC RULES:
-    1. **LANGUAGE**: Reply in **{language}**. If Shona, use Shona. If English, use English.
-    2. **BASKET COMPARISON**:
-       - If `market_matrix` has multiple stores, compare totals and explicitly state the savings using the pre-calculated `basket_savings`.
-       - Example: "Spar is **$6.95**, OK Mart is **$4.00** (but missing Oil). You save **$2.95** by getting the basket at OK Mart!"
-    3. **BRAND SUBSTITUTES (Phrasing)**:
-       - If `is_substitute` is TRUE for an item, say:
-         "I couldn't find **[Query]**, but the **nearest match is** **[Found]** ($Price)."
-    4. **SINGLE ITEMS**:
-       - State the best price first, then others. Explicitly state how much is saved by choosing the cheapest option over the most expensive one based on `potential_savings`.
-       - Example: "The cheapest is **$2.00** at OK. You save **$0.50** compared to the most expensive store!"
-    5. **CASUAL**:
-       - Reset if user says "Hi".
-    TONE: Helpful, direct, Zimbabwean. Use Markdown.
     """
-    try:
-        resp = _gemini_client.models.generate_content(
-            model=GEMINI_MODEL,
-            contents=PROMPT
-        )
-        return resp.text
-    except Exception as e:
-        logger.error(f"Chat Gen Error: {e}")
-        return "I checked the prices, but I'm having trouble displaying them right now."
-def gemini_generate_4step_plan(transcript: str, analyst_result: Dict) -> str:
-    if not _gemini_client: return "# Error\nAI Offline."
-    PROMPT = f"""
-    Generate a formatted Markdown Shopping Plan.
-    USER REQUEST: "{transcript}"
-    DATA: {json.dumps(analyst_result, indent=2, default=str)}
-    CRITICAL INSTRUCTION:
-    For items in 'global_missing', you MUST provide a Realistic USD Estimate (e.g. Chicken ~$6.00).
-    Do not leave them as "Unknown".
-    SECTIONS:
-    1. **In Our Catalogue ✅**
-       (Markdown Table: | Item | Retailer | Price (USD) | Potential Savings |)
-    2. **Not in Catalogue (Estimates) 😔**
-       (Markdown Table: | Item | Estimated Price (USD) |)
-       *Fill in estimated prices for missing items based on Zimbabwe market knowledge.*
-    3. **Totals & Savings 💰**
-       - Confirmed Total (Catalogue)
-       - Total Basket Savings (From cheapest vs most expensive store)
-       - Estimated Total (Missing Items)
-       - **Grand Total Estimate**
-    4. **Ideas & Tips 💡**
-       - 3 Creative ideas based on the specific event/meal (e.g. Braai tips, Cooking hacks).
-    Tone: Warm, Professional, Zimbabwean.
     """
-    try:
-        resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=PROMPT)
-        return resp.text
-    except Exception as e:
-        return "# Error\nCould not generate plan."
-# =========================
-# 4. Endpoints
-# =========================
-@app.get("/health")
-def health():
-    df = get_market_index()
-    return jsonify({
-        "ok": True,
-        "offers_indexed": len(df),
-        "api_source": PRICE_API_BASE,
-        "persona": "April v3.1 (Babel Fish)"
-    })
-@app.post("/chat")
-def chat():
-    body = request.get_json(silent=True) or {}
-    msg = body.get("message", "")
-    pid = body.get("profile_id")
-    if not pid: return jsonify({"ok": False, "error": "Missing profile_id"}), 400
-    # History
-    history_str = ""
     if db:
-        try:
-            docs = db.collection("pricelyst_profiles").document(pid).collection("chat_logs") \
-                     .order_by("ts", direction=firestore.Query.DESCENDING).limit(6).stream()
-            # Persona updated to April here for context memory
-            msgs = [f"User: {d.to_dict().get('message')}\nApril: {d.to_dict().get('response')}" for d in docs]
-            if msgs: history_str = "\n".join(reversed(msgs))
-        except: pass
-    # Intent
-    intent_data = gemini_detect_intent(msg)
-    intent_type = intent_data.get("intent", "CASUAL_CHAT")
-    items = intent_data.get("items", [])
-    store_pref = intent_data.get("store_preference")
-    analyst_data = {}
-    if items or intent_type in ["SHOPPING_BASKET", "STORE_DECISION", "TRUST_CHECK"]:
-        analyst_data = calculate_basket_optimization(items, preferred_retailer=store_pref)
-    elif intent_type == "UTILITY_CALC":
-        amount = intent_data.get("utility_amount", 20)
-        analyst_data = calculate_zesa_units(amount)
-    reply = gemini_chat_response(msg, intent_data, analyst_data, history_str)
     if db:
-        db.collection("pricelyst_profiles").document(pid).collection("chat_logs").add({
-            "message": msg,
-            "response": reply,
-            "intent": intent_data,
-            "ts": datetime.now(timezone.utc).isoformat()
         })
-    return jsonify({"ok": True, "data": {"message": reply, "analyst_debug": analyst_data if items else None}})
-@app.post("/api/analyze-image")
-def analyze_image():
-    body = request.get_json(silent=True) or {}
-    image_b64 = body.get("image_data")
-    caption = body.get("caption", "")
-    pid = body.get("profile_id")
-    if not image_b64 or not pid: return jsonify({"ok": False}), 400
-    vision_result = gemini_analyze_image(image_b64, caption)
-    img_type = vision_result.get("type", "IRRELEVANT")
-    items = vision_result.get("items", [])
-    description = vision_result.get("description", "an image")
-    # Fallback for empty products
-    if (img_type in ["PRODUCT", "MEAL"]) and not items and description:
-        items = [description]
-    response_text = ""
-    analyst_data = {}
-    if img_type == "IRRELEVANT" and not items:
-        prompt = f"User uploaded photo of {description}. Compliment it if appropriate, then explain you are a shopping bot."
-        response_text = gemini_chat_response(prompt, {"intent": "CASUAL_CHAT"}, {}, "")
-    elif items:
-        analyst_data = calculate_basket_optimization(items)
-        sim_msg = ""
-        if img_type == "MEAL": sim_msg = f"I want to cook {description}. Cost of ingredients: {', '.join(items)}?"
-        elif img_type == "LIST": sim_msg = f"Price of list: {', '.join(items)}?"
-        else: sim_msg = f"Cheapest price for {', '.join(items)}?"
-        response_text = gemini_chat_response(sim_msg, {"intent": "STORE_DECISION"}, analyst_data, "")
-    else:
-        response_text = "I couldn't identify the product. Could you type the name?"
     return jsonify({
-        "ok": True,
-        "image_type": img_type,
-        "items_identified": items,
-        "message": response_text,
-        "analyst_data": analyst_data
     })
-@app.post("/api/call-briefing")
-def call_briefing():
     """
-    Injects INTELLIGENT Market Data into the Voice Bot's context.
-    Includes: Staples Index, ZESA/Fuel, Top 60 Catalogue.
     """
-    body = request.get_json(silent=True) or {}
-    pid = body.get("profile_id")
-    username = body.get("username", "Friend")
-    if not pid: return jsonify({"ok": False}), 400
-    # 1. Memory Profile
-    prof = {}
-    if db:
-        ref = db.collection("pricelyst_profiles").document(pid)
-        doc = ref.get()
-        if doc.exists: prof = doc.to_dict()
-        else: ref.set({"created_at": datetime.now(timezone.utc).isoformat()})
-        if username != "Friend" and username != prof.get("username"):
-            if db: db.collection("pricelyst_profiles").document(pid).set({"username": username}, merge=True)
-    # 2. Market Intelligence Generation
-    df = get_market_index()
-    market_intel = ""
-    # A. ZESA & Fuel
-    zesa_10 = calculate_zesa_units(10.0)
-    zesa_20 = calculate_zesa_units(20.0)
-    context_section = f"""
-    [CRITICAL CONTEXT - ZIMBABWE]
-    FUEL: Petrol=${ZIM_CONTEXT['fuel_petrol']}, Diesel=${ZIM_CONTEXT['fuel_diesel']}
-    BREAD: ~${ZIM_CONTEXT['bread_avg']}
-    ZESA (Electricity): $10 = {zesa_10['est_units_kwh']}u, $20 = {zesa_20['est_units_kwh']}u
     """
-    # B. Staples Index
-    staples = ["Cooking Oil", "Maize Meal", "Sugar", "Rice"]
-    staple_summary = []
-    if not df.empty:
-        for s in staples:
-            hits = search_products_deep(df[df['is_offer']==True], s, limit=5)
-            if not hits.empty:
-                cheapest = hits.sort_values('price').iloc[0]
-                staple_summary.append(f"- {s}: ${cheapest['price']} @ {cheapest['retailer']}")
-    staples_section = "\n[STAPLES - LOWEST]\n" + "\n".join(staple_summary)
-    # C. Top 60 Catalogue
-    catalogue_lines = []
-    if not df.empty:
-        top_items = df[df['is_offer']==True].sort_values('views', ascending=False).drop_duplicates('product_name').head(60)
-        for _, r in top_items.iterrows():
-            p_name = r['product_name']
-            all_offers = df[(df['product_name'] == p_name) & df['is_offer']]
-            prices_str = ", ".join([f"${o['price']} ({o['retailer']})" for _, o in all_offers.iterrows()])
-            catalogue_lines.append(f"- {p_name}: {prices_str}")
-    catalogue_section = "\n[CATALOGUE - TOP 60]\n" + "\n".join(catalogue_lines)
     return jsonify({
-        "ok": True,
-        "username": username,
-        "memory_summary": prof.get("memory_summary", ""),
-        "kpi_snapshot": context_section + staples_section + catalogue_section
     })
-@app.post("/api/log-call-usage")
-def log_call_usage():
     """
-    Post-Call Orchestrator.
-    v3.1: Handles Concept Explosion & Plan Generation.
     """
-    body = request.get_json(silent=True) or {}
-    pid = body.get("profile_id")
-    transcript = body.get("transcript", "")
-    if not pid: return jsonify({"ok": False}), 400
-    # 1. Update Long-Term Memory
-    if len(transcript) > 20 and db:
-        try:
-            curr_mem = db.collection("pricelyst_profiles").document(pid).get().to_dict().get("memory_summary", "")
-            mem_prompt = f"Update user memory (budget, family size) based on: {transcript}\nOLD: {curr_mem}"
-            mem_resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=mem_prompt)
-            db.collection("pricelyst_profiles").document(pid).set({"memory_summary": mem_resp.text}, merge=True)
-        except: pass
-    # 2. Plan Generation Logic
-    intent_data = gemini_detect_intent(transcript)
-    plan_data = {}
-    # Check if ACTIONABLE (Shopping or Event)
-    if intent_data.get("actionable"):
-        target_items = intent_data.get("items", [])
-        # LOGIC: If Event Planning + No specific items -> EXPLODE CONCEPT
-        if intent_data.get("is_event_planning") and not target_items:
-            logger.info("💥 Exploding Concept for Event...")
-            target_items = gemini_explode_concept(transcript)
-        if target_items:
-            analyst_result = calculate_basket_optimization(target_items)
-            # v3.1: Generate Plan with Estimates & Creative Tips
-            md_content = gemini_generate_4step_plan(transcript, analyst_result)
-            plan_data = {
-                "is_actionable": True,
-                "title": f"Plan ({datetime.now().strftime('%d %b')})",
-                "markdown_content": md_content,
-                "items": target_items,
-                "created_at": datetime.now(timezone.utc).isoformat()
-            }
-            if db:
-                doc_ref = db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").document()
-                plan_data["id"] = doc_ref.id
-                doc_ref.set(plan_data)
     if db:
-        db.collection("pricelyst_profiles").document(pid).collection("call_logs").add({
-            "transcript": transcript,
-            "intent": intent_data,
-            "plan_generated": bool(plan_data),
-            "ts": datetime.now(timezone.utc).isoformat()
         })
     return jsonify({
-        "ok": True,
-        "shopping_plan": plan_data if plan_data.get("is_actionable") else None
     })
-@app.get("/api/shopping-plans")
-def list_plans():
-    pid = request.args.get("profile_id")
-    if not pid or not db: return jsonify({"ok": False}), 400
     try:
-        docs = db.collection("pricelyst_profiles").document(pid).collection("shopping_plans") \
-                 .order_by("created_at", direction=firestore.Query.DESCENDING).limit(10).stream()
-        return jsonify({"ok": True, "plans": [{"id": d.id, **d.to_dict()} for d in docs]})
-    except: return jsonify({"ok": False}), 500
-@app.delete("/api/shopping-plans/<plan_id>")
-def delete_plan(plan_id):
-    pid = request.args.get("profile_id")
-    if not pid or not db: return jsonify({"ok": False}), 400
     try:
-        db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").document(plan_id).delete()
         return jsonify({"ok": True})
-    except: return jsonify({"ok": False}), 500
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
-    try: get_market_index(force_refresh=True)
-    except: pass
     app.run(host="0.0.0.0", port=port)

 """
+main.py — Iris AI Service (v1.0 - April 2026)
+AI layer for the Iris Support Portal (IrisPlus / Unified Spark Desk).
+Deployed as a HuggingFace Space monofile (Flask + Gemini + AssemblyAI + Firebase).
+FEATURES:
+  1. WhatsApp Export → Knowledge Base (intelligent Gemini extraction, additive only)
+  2. Bulk KB Upload (CSV / Excel / PDF)
+  3. Natural Language + Voice Ticket Submission (AssemblyAI transcription → Gemini extraction)
+  4. System Tutorial Ingestion (video transcript → timestamped KB articles)
+  5. Agent NL/Voice Solution Writing (same pipeline, agent role)
+  6. Iris Chatbot (KB + tutorial source RAG, Firebase persistence)
 ENV VARS:
+  GOOGLE_API_KEY       — Gemini API key
+  ASSEMBLYAI_API_KEY   — AssemblyAI API key
+  FIREBASE             — JSON string of Firebase service account
+  PORT                 — Server port (default 7860)
 """
 import os
+import io
 import re
 import json
 import time
 import logging
 import base64
+import hashlib
 from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional
 import requests
 from flask import Flask, request, jsonify
 from flask_cors import CORS
+# ─── Logging ──────────────────────────────────────────────────────────────────
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s | %(levelname)s | %(message)s"
 )
+logger = logging.getLogger("iris-ai-service")
+# ─── Gemini SDK ───────────────────────────────────────────────────────────────
 try:
     from google import genai
+    from google.genai import types as genai_types
 except Exception as e:
     genai = None
+    logger.error("google-genai not installed: %s", e)
 GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+GEMINI_MODEL   = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
 _gemini_client = None
 if genai and GOOGLE_API_KEY:
     except Exception as e:
         logger.error("Failed to init Gemini client: %s", e)
+# ─── AssemblyAI ───────────────────────────────────────────────────────────────
+ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY", "")
+ASSEMBLYAI_BASE    = "https://api.assemblyai.com/v2"
+# ─── Firebase ─────────────────────────────────────────────────────────────────
+try:
+    import firebase_admin
+    from firebase_admin import credentials, firestore
+    FIREBASE_AVAILABLE = True
+except ImportError:
+    FIREBASE_AVAILABLE = False
+    logger.warning("firebase-admin not installed. Persistence disabled.")
 FIREBASE_ENV = os.environ.get("FIREBASE", "")
+def init_firestore() -> Optional[Any]:
+    if not FIREBASE_AVAILABLE:
+        return None
     if firebase_admin._apps:
         return firestore.client()
     if not FIREBASE_ENV:
         logger.info("Firebase initialized.")
         return firestore.client()
     except Exception as e:
+        logger.critical("Firebase init failed: %s", e)
         return None
+db = init_firestore()
+# ──��� Optional file-parsing libs ───────────────────────────────────────────────
+try:
+    import pandas as pd
+    PANDAS_AVAILABLE = True
+except ImportError:
+    PANDAS_AVAILABLE = False
+try:
+    import pypdf
+    PYPDF_AVAILABLE = True
+except ImportError:
+    PYPDF_AVAILABLE = False
+# ─── Flask App ────────────────────────────────────────────────────────────────
 app = Flask(__name__)
 CORS(app)
+# ══════════════════════════════════════════════════════════════════════════════
+# HELPERS
+# ══════════════════════════════════════════════════════════════════════════════
+def _safe_json(text: str, fallback: Any) -> Any:
+    """Strip markdown fences and parse JSON safely."""
+    try:
+        clean = text.strip()
+        if "```json" in clean:
+            clean = clean.split("```json")[1].split("```")[0]
+        elif "```" in clean:
+            clean = clean.split("```")[1].split("```")[0]
+        return json.loads(clean)
+    except Exception as e:
+        logger.error("JSON parse error: %s | text: %s", e, text[:200])
+        return fallback
+def _gemini_text(prompt: str, json_mode: bool = False) -> str:
+    """Call Gemini and return raw text."""
+    if not _gemini_client:
+        return ""
+    cfg = genai_types.GenerateContentConfig(response_mime_type="application/json") if json_mode else None
     try:
+        resp = _gemini_client.models.generate_content(
+            model=GEMINI_MODEL,
+            contents=prompt,
+            config=cfg
+        )
+        return resp.text or ""
+    except Exception as e:
+        logger.error("Gemini call error: %s", e)
+        return ""
+def _article_fingerprint(title: str, content: str) -> str:
+    """Stable hash to detect duplicate KB articles."""
+    raw = f"{title.strip().lower()}::{content.strip().lower()[:300]}"
+    return hashlib.sha256(raw.encode()).hexdigest()[:16]
+def _get_existing_fingerprints() -> set:
+    """Fetch all fingerprints already in Firestore KB."""
+    if not db:
+        return set()
     try:
+        docs = db.collection("iris_kb_articles").select(["fingerprint"]).stream()
+        return {d.to_dict().get("fingerprint") for d in docs if d.to_dict().get("fingerprint")}
     except Exception as e:
+        logger.error("Fingerprint fetch error: %s", e)
+        return set()
+def _save_kb_articles(articles: List[Dict], source_label: str) -> Dict:
+    """Save articles to Firestore, skip duplicates. Returns stats."""
+    if not db:
+        return {"saved": 0, "skipped": 0, "error": "Firebase unavailable"}
+    existing = _get_existing_fingerprints()
+    saved, skipped = 0, 0
+    for article in articles:
+        title   = article.get("title", "Untitled")
+        content = article.get("content", "")
+        fp      = _article_fingerprint(title, content)
+        if fp in existing:
+            skipped += 1
             continue
+        doc = {
+            "title":       title,
+            "content":     content,
+            "category":    article.get("category", "General"),
+            "tags":        article.get("tags", []),
+            "source":      source_label,
+            "fingerprint": fp,
+            "created_at":  datetime.now(timezone.utc).isoformat(),
+        }
+        # Carry timestamp crop info from tutorial ingestion if present
+        if article.get("timestamp_start") is not None:
+            doc["timestamp_start"] = article["timestamp_start"]
+            doc["timestamp_end"]   = article.get("timestamp_end")
+            doc["video_url"]       = article.get("video_url", "")
+        db.collection("iris_kb_articles").add(doc)
+        existing.add(fp)
+        saved += 1
+    return {"saved": saved, "skipped": skipped}
+# ══════════════════════════════════════════════════════════════════════════════
+# FEATURE 1 — WhatsApp Export → Knowledge Base
+# ══════════════════════════════════════════════════════════════════════════════
+WHATSAPP_EXTRACTION_PROMPT = """
+You are a support knowledge base curator.
+You have been given a raw WhatsApp group chat export from a support team.
+Your job is to extract ONLY clear problem→solution pairs.
+Rules:
+- Ignore greetings, off-topic chatter, emoji-only messages, system notifications.
+- Extract only exchanges where a user described an issue AND a support agent (or another user) provided a working solution.
+- Each article must be self-contained and searchable.
+- Merge follow-up messages that belong to the same resolution thread.
+Return a STRICT JSON array. Each object:
+{
+  "title": "Short, searchable title of the issue",
+  "content": "Full explanation: what the problem was and the step-by-step solution",
+  "category": "One of: Account, Billing, Technical, Feature, Other",
+  "tags": ["array", "of", "relevant", "keywords"]
+}
+Return ONLY the JSON array, no other text.
+WhatsApp Export:
+"""
+@app.post("/api/kb/whatsapp-import")
+def whatsapp_import():
     """
+    POST body: { "chat_text": "<raw WhatsApp export text>" }
+    Extracts problem→solution pairs, saves new articles (additive, no overwrite).
     """
+    body     = request.get_json(silent=True) or {}
+    raw_chat = body.get("chat_text", "").strip()
+    if not raw_chat:
+        return jsonify({"ok": False, "error": "chat_text is required"}), 400
+    if len(raw_chat) < 100:
+        return jsonify({"ok": False, "error": "Chat export too short to process"}), 400
+    logger.info("WhatsApp import: %d chars received", len(raw_chat))
+    gemini_out = _gemini_text(WHATSAPP_EXTRACTION_PROMPT + raw_chat[:50000], json_mode=True)
+    articles   = _safe_json(gemini_out, [])
+    if not isinstance(articles, list):
+        return jsonify({"ok": False, "error": "Gemini returned unexpected format", "raw": gemini_out[:500]}), 500
+    stats = _save_kb_articles(articles, source_label="whatsapp_export")
+    logger.info("WhatsApp import complete: %s", stats)
+    return jsonify({
+        "ok":              True,
+        "articles_found":  len(articles),
+        "saved":           stats["saved"],
+        "skipped_dupes":   stats["skipped"],
+    })
+# ══════════════════════════════════════════════════════════════════════════════
+# FEATURE 2 — Bulk KB Upload (CSV / Excel / PDF)
+# ══════════════════════════════════════════════════════════════════════════════
+def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
+    """Extract text from a PDF using pypdf, fallback to Gemini vision."""
+    if PYPDF_AVAILABLE:
+        try:
+            reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
+            pages  = [p.extract_text() or "" for p in reader.pages]
+            text   = "\n\n".join(pages).strip()
+            if text:
+                return text
+        except Exception as e:
+            logger.warning("pypdf extraction failed: %s", e)
+    # Gemini inline_data fallback for scanned PDFs
+    if _gemini_client:
+        try:
+            b64_pdf = base64.b64encode(pdf_bytes).decode()
+            resp    = _gemini_client.models.generate_content(
+                model=GEMINI_MODEL,
+                contents=[
+                    "Extract all text from this PDF document. Return plain text only.",
+                    genai_types.Part.from_bytes(data=pdf_bytes, mime_type="application/pdf")
+                ]
+            )
+            return resp.text or ""
+        except Exception as e:
+            logger.error("Gemini PDF extraction failed: %s", e)
+    return ""
+PDF_KB_PROMPT = """
+You are a support knowledge base curator.
+Convert the following document content into structured KB articles.
+Each article should cover one distinct topic, issue, or procedure.
+Return a STRICT JSON array. Each object:
+{
+  "title": "Short, searchable title",
+  "content": "Complete explanation in clear language",
+  "category": "One of: Account, Billing, Technical, Feature, Other",
+  "tags": ["keyword1", "keyword2"]
+}
+Return ONLY the JSON array.
+Document content:
+"""
+@app.post("/api/kb/bulk-upload")
+def bulk_upload():
     """
+    Accepts multipart file upload. Supports: .csv, .xlsx, .xls, .pdf
+    CSV/Excel expected columns: title, content (+ optional: category, tags)
+    PDF: Gemini extracts and structures articles.
     """
+    if "file" not in request.files:
+        return jsonify({"ok": False, "error": "No file uploaded"}), 400
+    f         = request.files["file"]
+    filename  = f.filename or ""
+    ext       = filename.rsplit(".", 1)[-1].lower()
+    file_data = f.read()
+    articles = []
+    if ext in ("csv", "xlsx", "xls"):
+        if not PANDAS_AVAILABLE:
+            return jsonify({"ok": False, "error": "pandas not installed on server"}), 500
+        try:
+            if ext == "csv":
+                df = pd.read_csv(io.BytesIO(file_data))
             else:
+                df = pd.read_excel(io.BytesIO(file_data))
+            df.columns = [c.strip().lower() for c in df.columns]
+            if "title" not in df.columns or "content" not in df.columns:
+                return jsonify({"ok": False, "error": "CSV/Excel must have 'title' and 'content' columns"}), 400
+            for _, row in df.iterrows():
+                tags = []
+                if "tags" in df.columns and pd.notna(row.get("tags")):
+                    raw_tags = str(row["tags"])
+                    tags = [t.strip() for t in re.split(r"[,;|]", raw_tags) if t.strip()]
+                articles.append({
+                    "title":    str(row["title"]).strip(),
+                    "content":  str(row["content"]).strip(),
+                    "category": str(row.get("category", "General")).strip() if pd.notna(row.get("category")) else "General",
+                    "tags":     tags,
+                })
+        except Exception as e:
+            logger.error("Spreadsheet parse error: %s", e)
+            return jsonify({"ok": False, "error": f"Could not parse file: {e}"}), 400
+    elif ext == "pdf":
+        text = _extract_text_from_pdf_bytes(file_data)
+        if not text:
+            return jsonify({"ok": False, "error": "Could not extract text from PDF"}), 400
+        gemini_out = _gemini_text(PDF_KB_PROMPT + text[:50000], json_mode=True)
+        articles   = _safe_json(gemini_out, [])
+        if not isinstance(articles, list):
+            return jsonify({"ok": False, "error": "Gemini PDF structuring failed"}), 500
     else:
+        return jsonify({"ok": False, "error": f"Unsupported file type: .{ext}. Use csv, xlsx, or pdf"}), 400
+    if not articles:
+        return jsonify({"ok": False, "error": "No articles extracted from file"}), 400
+    stats = _save_kb_articles(articles, source_label=f"bulk_upload:{filename}")
+    return jsonify({
+        "ok":             True,
+        "articles_found": len(articles),
+        "saved":          stats["saved"],
+        "skipped_dupes":  stats["skipped"],
+    })
+# ══════════════════════════════════════════════════════════════════════════════
+# FEATURE 3 — Ticket Submission via NL Text or Voice
+# ══════════════════════════════════════════════════════════════════════════════
+TICKET_EXTRACTION_PROMPT = """
+You are a support ticket intake system for a software support portal.
+A user has described their issue in natural language. Extract structured ticket fields.
+Return STRICT JSON (no other text):
+{
+  "title": "Concise ticket title (max 80 chars)",
+  "description": "Full detailed description of the issue, rewritten clearly in third person",
+  "category_hint": "Best matching category: Account | Billing | Technical | Feature | Other",
+  "priority_hint": "One of: low | medium | high | critical (based on urgency language)",
+  "keywords": ["array", "of", "technical", "keywords"]
+}
+User's message:
+"""
+def _transcribe_audio_assemblyai(audio_b64: str, audio_format: str = "wav") -> str:
+    """Upload audio to AssemblyAI and poll for transcript."""
+    if not ASSEMBLYAI_API_KEY:
+        return ""
+    audio_bytes = base64.b64decode(audio_b64)
+    headers     = {"authorization": ASSEMBLYAI_API_KEY}
+    # 1. Upload
     try:
+        upload_resp = requests.post(
+            f"{ASSEMBLYAI_BASE}/upload",
+            headers={**headers, "Content-Type": "application/octet-stream"},
+            data=audio_bytes,
+            timeout=30
         )
+        upload_resp.raise_for_status()
+        upload_url = upload_resp.json().get("upload_url")
     except Exception as e:
+        logger.error("AssemblyAI upload error: %s", e)
+        return ""
+    # 2. Request transcript
     try:
+        tx_resp = requests.post(
+            f"{ASSEMBLYAI_BASE}/transcript",
+            headers={**headers, "Content-Type": "application/json"},
+            json={"audio_url": upload_url, "language_detection": True},
+            timeout=15
         )
+        tx_resp.raise_for_status()
+        tx_id = tx_resp.json().get("id")
     except Exception as e:
+        logger.error("AssemblyAI transcript request error: %s", e)
+        return ""
+    # 3. Poll
+    for _ in range(30):
+        time.sleep(3)
+        try:
+            poll = requests.get(
+                f"{ASSEMBLYAI_BASE}/transcript/{tx_id}",
+                headers=headers,
+                timeout=15
+            )
+            poll.raise_for_status()
+            result = poll.json()
+            status = result.get("status")
+            if status == "completed":
+                return result.get("text", "")
+            elif status == "error":
+                logger.error("AssemblyAI error: %s", result.get("error"))
+                return ""
+        except Exception as e:
+            logger.error("AssemblyAI poll error: %s", e)
+    return ""
+@app.post("/api/tickets/submit-nl")
+def submit_ticket_nl():
     """
+    POST body: { "message": "I can't log in, it says my account is locked...", "user_id": "..." }
+    Returns structured ticket fields for the frontend to pre-fill and submit.
     """
+    body    = request.get_json(silent=True) or {}
+    message = body.get("message", "").strip()
+    user_id = body.get("user_id", "anonymous")
+    if not message:
+        return jsonify({"ok": False, "error": "message is required"}), 400
+    gemini_out = _gemini_text(TICKET_EXTRACTION_PROMPT + message, json_mode=True)
+    ticket     = _safe_json(gemini_out, {})
+    if not ticket.get("title"):
+        return jsonify({"ok": False, "error": "Could not extract ticket info from message"}), 500
+    # Log submission attempt
     if db:
+        db.collection("iris_ai_ticket_drafts").add({
+            "user_id":    user_id,
+            "raw_input":  message,
+            "extracted":  ticket,
+            "channel":    "nl_text",
+            "created_at": datetime.now(timezone.utc).isoformat(),
+        })
+    return jsonify({"ok": True, "ticket": ticket})
+@app.post("/api/tickets/submit-voice")
+def submit_ticket_voice():
+    """
+    POST body: { "audio_b64": "<base64 audio>", "audio_format": "wav", "user_id": "..." }
+    Transcribes audio via AssemblyAI, then extracts ticket via Gemini.
+    """
+    body         = request.get_json(silent=True) or {}
+    audio_b64    = body.get("audio_b64", "")
+    audio_format = body.get("audio_format", "wav")
+    user_id      = body.get("user_id", "anonymous")
+    if not audio_b64:
+        return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
+    if not ASSEMBLYAI_API_KEY:
+        return jsonify({"ok": False, "error": "AssemblyAI not configured on server"}), 500
+    logger.info("Voice ticket: transcribing audio for user=%s", user_id)
+    transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
+    if not transcript:
+        return jsonify({"ok": False, "error": "Transcription failed or returned empty result"}), 500
+    gemini_out = _gemini_text(TICKET_EXTRACTION_PROMPT + transcript, json_mode=True)
+    ticket     = _safe_json(gemini_out, {})
+    if not ticket.get("title"):
+        return jsonify({"ok": False, "error": "Could not extract ticket info from transcript"}), 500
     if db:
+        db.collection("iris_ai_ticket_drafts").add({
+            "user_id":    user_id,
+            "raw_input":  transcript,
+            "extracted":  ticket,
+            "channel":    "voice",
+            "created_at": datetime.now(timezone.utc).isoformat(),
         })
+    return jsonify({"ok": True, "transcript": transcript, "ticket": ticket})
+# ══════════════════════════════════════════════════════════════════════════════
+# FEATURE 4 — System Tutorial Ingestion
+# ══════════════════════════════════════════════════════════════════════════════
+TUTORIAL_EXTRACTION_PROMPT = """
+You are a knowledge base curator for a software support system.
+You have been given a timestamped transcript from a video tutorial about the Iris Support Portal.
+Your job is to extract discrete how-to articles, one per distinct feature or task demonstrated.
+For each article, identify the best timestamp range where the solution or demonstration occurs.
+Return a STRICT JSON array. Each object:
+{
+  "title": "How to <do something> in Iris",
+  "content": "Step-by-step instructions based on the tutorial",
+  "category": "One of: Account | Tickets | Agents | Reports | Admin | Other",
+  "tags": ["keyword1", "keyword2"],
+  "timestamp_start": <seconds as integer>,
+  "timestamp_end": <seconds as integer>
+}
+Return ONLY the JSON array.
+Transcript (with timestamps in [MM:SS] or [HH:MM:SS] format):
+"""
+def _parse_timestamp_to_seconds(ts: str) -> int:
+    """Convert MM:SS or HH:MM:SS string to integer seconds."""
+    parts = ts.strip("[]").split(":")
+    try:
+        if len(parts) == 2:
+            return int(parts[0]) * 60 + int(parts[1])
+        elif len(parts) == 3:
+            return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
+    except Exception:
+        pass
+    return 0
+@app.post("/api/kb/tutorial-ingest")
+def tutorial_ingest():
+    """
+    POST body: {
+      "transcript": "<timestamped transcript text>",
+      "video_url": "https://...",   (optional, for linking crop timestamps)
+      "video_title": "Getting Started with Iris"
+    }
+    Gemini extracts how-to articles with timestamp ranges.
+    """
+    body        = request.get_json(silent=True) or {}
+    transcript  = body.get("transcript", "").strip()
+    video_url   = body.get("video_url", "")
+    video_title = body.get("video_title", "Tutorial")
+    if not transcript:
+        return jsonify({"ok": False, "error": "transcript is required"}), 400
+    logger.info("Tutorial ingest: %d chars, title=%s", len(transcript), video_title)
+    gemini_out = _gemini_text(TUTORIAL_EXTRACTION_PROMPT + transcript[:50000], json_mode=True)
+    articles   = _safe_json(gemini_out, [])
+    if not isinstance(articles, list):
+        return jsonify({"ok": False, "error": "Gemini returned unexpected format"}), 500
+    # Inject video metadata into each article
+    for a in articles:
+        a["video_url"]   = video_url
+        a["video_title"] = video_title
+        # Ensure numeric seconds (Gemini may return the parsed value; validate it)
+        for ts_key in ("timestamp_start", "timestamp_end"):
+            val = a.get(ts_key)
+            if isinstance(val, str):
+                a[ts_key] = _parse_timestamp_to_seconds(val)
+            elif not isinstance(val, int):
+                a[ts_key] = 0
+    stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
     return jsonify({
+        "ok":             True,
+        "video_title":    video_title,
+        "articles_found": len(articles),
+        "saved":          stats["saved"],
+        "skipped_dupes":  stats["skipped"],
     })
+# ══════════════════════════════════════════════════════════════════════════════
+# FEATURE 5 — Agent Solution Writing (NL Text + Voice)
+# ══════════════════════════════════════════════════════════════════════════════
+SOLUTION_EXTRACTION_PROMPT = """
+You are a support knowledge base curator.
+An agent or support staff has described a solution they discovered while resolving a ticket.
+Structure this into a reusable KB article.
+Return STRICT JSON:
+{
+  "title": "Short, searchable problem title",
+  "content": "Clear step-by-step solution, rewritten for future reference",
+  "category": "One of: Account | Billing | Technical | Feature | Other",
+  "tags": ["relevant", "keywords"]
+}
+Agent's description:
+"""
+@app.post("/api/kb/agent-solution-nl")
+def agent_solution_nl():
     """
+    POST body: { "message": "I fixed ticket #123 by...", "agent_id": "...", "ticket_id": "..." }
+    Creates a KB article from agent's natural language solution description.
     """
+    body      = request.get_json(silent=True) or {}
+    message   = body.get("message", "").strip()
+    agent_id  = body.get("agent_id", "unknown")
+    ticket_id = body.get("ticket_id", "")
+    if not message:
+        return jsonify({"ok": False, "error": "message is required"}), 400
+    gemini_out = _gemini_text(SOLUTION_EXTRACTION_PROMPT + message, json_mode=True)
+    article    = _safe_json(gemini_out, {})
+    if not article.get("title"):
+        return jsonify({"ok": False, "error": "Could not structure solution"}), 500
+    # Add ticket reference tag
+    if ticket_id:
+        article.setdefault("tags", []).append(f"ticket:{ticket_id}")
+    stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
+    return jsonify({
+        "ok":     True,
+        "saved":  stats["saved"],
+        "article": article,
+    })
+@app.post("/api/kb/agent-solution-voice")
+def agent_solution_voice():
+    """
+    POST body: { "audio_b64": "...", "audio_format": "wav", "agent_id": "...", "ticket_id": "..." }
+    Transcribes agent's voice note, structures into KB article.
     """
+    body         = request.get_json(silent=True) or {}
+    audio_b64    = body.get("audio_b64", "")
+    audio_format = body.get("audio_format", "wav")
+    agent_id     = body.get("agent_id", "unknown")
+    ticket_id    = body.get("ticket_id", "")
+    if not audio_b64:
+        return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
+    transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
+    if not transcript:
+        return jsonify({"ok": False, "error": "Transcription failed"}), 500
+    gemini_out = _gemini_text(SOLUTION_EXTRACTION_PROMPT + transcript, json_mode=True)
+    article    = _safe_json(gemini_out, {})
+    if not article.get("title"):
+        return jsonify({"ok": False, "error": "Could not structure solution from transcript"}), 500
+    if ticket_id:
+        article.setdefault("tags", []).append(f"ticket:{ticket_id}")
+    stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
     return jsonify({
+        "ok":        True,
+        "transcript": transcript,
+        "saved":     stats["saved"],
+        "article":   article,
     })
+# ══════════════════════════════════════════════════════════════════════════════
+# FEATURE 6 — Iris Support Chatbot (RAG over KB + Tutorials)
+# ══════════════════════════════════════════════════════════════════════════════
+def _search_kb(query: str, limit: int = 5) -> List[Dict]:
     """
+    Simple keyword search over Firestore KB articles.
+    Production upgrade: swap with a vector DB (e.g. Qdrant) or Vertex AI Search.
     """
+    if not db:
+        return []
+    query_terms = [t.lower() for t in query.split() if len(t) > 2]
+    try:
+        # Fetch recent articles (Firestore doesn't support full-text, this is a lightweight approach)
+        docs = db.collection("iris_kb_articles").order_by(
+            "created_at", direction=firestore.Query.DESCENDING
+        ).limit(200).stream()
+        results = []
+        for doc in docs:
+            d     = doc.to_dict()
+            text  = f"{d.get('title','')} {d.get('content','')} {' '.join(d.get('tags',[]))}".lower()
+            score = sum(1 for term in query_terms if term in text)
+            if score > 0:
+                results.append({"score": score, **d})
+        results.sort(key=lambda x: x["score"], reverse=True)
+        return results[:limit]
+    except Exception as e:
+        logger.error("KB search error: %s", e)
+        return []
+CHATBOT_SYSTEM_PROMPT = """
+You are Iris, an intelligent support assistant for the Iris Support Portal.
+Your role: Help users resolve issues quickly using the knowledge base and tutorial content provided.
+Rules:
+- Answer ONLY from the provided context. Do not hallucinate solutions.
+- If the answer is in a tutorial with a timestamp, mention the video and timestamp so the user can jump to that moment.
+- Be concise, clear, and friendly.
+- If you cannot find the answer, say so honestly and suggest submitting a ticket.
+- Format step-by-step answers as numbered lists.
+"""
+@app.post("/api/chatbot/query")
+def chatbot_query():
+    """
+    POST body: {
+      "message": "How do I reset a user's password?",
+      "session_id": "...",
+      "user_id": "..."
+    }
+    RAG: searches KB, then uses Gemini to synthesize an answer.
+    """
+    body       = request.get_json(silent=True) or {}
+    message    = body.get("message", "").strip()
+    session_id = body.get("session_id", "default")
+    user_id    = body.get("user_id", "anonymous")
+    if not message:
+        return jsonify({"ok": False, "error": "message is required"}), 400
+    # Retrieve relevant KB context
+    kb_results = _search_kb(message, limit=5)
+    context_blocks = []
+    sources        = []
+    for r in kb_results:
+        block = f"[Article: {r.get('title')}]\n{r.get('content', '')}"
+        if r.get("timestamp_start") is not None:
+            ts  = r["timestamp_start"]
+            mm  = ts // 60
+            ss  = ts % 60
+            url = r.get("video_url", "")
+            block += f"\n(Tutorial: {r.get('video_title','Video')} at {mm:02d}:{ss:02d}"
+            block += f" — {url})" if url else ")"
+        context_blocks.append(block)
+        sources.append({
+            "title":     r.get("title"),
+            "category":  r.get("category"),
+            "source":    r.get("source"),
+            "ts_start":  r.get("timestamp_start"),
+            "video_url": r.get("video_url"),
+        })
+    context_str = "\n\n---\n\n".join(context_blocks) if context_blocks else "No relevant articles found."
+    full_prompt = f"""{CHATBOT_SYSTEM_PROMPT}
+KNOWLEDGE BASE CONTEXT:
+{context_str}
+USER QUESTION: {message}
+Answer:"""
+    answer = _gemini_text(full_prompt)
+    if not answer:
+        answer = "I'm sorry, I couldn't process your question right now. Please try again or submit a support ticket."
+    # Persist chat log
     if db:
+        db.collection("iris_chatbot_logs").add({
+            "user_id":    user_id,
+            "session_id": session_id,
+            "message":    message,
+            "answer":     answer,
+            "sources":    sources,
+            "created_at": datetime.now(timezone.utc).isoformat(),
         })
     return jsonify({
+        "ok":      True,
+        "answer":  answer,
+        "sources": sources,
     })
+# ══════════════════════════════════════════════════════════════════════════════
+# KB READ ENDPOINTS (for frontend display)
+# ══════════════════════════════════════════════════════════════════════════════
+@app.get("/api/kb/articles")
+def list_kb_articles():
+    """
+    GET /api/kb/articles?category=Technical&limit=50
+    Lists KB articles, optionally filtered by category.
+    """
+    category = request.args.get("category", "")
+    limit    = int(request.args.get("limit", 50))
+    if not db:
+        return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
     try:
+        query = db.collection("iris_kb_articles").order_by(
+            "created_at", direction=firestore.Query.DESCENDING
+        )
+        if category:
+            query = query.where("category", "==", category)
+        docs     = query.limit(limit).stream()
+        articles = [{"id": d.id, **d.to_dict()} for d in docs]
+        return jsonify({"ok": True, "articles": articles, "count": len(articles)})
+    except Exception as e:
+        logger.error("KB list error: %s", e)
+        return jsonify({"ok": False, "error": str(e)}), 500
+@app.delete("/api/kb/articles/<article_id>")
+def delete_kb_article(article_id: str):
+    """DELETE /api/kb/articles/<id> — Admin only (JWT check to be enforced at gateway)"""
+    if not db:
+        return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
     try:
+        db.collection("iris_kb_articles").document(article_id).delete()
         return jsonify({"ok": True})
+    except Exception as e:
+        return jsonify({"ok": False, "error": str(e)}), 500
+# ══════════════════════════════════════════════════════════════════════════════
+# HEALTH
+# ══════════════════════════════════════════════════════════════════════════════
+@app.get("/health")
+def health():
+    article_count = 0
+    if db:
+        try:
+            docs          = db.collection("iris_kb_articles").count().get()
+            article_count = docs[0][0].value
+        except Exception:
+            pass
+    return jsonify({
+        "ok":              True,
+        "service":         "Iris AI Service v1.0",
+        "gemini":          bool(_gemini_client),
+        "assemblyai":      bool(ASSEMBLYAI_API_KEY),
+        "firebase":        bool(db),
+        "kb_articles":     article_count,
+    })
+# ══════════════════════════��═══════════════════════════════════════════════════
+# ENTRYPOINT
+# ══════════════════════════════════════════════════════════════════════════════
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 7860))
+    logger.info("Iris AI Service starting on port %d", port)
     app.run(host="0.0.0.0", port=port)