Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,54 +1,58 @@
|
|
| 1 |
"""
|
| 2 |
-
main.py —
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
ENV VARS:
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
- PORT=5000
|
| 16 |
"""
|
| 17 |
|
| 18 |
import os
|
|
|
|
| 19 |
import re
|
| 20 |
import json
|
| 21 |
import time
|
| 22 |
-
import math
|
| 23 |
import logging
|
| 24 |
import base64
|
|
|
|
| 25 |
from datetime import datetime, timezone
|
| 26 |
-
from typing import Any, Dict, List, Optional
|
| 27 |
|
| 28 |
import requests
|
| 29 |
-
import pandas as pd
|
| 30 |
from flask import Flask, request, jsonify
|
| 31 |
from flask_cors import CORS
|
| 32 |
|
| 33 |
-
#
|
| 34 |
|
| 35 |
logging.basicConfig(
|
| 36 |
level=logging.INFO,
|
| 37 |
format="%(asctime)s | %(levelname)s | %(message)s"
|
| 38 |
)
|
| 39 |
-
logger = logging.getLogger("
|
| 40 |
|
| 41 |
-
#
|
| 42 |
|
| 43 |
try:
|
| 44 |
from google import genai
|
| 45 |
-
from google.genai import types
|
| 46 |
except Exception as e:
|
| 47 |
genai = None
|
| 48 |
-
logger.error("google-genai not installed
|
| 49 |
|
| 50 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
|
| 51 |
-
GEMINI_MODEL
|
| 52 |
|
| 53 |
_gemini_client = None
|
| 54 |
if genai and GOOGLE_API_KEY:
|
|
@@ -58,14 +62,26 @@ if genai and GOOGLE_API_KEY:
|
|
| 58 |
except Exception as e:
|
| 59 |
logger.error("Failed to init Gemini client: %s", e)
|
| 60 |
|
| 61 |
-
#
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
FIREBASE_ENV = os.environ.get("FIREBASE", "")
|
| 67 |
|
| 68 |
-
def
|
|
|
|
|
|
|
| 69 |
if firebase_admin._apps:
|
| 70 |
return firestore.client()
|
| 71 |
if not FIREBASE_ENV:
|
|
@@ -78,807 +94,838 @@ def init_firestore_from_env() -> Optional[firestore.Client]:
|
|
| 78 |
logger.info("Firebase initialized.")
|
| 79 |
return firestore.client()
|
| 80 |
except Exception as e:
|
| 81 |
-
logger.critical("
|
| 82 |
return None
|
| 83 |
|
| 84 |
-
db =
|
| 85 |
-
|
| 86 |
-
# ––––– External API –––––
|
| 87 |
|
| 88 |
-
|
| 89 |
-
HTTP_TIMEOUT = 30
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
"gas_lpg": 2.00,
|
| 97 |
-
"bread_avg": 1.10,
|
| 98 |
-
"zesa_step_1": {"limit": 50, "rate": 0.04},
|
| 99 |
-
"zesa_step_2": {"limit": 150, "rate": 0.09},
|
| 100 |
-
"zesa_step_3": {"limit": 9999, "rate": 0.14},
|
| 101 |
-
"zesa_levy": 0.06
|
| 102 |
-
}
|
| 103 |
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
-
|
| 107 |
-
_data_cache: Dict[str, Any] = {
|
| 108 |
-
"ts": 0,
|
| 109 |
-
"df": pd.DataFrame(),
|
| 110 |
-
"raw_count": 0
|
| 111 |
-
}
|
| 112 |
|
| 113 |
app = Flask(__name__)
|
| 114 |
CORS(app)
|
| 115 |
|
| 116 |
-
#
|
| 117 |
-
#
|
| 118 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
|
| 120 |
-
def _norm(s: Any) -> str:
|
| 121 |
-
if not s: return ""
|
| 122 |
-
return str(s).strip().lower()
|
| 123 |
|
| 124 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
try:
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
try:
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
elif "```" in s:
|
| 135 |
-
s = s.split("```")[0]
|
| 136 |
-
return json.loads(s)
|
| 137 |
except Exception as e:
|
| 138 |
-
logger.error(
|
| 139 |
-
return
|
| 140 |
|
| 141 |
-
def fetch_and_flatten_data() -> pd.DataFrame:
|
| 142 |
-
all_products = []
|
| 143 |
-
page = 1
|
| 144 |
-
|
| 145 |
-
logger.info("ETL: Starting fetch from /api/v1/product-listing")
|
| 146 |
-
|
| 147 |
-
while True:
|
| 148 |
-
try:
|
| 149 |
-
url = f"{PRICE_API_BASE}/api/v1/product-listing"
|
| 150 |
-
r = requests.get(url, params={"page": page, "perPage": 50}, timeout=HTTP_TIMEOUT)
|
| 151 |
-
r.raise_for_status()
|
| 152 |
-
payload = r.json()
|
| 153 |
-
data = payload.get("data") or []
|
| 154 |
-
if not data: break
|
| 155 |
-
|
| 156 |
-
all_products.extend(data)
|
| 157 |
-
|
| 158 |
-
meta = payload
|
| 159 |
-
if page >= (meta.get("totalPages") or 99):
|
| 160 |
-
break
|
| 161 |
-
page += 1
|
| 162 |
-
except Exception as e:
|
| 163 |
-
logger.error(f"ETL Error on page {page}: {e}")
|
| 164 |
-
break
|
| 165 |
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
|
| 182 |
-
|
| 183 |
-
views = int(p.get("view_count") or 0)
|
| 184 |
-
image = str(p.get("thumbnail") or p.get("image") or "")
|
| 185 |
-
|
| 186 |
-
prices = p.get("prices") or []
|
| 187 |
-
|
| 188 |
-
if not prices:
|
| 189 |
-
rows.append({
|
| 190 |
-
"product_id": p_id,
|
| 191 |
-
"product_name": p_name,
|
| 192 |
-
"search_vector": search_vector,
|
| 193 |
-
"brand": brand_name,
|
| 194 |
-
"category": primary_cat,
|
| 195 |
-
"retailer": "Listing",
|
| 196 |
-
"price": 0.0,
|
| 197 |
-
"views": views,
|
| 198 |
-
"image": image,
|
| 199 |
-
"is_offer": False
|
| 200 |
-
})
|
| 201 |
-
continue
|
| 202 |
-
|
| 203 |
-
for offer in prices:
|
| 204 |
-
retailer = offer.get("retailer") or {}
|
| 205 |
-
r_name = str(retailer.get("name") or "Unknown Store")
|
| 206 |
-
price_val = _coerce_price(offer.get("price"))
|
| 207 |
-
|
| 208 |
-
if price_val > 0:
|
| 209 |
-
rows.append({
|
| 210 |
-
"product_id": p_id,
|
| 211 |
-
"product_name": p_name,
|
| 212 |
-
"search_vector": search_vector,
|
| 213 |
-
"brand": brand_name,
|
| 214 |
-
"category": primary_cat,
|
| 215 |
-
"retailer": r_name,
|
| 216 |
-
"price": price_val,
|
| 217 |
-
"views": views,
|
| 218 |
-
"image": image,
|
| 219 |
-
"is_offer": True
|
| 220 |
-
})
|
| 221 |
-
except:
|
| 222 |
continue
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
"""
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
1. Exact sequential match in Name/Vector (Highest Score)
|
| 247 |
-
2. Token overlap (Medium Score)
|
| 248 |
-
3. Views/Popularity (Tie-breaker)
|
| 249 |
"""
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
"""
|
| 292 |
-
|
|
|
|
|
|
|
| 293 |
"""
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
# 1. Resolve Items & Check Brand Fidelity
|
| 302 |
-
for item in item_names:
|
| 303 |
-
hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
|
| 304 |
-
|
| 305 |
-
if hits.empty:
|
| 306 |
-
missing_global.append(item)
|
| 307 |
-
continue
|
| 308 |
-
|
| 309 |
-
best_match = hits.iloc[0]
|
| 310 |
-
|
| 311 |
-
# --- Brand Fidelity Check ---
|
| 312 |
-
q_norm = _norm(item)
|
| 313 |
-
res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
|
| 314 |
-
q_tokens = q_norm.split()
|
| 315 |
-
|
| 316 |
-
is_substitute = False
|
| 317 |
-
found_tokens = sum(1 for t in q_tokens if t in res_norm)
|
| 318 |
-
if len(q_tokens) > 1 and found_tokens < len(q_tokens):
|
| 319 |
-
is_substitute = True
|
| 320 |
-
|
| 321 |
-
# Aggregate all offers
|
| 322 |
-
product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
|
| 323 |
-
|
| 324 |
-
offers_list = []
|
| 325 |
-
for _, r in product_offers.iterrows():
|
| 326 |
-
offers_list.append({"retailer": r['retailer'], "price": float(r['price'])})
|
| 327 |
-
|
| 328 |
-
best_price = offers_list[0]['price']
|
| 329 |
-
max_price = offers_list[-1]['price']
|
| 330 |
-
potential_savings = max_price - best_price
|
| 331 |
-
|
| 332 |
-
found_items.append({
|
| 333 |
-
"query": item,
|
| 334 |
-
"product_name": str(best_match['product_name']),
|
| 335 |
-
"is_substitute": is_substitute,
|
| 336 |
-
"offers": offers_list,
|
| 337 |
-
"best_price": best_price,
|
| 338 |
-
"potential_savings": potential_savings
|
| 339 |
-
})
|
| 340 |
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
store_comparison = []
|
| 351 |
-
|
| 352 |
-
for retailer in all_involved_retailers:
|
| 353 |
-
total_price = 0.0
|
| 354 |
-
found_count = 0
|
| 355 |
-
missing_in_store = []
|
| 356 |
-
|
| 357 |
-
for item in found_items:
|
| 358 |
-
price = next((o['price'] for o in item['offers'] if o['retailer'] == retailer), None)
|
| 359 |
-
if price:
|
| 360 |
-
total_price += price
|
| 361 |
-
found_count += 1
|
| 362 |
else:
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
store_comparison.append({
|
| 366 |
-
"retailer": retailer,
|
| 367 |
-
"total_price": total_price,
|
| 368 |
-
"found_count": found_count,
|
| 369 |
-
"total_items": len(found_items),
|
| 370 |
-
"missing_items": missing_in_store
|
| 371 |
-
})
|
| 372 |
|
| 373 |
-
|
| 374 |
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
units = 0.0
|
| 398 |
-
|
| 399 |
-
t1 = ZIM_CONTEXT["zesa_step_1"]
|
| 400 |
-
cost_t1 = t1["limit"] * t1["rate"]
|
| 401 |
-
|
| 402 |
-
if remaining > cost_t1:
|
| 403 |
-
units += t1["limit"]
|
| 404 |
-
remaining -= cost_t1
|
| 405 |
-
|
| 406 |
-
t2 = ZIM_CONTEXT["zesa_step_2"]
|
| 407 |
-
cost_t2 = t2["limit"] * t2["rate"]
|
| 408 |
-
|
| 409 |
-
if remaining > cost_t2:
|
| 410 |
-
units += t2["limit"]
|
| 411 |
-
remaining -= cost_t2
|
| 412 |
-
units += remaining / ZIM_CONTEXT["zesa_step_3"]["rate"]
|
| 413 |
-
else:
|
| 414 |
-
units += remaining / t2["rate"]
|
| 415 |
else:
|
| 416 |
-
|
| 417 |
|
| 418 |
-
|
| 419 |
-
"
|
| 420 |
-
"est_units_kwh": float(round(units, 1))
|
| 421 |
-
}
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
PROMPT = """
|
| 431 |
-
Analyze transcript. Return STRICT JSON.
|
| 432 |
-
Classify intent:
|
| 433 |
-
- CASUAL_CHAT: Greetings, "hi".
|
| 434 |
-
- SHOPPING_BASKET: Looking for prices, products, "cheapest X".
|
| 435 |
-
- UTILITY_CALC: Electricity/ZESA questions.
|
| 436 |
-
- STORE_DECISION: "Where should I buy?", "Which store is cheapest?".
|
| 437 |
-
- EVENT_PLANNING: "Plan a braai", "Wedding list", "Dinner for 5" (Implicit lists).
|
| 438 |
-
|
| 439 |
-
Extract:
|
| 440 |
-
- items: list of specific products found. **TRANSLATE ALL ITEMS TO ENGLISH** (e.g. 'Hupfu' -> 'Maize Meal').
|
| 441 |
-
- utility_amount: number
|
| 442 |
-
- store_preference: if a specific store is named (e.g. "at OK Mart").
|
| 443 |
-
- is_event_planning: boolean (true if user asks to plan an event but lists no items).
|
| 444 |
-
- language: Detected user language (e.g., "Shona", "Ndebele", "English").
|
| 445 |
-
|
| 446 |
-
JSON Schema:
|
| 447 |
-
{
|
| 448 |
-
"actionable": boolean,
|
| 449 |
-
"intent": "string",
|
| 450 |
-
"items": ["string"],
|
| 451 |
-
"utility_amount": number,
|
| 452 |
-
"store_preference": "string",
|
| 453 |
-
"is_event_planning": boolean,
|
| 454 |
-
"language": "string"
|
| 455 |
-
}
|
| 456 |
-
"""
|
| 457 |
-
try:
|
| 458 |
-
resp = _gemini_client.models.generate_content(
|
| 459 |
-
model=GEMINI_MODEL,
|
| 460 |
-
contents=PROMPT + "\nTranscript: " + transcript,
|
| 461 |
-
config=types.GenerateContentConfig(response_mime_type="application/json")
|
| 462 |
-
)
|
| 463 |
-
return _safe_json_loads(resp.text, {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"})
|
| 464 |
-
except Exception as e:
|
| 465 |
-
logger.error(f"Intent Detect Error: {e}")
|
| 466 |
-
return {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"}
|
| 467 |
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
try:
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
|
|
|
| 485 |
)
|
| 486 |
-
|
|
|
|
| 487 |
except Exception as e:
|
| 488 |
-
logger.error(
|
| 489 |
-
return
|
| 490 |
|
| 491 |
-
|
| 492 |
-
if not _gemini_client: return {"error": "AI Offline"}
|
| 493 |
-
|
| 494 |
-
PROMPT = f"""
|
| 495 |
-
Analyze this image. Context: {caption}
|
| 496 |
-
1. SHOPPING LIST? -> Extract items.
|
| 497 |
-
2. SINGLE PRODUCT? -> Extract BRAND + NAME (e.g. "Pepsi 500ml").
|
| 498 |
-
3. MEAL/DISH? -> Identify dish + ingredients.
|
| 499 |
-
4. IRRELEVANT? -> Return type "IRRELEVANT".
|
| 500 |
-
|
| 501 |
-
Return STRICT JSON:
|
| 502 |
-
{{
|
| 503 |
-
"type": "LIST" | "PRODUCT" | "MEAL" | "IRRELEVANT",
|
| 504 |
-
"items": ["item1"],
|
| 505 |
-
"description": "Short description"
|
| 506 |
-
}}
|
| 507 |
-
"""
|
| 508 |
try:
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
|
| 515 |
-
],
|
| 516 |
-
config=types.GenerateContentConfig(response_mime_type="application/json")
|
| 517 |
)
|
| 518 |
-
|
| 519 |
-
|
| 520 |
except Exception as e:
|
| 521 |
-
logger.error(
|
| 522 |
-
return
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
1. **LANGUAGE**: Reply in **{language}**. If Shona, use Shona. If English, use English.
|
| 549 |
-
|
| 550 |
-
2. **BASKET COMPARISON**:
|
| 551 |
-
- If `market_matrix` has multiple stores, compare totals and explicitly state the savings using the pre-calculated `basket_savings`.
|
| 552 |
-
- Example: "Spar is **$6.95**, OK Mart is **$4.00** (but missing Oil). You save **$2.95** by getting the basket at OK Mart!"
|
| 553 |
-
|
| 554 |
-
3. **BRAND SUBSTITUTES (Phrasing)**:
|
| 555 |
-
- If `is_substitute` is TRUE for an item, say:
|
| 556 |
-
"I couldn't find **[Query]**, but the **nearest match is** **[Found]** ($Price)."
|
| 557 |
-
|
| 558 |
-
4. **SINGLE ITEMS**:
|
| 559 |
-
- State the best price first, then others. Explicitly state how much is saved by choosing the cheapest option over the most expensive one based on `potential_savings`.
|
| 560 |
-
- Example: "The cheapest is **$2.00** at OK. You save **$0.50** compared to the most expensive store!"
|
| 561 |
-
|
| 562 |
-
5. **CASUAL**:
|
| 563 |
-
- Reset if user says "Hi".
|
| 564 |
-
|
| 565 |
-
TONE: Helpful, direct, Zimbabwean. Use Markdown.
|
| 566 |
"""
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
resp = _gemini_client.models.generate_content(
|
| 570 |
-
model=GEMINI_MODEL,
|
| 571 |
-
contents=PROMPT
|
| 572 |
-
)
|
| 573 |
-
return resp.text
|
| 574 |
-
except Exception as e:
|
| 575 |
-
logger.error(f"Chat Gen Error: {e}")
|
| 576 |
-
return "I checked the prices, but I'm having trouble displaying them right now."
|
| 577 |
-
|
| 578 |
-
def gemini_generate_4step_plan(transcript: str, analyst_result: Dict) -> str:
|
| 579 |
-
if not _gemini_client: return "# Error\nAI Offline."
|
| 580 |
-
|
| 581 |
-
PROMPT = f"""
|
| 582 |
-
Generate a formatted Markdown Shopping Plan.
|
| 583 |
-
|
| 584 |
-
USER REQUEST: "{transcript}"
|
| 585 |
-
DATA: {json.dumps(analyst_result, indent=2, default=str)}
|
| 586 |
-
|
| 587 |
-
CRITICAL INSTRUCTION:
|
| 588 |
-
For items in 'global_missing', you MUST provide a Realistic USD Estimate (e.g. Chicken ~$6.00).
|
| 589 |
-
Do not leave them as "Unknown".
|
| 590 |
-
|
| 591 |
-
SECTIONS:
|
| 592 |
-
|
| 593 |
-
1. **In Our Catalogue ✅**
|
| 594 |
-
(Markdown Table: | Item | Retailer | Price (USD) | Potential Savings |)
|
| 595 |
-
|
| 596 |
-
2. **Not in Catalogue (Estimates) 😔**
|
| 597 |
-
(Markdown Table: | Item | Estimated Price (USD) |)
|
| 598 |
-
*Fill in estimated prices for missing items based on Zimbabwe market knowledge.*
|
| 599 |
-
|
| 600 |
-
3. **Totals & Savings 💰**
|
| 601 |
-
- Confirmed Total (Catalogue)
|
| 602 |
-
- Total Basket Savings (From cheapest vs most expensive store)
|
| 603 |
-
- Estimated Total (Missing Items)
|
| 604 |
-
- **Grand Total Estimate**
|
| 605 |
-
|
| 606 |
-
4. **Ideas & Tips 💡**
|
| 607 |
-
- 3 Creative ideas based on the specific event/meal (e.g. Braai tips, Cooking hacks).
|
| 608 |
-
|
| 609 |
-
Tone: Warm, Professional, Zimbabwean.
|
| 610 |
"""
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
except Exception as e:
|
| 615 |
-
return "# Error\nCould not generate plan."
|
| 616 |
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
# =========================
|
| 620 |
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
"ok":
|
| 626 |
-
"offers_indexed": len(df),
|
| 627 |
-
"api_source": PRICE_API_BASE,
|
| 628 |
-
"persona": "April v3.1 (Babel Fish)"
|
| 629 |
-
})
|
| 630 |
|
| 631 |
-
|
| 632 |
-
def chat():
|
| 633 |
-
body = request.get_json(silent=True) or {}
|
| 634 |
-
msg = body.get("message", "")
|
| 635 |
-
pid = body.get("profile_id")
|
| 636 |
-
|
| 637 |
-
if not pid: return jsonify({"ok": False, "error": "Missing profile_id"}), 400
|
| 638 |
-
|
| 639 |
-
# History
|
| 640 |
-
history_str = ""
|
| 641 |
if db:
|
| 642 |
-
|
| 643 |
-
|
| 644 |
-
|
| 645 |
-
|
| 646 |
-
|
| 647 |
-
|
| 648 |
-
|
| 649 |
-
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
|
| 667 |
if db:
|
| 668 |
-
db.collection("
|
| 669 |
-
"
|
| 670 |
-
"
|
| 671 |
-
"
|
| 672 |
-
"
|
|
|
|
| 673 |
})
|
| 674 |
|
| 675 |
-
return jsonify({"ok": True, "
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 715 |
|
| 716 |
return jsonify({
|
| 717 |
-
"ok":
|
| 718 |
-
"
|
| 719 |
-
"
|
| 720 |
-
"
|
| 721 |
-
"
|
| 722 |
})
|
| 723 |
|
| 724 |
-
|
| 725 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
"""
|
| 727 |
-
|
| 728 |
-
|
| 729 |
"""
|
| 730 |
-
body
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
|
|
|
|
|
|
| 760 |
"""
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
return jsonify({
|
| 788 |
-
"ok":
|
| 789 |
-
"
|
| 790 |
-
"
|
| 791 |
-
"
|
| 792 |
})
|
| 793 |
|
| 794 |
-
|
| 795 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 796 |
"""
|
| 797 |
-
|
| 798 |
-
|
| 799 |
"""
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 846 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 847 |
if db:
|
| 848 |
-
db.collection("
|
| 849 |
-
"
|
| 850 |
-
"
|
| 851 |
-
"
|
| 852 |
-
"
|
|
|
|
|
|
|
| 853 |
})
|
| 854 |
|
| 855 |
return jsonify({
|
| 856 |
-
"ok":
|
| 857 |
-
"
|
|
|
|
| 858 |
})
|
| 859 |
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 864 |
try:
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
try:
|
| 875 |
-
db.collection("
|
| 876 |
return jsonify({"ok": True})
|
| 877 |
-
except
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 878 |
|
|
|
|
|
|
|
|
|
|
| 879 |
|
| 880 |
if __name__ == "__main__":
|
| 881 |
port = int(os.environ.get("PORT", 7860))
|
| 882 |
-
|
| 883 |
-
except: pass
|
| 884 |
app.run(host="0.0.0.0", port=port)
|
|
|
|
| 1 |
"""
|
| 2 |
+
main.py — Iris AI Service (v1.0 - April 2026)
|
| 3 |
|
| 4 |
+
AI layer for the Iris Support Portal (IrisPlus / Unified Spark Desk).
|
| 5 |
+
Deployed as a HuggingFace Space monofile (Flask + Gemini + AssemblyAI + Firebase).
|
| 6 |
+
|
| 7 |
+
FEATURES:
|
| 8 |
+
1. WhatsApp Export → Knowledge Base (intelligent Gemini extraction, additive only)
|
| 9 |
+
2. Bulk KB Upload (CSV / Excel / PDF)
|
| 10 |
+
3. Natural Language + Voice Ticket Submission (AssemblyAI transcription → Gemini extraction)
|
| 11 |
+
4. System Tutorial Ingestion (video transcript → timestamped KB articles)
|
| 12 |
+
5. Agent NL/Voice Solution Writing (same pipeline, agent role)
|
| 13 |
+
6. Iris Chatbot (KB + tutorial source RAG, Firebase persistence)
|
| 14 |
|
| 15 |
ENV VARS:
|
| 16 |
+
GOOGLE_API_KEY — Gemini API key
|
| 17 |
+
ASSEMBLYAI_API_KEY — AssemblyAI API key
|
| 18 |
+
FIREBASE — JSON string of Firebase service account
|
| 19 |
+
PORT — Server port (default 7860)
|
|
|
|
| 20 |
"""
|
| 21 |
|
| 22 |
import os
|
| 23 |
+
import io
|
| 24 |
import re
|
| 25 |
import json
|
| 26 |
import time
|
|
|
|
| 27 |
import logging
|
| 28 |
import base64
|
| 29 |
+
import hashlib
|
| 30 |
from datetime import datetime, timezone
|
| 31 |
+
from typing import Any, Dict, List, Optional
|
| 32 |
|
| 33 |
import requests
|
|
|
|
| 34 |
from flask import Flask, request, jsonify
|
| 35 |
from flask_cors import CORS
|
| 36 |
|
| 37 |
+
# ─── Logging ──────────────────────────────────────────────────────────────────
|
| 38 |
|
| 39 |
logging.basicConfig(
|
| 40 |
level=logging.INFO,
|
| 41 |
format="%(asctime)s | %(levelname)s | %(message)s"
|
| 42 |
)
|
| 43 |
+
logger = logging.getLogger("iris-ai-service")
|
| 44 |
|
| 45 |
+
# ─── Gemini SDK ───────────────────────────────────────────────────────────────
|
| 46 |
|
| 47 |
try:
|
| 48 |
from google import genai
|
| 49 |
+
from google.genai import types as genai_types
|
| 50 |
except Exception as e:
|
| 51 |
genai = None
|
| 52 |
+
logger.error("google-genai not installed: %s", e)
|
| 53 |
|
| 54 |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
|
| 55 |
+
GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
|
| 56 |
|
| 57 |
_gemini_client = None
|
| 58 |
if genai and GOOGLE_API_KEY:
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
logger.error("Failed to init Gemini client: %s", e)
|
| 64 |
|
| 65 |
+
# ─── AssemblyAI ───────────────────────────────────────────────────────────────
|
| 66 |
+
|
| 67 |
+
ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY", "")
|
| 68 |
+
ASSEMBLYAI_BASE = "https://api.assemblyai.com/v2"
|
| 69 |
|
| 70 |
+
# ─── Firebase ─────────────────────────────────────────────────────────────────
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
import firebase_admin
|
| 74 |
+
from firebase_admin import credentials, firestore
|
| 75 |
+
FIREBASE_AVAILABLE = True
|
| 76 |
+
except ImportError:
|
| 77 |
+
FIREBASE_AVAILABLE = False
|
| 78 |
+
logger.warning("firebase-admin not installed. Persistence disabled.")
|
| 79 |
|
| 80 |
FIREBASE_ENV = os.environ.get("FIREBASE", "")
|
| 81 |
|
| 82 |
+
def init_firestore() -> Optional[Any]:
|
| 83 |
+
if not FIREBASE_AVAILABLE:
|
| 84 |
+
return None
|
| 85 |
if firebase_admin._apps:
|
| 86 |
return firestore.client()
|
| 87 |
if not FIREBASE_ENV:
|
|
|
|
| 94 |
logger.info("Firebase initialized.")
|
| 95 |
return firestore.client()
|
| 96 |
except Exception as e:
|
| 97 |
+
logger.critical("Firebase init failed: %s", e)
|
| 98 |
return None
|
| 99 |
|
| 100 |
+
db = init_firestore()
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
# ──��� Optional file-parsing libs ───────────────────────────────────────────────
|
|
|
|
| 103 |
|
| 104 |
+
try:
|
| 105 |
+
import pandas as pd
|
| 106 |
+
PANDAS_AVAILABLE = True
|
| 107 |
+
except ImportError:
|
| 108 |
+
PANDAS_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
try:
|
| 111 |
+
import pypdf
|
| 112 |
+
PYPDF_AVAILABLE = True
|
| 113 |
+
except ImportError:
|
| 114 |
+
PYPDF_AVAILABLE = False
|
| 115 |
|
| 116 |
+
# ─── Flask App ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
app = Flask(__name__)
|
| 119 |
CORS(app)
|
| 120 |
|
| 121 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 122 |
+
# HELPERS
|
| 123 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 124 |
+
|
| 125 |
+
def _safe_json(text: str, fallback: Any) -> Any:
|
| 126 |
+
"""Strip markdown fences and parse JSON safely."""
|
| 127 |
+
try:
|
| 128 |
+
clean = text.strip()
|
| 129 |
+
if "```json" in clean:
|
| 130 |
+
clean = clean.split("```json")[1].split("```")[0]
|
| 131 |
+
elif "```" in clean:
|
| 132 |
+
clean = clean.split("```")[1].split("```")[0]
|
| 133 |
+
return json.loads(clean)
|
| 134 |
+
except Exception as e:
|
| 135 |
+
logger.error("JSON parse error: %s | text: %s", e, text[:200])
|
| 136 |
+
return fallback
|
| 137 |
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
+
def _gemini_text(prompt: str, json_mode: bool = False) -> str:
|
| 140 |
+
"""Call Gemini and return raw text."""
|
| 141 |
+
if not _gemini_client:
|
| 142 |
+
return ""
|
| 143 |
+
cfg = genai_types.GenerateContentConfig(response_mime_type="application/json") if json_mode else None
|
| 144 |
try:
|
| 145 |
+
resp = _gemini_client.models.generate_content(
|
| 146 |
+
model=GEMINI_MODEL,
|
| 147 |
+
contents=prompt,
|
| 148 |
+
config=cfg
|
| 149 |
+
)
|
| 150 |
+
return resp.text or ""
|
| 151 |
+
except Exception as e:
|
| 152 |
+
logger.error("Gemini call error: %s", e)
|
| 153 |
+
return ""
|
| 154 |
|
| 155 |
+
|
| 156 |
+
def _article_fingerprint(title: str, content: str) -> str:
|
| 157 |
+
"""Stable hash to detect duplicate KB articles."""
|
| 158 |
+
raw = f"{title.strip().lower()}::{content.strip().lower()[:300]}"
|
| 159 |
+
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _get_existing_fingerprints() -> set:
|
| 163 |
+
"""Fetch all fingerprints already in Firestore KB."""
|
| 164 |
+
if not db:
|
| 165 |
+
return set()
|
| 166 |
try:
|
| 167 |
+
docs = db.collection("iris_kb_articles").select(["fingerprint"]).stream()
|
| 168 |
+
return {d.to_dict().get("fingerprint") for d in docs if d.to_dict().get("fingerprint")}
|
|
|
|
|
|
|
|
|
|
| 169 |
except Exception as e:
|
| 170 |
+
logger.error("Fingerprint fetch error: %s", e)
|
| 171 |
+
return set()
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
+
def _save_kb_articles(articles: List[Dict], source_label: str) -> Dict:
|
| 175 |
+
"""Save articles to Firestore, skip duplicates. Returns stats."""
|
| 176 |
+
if not db:
|
| 177 |
+
return {"saved": 0, "skipped": 0, "error": "Firebase unavailable"}
|
| 178 |
+
|
| 179 |
+
existing = _get_existing_fingerprints()
|
| 180 |
+
saved, skipped = 0, 0
|
| 181 |
+
|
| 182 |
+
for article in articles:
|
| 183 |
+
title = article.get("title", "Untitled")
|
| 184 |
+
content = article.get("content", "")
|
| 185 |
+
fp = _article_fingerprint(title, content)
|
| 186 |
+
|
| 187 |
+
if fp in existing:
|
| 188 |
+
skipped += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
continue
|
| 190 |
|
| 191 |
+
doc = {
|
| 192 |
+
"title": title,
|
| 193 |
+
"content": content,
|
| 194 |
+
"category": article.get("category", "General"),
|
| 195 |
+
"tags": article.get("tags", []),
|
| 196 |
+
"source": source_label,
|
| 197 |
+
"fingerprint": fp,
|
| 198 |
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 199 |
+
}
|
| 200 |
+
# Carry timestamp crop info from tutorial ingestion if present
|
| 201 |
+
if article.get("timestamp_start") is not None:
|
| 202 |
+
doc["timestamp_start"] = article["timestamp_start"]
|
| 203 |
+
doc["timestamp_end"] = article.get("timestamp_end")
|
| 204 |
+
doc["video_url"] = article.get("video_url", "")
|
| 205 |
+
|
| 206 |
+
db.collection("iris_kb_articles").add(doc)
|
| 207 |
+
existing.add(fp)
|
| 208 |
+
saved += 1
|
| 209 |
+
|
| 210 |
+
return {"saved": saved, "skipped": skipped}
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 214 |
+
# FEATURE 1 — WhatsApp Export → Knowledge Base
|
| 215 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 216 |
+
|
| 217 |
+
WHATSAPP_EXTRACTION_PROMPT = """
|
| 218 |
+
You are a support knowledge base curator.
|
| 219 |
+
|
| 220 |
+
You have been given a raw WhatsApp group chat export from a support team.
|
| 221 |
+
Your job is to extract ONLY clear problem→solution pairs.
|
| 222 |
+
|
| 223 |
+
Rules:
|
| 224 |
+
- Ignore greetings, off-topic chatter, emoji-only messages, system notifications.
|
| 225 |
+
- Extract only exchanges where a user described an issue AND a support agent (or another user) provided a working solution.
|
| 226 |
+
- Each article must be self-contained and searchable.
|
| 227 |
+
- Merge follow-up messages that belong to the same resolution thread.
|
| 228 |
+
|
| 229 |
+
Return a STRICT JSON array. Each object:
|
| 230 |
+
{
|
| 231 |
+
"title": "Short, searchable title of the issue",
|
| 232 |
+
"content": "Full explanation: what the problem was and the step-by-step solution",
|
| 233 |
+
"category": "One of: Account, Billing, Technical, Feature, Other",
|
| 234 |
+
"tags": ["array", "of", "relevant", "keywords"]
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
Return ONLY the JSON array, no other text.
|
| 238 |
+
|
| 239 |
+
WhatsApp Export:
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
@app.post("/api/kb/whatsapp-import")
|
| 243 |
+
def whatsapp_import():
|
| 244 |
"""
|
| 245 |
+
POST body: { "chat_text": "<raw WhatsApp export text>" }
|
| 246 |
+
Extracts problem→solution pairs, saves new articles (additive, no overwrite).
|
|
|
|
|
|
|
|
|
|
| 247 |
"""
|
| 248 |
+
body = request.get_json(silent=True) or {}
|
| 249 |
+
raw_chat = body.get("chat_text", "").strip()
|
| 250 |
+
|
| 251 |
+
if not raw_chat:
|
| 252 |
+
return jsonify({"ok": False, "error": "chat_text is required"}), 400
|
| 253 |
+
|
| 254 |
+
if len(raw_chat) < 100:
|
| 255 |
+
return jsonify({"ok": False, "error": "Chat export too short to process"}), 400
|
| 256 |
+
|
| 257 |
+
logger.info("WhatsApp import: %d chars received", len(raw_chat))
|
| 258 |
+
|
| 259 |
+
gemini_out = _gemini_text(WHATSAPP_EXTRACTION_PROMPT + raw_chat[:50000], json_mode=True)
|
| 260 |
+
articles = _safe_json(gemini_out, [])
|
| 261 |
+
|
| 262 |
+
if not isinstance(articles, list):
|
| 263 |
+
return jsonify({"ok": False, "error": "Gemini returned unexpected format", "raw": gemini_out[:500]}), 500
|
| 264 |
+
|
| 265 |
+
stats = _save_kb_articles(articles, source_label="whatsapp_export")
|
| 266 |
+
logger.info("WhatsApp import complete: %s", stats)
|
| 267 |
+
|
| 268 |
+
return jsonify({
|
| 269 |
+
"ok": True,
|
| 270 |
+
"articles_found": len(articles),
|
| 271 |
+
"saved": stats["saved"],
|
| 272 |
+
"skipped_dupes": stats["skipped"],
|
| 273 |
+
})
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 277 |
+
# FEATURE 2 — Bulk KB Upload (CSV / Excel / PDF)
|
| 278 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 279 |
+
|
| 280 |
+
def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
|
| 281 |
+
"""Extract text from a PDF using pypdf, fallback to Gemini vision."""
|
| 282 |
+
if PYPDF_AVAILABLE:
|
| 283 |
+
try:
|
| 284 |
+
reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
|
| 285 |
+
pages = [p.extract_text() or "" for p in reader.pages]
|
| 286 |
+
text = "\n\n".join(pages).strip()
|
| 287 |
+
if text:
|
| 288 |
+
return text
|
| 289 |
+
except Exception as e:
|
| 290 |
+
logger.warning("pypdf extraction failed: %s", e)
|
| 291 |
+
|
| 292 |
+
# Gemini inline_data fallback for scanned PDFs
|
| 293 |
+
if _gemini_client:
|
| 294 |
+
try:
|
| 295 |
+
b64_pdf = base64.b64encode(pdf_bytes).decode()
|
| 296 |
+
resp = _gemini_client.models.generate_content(
|
| 297 |
+
model=GEMINI_MODEL,
|
| 298 |
+
contents=[
|
| 299 |
+
"Extract all text from this PDF document. Return plain text only.",
|
| 300 |
+
genai_types.Part.from_bytes(data=pdf_bytes, mime_type="application/pdf")
|
| 301 |
+
]
|
| 302 |
+
)
|
| 303 |
+
return resp.text or ""
|
| 304 |
+
except Exception as e:
|
| 305 |
+
logger.error("Gemini PDF extraction failed: %s", e)
|
| 306 |
+
return ""
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
PDF_KB_PROMPT = """
|
| 310 |
+
You are a support knowledge base curator.
|
| 311 |
+
Convert the following document content into structured KB articles.
|
| 312 |
+
Each article should cover one distinct topic, issue, or procedure.
|
| 313 |
+
|
| 314 |
+
Return a STRICT JSON array. Each object:
|
| 315 |
+
{
|
| 316 |
+
"title": "Short, searchable title",
|
| 317 |
+
"content": "Complete explanation in clear language",
|
| 318 |
+
"category": "One of: Account, Billing, Technical, Feature, Other",
|
| 319 |
+
"tags": ["keyword1", "keyword2"]
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
Return ONLY the JSON array.
|
| 323 |
+
|
| 324 |
+
Document content:
|
| 325 |
+
"""
|
| 326 |
+
|
| 327 |
+
@app.post("/api/kb/bulk-upload")
|
| 328 |
+
def bulk_upload():
|
| 329 |
"""
|
| 330 |
+
Accepts multipart file upload. Supports: .csv, .xlsx, .xls, .pdf
|
| 331 |
+
CSV/Excel expected columns: title, content (+ optional: category, tags)
|
| 332 |
+
PDF: Gemini extracts and structures articles.
|
| 333 |
"""
|
| 334 |
+
if "file" not in request.files:
|
| 335 |
+
return jsonify({"ok": False, "error": "No file uploaded"}), 400
|
| 336 |
+
|
| 337 |
+
f = request.files["file"]
|
| 338 |
+
filename = f.filename or ""
|
| 339 |
+
ext = filename.rsplit(".", 1)[-1].lower()
|
| 340 |
+
file_data = f.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
|
| 342 |
+
articles = []
|
| 343 |
+
|
| 344 |
+
if ext in ("csv", "xlsx", "xls"):
|
| 345 |
+
if not PANDAS_AVAILABLE:
|
| 346 |
+
return jsonify({"ok": False, "error": "pandas not installed on server"}), 500
|
| 347 |
+
try:
|
| 348 |
+
if ext == "csv":
|
| 349 |
+
df = pd.read_csv(io.BytesIO(file_data))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
else:
|
| 351 |
+
df = pd.read_excel(io.BytesIO(file_data))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
+
df.columns = [c.strip().lower() for c in df.columns]
|
| 354 |
|
| 355 |
+
if "title" not in df.columns or "content" not in df.columns:
|
| 356 |
+
return jsonify({"ok": False, "error": "CSV/Excel must have 'title' and 'content' columns"}), 400
|
| 357 |
+
|
| 358 |
+
for _, row in df.iterrows():
|
| 359 |
+
tags = []
|
| 360 |
+
if "tags" in df.columns and pd.notna(row.get("tags")):
|
| 361 |
+
raw_tags = str(row["tags"])
|
| 362 |
+
tags = [t.strip() for t in re.split(r"[,;|]", raw_tags) if t.strip()]
|
| 363 |
+
|
| 364 |
+
articles.append({
|
| 365 |
+
"title": str(row["title"]).strip(),
|
| 366 |
+
"content": str(row["content"]).strip(),
|
| 367 |
+
"category": str(row.get("category", "General")).strip() if pd.notna(row.get("category")) else "General",
|
| 368 |
+
"tags": tags,
|
| 369 |
+
})
|
| 370 |
+
except Exception as e:
|
| 371 |
+
logger.error("Spreadsheet parse error: %s", e)
|
| 372 |
+
return jsonify({"ok": False, "error": f"Could not parse file: {e}"}), 400
|
| 373 |
+
|
| 374 |
+
elif ext == "pdf":
|
| 375 |
+
text = _extract_text_from_pdf_bytes(file_data)
|
| 376 |
+
if not text:
|
| 377 |
+
return jsonify({"ok": False, "error": "Could not extract text from PDF"}), 400
|
| 378 |
+
|
| 379 |
+
gemini_out = _gemini_text(PDF_KB_PROMPT + text[:50000], json_mode=True)
|
| 380 |
+
articles = _safe_json(gemini_out, [])
|
| 381 |
|
| 382 |
+
if not isinstance(articles, list):
|
| 383 |
+
return jsonify({"ok": False, "error": "Gemini PDF structuring failed"}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
else:
|
| 385 |
+
return jsonify({"ok": False, "error": f"Unsupported file type: .{ext}. Use csv, xlsx, or pdf"}), 400
|
| 386 |
|
| 387 |
+
if not articles:
|
| 388 |
+
return jsonify({"ok": False, "error": "No articles extracted from file"}), 400
|
|
|
|
|
|
|
| 389 |
|
| 390 |
+
stats = _save_kb_articles(articles, source_label=f"bulk_upload:{filename}")
|
| 391 |
+
return jsonify({
|
| 392 |
+
"ok": True,
|
| 393 |
+
"articles_found": len(articles),
|
| 394 |
+
"saved": stats["saved"],
|
| 395 |
+
"skipped_dupes": stats["skipped"],
|
| 396 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
+
|
| 399 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 400 |
+
# FEATURE 3 — Ticket Submission via NL Text or Voice
|
| 401 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 402 |
+
|
| 403 |
+
TICKET_EXTRACTION_PROMPT = """
|
| 404 |
+
You are a support ticket intake system for a software support portal.
|
| 405 |
+
|
| 406 |
+
A user has described their issue in natural language. Extract structured ticket fields.
|
| 407 |
+
|
| 408 |
+
Return STRICT JSON (no other text):
|
| 409 |
+
{
|
| 410 |
+
"title": "Concise ticket title (max 80 chars)",
|
| 411 |
+
"description": "Full detailed description of the issue, rewritten clearly in third person",
|
| 412 |
+
"category_hint": "Best matching category: Account | Billing | Technical | Feature | Other",
|
| 413 |
+
"priority_hint": "One of: low | medium | high | critical (based on urgency language)",
|
| 414 |
+
"keywords": ["array", "of", "technical", "keywords"]
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
User's message:
|
| 418 |
+
"""
|
| 419 |
+
|
| 420 |
+
def _transcribe_audio_assemblyai(audio_b64: str, audio_format: str = "wav") -> str:
|
| 421 |
+
"""Upload audio to AssemblyAI and poll for transcript."""
|
| 422 |
+
if not ASSEMBLYAI_API_KEY:
|
| 423 |
+
return ""
|
| 424 |
+
|
| 425 |
+
audio_bytes = base64.b64decode(audio_b64)
|
| 426 |
+
headers = {"authorization": ASSEMBLYAI_API_KEY}
|
| 427 |
+
|
| 428 |
+
# 1. Upload
|
| 429 |
try:
|
| 430 |
+
upload_resp = requests.post(
|
| 431 |
+
f"{ASSEMBLYAI_BASE}/upload",
|
| 432 |
+
headers={**headers, "Content-Type": "application/octet-stream"},
|
| 433 |
+
data=audio_bytes,
|
| 434 |
+
timeout=30
|
| 435 |
)
|
| 436 |
+
upload_resp.raise_for_status()
|
| 437 |
+
upload_url = upload_resp.json().get("upload_url")
|
| 438 |
except Exception as e:
|
| 439 |
+
logger.error("AssemblyAI upload error: %s", e)
|
| 440 |
+
return ""
|
| 441 |
|
| 442 |
+
# 2. Request transcript
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
try:
|
| 444 |
+
tx_resp = requests.post(
|
| 445 |
+
f"{ASSEMBLYAI_BASE}/transcript",
|
| 446 |
+
headers={**headers, "Content-Type": "application/json"},
|
| 447 |
+
json={"audio_url": upload_url, "language_detection": True},
|
| 448 |
+
timeout=15
|
|
|
|
|
|
|
|
|
|
| 449 |
)
|
| 450 |
+
tx_resp.raise_for_status()
|
| 451 |
+
tx_id = tx_resp.json().get("id")
|
| 452 |
except Exception as e:
|
| 453 |
+
logger.error("AssemblyAI transcript request error: %s", e)
|
| 454 |
+
return ""
|
| 455 |
+
|
| 456 |
+
# 3. Poll
|
| 457 |
+
for _ in range(30):
|
| 458 |
+
time.sleep(3)
|
| 459 |
+
try:
|
| 460 |
+
poll = requests.get(
|
| 461 |
+
f"{ASSEMBLYAI_BASE}/transcript/{tx_id}",
|
| 462 |
+
headers=headers,
|
| 463 |
+
timeout=15
|
| 464 |
+
)
|
| 465 |
+
poll.raise_for_status()
|
| 466 |
+
result = poll.json()
|
| 467 |
+
status = result.get("status")
|
| 468 |
+
if status == "completed":
|
| 469 |
+
return result.get("text", "")
|
| 470 |
+
elif status == "error":
|
| 471 |
+
logger.error("AssemblyAI error: %s", result.get("error"))
|
| 472 |
+
return ""
|
| 473 |
+
except Exception as e:
|
| 474 |
+
logger.error("AssemblyAI poll error: %s", e)
|
| 475 |
+
return ""
|
| 476 |
+
|
| 477 |
+
|
| 478 |
+
@app.post("/api/tickets/submit-nl")
|
| 479 |
+
def submit_ticket_nl():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 480 |
"""
|
| 481 |
+
POST body: { "message": "I can't log in, it says my account is locked...", "user_id": "..." }
|
| 482 |
+
Returns structured ticket fields for the frontend to pre-fill and submit.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 483 |
"""
|
| 484 |
+
body = request.get_json(silent=True) or {}
|
| 485 |
+
message = body.get("message", "").strip()
|
| 486 |
+
user_id = body.get("user_id", "anonymous")
|
|
|
|
|
|
|
| 487 |
|
| 488 |
+
if not message:
|
| 489 |
+
return jsonify({"ok": False, "error": "message is required"}), 400
|
|
|
|
| 490 |
|
| 491 |
+
gemini_out = _gemini_text(TICKET_EXTRACTION_PROMPT + message, json_mode=True)
|
| 492 |
+
ticket = _safe_json(gemini_out, {})
|
| 493 |
+
|
| 494 |
+
if not ticket.get("title"):
|
| 495 |
+
return jsonify({"ok": False, "error": "Could not extract ticket info from message"}), 500
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
|
| 497 |
+
# Log submission attempt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
if db:
|
| 499 |
+
db.collection("iris_ai_ticket_drafts").add({
|
| 500 |
+
"user_id": user_id,
|
| 501 |
+
"raw_input": message,
|
| 502 |
+
"extracted": ticket,
|
| 503 |
+
"channel": "nl_text",
|
| 504 |
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 505 |
+
})
|
| 506 |
+
|
| 507 |
+
return jsonify({"ok": True, "ticket": ticket})
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
@app.post("/api/tickets/submit-voice")
|
| 511 |
+
def submit_ticket_voice():
|
| 512 |
+
"""
|
| 513 |
+
POST body: { "audio_b64": "<base64 audio>", "audio_format": "wav", "user_id": "..." }
|
| 514 |
+
Transcribes audio via AssemblyAI, then extracts ticket via Gemini.
|
| 515 |
+
"""
|
| 516 |
+
body = request.get_json(silent=True) or {}
|
| 517 |
+
audio_b64 = body.get("audio_b64", "")
|
| 518 |
+
audio_format = body.get("audio_format", "wav")
|
| 519 |
+
user_id = body.get("user_id", "anonymous")
|
| 520 |
+
|
| 521 |
+
if not audio_b64:
|
| 522 |
+
return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
|
| 523 |
+
|
| 524 |
+
if not ASSEMBLYAI_API_KEY:
|
| 525 |
+
return jsonify({"ok": False, "error": "AssemblyAI not configured on server"}), 500
|
| 526 |
+
|
| 527 |
+
logger.info("Voice ticket: transcribing audio for user=%s", user_id)
|
| 528 |
+
transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
|
| 529 |
+
|
| 530 |
+
if not transcript:
|
| 531 |
+
return jsonify({"ok": False, "error": "Transcription failed or returned empty result"}), 500
|
| 532 |
+
|
| 533 |
+
gemini_out = _gemini_text(TICKET_EXTRACTION_PROMPT + transcript, json_mode=True)
|
| 534 |
+
ticket = _safe_json(gemini_out, {})
|
| 535 |
+
|
| 536 |
+
if not ticket.get("title"):
|
| 537 |
+
return jsonify({"ok": False, "error": "Could not extract ticket info from transcript"}), 500
|
| 538 |
|
| 539 |
if db:
|
| 540 |
+
db.collection("iris_ai_ticket_drafts").add({
|
| 541 |
+
"user_id": user_id,
|
| 542 |
+
"raw_input": transcript,
|
| 543 |
+
"extracted": ticket,
|
| 544 |
+
"channel": "voice",
|
| 545 |
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 546 |
})
|
| 547 |
|
| 548 |
+
return jsonify({"ok": True, "transcript": transcript, "ticket": ticket})
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 552 |
+
# FEATURE 4 — System Tutorial Ingestion
|
| 553 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 554 |
+
|
| 555 |
+
TUTORIAL_EXTRACTION_PROMPT = """
|
| 556 |
+
You are a knowledge base curator for a software support system.
|
| 557 |
+
|
| 558 |
+
You have been given a timestamped transcript from a video tutorial about the Iris Support Portal.
|
| 559 |
+
Your job is to extract discrete how-to articles, one per distinct feature or task demonstrated.
|
| 560 |
+
|
| 561 |
+
For each article, identify the best timestamp range where the solution or demonstration occurs.
|
| 562 |
+
|
| 563 |
+
Return a STRICT JSON array. Each object:
|
| 564 |
+
{
|
| 565 |
+
"title": "How to <do something> in Iris",
|
| 566 |
+
"content": "Step-by-step instructions based on the tutorial",
|
| 567 |
+
"category": "One of: Account | Tickets | Agents | Reports | Admin | Other",
|
| 568 |
+
"tags": ["keyword1", "keyword2"],
|
| 569 |
+
"timestamp_start": <seconds as integer>,
|
| 570 |
+
"timestamp_end": <seconds as integer>
|
| 571 |
+
}
|
| 572 |
+
|
| 573 |
+
Return ONLY the JSON array.
|
| 574 |
+
|
| 575 |
+
Transcript (with timestamps in [MM:SS] or [HH:MM:SS] format):
|
| 576 |
+
"""
|
| 577 |
+
|
| 578 |
+
def _parse_timestamp_to_seconds(ts: str) -> int:
|
| 579 |
+
"""Convert MM:SS or HH:MM:SS string to integer seconds."""
|
| 580 |
+
parts = ts.strip("[]").split(":")
|
| 581 |
+
try:
|
| 582 |
+
if len(parts) == 2:
|
| 583 |
+
return int(parts[0]) * 60 + int(parts[1])
|
| 584 |
+
elif len(parts) == 3:
|
| 585 |
+
return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
|
| 586 |
+
except Exception:
|
| 587 |
+
pass
|
| 588 |
+
return 0
|
| 589 |
+
|
| 590 |
+
|
| 591 |
+
@app.post("/api/kb/tutorial-ingest")
|
| 592 |
+
def tutorial_ingest():
|
| 593 |
+
"""
|
| 594 |
+
POST body: {
|
| 595 |
+
"transcript": "<timestamped transcript text>",
|
| 596 |
+
"video_url": "https://...", (optional, for linking crop timestamps)
|
| 597 |
+
"video_title": "Getting Started with Iris"
|
| 598 |
+
}
|
| 599 |
+
Gemini extracts how-to articles with timestamp ranges.
|
| 600 |
+
"""
|
| 601 |
+
body = request.get_json(silent=True) or {}
|
| 602 |
+
transcript = body.get("transcript", "").strip()
|
| 603 |
+
video_url = body.get("video_url", "")
|
| 604 |
+
video_title = body.get("video_title", "Tutorial")
|
| 605 |
+
|
| 606 |
+
if not transcript:
|
| 607 |
+
return jsonify({"ok": False, "error": "transcript is required"}), 400
|
| 608 |
+
|
| 609 |
+
logger.info("Tutorial ingest: %d chars, title=%s", len(transcript), video_title)
|
| 610 |
+
|
| 611 |
+
gemini_out = _gemini_text(TUTORIAL_EXTRACTION_PROMPT + transcript[:50000], json_mode=True)
|
| 612 |
+
articles = _safe_json(gemini_out, [])
|
| 613 |
+
|
| 614 |
+
if not isinstance(articles, list):
|
| 615 |
+
return jsonify({"ok": False, "error": "Gemini returned unexpected format"}), 500
|
| 616 |
+
|
| 617 |
+
# Inject video metadata into each article
|
| 618 |
+
for a in articles:
|
| 619 |
+
a["video_url"] = video_url
|
| 620 |
+
a["video_title"] = video_title
|
| 621 |
+
# Ensure numeric seconds (Gemini may return the parsed value; validate it)
|
| 622 |
+
for ts_key in ("timestamp_start", "timestamp_end"):
|
| 623 |
+
val = a.get(ts_key)
|
| 624 |
+
if isinstance(val, str):
|
| 625 |
+
a[ts_key] = _parse_timestamp_to_seconds(val)
|
| 626 |
+
elif not isinstance(val, int):
|
| 627 |
+
a[ts_key] = 0
|
| 628 |
+
|
| 629 |
+
stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
|
| 630 |
|
| 631 |
return jsonify({
|
| 632 |
+
"ok": True,
|
| 633 |
+
"video_title": video_title,
|
| 634 |
+
"articles_found": len(articles),
|
| 635 |
+
"saved": stats["saved"],
|
| 636 |
+
"skipped_dupes": stats["skipped"],
|
| 637 |
})
|
| 638 |
|
| 639 |
+
|
| 640 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 641 |
+
# FEATURE 5 — Agent Solution Writing (NL Text + Voice)
|
| 642 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 643 |
+
|
| 644 |
+
SOLUTION_EXTRACTION_PROMPT = """
|
| 645 |
+
You are a support knowledge base curator.
|
| 646 |
+
|
| 647 |
+
An agent or support staff has described a solution they discovered while resolving a ticket.
|
| 648 |
+
Structure this into a reusable KB article.
|
| 649 |
+
|
| 650 |
+
Return STRICT JSON:
|
| 651 |
+
{
|
| 652 |
+
"title": "Short, searchable problem title",
|
| 653 |
+
"content": "Clear step-by-step solution, rewritten for future reference",
|
| 654 |
+
"category": "One of: Account | Billing | Technical | Feature | Other",
|
| 655 |
+
"tags": ["relevant", "keywords"]
|
| 656 |
+
}
|
| 657 |
+
|
| 658 |
+
Agent's description:
|
| 659 |
+
"""
|
| 660 |
+
|
| 661 |
+
@app.post("/api/kb/agent-solution-nl")
|
| 662 |
+
def agent_solution_nl():
|
| 663 |
"""
|
| 664 |
+
POST body: { "message": "I fixed ticket #123 by...", "agent_id": "...", "ticket_id": "..." }
|
| 665 |
+
Creates a KB article from agent's natural language solution description.
|
| 666 |
"""
|
| 667 |
+
body = request.get_json(silent=True) or {}
|
| 668 |
+
message = body.get("message", "").strip()
|
| 669 |
+
agent_id = body.get("agent_id", "unknown")
|
| 670 |
+
ticket_id = body.get("ticket_id", "")
|
| 671 |
+
|
| 672 |
+
if not message:
|
| 673 |
+
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 674 |
+
|
| 675 |
+
gemini_out = _gemini_text(SOLUTION_EXTRACTION_PROMPT + message, json_mode=True)
|
| 676 |
+
article = _safe_json(gemini_out, {})
|
| 677 |
+
|
| 678 |
+
if not article.get("title"):
|
| 679 |
+
return jsonify({"ok": False, "error": "Could not structure solution"}), 500
|
| 680 |
+
|
| 681 |
+
# Add ticket reference tag
|
| 682 |
+
if ticket_id:
|
| 683 |
+
article.setdefault("tags", []).append(f"ticket:{ticket_id}")
|
| 684 |
+
|
| 685 |
+
stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
|
| 686 |
+
|
| 687 |
+
return jsonify({
|
| 688 |
+
"ok": True,
|
| 689 |
+
"saved": stats["saved"],
|
| 690 |
+
"article": article,
|
| 691 |
+
})
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
@app.post("/api/kb/agent-solution-voice")
|
| 695 |
+
def agent_solution_voice():
|
| 696 |
+
"""
|
| 697 |
+
POST body: { "audio_b64": "...", "audio_format": "wav", "agent_id": "...", "ticket_id": "..." }
|
| 698 |
+
Transcribes agent's voice note, structures into KB article.
|
| 699 |
"""
|
| 700 |
+
body = request.get_json(silent=True) or {}
|
| 701 |
+
audio_b64 = body.get("audio_b64", "")
|
| 702 |
+
audio_format = body.get("audio_format", "wav")
|
| 703 |
+
agent_id = body.get("agent_id", "unknown")
|
| 704 |
+
ticket_id = body.get("ticket_id", "")
|
| 705 |
+
|
| 706 |
+
if not audio_b64:
|
| 707 |
+
return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
|
| 708 |
+
|
| 709 |
+
transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
|
| 710 |
+
|
| 711 |
+
if not transcript:
|
| 712 |
+
return jsonify({"ok": False, "error": "Transcription failed"}), 500
|
| 713 |
+
|
| 714 |
+
gemini_out = _gemini_text(SOLUTION_EXTRACTION_PROMPT + transcript, json_mode=True)
|
| 715 |
+
article = _safe_json(gemini_out, {})
|
| 716 |
+
|
| 717 |
+
if not article.get("title"):
|
| 718 |
+
return jsonify({"ok": False, "error": "Could not structure solution from transcript"}), 500
|
| 719 |
+
|
| 720 |
+
if ticket_id:
|
| 721 |
+
article.setdefault("tags", []).append(f"ticket:{ticket_id}")
|
| 722 |
+
|
| 723 |
+
stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
|
| 724 |
+
|
|
|
|
| 725 |
return jsonify({
|
| 726 |
+
"ok": True,
|
| 727 |
+
"transcript": transcript,
|
| 728 |
+
"saved": stats["saved"],
|
| 729 |
+
"article": article,
|
| 730 |
})
|
| 731 |
|
| 732 |
+
|
| 733 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 734 |
+
# FEATURE 6 — Iris Support Chatbot (RAG over KB + Tutorials)
|
| 735 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 736 |
+
|
| 737 |
+
def _search_kb(query: str, limit: int = 5) -> List[Dict]:
|
| 738 |
"""
|
| 739 |
+
Simple keyword search over Firestore KB articles.
|
| 740 |
+
Production upgrade: swap with a vector DB (e.g. Qdrant) or Vertex AI Search.
|
| 741 |
"""
|
| 742 |
+
if not db:
|
| 743 |
+
return []
|
| 744 |
+
|
| 745 |
+
query_terms = [t.lower() for t in query.split() if len(t) > 2]
|
| 746 |
+
|
| 747 |
+
try:
|
| 748 |
+
# Fetch recent articles (Firestore doesn't support full-text, this is a lightweight approach)
|
| 749 |
+
docs = db.collection("iris_kb_articles").order_by(
|
| 750 |
+
"created_at", direction=firestore.Query.DESCENDING
|
| 751 |
+
).limit(200).stream()
|
| 752 |
+
|
| 753 |
+
results = []
|
| 754 |
+
for doc in docs:
|
| 755 |
+
d = doc.to_dict()
|
| 756 |
+
text = f"{d.get('title','')} {d.get('content','')} {' '.join(d.get('tags',[]))}".lower()
|
| 757 |
+
score = sum(1 for term in query_terms if term in text)
|
| 758 |
+
if score > 0:
|
| 759 |
+
results.append({"score": score, **d})
|
| 760 |
+
|
| 761 |
+
results.sort(key=lambda x: x["score"], reverse=True)
|
| 762 |
+
return results[:limit]
|
| 763 |
+
|
| 764 |
+
except Exception as e:
|
| 765 |
+
logger.error("KB search error: %s", e)
|
| 766 |
+
return []
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
CHATBOT_SYSTEM_PROMPT = """
|
| 770 |
+
You are Iris, an intelligent support assistant for the Iris Support Portal.
|
| 771 |
+
|
| 772 |
+
Your role: Help users resolve issues quickly using the knowledge base and tutorial content provided.
|
| 773 |
+
|
| 774 |
+
Rules:
|
| 775 |
+
- Answer ONLY from the provided context. Do not hallucinate solutions.
|
| 776 |
+
- If the answer is in a tutorial with a timestamp, mention the video and timestamp so the user can jump to that moment.
|
| 777 |
+
- Be concise, clear, and friendly.
|
| 778 |
+
- If you cannot find the answer, say so honestly and suggest submitting a ticket.
|
| 779 |
+
- Format step-by-step answers as numbered lists.
|
| 780 |
+
"""
|
| 781 |
+
|
| 782 |
+
@app.post("/api/chatbot/query")
|
| 783 |
+
def chatbot_query():
|
| 784 |
+
"""
|
| 785 |
+
POST body: {
|
| 786 |
+
"message": "How do I reset a user's password?",
|
| 787 |
+
"session_id": "...",
|
| 788 |
+
"user_id": "..."
|
| 789 |
+
}
|
| 790 |
+
RAG: searches KB, then uses Gemini to synthesize an answer.
|
| 791 |
+
"""
|
| 792 |
+
body = request.get_json(silent=True) or {}
|
| 793 |
+
message = body.get("message", "").strip()
|
| 794 |
+
session_id = body.get("session_id", "default")
|
| 795 |
+
user_id = body.get("user_id", "anonymous")
|
| 796 |
+
|
| 797 |
+
if not message:
|
| 798 |
+
return jsonify({"ok": False, "error": "message is required"}), 400
|
| 799 |
+
|
| 800 |
+
# Retrieve relevant KB context
|
| 801 |
+
kb_results = _search_kb(message, limit=5)
|
| 802 |
+
|
| 803 |
+
context_blocks = []
|
| 804 |
+
sources = []
|
| 805 |
+
for r in kb_results:
|
| 806 |
+
block = f"[Article: {r.get('title')}]\n{r.get('content', '')}"
|
| 807 |
+
if r.get("timestamp_start") is not None:
|
| 808 |
+
ts = r["timestamp_start"]
|
| 809 |
+
mm = ts // 60
|
| 810 |
+
ss = ts % 60
|
| 811 |
+
url = r.get("video_url", "")
|
| 812 |
+
block += f"\n(Tutorial: {r.get('video_title','Video')} at {mm:02d}:{ss:02d}"
|
| 813 |
+
block += f" — {url})" if url else ")"
|
| 814 |
+
context_blocks.append(block)
|
| 815 |
+
sources.append({
|
| 816 |
+
"title": r.get("title"),
|
| 817 |
+
"category": r.get("category"),
|
| 818 |
+
"source": r.get("source"),
|
| 819 |
+
"ts_start": r.get("timestamp_start"),
|
| 820 |
+
"video_url": r.get("video_url"),
|
| 821 |
+
})
|
| 822 |
+
|
| 823 |
+
context_str = "\n\n---\n\n".join(context_blocks) if context_blocks else "No relevant articles found."
|
| 824 |
|
| 825 |
+
full_prompt = f"""{CHATBOT_SYSTEM_PROMPT}
|
| 826 |
+
|
| 827 |
+
KNOWLEDGE BASE CONTEXT:
|
| 828 |
+
{context_str}
|
| 829 |
+
|
| 830 |
+
USER QUESTION: {message}
|
| 831 |
+
|
| 832 |
+
Answer:"""
|
| 833 |
+
|
| 834 |
+
answer = _gemini_text(full_prompt)
|
| 835 |
+
|
| 836 |
+
if not answer:
|
| 837 |
+
answer = "I'm sorry, I couldn't process your question right now. Please try again or submit a support ticket."
|
| 838 |
+
|
| 839 |
+
# Persist chat log
|
| 840 |
if db:
|
| 841 |
+
db.collection("iris_chatbot_logs").add({
|
| 842 |
+
"user_id": user_id,
|
| 843 |
+
"session_id": session_id,
|
| 844 |
+
"message": message,
|
| 845 |
+
"answer": answer,
|
| 846 |
+
"sources": sources,
|
| 847 |
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
| 848 |
})
|
| 849 |
|
| 850 |
return jsonify({
|
| 851 |
+
"ok": True,
|
| 852 |
+
"answer": answer,
|
| 853 |
+
"sources": sources,
|
| 854 |
})
|
| 855 |
|
| 856 |
+
|
| 857 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 858 |
+
# KB READ ENDPOINTS (for frontend display)
|
| 859 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 860 |
+
|
| 861 |
+
@app.get("/api/kb/articles")
|
| 862 |
+
def list_kb_articles():
|
| 863 |
+
"""
|
| 864 |
+
GET /api/kb/articles?category=Technical&limit=50
|
| 865 |
+
Lists KB articles, optionally filtered by category.
|
| 866 |
+
"""
|
| 867 |
+
category = request.args.get("category", "")
|
| 868 |
+
limit = int(request.args.get("limit", 50))
|
| 869 |
+
|
| 870 |
+
if not db:
|
| 871 |
+
return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
|
| 872 |
+
|
| 873 |
try:
|
| 874 |
+
query = db.collection("iris_kb_articles").order_by(
|
| 875 |
+
"created_at", direction=firestore.Query.DESCENDING
|
| 876 |
+
)
|
| 877 |
+
if category:
|
| 878 |
+
query = query.where("category", "==", category)
|
| 879 |
+
|
| 880 |
+
docs = query.limit(limit).stream()
|
| 881 |
+
articles = [{"id": d.id, **d.to_dict()} for d in docs]
|
| 882 |
+
return jsonify({"ok": True, "articles": articles, "count": len(articles)})
|
| 883 |
+
except Exception as e:
|
| 884 |
+
logger.error("KB list error: %s", e)
|
| 885 |
+
return jsonify({"ok": False, "error": str(e)}), 500
|
| 886 |
+
|
| 887 |
+
|
| 888 |
+
@app.delete("/api/kb/articles/<article_id>")
|
| 889 |
+
def delete_kb_article(article_id: str):
|
| 890 |
+
"""DELETE /api/kb/articles/<id> — Admin only (JWT check to be enforced at gateway)"""
|
| 891 |
+
if not db:
|
| 892 |
+
return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
|
| 893 |
try:
|
| 894 |
+
db.collection("iris_kb_articles").document(article_id).delete()
|
| 895 |
return jsonify({"ok": True})
|
| 896 |
+
except Exception as e:
|
| 897 |
+
return jsonify({"ok": False, "error": str(e)}), 500
|
| 898 |
+
|
| 899 |
+
|
| 900 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 901 |
+
# HEALTH
|
| 902 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 903 |
+
|
| 904 |
+
@app.get("/health")
|
| 905 |
+
def health():
|
| 906 |
+
article_count = 0
|
| 907 |
+
if db:
|
| 908 |
+
try:
|
| 909 |
+
docs = db.collection("iris_kb_articles").count().get()
|
| 910 |
+
article_count = docs[0][0].value
|
| 911 |
+
except Exception:
|
| 912 |
+
pass
|
| 913 |
+
|
| 914 |
+
return jsonify({
|
| 915 |
+
"ok": True,
|
| 916 |
+
"service": "Iris AI Service v1.0",
|
| 917 |
+
"gemini": bool(_gemini_client),
|
| 918 |
+
"assemblyai": bool(ASSEMBLYAI_API_KEY),
|
| 919 |
+
"firebase": bool(db),
|
| 920 |
+
"kb_articles": article_count,
|
| 921 |
+
})
|
| 922 |
+
|
| 923 |
|
| 924 |
+
# ══════════════════════════��═══════════════════════════════════════════════════
|
| 925 |
+
# ENTRYPOINT
|
| 926 |
+
# ══════════════════════════════════════════════════════════════════════════════
|
| 927 |
|
| 928 |
if __name__ == "__main__":
|
| 929 |
port = int(os.environ.get("PORT", 7860))
|
| 930 |
+
logger.info("Iris AI Service starting on port %d", port)
|
|
|
|
| 931 |
app.run(host="0.0.0.0", port=port)
|