rairo commited on
Commit
4393c22
·
verified ·
1 Parent(s): 18fb538

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +798 -751
main.py CHANGED
@@ -1,54 +1,58 @@
1
  """
2
- main.py — Pricelyst Shopping Advisor (April Edition 2026 - Upgrade v3.1)
3
 
4
- Feature: "Vernacular Engine" (Shona/Ndebele/English Input -> Native Response).
5
- Feature: "Precision Search" (Prioritizes exact phrase matches over popularity).
6
- ✅ Feature: "Concept Exploder" (Event Planning -> Shopping List).
7
- ✅ UI/UX: "Nearest Match" phrasing for substitutions.
8
- Core: Deep Vector Search + Market Matrix + Store Preferences + Savings Calculator.
 
 
 
 
 
9
 
10
  ENV VARS:
11
- - GOOGLE_API_KEY=...
12
- - FIREBASE='{"type":"service_account", ...}'
13
- - PRICE_API_BASE=https://api.pricelyst.co.zw
14
- - GEMINI_MODEL=gemini-2.5-flash
15
- - PORT=5000
16
  """
17
 
18
  import os
 
19
  import re
20
  import json
21
  import time
22
- import math
23
  import logging
24
  import base64
 
25
  from datetime import datetime, timezone
26
- from typing import Any, Dict, List, Optional, Tuple
27
 
28
  import requests
29
- import pandas as pd
30
  from flask import Flask, request, jsonify
31
  from flask_cors import CORS
32
 
33
- # ––––– Logging –––––
34
 
35
  logging.basicConfig(
36
  level=logging.INFO,
37
  format="%(asctime)s | %(levelname)s | %(message)s"
38
  )
39
- logger = logging.getLogger("pricelyst-advisor")
40
 
41
- # ––––– Gemini SDK –––––
42
 
43
  try:
44
  from google import genai
45
- from google.genai import types
46
  except Exception as e:
47
  genai = None
48
- logger.error("google-genai not installed. pip install google-genai. Error=%s", e)
49
 
50
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
51
- GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.5-flash")
52
 
53
  _gemini_client = None
54
  if genai and GOOGLE_API_KEY:
@@ -58,14 +62,26 @@ if genai and GOOGLE_API_KEY:
58
  except Exception as e:
59
  logger.error("Failed to init Gemini client: %s", e)
60
 
61
- # ––––– Firebase Admin –––––
 
 
 
62
 
63
- import firebase_admin
64
- from firebase_admin import credentials, firestore
 
 
 
 
 
 
 
65
 
66
  FIREBASE_ENV = os.environ.get("FIREBASE", "")
67
 
68
- def init_firestore_from_env() -> Optional[firestore.Client]:
 
 
69
  if firebase_admin._apps:
70
  return firestore.client()
71
  if not FIREBASE_ENV:
@@ -78,807 +94,838 @@ def init_firestore_from_env() -> Optional[firestore.Client]:
78
  logger.info("Firebase initialized.")
79
  return firestore.client()
80
  except Exception as e:
81
- logger.critical("Failed to initialize Firebase: %s", e)
82
  return None
83
 
84
- db = init_firestore_from_env()
85
-
86
- # ––––– External API –––––
87
 
88
- PRICE_API_BASE = os.environ.get("PRICE_API_BASE", "https://api.pricelyst.co.zw").rstrip("/")
89
- HTTP_TIMEOUT = 30
90
 
91
- # ––––– Static Data (Zim Context) –––––
92
-
93
- ZIM_CONTEXT = {
94
- "fuel_petrol": 1.58,
95
- "fuel_diesel": 1.65,
96
- "gas_lpg": 2.00,
97
- "bread_avg": 1.10,
98
- "zesa_step_1": {"limit": 50, "rate": 0.04},
99
- "zesa_step_2": {"limit": 150, "rate": 0.09},
100
- "zesa_step_3": {"limit": 9999, "rate": 0.14},
101
- "zesa_levy": 0.06
102
- }
103
 
104
- # ––––– Cache –––––
 
 
 
 
105
 
106
- PRODUCT_CACHE_TTL = 60 * 20 # 20 mins
107
- _data_cache: Dict[str, Any] = {
108
- "ts": 0,
109
- "df": pd.DataFrame(),
110
- "raw_count": 0
111
- }
112
 
113
  app = Flask(__name__)
114
  CORS(app)
115
 
116
- # =========================
117
- # 1. ETL Layer (Deep Search Indexing)
118
- # =========================
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- def _norm(s: Any) -> str:
121
- if not s: return ""
122
- return str(s).strip().lower()
123
 
124
- def _coerce_price(v: Any) -> float:
 
 
 
 
125
  try:
126
- return float(v) if v is not None else 0.0
127
- except:
128
- return 0.0
 
 
 
 
 
 
129
 
130
- def _safe_json_loads(s: str, fallback: Any):
 
 
 
 
 
 
 
 
 
 
131
  try:
132
- if "```json" in s:
133
- s = s.split("```json")[1].split("```")[0]
134
- elif "```" in s:
135
- s = s.split("```")[0]
136
- return json.loads(s)
137
  except Exception as e:
138
- logger.error(f"JSON Parse Error: {e}")
139
- return fallback
140
 
141
- def fetch_and_flatten_data() -> pd.DataFrame:
142
- all_products = []
143
- page = 1
144
-
145
- logger.info("ETL: Starting fetch from /api/v1/product-listing")
146
-
147
- while True:
148
- try:
149
- url = f"{PRICE_API_BASE}/api/v1/product-listing"
150
- r = requests.get(url, params={"page": page, "perPage": 50}, timeout=HTTP_TIMEOUT)
151
- r.raise_for_status()
152
- payload = r.json()
153
- data = payload.get("data") or []
154
- if not data: break
155
-
156
- all_products.extend(data)
157
-
158
- meta = payload
159
- if page >= (meta.get("totalPages") or 99):
160
- break
161
- page += 1
162
- except Exception as e:
163
- logger.error(f"ETL Error on page {page}: {e}")
164
- break
165
 
166
- rows = []
167
- for p in all_products:
168
- try:
169
- p_id = int(p.get("id") or 0)
170
- p_name = str(p.get("name") or "Unknown")
171
-
172
- brand_obj = p.get("brand") or {}
173
- brand_name = str(brand_obj.get("brand_name") or "")
174
-
175
- cats = p.get("categories") or []
176
- cat_names = [str(c.get("name") or "") for c in cats]
177
- cat_str = " ".join(cat_names)
178
- primary_cat = cat_names[0] if cat_names else "General"
179
-
180
- # Deep Search Vector
181
- search_vector = _norm(f"{p_name} {brand_name} {cat_str}")
182
-
183
- views = int(p.get("view_count") or 0)
184
- image = str(p.get("thumbnail") or p.get("image") or "")
185
-
186
- prices = p.get("prices") or []
187
-
188
- if not prices:
189
- rows.append({
190
- "product_id": p_id,
191
- "product_name": p_name,
192
- "search_vector": search_vector,
193
- "brand": brand_name,
194
- "category": primary_cat,
195
- "retailer": "Listing",
196
- "price": 0.0,
197
- "views": views,
198
- "image": image,
199
- "is_offer": False
200
- })
201
- continue
202
-
203
- for offer in prices:
204
- retailer = offer.get("retailer") or {}
205
- r_name = str(retailer.get("name") or "Unknown Store")
206
- price_val = _coerce_price(offer.get("price"))
207
-
208
- if price_val > 0:
209
- rows.append({
210
- "product_id": p_id,
211
- "product_name": p_name,
212
- "search_vector": search_vector,
213
- "brand": brand_name,
214
- "category": primary_cat,
215
- "retailer": r_name,
216
- "price": price_val,
217
- "views": views,
218
- "image": image,
219
- "is_offer": True
220
- })
221
- except:
222
  continue
223
 
224
- df = pd.DataFrame(rows)
225
- logger.info(f"ETL: Flattened into {len(df)} rows.")
226
- return df
227
-
228
- def get_market_index(force_refresh: bool = False) -> pd.DataFrame:
229
- global _data_cache
230
- if force_refresh or _data_cache["df"].empty or (time.time() - _data_cache["ts"] > PRODUCT_CACHE_TTL):
231
- logger.info("ETL: Refreshing Market Index...")
232
- df = fetch_and_flatten_data()
233
- _data_cache["df"] = df
234
- _data_cache["ts"] = time.time()
235
- _data_cache["raw_count"] = len(df)
236
- return _data_cache["df"]
237
-
238
- # =========================
239
- # 2. Analyst Engine (Precision Search & Matrix)
240
- # =========================
241
-
242
- def search_products_deep(df: pd.DataFrame, query: str, limit: int = 15) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  """
244
- Precision Search Algorithm.
245
- Prioritizes:
246
- 1. Exact sequential match in Name/Vector (Highest Score)
247
- 2. Token overlap (Medium Score)
248
- 3. Views/Popularity (Tie-breaker)
249
  """
250
- if df.empty or not query: return df
251
- q_norm = _norm(query)
252
- q_tokens = set(q_norm.split())
253
-
254
- def scoring_algo(row):
255
- score = 0
256
- vector = row['search_vector']
257
-
258
- # 1. Exact Name Match (Highest)
259
- if q_norm == _norm(row['product_name']):
260
- score += 1000
261
-
262
- # 2. Sequential Vector Match (High)
263
- if q_norm in vector:
264
- score += 500
265
-
266
- # 3. Brand Match
267
- if row['brand'].lower() in q_norm:
268
- score += 200
269
-
270
- # 4. Token Overlap
271
- text_tokens = set(vector.split())
272
- overlap = len(q_tokens.intersection(text_tokens))
273
- score += (overlap * 50)
274
-
275
- return score
276
-
277
- df_scored = df.copy()
278
- df_scored['match_score'] = df_scored.apply(scoring_algo, axis=1)
279
-
280
- # Filter out zero matches
281
- matches = df_scored[df_scored['match_score'] > 0]
282
-
283
- if matches.empty: return matches
284
-
285
- # Sort: Match Score (Desc) -> Views (Desc) -> Price (Asc)
286
- matches = matches.sort_values(by=['match_score', 'views', 'price'], ascending=[False, False, True])
287
-
288
- return matches.head(limit)
289
-
290
- def calculate_basket_optimization(item_names: List[str], preferred_retailer: str = None) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  """
292
- Generates a FULL MARKET MATRIX with Precision Search and Savings Calculation.
 
 
293
  """
294
- df = get_market_index()
295
- if df.empty:
296
- return {"actionable": False, "error": "No data"}
297
-
298
- found_items = []
299
- missing_global = []
300
-
301
- # 1. Resolve Items & Check Brand Fidelity
302
- for item in item_names:
303
- hits = search_products_deep(df[df['is_offer']==True], item, limit=10)
304
-
305
- if hits.empty:
306
- missing_global.append(item)
307
- continue
308
-
309
- best_match = hits.iloc[0]
310
-
311
- # --- Brand Fidelity Check ---
312
- q_norm = _norm(item)
313
- res_norm = _norm(best_match['product_name'] + " " + best_match['brand'])
314
- q_tokens = q_norm.split()
315
-
316
- is_substitute = False
317
- found_tokens = sum(1 for t in q_tokens if t in res_norm)
318
- if len(q_tokens) > 1 and found_tokens < len(q_tokens):
319
- is_substitute = True
320
-
321
- # Aggregate all offers
322
- product_offers = hits[hits['product_name'] == best_match['product_name']].sort_values('price')
323
-
324
- offers_list = []
325
- for _, r in product_offers.iterrows():
326
- offers_list.append({"retailer": r['retailer'], "price": float(r['price'])})
327
-
328
- best_price = offers_list[0]['price']
329
- max_price = offers_list[-1]['price']
330
- potential_savings = max_price - best_price
331
-
332
- found_items.append({
333
- "query": item,
334
- "product_name": str(best_match['product_name']),
335
- "is_substitute": is_substitute,
336
- "offers": offers_list,
337
- "best_price": best_price,
338
- "potential_savings": potential_savings
339
- })
340
 
341
- if not found_items:
342
- return {"actionable": True, "found_items": [], "global_missing": missing_global}
343
-
344
- # 2. MARKET MATRIX (Comparison across all stores)
345
- all_involved_retailers = set()
346
- for f in found_items:
347
- for o in f['offers']:
348
- all_involved_retailers.add(o['retailer'])
349
-
350
- store_comparison = []
351
-
352
- for retailer in all_involved_retailers:
353
- total_price = 0.0
354
- found_count = 0
355
- missing_in_store = []
356
-
357
- for item in found_items:
358
- price = next((o['price'] for o in item['offers'] if o['retailer'] == retailer), None)
359
- if price:
360
- total_price += price
361
- found_count += 1
362
  else:
363
- missing_in_store.append(item['product_name'])
364
-
365
- store_comparison.append({
366
- "retailer": retailer,
367
- "total_price": total_price,
368
- "found_count": found_count,
369
- "total_items": len(found_items),
370
- "missing_items": missing_in_store
371
- })
372
 
373
- store_comparison.sort(key=lambda x: (-x['found_count'], x['total_price']))
374
 
375
- # 3. Calculate Basket-Level Savings
376
- if len(store_comparison) > 1:
377
- most_expensive_total = max(s['total_price'] for s in store_comparison if s['found_count'] == store_comparison[0]['found_count'])
378
- for store in store_comparison:
379
- # Savings calculated against the highest total for an equivalent sized basket
380
- store['basket_savings'] = most_expensive_total - store['total_price'] if store['found_count'] == store_comparison[0]['found_count'] else 0.0
381
- else:
382
- for store in store_comparison:
383
- store['basket_savings'] = 0.0
384
-
385
- return {
386
- "actionable": True,
387
- "is_basket": len(found_items) > 1,
388
- "found_items": found_items,
389
- "global_missing": missing_global,
390
- "market_matrix": store_comparison[:4],
391
- "best_store": store_comparison[0] if store_comparison else None,
392
- "preferred_retailer": preferred_retailer
393
- }
 
 
 
 
 
 
 
394
 
395
- def calculate_zesa_units(amount_usd: float) -> Dict[str, Any]:
396
- remaining = amount_usd / 1.06
397
- units = 0.0
398
-
399
- t1 = ZIM_CONTEXT["zesa_step_1"]
400
- cost_t1 = t1["limit"] * t1["rate"]
401
-
402
- if remaining > cost_t1:
403
- units += t1["limit"]
404
- remaining -= cost_t1
405
-
406
- t2 = ZIM_CONTEXT["zesa_step_2"]
407
- cost_t2 = t2["limit"] * t2["rate"]
408
-
409
- if remaining > cost_t2:
410
- units += t2["limit"]
411
- remaining -= cost_t2
412
- units += remaining / ZIM_CONTEXT["zesa_step_3"]["rate"]
413
- else:
414
- units += remaining / t2["rate"]
415
  else:
416
- units += remaining / t1["rate"]
417
 
418
- return {
419
- "amount_usd": float(amount_usd),
420
- "est_units_kwh": float(round(units, 1))
421
- }
422
 
423
- # =========================
424
- # 3. Gemini Helpers (Vernacular & Intelligence)
425
- # =========================
426
-
427
- def gemini_detect_intent(transcript: str) -> Dict[str, Any]:
428
- if not _gemini_client: return {"actionable": False}
429
-
430
- PROMPT = """
431
- Analyze transcript. Return STRICT JSON.
432
- Classify intent:
433
- - CASUAL_CHAT: Greetings, "hi".
434
- - SHOPPING_BASKET: Looking for prices, products, "cheapest X".
435
- - UTILITY_CALC: Electricity/ZESA questions.
436
- - STORE_DECISION: "Where should I buy?", "Which store is cheapest?".
437
- - EVENT_PLANNING: "Plan a braai", "Wedding list", "Dinner for 5" (Implicit lists).
438
-
439
- Extract:
440
- - items: list of specific products found. **TRANSLATE ALL ITEMS TO ENGLISH** (e.g. 'Hupfu' -> 'Maize Meal').
441
- - utility_amount: number
442
- - store_preference: if a specific store is named (e.g. "at OK Mart").
443
- - is_event_planning: boolean (true if user asks to plan an event but lists no items).
444
- - language: Detected user language (e.g., "Shona", "Ndebele", "English").
445
-
446
- JSON Schema:
447
- {
448
- "actionable": boolean,
449
- "intent": "string",
450
- "items": ["string"],
451
- "utility_amount": number,
452
- "store_preference": "string",
453
- "is_event_planning": boolean,
454
- "language": "string"
455
- }
456
- """
457
- try:
458
- resp = _gemini_client.models.generate_content(
459
- model=GEMINI_MODEL,
460
- contents=PROMPT + "\nTranscript: " + transcript,
461
- config=types.GenerateContentConfig(response_mime_type="application/json")
462
- )
463
- return _safe_json_loads(resp.text, {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"})
464
- except Exception as e:
465
- logger.error(f"Intent Detect Error: {e}")
466
- return {"actionable": False, "intent": "CASUAL_CHAT", "language": "English"}
467
 
468
- def gemini_explode_concept(transcript: str) -> List[str]:
469
- """
470
- Converts a concept ("Braai for 10") into a concrete list ("Wors", "Charcoal").
471
- """
472
- if not _gemini_client: return []
473
-
474
- PROMPT = f"""
475
- User wants to plan an event: "{transcript}".
476
- Generate a STRICT list of 10-15 essential Zimbabwean shopping items for this.
477
- Use English terms for database lookup (e.g. 'Maize Meal', 'Cooking Oil').
478
- Return ONLY a JSON list of strings.
479
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  try:
481
- resp = _gemini_client.models.generate_content(
482
- model=GEMINI_MODEL,
483
- contents=PROMPT,
484
- config=types.GenerateContentConfig(response_mime_type="application/json")
 
485
  )
486
- return _safe_json_loads(resp.text, [])
 
487
  except Exception as e:
488
- logger.error(f"Explode Concept Error: {e}")
489
- return []
490
 
491
- def gemini_analyze_image(image_b64: str, caption: str = "") -> Dict[str, Any]:
492
- if not _gemini_client: return {"error": "AI Offline"}
493
-
494
- PROMPT = f"""
495
- Analyze this image. Context: {caption}
496
- 1. SHOPPING LIST? -> Extract items.
497
- 2. SINGLE PRODUCT? -> Extract BRAND + NAME (e.g. "Pepsi 500ml").
498
- 3. MEAL/DISH? -> Identify dish + ingredients.
499
- 4. IRRELEVANT? -> Return type "IRRELEVANT".
500
-
501
- Return STRICT JSON:
502
- {{
503
- "type": "LIST" | "PRODUCT" | "MEAL" | "IRRELEVANT",
504
- "items": ["item1"],
505
- "description": "Short description"
506
- }}
507
- """
508
  try:
509
- image_bytes = base64.b64decode(image_b64)
510
- resp = _gemini_client.models.generate_content(
511
- model=GEMINI_MODEL,
512
- contents=[
513
- PROMPT,
514
- types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg")
515
- ],
516
- config=types.GenerateContentConfig(response_mime_type="application/json")
517
  )
518
- result = _safe_json_loads(resp.text, {"type": "IRRELEVANT", "items": []})
519
- return result
520
  except Exception as e:
521
- logger.error(f"Vision Error: {e}")
522
- return {"type": "IRRELEVANT", "items": []}
523
-
524
- def gemini_chat_response(transcript: str, intent: Dict, analyst_data: Dict, chat_history: str = "") -> str:
525
- if not _gemini_client: return "I'm having trouble connecting to my brain right now."
526
-
527
- context_str = f"RECENT CHAT HISTORY (Last 6 messages):\n{chat_history}\n" if chat_history else ""
528
- context_str += f"ZIMBABWE CONTEXT: Fuel={ZIM_CONTEXT['fuel_petrol']}, ZESA Rate={ZIM_CONTEXT['zesa_step_1']['rate']}\n"
529
-
530
- if analyst_data:
531
- context_str += f"ANALYST DATA: {json.dumps(analyst_data, default=str)}\n"
532
-
533
- language = intent.get("language", "English")
534
-
535
- PROMPT = f"""
536
- You are April, Pricelyst's Shopping Advisor (Zimbabwe).
537
- Role: Intelligent Shopping Companion.
538
- Goal: Shortest path to value. Complete Transparency.
539
-
540
- INPUT: "{transcript}"
541
- USER LANGUAGE: {language}
542
- INTENT: {intent.get('intent')}
543
- CONTEXT:
544
- {context_str}
545
-
546
- LOGIC RULES:
547
-
548
- 1. **LANGUAGE**: Reply in **{language}**. If Shona, use Shona. If English, use English.
549
-
550
- 2. **BASKET COMPARISON**:
551
- - If `market_matrix` has multiple stores, compare totals and explicitly state the savings using the pre-calculated `basket_savings`.
552
- - Example: "Spar is **$6.95**, OK Mart is **$4.00** (but missing Oil). You save **$2.95** by getting the basket at OK Mart!"
553
-
554
- 3. **BRAND SUBSTITUTES (Phrasing)**:
555
- - If `is_substitute` is TRUE for an item, say:
556
- "I couldn't find **[Query]**, but the **nearest match is** **[Found]** ($Price)."
557
-
558
- 4. **SINGLE ITEMS**:
559
- - State the best price first, then others. Explicitly state how much is saved by choosing the cheapest option over the most expensive one based on `potential_savings`.
560
- - Example: "The cheapest is **$2.00** at OK. You save **$0.50** compared to the most expensive store!"
561
-
562
- 5. **CASUAL**:
563
- - Reset if user says "Hi".
564
-
565
- TONE: Helpful, direct, Zimbabwean. Use Markdown.
566
  """
567
-
568
- try:
569
- resp = _gemini_client.models.generate_content(
570
- model=GEMINI_MODEL,
571
- contents=PROMPT
572
- )
573
- return resp.text
574
- except Exception as e:
575
- logger.error(f"Chat Gen Error: {e}")
576
- return "I checked the prices, but I'm having trouble displaying them right now."
577
-
578
- def gemini_generate_4step_plan(transcript: str, analyst_result: Dict) -> str:
579
- if not _gemini_client: return "# Error\nAI Offline."
580
-
581
- PROMPT = f"""
582
- Generate a formatted Markdown Shopping Plan.
583
-
584
- USER REQUEST: "{transcript}"
585
- DATA: {json.dumps(analyst_result, indent=2, default=str)}
586
-
587
- CRITICAL INSTRUCTION:
588
- For items in 'global_missing', you MUST provide a Realistic USD Estimate (e.g. Chicken ~$6.00).
589
- Do not leave them as "Unknown".
590
-
591
- SECTIONS:
592
-
593
- 1. **In Our Catalogue ✅**
594
- (Markdown Table: | Item | Retailer | Price (USD) | Potential Savings |)
595
-
596
- 2. **Not in Catalogue (Estimates) 😔**
597
- (Markdown Table: | Item | Estimated Price (USD) |)
598
- *Fill in estimated prices for missing items based on Zimbabwe market knowledge.*
599
-
600
- 3. **Totals & Savings 💰**
601
- - Confirmed Total (Catalogue)
602
- - Total Basket Savings (From cheapest vs most expensive store)
603
- - Estimated Total (Missing Items)
604
- - **Grand Total Estimate**
605
-
606
- 4. **Ideas & Tips 💡**
607
- - 3 Creative ideas based on the specific event/meal (e.g. Braai tips, Cooking hacks).
608
-
609
- Tone: Warm, Professional, Zimbabwean.
610
  """
611
- try:
612
- resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=PROMPT)
613
- return resp.text
614
- except Exception as e:
615
- return "# Error\nCould not generate plan."
616
 
617
- # =========================
618
- # 4. Endpoints
619
- # =========================
620
 
621
- @app.get("/health")
622
- def health():
623
- df = get_market_index()
624
- return jsonify({
625
- "ok": True,
626
- "offers_indexed": len(df),
627
- "api_source": PRICE_API_BASE,
628
- "persona": "April v3.1 (Babel Fish)"
629
- })
630
 
631
- @app.post("/chat")
632
- def chat():
633
- body = request.get_json(silent=True) or {}
634
- msg = body.get("message", "")
635
- pid = body.get("profile_id")
636
-
637
- if not pid: return jsonify({"ok": False, "error": "Missing profile_id"}), 400
638
-
639
- # History
640
- history_str = ""
641
  if db:
642
- try:
643
- docs = db.collection("pricelyst_profiles").document(pid).collection("chat_logs") \
644
- .order_by("ts", direction=firestore.Query.DESCENDING).limit(6).stream()
645
- # Persona updated to April here for context memory
646
- msgs = [f"User: {d.to_dict().get('message')}\nApril: {d.to_dict().get('response')}" for d in docs]
647
- if msgs: history_str = "\n".join(reversed(msgs))
648
- except: pass
649
-
650
- # Intent
651
- intent_data = gemini_detect_intent(msg)
652
- intent_type = intent_data.get("intent", "CASUAL_CHAT")
653
- items = intent_data.get("items", [])
654
- store_pref = intent_data.get("store_preference")
655
-
656
- analyst_data = {}
657
-
658
- if items or intent_type in ["SHOPPING_BASKET", "STORE_DECISION", "TRUST_CHECK"]:
659
- analyst_data = calculate_basket_optimization(items, preferred_retailer=store_pref)
660
-
661
- elif intent_type == "UTILITY_CALC":
662
- amount = intent_data.get("utility_amount", 20)
663
- analyst_data = calculate_zesa_units(amount)
664
-
665
- reply = gemini_chat_response(msg, intent_data, analyst_data, history_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
 
667
  if db:
668
- db.collection("pricelyst_profiles").document(pid).collection("chat_logs").add({
669
- "message": msg,
670
- "response": reply,
671
- "intent": intent_data,
672
- "ts": datetime.now(timezone.utc).isoformat()
 
673
  })
674
 
675
- return jsonify({"ok": True, "data": {"message": reply, "analyst_debug": analyst_data if items else None}})
676
-
677
- @app.post("/api/analyze-image")
678
- def analyze_image():
679
- body = request.get_json(silent=True) or {}
680
- image_b64 = body.get("image_data")
681
-
682
- caption = body.get("caption", "")
683
- pid = body.get("profile_id")
684
-
685
- if not image_b64 or not pid: return jsonify({"ok": False}), 400
686
-
687
- vision_result = gemini_analyze_image(image_b64, caption)
688
- img_type = vision_result.get("type", "IRRELEVANT")
689
- items = vision_result.get("items", [])
690
- description = vision_result.get("description", "an image")
691
-
692
- # Fallback for empty products
693
- if (img_type in ["PRODUCT", "MEAL"]) and not items and description:
694
- items = [description]
695
-
696
- response_text = ""
697
- analyst_data = {}
698
-
699
- if img_type == "IRRELEVANT" and not items:
700
- prompt = f"User uploaded photo of {description}. Compliment it if appropriate, then explain you are a shopping bot."
701
- response_text = gemini_chat_response(prompt, {"intent": "CASUAL_CHAT"}, {}, "")
702
-
703
- elif items:
704
- analyst_data = calculate_basket_optimization(items)
705
-
706
- sim_msg = ""
707
- if img_type == "MEAL": sim_msg = f"I want to cook {description}. Cost of ingredients: {', '.join(items)}?"
708
- elif img_type == "LIST": sim_msg = f"Price of list: {', '.join(items)}?"
709
- else: sim_msg = f"Cheapest price for {', '.join(items)}?"
710
-
711
- response_text = gemini_chat_response(sim_msg, {"intent": "STORE_DECISION"}, analyst_data, "")
712
-
713
- else:
714
- response_text = "I couldn't identify the product. Could you type the name?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
 
716
  return jsonify({
717
- "ok": True,
718
- "image_type": img_type,
719
- "items_identified": items,
720
- "message": response_text,
721
- "analyst_data": analyst_data
722
  })
723
 
724
- @app.post("/api/call-briefing")
725
- def call_briefing():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  """
727
- Injects INTELLIGENT Market Data into the Voice Bot's context.
728
- Includes: Staples Index, ZESA/Fuel, Top 60 Catalogue.
729
  """
730
- body = request.get_json(silent=True) or {}
731
- pid = body.get("profile_id")
732
- username = body.get("username", "Friend")
733
-
734
- if not pid: return jsonify({"ok": False}), 400
735
-
736
- # 1. Memory Profile
737
- prof = {}
738
- if db:
739
- ref = db.collection("pricelyst_profiles").document(pid)
740
- doc = ref.get()
741
- if doc.exists: prof = doc.to_dict()
742
- else: ref.set({"created_at": datetime.now(timezone.utc).isoformat()})
743
-
744
- if username != "Friend" and username != prof.get("username"):
745
- if db: db.collection("pricelyst_profiles").document(pid).set({"username": username}, merge=True)
746
-
747
- # 2. Market Intelligence Generation
748
- df = get_market_index()
749
- market_intel = ""
750
-
751
- # A. ZESA & Fuel
752
- zesa_10 = calculate_zesa_units(10.0)
753
- zesa_20 = calculate_zesa_units(20.0)
754
-
755
- context_section = f"""
756
- [CRITICAL CONTEXT - ZIMBABWE]
757
- FUEL: Petrol=${ZIM_CONTEXT['fuel_petrol']}, Diesel=${ZIM_CONTEXT['fuel_diesel']}
758
- BREAD: ~${ZIM_CONTEXT['bread_avg']}
759
- ZESA (Electricity): $10 = {zesa_10['est_units_kwh']}u, $20 = {zesa_20['est_units_kwh']}u
 
 
760
  """
761
-
762
- # B. Staples Index
763
- staples = ["Cooking Oil", "Maize Meal", "Sugar", "Rice"]
764
- staple_summary = []
765
-
766
- if not df.empty:
767
- for s in staples:
768
- hits = search_products_deep(df[df['is_offer']==True], s, limit=5)
769
- if not hits.empty:
770
- cheapest = hits.sort_values('price').iloc[0]
771
- staple_summary.append(f"- {s}: ${cheapest['price']} @ {cheapest['retailer']}")
772
-
773
- staples_section = "\n[STAPLES - LOWEST]\n" + "\n".join(staple_summary)
774
-
775
- # C. Top 60 Catalogue
776
- catalogue_lines = []
777
- if not df.empty:
778
- top_items = df[df['is_offer']==True].sort_values('views', ascending=False).drop_duplicates('product_name').head(60)
779
- for _, r in top_items.iterrows():
780
- p_name = r['product_name']
781
- all_offers = df[(df['product_name'] == p_name) & df['is_offer']]
782
- prices_str = ", ".join([f"${o['price']} ({o['retailer']})" for _, o in all_offers.iterrows()])
783
- catalogue_lines.append(f"- {p_name}: {prices_str}")
784
-
785
- catalogue_section = "\n[CATALOGUE - TOP 60]\n" + "\n".join(catalogue_lines)
786
-
787
  return jsonify({
788
- "ok": True,
789
- "username": username,
790
- "memory_summary": prof.get("memory_summary", ""),
791
- "kpi_snapshot": context_section + staples_section + catalogue_section
792
  })
793
 
794
- @app.post("/api/log-call-usage")
795
- def log_call_usage():
 
 
 
 
796
  """
797
- Post-Call Orchestrator.
798
- v3.1: Handles Concept Explosion & Plan Generation.
799
  """
800
- body = request.get_json(silent=True) or {}
801
- pid = body.get("profile_id")
802
- transcript = body.get("transcript", "")
803
-
804
- if not pid: return jsonify({"ok": False}), 400
805
-
806
- # 1. Update Long-Term Memory
807
- if len(transcript) > 20 and db:
808
- try:
809
- curr_mem = db.collection("pricelyst_profiles").document(pid).get().to_dict().get("memory_summary", "")
810
- mem_prompt = f"Update user memory (budget, family size) based on: {transcript}\nOLD: {curr_mem}"
811
- mem_resp = _gemini_client.models.generate_content(model=GEMINI_MODEL, contents=mem_prompt)
812
- db.collection("pricelyst_profiles").document(pid).set({"memory_summary": mem_resp.text}, merge=True)
813
- except: pass
814
-
815
- # 2. Plan Generation Logic
816
- intent_data = gemini_detect_intent(transcript)
817
- plan_data = {}
818
-
819
- # Check if ACTIONABLE (Shopping or Event)
820
- if intent_data.get("actionable"):
821
- target_items = intent_data.get("items", [])
822
-
823
- # LOGIC: If Event Planning + No specific items -> EXPLODE CONCEPT
824
- if intent_data.get("is_event_planning") and not target_items:
825
- logger.info("💥 Exploding Concept for Event...")
826
- target_items = gemini_explode_concept(transcript)
827
-
828
- if target_items:
829
- analyst_result = calculate_basket_optimization(target_items)
830
-
831
- # v3.1: Generate Plan with Estimates & Creative Tips
832
- md_content = gemini_generate_4step_plan(transcript, analyst_result)
833
-
834
- plan_data = {
835
- "is_actionable": True,
836
- "title": f"Plan ({datetime.now().strftime('%d %b')})",
837
- "markdown_content": md_content,
838
- "items": target_items,
839
- "created_at": datetime.now(timezone.utc).isoformat()
840
- }
841
-
842
- if db:
843
- doc_ref = db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").document()
844
- plan_data["id"] = doc_ref.id
845
- doc_ref.set(plan_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
  if db:
848
- db.collection("pricelyst_profiles").document(pid).collection("call_logs").add({
849
- "transcript": transcript,
850
- "intent": intent_data,
851
- "plan_generated": bool(plan_data),
852
- "ts": datetime.now(timezone.utc).isoformat()
 
 
853
  })
854
 
855
  return jsonify({
856
- "ok": True,
857
- "shopping_plan": plan_data if plan_data.get("is_actionable") else None
 
858
  })
859
 
860
- @app.get("/api/shopping-plans")
861
- def list_plans():
862
- pid = request.args.get("profile_id")
863
- if not pid or not db: return jsonify({"ok": False}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
864
  try:
865
- docs = db.collection("pricelyst_profiles").document(pid).collection("shopping_plans") \
866
- .order_by("created_at", direction=firestore.Query.DESCENDING).limit(10).stream()
867
- return jsonify({"ok": True, "plans": [{"id": d.id, **d.to_dict()} for d in docs]})
868
- except: return jsonify({"ok": False}), 500
869
-
870
- @app.delete("/api/shopping-plans/<plan_id>")
871
- def delete_plan(plan_id):
872
- pid = request.args.get("profile_id")
873
- if not pid or not db: return jsonify({"ok": False}), 400
 
 
 
 
 
 
 
 
 
 
874
  try:
875
- db.collection("pricelyst_profiles").document(pid).collection("shopping_plans").document(plan_id).delete()
876
  return jsonify({"ok": True})
877
- except: return jsonify({"ok": False}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878
 
 
 
 
879
 
880
  if __name__ == "__main__":
881
  port = int(os.environ.get("PORT", 7860))
882
- try: get_market_index(force_refresh=True)
883
- except: pass
884
  app.run(host="0.0.0.0", port=port)
 
1
  """
2
+ main.py — Iris AI Service (v1.0 - April 2026)
3
 
4
+ AI layer for the Iris Support Portal (IrisPlus / Unified Spark Desk).
5
+ Deployed as a HuggingFace Space monofile (Flask + Gemini + AssemblyAI + Firebase).
6
+
7
+ FEATURES:
8
+ 1. WhatsApp Export Knowledge Base (intelligent Gemini extraction, additive only)
9
+ 2. Bulk KB Upload (CSV / Excel / PDF)
10
+ 3. Natural Language + Voice Ticket Submission (AssemblyAI transcription → Gemini extraction)
11
+ 4. System Tutorial Ingestion (video transcript → timestamped KB articles)
12
+ 5. Agent NL/Voice Solution Writing (same pipeline, agent role)
13
+ 6. Iris Chatbot (KB + tutorial source RAG, Firebase persistence)
14
 
15
  ENV VARS:
16
+ GOOGLE_API_KEY — Gemini API key
17
+ ASSEMBLYAI_API_KEY — AssemblyAI API key
18
+ FIREBASE — JSON string of Firebase service account
19
+ PORT — Server port (default 7860)
 
20
  """
21
 
22
  import os
23
+ import io
24
  import re
25
  import json
26
  import time
 
27
  import logging
28
  import base64
29
+ import hashlib
30
  from datetime import datetime, timezone
31
+ from typing import Any, Dict, List, Optional
32
 
33
  import requests
 
34
  from flask import Flask, request, jsonify
35
  from flask_cors import CORS
36
 
37
+ # ─── Logging ──────────────────────────────────────────────────────────────────
38
 
39
  logging.basicConfig(
40
  level=logging.INFO,
41
  format="%(asctime)s | %(levelname)s | %(message)s"
42
  )
43
+ logger = logging.getLogger("iris-ai-service")
44
 
45
+ # ─── Gemini SDK ───────────────────────────────────────────────────────────────
46
 
47
  try:
48
  from google import genai
49
+ from google.genai import types as genai_types
50
  except Exception as e:
51
  genai = None
52
+ logger.error("google-genai not installed: %s", e)
53
 
54
  GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
55
+ GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.0-flash")
56
 
57
  _gemini_client = None
58
  if genai and GOOGLE_API_KEY:
 
62
  except Exception as e:
63
  logger.error("Failed to init Gemini client: %s", e)
64
 
65
+ # ─── AssemblyAI ───────────────────────────────────────────────────────────────
66
+
67
+ ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY", "")
68
+ ASSEMBLYAI_BASE = "https://api.assemblyai.com/v2"
69
 
70
+ # ─── Firebase ─────────────────────────────────────────────────────────────────
71
+
72
+ try:
73
+ import firebase_admin
74
+ from firebase_admin import credentials, firestore
75
+ FIREBASE_AVAILABLE = True
76
+ except ImportError:
77
+ FIREBASE_AVAILABLE = False
78
+ logger.warning("firebase-admin not installed. Persistence disabled.")
79
 
80
  FIREBASE_ENV = os.environ.get("FIREBASE", "")
81
 
82
+ def init_firestore() -> Optional[Any]:
83
+ if not FIREBASE_AVAILABLE:
84
+ return None
85
  if firebase_admin._apps:
86
  return firestore.client()
87
  if not FIREBASE_ENV:
 
94
  logger.info("Firebase initialized.")
95
  return firestore.client()
96
  except Exception as e:
97
+ logger.critical("Firebase init failed: %s", e)
98
  return None
99
 
100
+ db = init_firestore()
 
 
101
 
102
+ # ──��� Optional file-parsing libs ───────────────────────────────────────────────
 
103
 
104
+ try:
105
+ import pandas as pd
106
+ PANDAS_AVAILABLE = True
107
+ except ImportError:
108
+ PANDAS_AVAILABLE = False
 
 
 
 
 
 
 
109
 
110
+ try:
111
+ import pypdf
112
+ PYPDF_AVAILABLE = True
113
+ except ImportError:
114
+ PYPDF_AVAILABLE = False
115
 
116
+ # ─── Flask App ────────────────────────────────────────────────────────────────
 
 
 
 
 
117
 
118
  app = Flask(__name__)
119
  CORS(app)
120
 
121
+ # ══════════════════════════════════════════════════════════════════════════════
122
+ # HELPERS
123
+ # ══════════════════════════════════════════════════════════════════════════════
124
+
125
+ def _safe_json(text: str, fallback: Any) -> Any:
126
+ """Strip markdown fences and parse JSON safely."""
127
+ try:
128
+ clean = text.strip()
129
+ if "```json" in clean:
130
+ clean = clean.split("```json")[1].split("```")[0]
131
+ elif "```" in clean:
132
+ clean = clean.split("```")[1].split("```")[0]
133
+ return json.loads(clean)
134
+ except Exception as e:
135
+ logger.error("JSON parse error: %s | text: %s", e, text[:200])
136
+ return fallback
137
 
 
 
 
138
 
139
+ def _gemini_text(prompt: str, json_mode: bool = False) -> str:
140
+ """Call Gemini and return raw text."""
141
+ if not _gemini_client:
142
+ return ""
143
+ cfg = genai_types.GenerateContentConfig(response_mime_type="application/json") if json_mode else None
144
  try:
145
+ resp = _gemini_client.models.generate_content(
146
+ model=GEMINI_MODEL,
147
+ contents=prompt,
148
+ config=cfg
149
+ )
150
+ return resp.text or ""
151
+ except Exception as e:
152
+ logger.error("Gemini call error: %s", e)
153
+ return ""
154
 
155
+
156
+ def _article_fingerprint(title: str, content: str) -> str:
157
+ """Stable hash to detect duplicate KB articles."""
158
+ raw = f"{title.strip().lower()}::{content.strip().lower()[:300]}"
159
+ return hashlib.sha256(raw.encode()).hexdigest()[:16]
160
+
161
+
162
+ def _get_existing_fingerprints() -> set:
163
+ """Fetch all fingerprints already in Firestore KB."""
164
+ if not db:
165
+ return set()
166
  try:
167
+ docs = db.collection("iris_kb_articles").select(["fingerprint"]).stream()
168
+ return {d.to_dict().get("fingerprint") for d in docs if d.to_dict().get("fingerprint")}
 
 
 
169
  except Exception as e:
170
+ logger.error("Fingerprint fetch error: %s", e)
171
+ return set()
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
+ def _save_kb_articles(articles: List[Dict], source_label: str) -> Dict:
175
+ """Save articles to Firestore, skip duplicates. Returns stats."""
176
+ if not db:
177
+ return {"saved": 0, "skipped": 0, "error": "Firebase unavailable"}
178
+
179
+ existing = _get_existing_fingerprints()
180
+ saved, skipped = 0, 0
181
+
182
+ for article in articles:
183
+ title = article.get("title", "Untitled")
184
+ content = article.get("content", "")
185
+ fp = _article_fingerprint(title, content)
186
+
187
+ if fp in existing:
188
+ skipped += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  continue
190
 
191
+ doc = {
192
+ "title": title,
193
+ "content": content,
194
+ "category": article.get("category", "General"),
195
+ "tags": article.get("tags", []),
196
+ "source": source_label,
197
+ "fingerprint": fp,
198
+ "created_at": datetime.now(timezone.utc).isoformat(),
199
+ }
200
+ # Carry timestamp crop info from tutorial ingestion if present
201
+ if article.get("timestamp_start") is not None:
202
+ doc["timestamp_start"] = article["timestamp_start"]
203
+ doc["timestamp_end"] = article.get("timestamp_end")
204
+ doc["video_url"] = article.get("video_url", "")
205
+
206
+ db.collection("iris_kb_articles").add(doc)
207
+ existing.add(fp)
208
+ saved += 1
209
+
210
+ return {"saved": saved, "skipped": skipped}
211
+
212
+
213
+ # ══════════════════════════════════════════════════════════════════════════════
214
+ # FEATURE 1 — WhatsApp Export → Knowledge Base
215
+ # ══════════════════════════════════════════════════════════════════════════════
216
+
217
+ WHATSAPP_EXTRACTION_PROMPT = """
218
+ You are a support knowledge base curator.
219
+
220
+ You have been given a raw WhatsApp group chat export from a support team.
221
+ Your job is to extract ONLY clear problem→solution pairs.
222
+
223
+ Rules:
224
+ - Ignore greetings, off-topic chatter, emoji-only messages, system notifications.
225
+ - Extract only exchanges where a user described an issue AND a support agent (or another user) provided a working solution.
226
+ - Each article must be self-contained and searchable.
227
+ - Merge follow-up messages that belong to the same resolution thread.
228
+
229
+ Return a STRICT JSON array. Each object:
230
+ {
231
+ "title": "Short, searchable title of the issue",
232
+ "content": "Full explanation: what the problem was and the step-by-step solution",
233
+ "category": "One of: Account, Billing, Technical, Feature, Other",
234
+ "tags": ["array", "of", "relevant", "keywords"]
235
+ }
236
+
237
+ Return ONLY the JSON array, no other text.
238
+
239
+ WhatsApp Export:
240
+ """
241
+
242
+ @app.post("/api/kb/whatsapp-import")
243
+ def whatsapp_import():
244
  """
245
+ POST body: { "chat_text": "<raw WhatsApp export text>" }
246
+ Extracts problem→solution pairs, saves new articles (additive, no overwrite).
 
 
 
247
  """
248
+ body = request.get_json(silent=True) or {}
249
+ raw_chat = body.get("chat_text", "").strip()
250
+
251
+ if not raw_chat:
252
+ return jsonify({"ok": False, "error": "chat_text is required"}), 400
253
+
254
+ if len(raw_chat) < 100:
255
+ return jsonify({"ok": False, "error": "Chat export too short to process"}), 400
256
+
257
+ logger.info("WhatsApp import: %d chars received", len(raw_chat))
258
+
259
+ gemini_out = _gemini_text(WHATSAPP_EXTRACTION_PROMPT + raw_chat[:50000], json_mode=True)
260
+ articles = _safe_json(gemini_out, [])
261
+
262
+ if not isinstance(articles, list):
263
+ return jsonify({"ok": False, "error": "Gemini returned unexpected format", "raw": gemini_out[:500]}), 500
264
+
265
+ stats = _save_kb_articles(articles, source_label="whatsapp_export")
266
+ logger.info("WhatsApp import complete: %s", stats)
267
+
268
+ return jsonify({
269
+ "ok": True,
270
+ "articles_found": len(articles),
271
+ "saved": stats["saved"],
272
+ "skipped_dupes": stats["skipped"],
273
+ })
274
+
275
+
276
+ # ══════════════════════════════════════════════════════════════════════════════
277
+ # FEATURE 2 — Bulk KB Upload (CSV / Excel / PDF)
278
+ # ══════════════════════════════════════════════════════════════════════════════
279
+
280
+ def _extract_text_from_pdf_bytes(pdf_bytes: bytes) -> str:
281
+ """Extract text from a PDF using pypdf, fallback to Gemini vision."""
282
+ if PYPDF_AVAILABLE:
283
+ try:
284
+ reader = pypdf.PdfReader(io.BytesIO(pdf_bytes))
285
+ pages = [p.extract_text() or "" for p in reader.pages]
286
+ text = "\n\n".join(pages).strip()
287
+ if text:
288
+ return text
289
+ except Exception as e:
290
+ logger.warning("pypdf extraction failed: %s", e)
291
+
292
+ # Gemini inline_data fallback for scanned PDFs
293
+ if _gemini_client:
294
+ try:
295
+ b64_pdf = base64.b64encode(pdf_bytes).decode()
296
+ resp = _gemini_client.models.generate_content(
297
+ model=GEMINI_MODEL,
298
+ contents=[
299
+ "Extract all text from this PDF document. Return plain text only.",
300
+ genai_types.Part.from_bytes(data=pdf_bytes, mime_type="application/pdf")
301
+ ]
302
+ )
303
+ return resp.text or ""
304
+ except Exception as e:
305
+ logger.error("Gemini PDF extraction failed: %s", e)
306
+ return ""
307
+
308
+
309
+ PDF_KB_PROMPT = """
310
+ You are a support knowledge base curator.
311
+ Convert the following document content into structured KB articles.
312
+ Each article should cover one distinct topic, issue, or procedure.
313
+
314
+ Return a STRICT JSON array. Each object:
315
+ {
316
+ "title": "Short, searchable title",
317
+ "content": "Complete explanation in clear language",
318
+ "category": "One of: Account, Billing, Technical, Feature, Other",
319
+ "tags": ["keyword1", "keyword2"]
320
+ }
321
+
322
+ Return ONLY the JSON array.
323
+
324
+ Document content:
325
+ """
326
+
327
+ @app.post("/api/kb/bulk-upload")
328
+ def bulk_upload():
329
  """
330
+ Accepts multipart file upload. Supports: .csv, .xlsx, .xls, .pdf
331
+ CSV/Excel expected columns: title, content (+ optional: category, tags)
332
+ PDF: Gemini extracts and structures articles.
333
  """
334
+ if "file" not in request.files:
335
+ return jsonify({"ok": False, "error": "No file uploaded"}), 400
336
+
337
+ f = request.files["file"]
338
+ filename = f.filename or ""
339
+ ext = filename.rsplit(".", 1)[-1].lower()
340
+ file_data = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
341
 
342
+ articles = []
343
+
344
+ if ext in ("csv", "xlsx", "xls"):
345
+ if not PANDAS_AVAILABLE:
346
+ return jsonify({"ok": False, "error": "pandas not installed on server"}), 500
347
+ try:
348
+ if ext == "csv":
349
+ df = pd.read_csv(io.BytesIO(file_data))
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  else:
351
+ df = pd.read_excel(io.BytesIO(file_data))
 
 
 
 
 
 
 
 
352
 
353
+ df.columns = [c.strip().lower() for c in df.columns]
354
 
355
+ if "title" not in df.columns or "content" not in df.columns:
356
+ return jsonify({"ok": False, "error": "CSV/Excel must have 'title' and 'content' columns"}), 400
357
+
358
+ for _, row in df.iterrows():
359
+ tags = []
360
+ if "tags" in df.columns and pd.notna(row.get("tags")):
361
+ raw_tags = str(row["tags"])
362
+ tags = [t.strip() for t in re.split(r"[,;|]", raw_tags) if t.strip()]
363
+
364
+ articles.append({
365
+ "title": str(row["title"]).strip(),
366
+ "content": str(row["content"]).strip(),
367
+ "category": str(row.get("category", "General")).strip() if pd.notna(row.get("category")) else "General",
368
+ "tags": tags,
369
+ })
370
+ except Exception as e:
371
+ logger.error("Spreadsheet parse error: %s", e)
372
+ return jsonify({"ok": False, "error": f"Could not parse file: {e}"}), 400
373
+
374
+ elif ext == "pdf":
375
+ text = _extract_text_from_pdf_bytes(file_data)
376
+ if not text:
377
+ return jsonify({"ok": False, "error": "Could not extract text from PDF"}), 400
378
+
379
+ gemini_out = _gemini_text(PDF_KB_PROMPT + text[:50000], json_mode=True)
380
+ articles = _safe_json(gemini_out, [])
381
 
382
+ if not isinstance(articles, list):
383
+ return jsonify({"ok": False, "error": "Gemini PDF structuring failed"}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  else:
385
+ return jsonify({"ok": False, "error": f"Unsupported file type: .{ext}. Use csv, xlsx, or pdf"}), 400
386
 
387
+ if not articles:
388
+ return jsonify({"ok": False, "error": "No articles extracted from file"}), 400
 
 
389
 
390
+ stats = _save_kb_articles(articles, source_label=f"bulk_upload:{filename}")
391
+ return jsonify({
392
+ "ok": True,
393
+ "articles_found": len(articles),
394
+ "saved": stats["saved"],
395
+ "skipped_dupes": stats["skipped"],
396
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
+
399
+ # ══════════════════════════════════════════════════════════════════════════════
400
+ # FEATURE 3 Ticket Submission via NL Text or Voice
401
+ # ══════════════════════════════════════════════════════════════════════════════
402
+
403
+ TICKET_EXTRACTION_PROMPT = """
404
+ You are a support ticket intake system for a software support portal.
405
+
406
+ A user has described their issue in natural language. Extract structured ticket fields.
407
+
408
+ Return STRICT JSON (no other text):
409
+ {
410
+ "title": "Concise ticket title (max 80 chars)",
411
+ "description": "Full detailed description of the issue, rewritten clearly in third person",
412
+ "category_hint": "Best matching category: Account | Billing | Technical | Feature | Other",
413
+ "priority_hint": "One of: low | medium | high | critical (based on urgency language)",
414
+ "keywords": ["array", "of", "technical", "keywords"]
415
+ }
416
+
417
+ User's message:
418
+ """
419
+
420
+ def _transcribe_audio_assemblyai(audio_b64: str, audio_format: str = "wav") -> str:
421
+ """Upload audio to AssemblyAI and poll for transcript."""
422
+ if not ASSEMBLYAI_API_KEY:
423
+ return ""
424
+
425
+ audio_bytes = base64.b64decode(audio_b64)
426
+ headers = {"authorization": ASSEMBLYAI_API_KEY}
427
+
428
+ # 1. Upload
429
  try:
430
+ upload_resp = requests.post(
431
+ f"{ASSEMBLYAI_BASE}/upload",
432
+ headers={**headers, "Content-Type": "application/octet-stream"},
433
+ data=audio_bytes,
434
+ timeout=30
435
  )
436
+ upload_resp.raise_for_status()
437
+ upload_url = upload_resp.json().get("upload_url")
438
  except Exception as e:
439
+ logger.error("AssemblyAI upload error: %s", e)
440
+ return ""
441
 
442
+ # 2. Request transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
  try:
444
+ tx_resp = requests.post(
445
+ f"{ASSEMBLYAI_BASE}/transcript",
446
+ headers={**headers, "Content-Type": "application/json"},
447
+ json={"audio_url": upload_url, "language_detection": True},
448
+ timeout=15
 
 
 
449
  )
450
+ tx_resp.raise_for_status()
451
+ tx_id = tx_resp.json().get("id")
452
  except Exception as e:
453
+ logger.error("AssemblyAI transcript request error: %s", e)
454
+ return ""
455
+
456
+ # 3. Poll
457
+ for _ in range(30):
458
+ time.sleep(3)
459
+ try:
460
+ poll = requests.get(
461
+ f"{ASSEMBLYAI_BASE}/transcript/{tx_id}",
462
+ headers=headers,
463
+ timeout=15
464
+ )
465
+ poll.raise_for_status()
466
+ result = poll.json()
467
+ status = result.get("status")
468
+ if status == "completed":
469
+ return result.get("text", "")
470
+ elif status == "error":
471
+ logger.error("AssemblyAI error: %s", result.get("error"))
472
+ return ""
473
+ except Exception as e:
474
+ logger.error("AssemblyAI poll error: %s", e)
475
+ return ""
476
+
477
+
478
+ @app.post("/api/tickets/submit-nl")
479
+ def submit_ticket_nl():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
  """
481
+ POST body: { "message": "I can't log in, it says my account is locked...", "user_id": "..." }
482
+ Returns structured ticket fields for the frontend to pre-fill and submit.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
  """
484
+ body = request.get_json(silent=True) or {}
485
+ message = body.get("message", "").strip()
486
+ user_id = body.get("user_id", "anonymous")
 
 
487
 
488
+ if not message:
489
+ return jsonify({"ok": False, "error": "message is required"}), 400
 
490
 
491
+ gemini_out = _gemini_text(TICKET_EXTRACTION_PROMPT + message, json_mode=True)
492
+ ticket = _safe_json(gemini_out, {})
493
+
494
+ if not ticket.get("title"):
495
+ return jsonify({"ok": False, "error": "Could not extract ticket info from message"}), 500
 
 
 
 
496
 
497
+ # Log submission attempt
 
 
 
 
 
 
 
 
 
498
  if db:
499
+ db.collection("iris_ai_ticket_drafts").add({
500
+ "user_id": user_id,
501
+ "raw_input": message,
502
+ "extracted": ticket,
503
+ "channel": "nl_text",
504
+ "created_at": datetime.now(timezone.utc).isoformat(),
505
+ })
506
+
507
+ return jsonify({"ok": True, "ticket": ticket})
508
+
509
+
510
+ @app.post("/api/tickets/submit-voice")
511
+ def submit_ticket_voice():
512
+ """
513
+ POST body: { "audio_b64": "<base64 audio>", "audio_format": "wav", "user_id": "..." }
514
+ Transcribes audio via AssemblyAI, then extracts ticket via Gemini.
515
+ """
516
+ body = request.get_json(silent=True) or {}
517
+ audio_b64 = body.get("audio_b64", "")
518
+ audio_format = body.get("audio_format", "wav")
519
+ user_id = body.get("user_id", "anonymous")
520
+
521
+ if not audio_b64:
522
+ return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
523
+
524
+ if not ASSEMBLYAI_API_KEY:
525
+ return jsonify({"ok": False, "error": "AssemblyAI not configured on server"}), 500
526
+
527
+ logger.info("Voice ticket: transcribing audio for user=%s", user_id)
528
+ transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
529
+
530
+ if not transcript:
531
+ return jsonify({"ok": False, "error": "Transcription failed or returned empty result"}), 500
532
+
533
+ gemini_out = _gemini_text(TICKET_EXTRACTION_PROMPT + transcript, json_mode=True)
534
+ ticket = _safe_json(gemini_out, {})
535
+
536
+ if not ticket.get("title"):
537
+ return jsonify({"ok": False, "error": "Could not extract ticket info from transcript"}), 500
538
 
539
  if db:
540
+ db.collection("iris_ai_ticket_drafts").add({
541
+ "user_id": user_id,
542
+ "raw_input": transcript,
543
+ "extracted": ticket,
544
+ "channel": "voice",
545
+ "created_at": datetime.now(timezone.utc).isoformat(),
546
  })
547
 
548
+ return jsonify({"ok": True, "transcript": transcript, "ticket": ticket})
549
+
550
+
551
+ # ══════════════════════════════════════════════════════════════════════════════
552
+ # FEATURE 4 System Tutorial Ingestion
553
+ # ══════════════════════════════════════════════════════════════════════════════
554
+
555
+ TUTORIAL_EXTRACTION_PROMPT = """
556
+ You are a knowledge base curator for a software support system.
557
+
558
+ You have been given a timestamped transcript from a video tutorial about the Iris Support Portal.
559
+ Your job is to extract discrete how-to articles, one per distinct feature or task demonstrated.
560
+
561
+ For each article, identify the best timestamp range where the solution or demonstration occurs.
562
+
563
+ Return a STRICT JSON array. Each object:
564
+ {
565
+ "title": "How to <do something> in Iris",
566
+ "content": "Step-by-step instructions based on the tutorial",
567
+ "category": "One of: Account | Tickets | Agents | Reports | Admin | Other",
568
+ "tags": ["keyword1", "keyword2"],
569
+ "timestamp_start": <seconds as integer>,
570
+ "timestamp_end": <seconds as integer>
571
+ }
572
+
573
+ Return ONLY the JSON array.
574
+
575
+ Transcript (with timestamps in [MM:SS] or [HH:MM:SS] format):
576
+ """
577
+
578
+ def _parse_timestamp_to_seconds(ts: str) -> int:
579
+ """Convert MM:SS or HH:MM:SS string to integer seconds."""
580
+ parts = ts.strip("[]").split(":")
581
+ try:
582
+ if len(parts) == 2:
583
+ return int(parts[0]) * 60 + int(parts[1])
584
+ elif len(parts) == 3:
585
+ return int(parts[0]) * 3600 + int(parts[1]) * 60 + int(parts[2])
586
+ except Exception:
587
+ pass
588
+ return 0
589
+
590
+
591
+ @app.post("/api/kb/tutorial-ingest")
592
+ def tutorial_ingest():
593
+ """
594
+ POST body: {
595
+ "transcript": "<timestamped transcript text>",
596
+ "video_url": "https://...", (optional, for linking crop timestamps)
597
+ "video_title": "Getting Started with Iris"
598
+ }
599
+ Gemini extracts how-to articles with timestamp ranges.
600
+ """
601
+ body = request.get_json(silent=True) or {}
602
+ transcript = body.get("transcript", "").strip()
603
+ video_url = body.get("video_url", "")
604
+ video_title = body.get("video_title", "Tutorial")
605
+
606
+ if not transcript:
607
+ return jsonify({"ok": False, "error": "transcript is required"}), 400
608
+
609
+ logger.info("Tutorial ingest: %d chars, title=%s", len(transcript), video_title)
610
+
611
+ gemini_out = _gemini_text(TUTORIAL_EXTRACTION_PROMPT + transcript[:50000], json_mode=True)
612
+ articles = _safe_json(gemini_out, [])
613
+
614
+ if not isinstance(articles, list):
615
+ return jsonify({"ok": False, "error": "Gemini returned unexpected format"}), 500
616
+
617
+ # Inject video metadata into each article
618
+ for a in articles:
619
+ a["video_url"] = video_url
620
+ a["video_title"] = video_title
621
+ # Ensure numeric seconds (Gemini may return the parsed value; validate it)
622
+ for ts_key in ("timestamp_start", "timestamp_end"):
623
+ val = a.get(ts_key)
624
+ if isinstance(val, str):
625
+ a[ts_key] = _parse_timestamp_to_seconds(val)
626
+ elif not isinstance(val, int):
627
+ a[ts_key] = 0
628
+
629
+ stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
630
 
631
  return jsonify({
632
+ "ok": True,
633
+ "video_title": video_title,
634
+ "articles_found": len(articles),
635
+ "saved": stats["saved"],
636
+ "skipped_dupes": stats["skipped"],
637
  })
638
 
639
+
640
+ # ══════════════════════════════════════════════════════════════════════════════
641
+ # FEATURE 5 — Agent Solution Writing (NL Text + Voice)
642
+ # ══════════════════════════════════════════════════════════════════════════════
643
+
644
+ SOLUTION_EXTRACTION_PROMPT = """
645
+ You are a support knowledge base curator.
646
+
647
+ An agent or support staff has described a solution they discovered while resolving a ticket.
648
+ Structure this into a reusable KB article.
649
+
650
+ Return STRICT JSON:
651
+ {
652
+ "title": "Short, searchable problem title",
653
+ "content": "Clear step-by-step solution, rewritten for future reference",
654
+ "category": "One of: Account | Billing | Technical | Feature | Other",
655
+ "tags": ["relevant", "keywords"]
656
+ }
657
+
658
+ Agent's description:
659
+ """
660
+
661
+ @app.post("/api/kb/agent-solution-nl")
662
+ def agent_solution_nl():
663
  """
664
+ POST body: { "message": "I fixed ticket #123 by...", "agent_id": "...", "ticket_id": "..." }
665
+ Creates a KB article from agent's natural language solution description.
666
  """
667
+ body = request.get_json(silent=True) or {}
668
+ message = body.get("message", "").strip()
669
+ agent_id = body.get("agent_id", "unknown")
670
+ ticket_id = body.get("ticket_id", "")
671
+
672
+ if not message:
673
+ return jsonify({"ok": False, "error": "message is required"}), 400
674
+
675
+ gemini_out = _gemini_text(SOLUTION_EXTRACTION_PROMPT + message, json_mode=True)
676
+ article = _safe_json(gemini_out, {})
677
+
678
+ if not article.get("title"):
679
+ return jsonify({"ok": False, "error": "Could not structure solution"}), 500
680
+
681
+ # Add ticket reference tag
682
+ if ticket_id:
683
+ article.setdefault("tags", []).append(f"ticket:{ticket_id}")
684
+
685
+ stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
686
+
687
+ return jsonify({
688
+ "ok": True,
689
+ "saved": stats["saved"],
690
+ "article": article,
691
+ })
692
+
693
+
694
+ @app.post("/api/kb/agent-solution-voice")
695
+ def agent_solution_voice():
696
+ """
697
+ POST body: { "audio_b64": "...", "audio_format": "wav", "agent_id": "...", "ticket_id": "..." }
698
+ Transcribes agent's voice note, structures into KB article.
699
  """
700
+ body = request.get_json(silent=True) or {}
701
+ audio_b64 = body.get("audio_b64", "")
702
+ audio_format = body.get("audio_format", "wav")
703
+ agent_id = body.get("agent_id", "unknown")
704
+ ticket_id = body.get("ticket_id", "")
705
+
706
+ if not audio_b64:
707
+ return jsonify({"ok": False, "error": "audio_b64 is required"}), 400
708
+
709
+ transcript = _transcribe_audio_assemblyai(audio_b64, audio_format)
710
+
711
+ if not transcript:
712
+ return jsonify({"ok": False, "error": "Transcription failed"}), 500
713
+
714
+ gemini_out = _gemini_text(SOLUTION_EXTRACTION_PROMPT + transcript, json_mode=True)
715
+ article = _safe_json(gemini_out, {})
716
+
717
+ if not article.get("title"):
718
+ return jsonify({"ok": False, "error": "Could not structure solution from transcript"}), 500
719
+
720
+ if ticket_id:
721
+ article.setdefault("tags", []).append(f"ticket:{ticket_id}")
722
+
723
+ stats = _save_kb_articles([article], source_label=f"agent:{agent_id}")
724
+
 
725
  return jsonify({
726
+ "ok": True,
727
+ "transcript": transcript,
728
+ "saved": stats["saved"],
729
+ "article": article,
730
  })
731
 
732
+
733
+ # ══════════════════════════════════════════════════════════════════════════════
734
+ # FEATURE 6 — Iris Support Chatbot (RAG over KB + Tutorials)
735
+ # ══════════════════════════════════════════════════════════════════════════════
736
+
737
+ def _search_kb(query: str, limit: int = 5) -> List[Dict]:
738
  """
739
+ Simple keyword search over Firestore KB articles.
740
+ Production upgrade: swap with a vector DB (e.g. Qdrant) or Vertex AI Search.
741
  """
742
+ if not db:
743
+ return []
744
+
745
+ query_terms = [t.lower() for t in query.split() if len(t) > 2]
746
+
747
+ try:
748
+ # Fetch recent articles (Firestore doesn't support full-text, this is a lightweight approach)
749
+ docs = db.collection("iris_kb_articles").order_by(
750
+ "created_at", direction=firestore.Query.DESCENDING
751
+ ).limit(200).stream()
752
+
753
+ results = []
754
+ for doc in docs:
755
+ d = doc.to_dict()
756
+ text = f"{d.get('title','')} {d.get('content','')} {' '.join(d.get('tags',[]))}".lower()
757
+ score = sum(1 for term in query_terms if term in text)
758
+ if score > 0:
759
+ results.append({"score": score, **d})
760
+
761
+ results.sort(key=lambda x: x["score"], reverse=True)
762
+ return results[:limit]
763
+
764
+ except Exception as e:
765
+ logger.error("KB search error: %s", e)
766
+ return []
767
+
768
+
769
+ CHATBOT_SYSTEM_PROMPT = """
770
+ You are Iris, an intelligent support assistant for the Iris Support Portal.
771
+
772
+ Your role: Help users resolve issues quickly using the knowledge base and tutorial content provided.
773
+
774
+ Rules:
775
+ - Answer ONLY from the provided context. Do not hallucinate solutions.
776
+ - If the answer is in a tutorial with a timestamp, mention the video and timestamp so the user can jump to that moment.
777
+ - Be concise, clear, and friendly.
778
+ - If you cannot find the answer, say so honestly and suggest submitting a ticket.
779
+ - Format step-by-step answers as numbered lists.
780
+ """
781
+
782
+ @app.post("/api/chatbot/query")
783
+ def chatbot_query():
784
+ """
785
+ POST body: {
786
+ "message": "How do I reset a user's password?",
787
+ "session_id": "...",
788
+ "user_id": "..."
789
+ }
790
+ RAG: searches KB, then uses Gemini to synthesize an answer.
791
+ """
792
+ body = request.get_json(silent=True) or {}
793
+ message = body.get("message", "").strip()
794
+ session_id = body.get("session_id", "default")
795
+ user_id = body.get("user_id", "anonymous")
796
+
797
+ if not message:
798
+ return jsonify({"ok": False, "error": "message is required"}), 400
799
+
800
+ # Retrieve relevant KB context
801
+ kb_results = _search_kb(message, limit=5)
802
+
803
+ context_blocks = []
804
+ sources = []
805
+ for r in kb_results:
806
+ block = f"[Article: {r.get('title')}]\n{r.get('content', '')}"
807
+ if r.get("timestamp_start") is not None:
808
+ ts = r["timestamp_start"]
809
+ mm = ts // 60
810
+ ss = ts % 60
811
+ url = r.get("video_url", "")
812
+ block += f"\n(Tutorial: {r.get('video_title','Video')} at {mm:02d}:{ss:02d}"
813
+ block += f" — {url})" if url else ")"
814
+ context_blocks.append(block)
815
+ sources.append({
816
+ "title": r.get("title"),
817
+ "category": r.get("category"),
818
+ "source": r.get("source"),
819
+ "ts_start": r.get("timestamp_start"),
820
+ "video_url": r.get("video_url"),
821
+ })
822
+
823
+ context_str = "\n\n---\n\n".join(context_blocks) if context_blocks else "No relevant articles found."
824
 
825
+ full_prompt = f"""{CHATBOT_SYSTEM_PROMPT}
826
+
827
+ KNOWLEDGE BASE CONTEXT:
828
+ {context_str}
829
+
830
+ USER QUESTION: {message}
831
+
832
+ Answer:"""
833
+
834
+ answer = _gemini_text(full_prompt)
835
+
836
+ if not answer:
837
+ answer = "I'm sorry, I couldn't process your question right now. Please try again or submit a support ticket."
838
+
839
+ # Persist chat log
840
  if db:
841
+ db.collection("iris_chatbot_logs").add({
842
+ "user_id": user_id,
843
+ "session_id": session_id,
844
+ "message": message,
845
+ "answer": answer,
846
+ "sources": sources,
847
+ "created_at": datetime.now(timezone.utc).isoformat(),
848
  })
849
 
850
  return jsonify({
851
+ "ok": True,
852
+ "answer": answer,
853
+ "sources": sources,
854
  })
855
 
856
+
857
+ # ══════════════════════════════════════════════════════════════════════════════
858
+ # KB READ ENDPOINTS (for frontend display)
859
+ # ══════════════════════════════════════════════════════════════════════════════
860
+
861
+ @app.get("/api/kb/articles")
862
+ def list_kb_articles():
863
+ """
864
+ GET /api/kb/articles?category=Technical&limit=50
865
+ Lists KB articles, optionally filtered by category.
866
+ """
867
+ category = request.args.get("category", "")
868
+ limit = int(request.args.get("limit", 50))
869
+
870
+ if not db:
871
+ return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
872
+
873
  try:
874
+ query = db.collection("iris_kb_articles").order_by(
875
+ "created_at", direction=firestore.Query.DESCENDING
876
+ )
877
+ if category:
878
+ query = query.where("category", "==", category)
879
+
880
+ docs = query.limit(limit).stream()
881
+ articles = [{"id": d.id, **d.to_dict()} for d in docs]
882
+ return jsonify({"ok": True, "articles": articles, "count": len(articles)})
883
+ except Exception as e:
884
+ logger.error("KB list error: %s", e)
885
+ return jsonify({"ok": False, "error": str(e)}), 500
886
+
887
+
888
+ @app.delete("/api/kb/articles/<article_id>")
889
+ def delete_kb_article(article_id: str):
890
+ """DELETE /api/kb/articles/<id> — Admin only (JWT check to be enforced at gateway)"""
891
+ if not db:
892
+ return jsonify({"ok": False, "error": "Firebase unavailable"}), 500
893
  try:
894
+ db.collection("iris_kb_articles").document(article_id).delete()
895
  return jsonify({"ok": True})
896
+ except Exception as e:
897
+ return jsonify({"ok": False, "error": str(e)}), 500
898
+
899
+
900
+ # ══════════════════════════════════════════════════════════════════════════════
901
+ # HEALTH
902
+ # ══════════════════════════════════════════════════════════════════════════════
903
+
904
+ @app.get("/health")
905
+ def health():
906
+ article_count = 0
907
+ if db:
908
+ try:
909
+ docs = db.collection("iris_kb_articles").count().get()
910
+ article_count = docs[0][0].value
911
+ except Exception:
912
+ pass
913
+
914
+ return jsonify({
915
+ "ok": True,
916
+ "service": "Iris AI Service v1.0",
917
+ "gemini": bool(_gemini_client),
918
+ "assemblyai": bool(ASSEMBLYAI_API_KEY),
919
+ "firebase": bool(db),
920
+ "kb_articles": article_count,
921
+ })
922
+
923
 
924
+ # ══════════════════════════��═══════════════════════════════════════════════════
925
+ # ENTRYPOINT
926
+ # ══════════════════════════════════════════════════════════════════════════════
927
 
928
  if __name__ == "__main__":
929
  port = int(os.environ.get("PORT", 7860))
930
+ logger.info("Iris AI Service starting on port %d", port)
 
931
  app.run(host="0.0.0.0", port=port)