pythonprincess commited on
Commit
22eeb7e
·
verified ·
1 Parent(s): 49b82a8

Upload 9 files

Browse files
app/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # PENNY app package initialization
2
+ """
3
+ PENNY Application Package
4
+
5
+ This package contains the core orchestration, routing, and agent logic
6
+ for the PENNY civic engagement assistant.
7
+ """
8
+
9
+ __version__ = "2.2.0"
10
+ __author__ = "CyberShawties"
app/event_weather.py ADDED
@@ -0,0 +1,761 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/event_weather.py
2
+ """
3
+ 🌤️ Penny's Event + Weather Matchmaker
4
+ Helps residents find the perfect community activity based on real-time weather.
5
+ Penny always suggests what's actually enjoyable — not just what exists.
6
+
7
+ Production-ready version with structured logging, performance tracking, and robust error handling.
8
+ """
9
+
10
+ import json
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Dict, Any, List, Optional, Tuple
14
+ from datetime import datetime
15
+ from enum import Enum
16
+
17
+ from app.weather_agent import get_weather_for_location
18
+ from app.location_utils import load_city_events
19
+ from app.logging_utils import log_interaction, sanitize_for_logging
20
+
21
+ # --- LOGGING SETUP (Structured, Azure-compatible) ---
22
+ import logging
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # --- CONFIGURATION CONSTANTS ---
27
+ class EventWeatherConfig:
28
+ """Configuration constants for event recommendation system."""
29
+ MAX_FALLBACK_EVENTS = 10
30
+ MAX_RECOMMENDATIONS = 20
31
+ WEATHER_TIMEOUT_SECONDS = 5.0
32
+ SLOW_OPERATION_THRESHOLD_MS = 2000
33
+
34
+
35
+ # --- PENNY'S WEATHER WISDOM (Personality-Driven Thresholds) ---
36
+ class WeatherThresholds:
37
+ """
38
+ Penny's practical weather rules for event recommendations.
39
+ These are based on real resident comfort, not just data.
40
+ """
41
+ WARM_THRESHOLD = 70 # F° - Great for outdoor events
42
+ HOT_THRESHOLD = 85 # F° - Maybe too hot for some activities
43
+ COOL_THRESHOLD = 60 # F° - Bring a jacket
44
+ COLD_THRESHOLD = 40 # F° - Indoor events preferred
45
+
46
+ RAINY_KEYWORDS = ["rain", "shower", "storm", "drizzle", "thunderstorm"]
47
+ SNOWY_KEYWORDS = ["snow", "flurries", "blizzard", "ice"]
48
+ NICE_KEYWORDS = ["clear", "sunny", "fair", "partly cloudy"]
49
+
50
+
51
+ class ErrorType(str, Enum):
52
+ """Structured error types for event weather system."""
53
+ NOT_FOUND = "event_data_not_found"
54
+ PARSE_ERROR = "json_parse_error"
55
+ WEATHER_ERROR = "weather_service_error"
56
+ UNKNOWN = "unknown_error"
57
+
58
+
59
+ class EventWeatherException(Exception):
60
+ """Base exception for event weather system."""
61
+ def __init__(self, error_type: ErrorType, message: str, original_error: Optional[Exception] = None):
62
+ self.error_type = error_type
63
+ self.message = message
64
+ self.original_error = original_error
65
+ super().__init__(message)
66
+
67
+
68
+ # --- MAIN RECOMMENDATION FUNCTION ---
69
+ async def get_event_recommendations_with_weather(
70
+ tenant_id: str,
71
+ lat: float,
72
+ lon: float,
73
+ include_all_events: bool = False,
74
+ session_id: Optional[str] = None,
75
+ user_id: Optional[str] = None
76
+ ) -> Dict[str, Any]:
77
+ """
78
+ 🌤️ Penny's Event + Weather Intelligence System
79
+
80
+ Combines real-time weather with community events to give residents
81
+ smart, helpful suggestions about what to do today.
82
+
83
+ Args:
84
+ tenant_id: City identifier (e.g., 'atlanta_ga', 'seattle_wa')
85
+ lat: Latitude for weather lookup
86
+ lon: Longitude for weather lookup
87
+ include_all_events: If True, returns all events regardless of weather fit
88
+ session_id: Optional session identifier for logging
89
+ user_id: Optional user identifier for logging
90
+
91
+ Returns:
92
+ Dict containing:
93
+ - weather: Current conditions
94
+ - suggestions: Penny's prioritized recommendations
95
+ - all_events: Optional full event list
96
+ - metadata: Useful context (timestamp, event count, etc.)
97
+
98
+ Raises:
99
+ EventWeatherException: When critical errors occur
100
+
101
+ Example:
102
+ >>> recommendations = await get_event_recommendations_with_weather(
103
+ ... tenant_id="norfolk_va",
104
+ ... lat=36.8508,
105
+ ... lon=-76.2859
106
+ ... )
107
+ >>> print(recommendations["suggestions"][0])
108
+ 🌟 **Outdoor Concert**at Town Point Park — Perfect outdoor weather! This is the one.
109
+ """
110
+ start_time = time.time()
111
+
112
+ # Sanitize inputs for logging
113
+ safe_tenant_id = sanitize_for_logging(tenant_id)
114
+ safe_coords = f"({lat:.4f}, {lon:.4f})"
115
+
116
+ logger.info(
117
+ f"🌤️ Event weather recommendation request: tenant={safe_tenant_id}, coords={safe_coords}"
118
+ )
119
+
120
+ try:
121
+ # --- STEP 1: Load City Events (Standardized) ---
122
+ events, event_load_time = await _load_events_with_timing(tenant_id)
123
+
124
+ if not events:
125
+ response = _create_no_events_response(tenant_id)
126
+ _log_operation(
127
+ operation="event_weather_recommendations",
128
+ tenant_id=tenant_id,
129
+ session_id=session_id,
130
+ user_id=user_id,
131
+ success=True,
132
+ event_count=0,
133
+ response_time_ms=_calculate_response_time(start_time),
134
+ fallback_used=False,
135
+ weather_available=False
136
+ )
137
+ return response
138
+
139
+ logger.info(f"✅ Loaded {len(events)} events for {safe_tenant_id} in {event_load_time:.2f}s")
140
+
141
+ # --- STEP 2: Get Live Weather Data ---
142
+ weather, weather_available = await _get_weather_with_fallback(lat, lon)
143
+
144
+ # --- STEP 3: Generate Recommendations ---
145
+ if weather_available:
146
+ response = await _generate_weather_optimized_recommendations(
147
+ tenant_id=tenant_id,
148
+ events=events,
149
+ weather=weather,
150
+ include_all_events=include_all_events
151
+ )
152
+ else:
153
+ # Graceful degradation: Still show events without weather optimization
154
+ response = _create_fallback_response(tenant_id, events)
155
+
156
+ # --- STEP 4: Calculate Performance Metrics ---
157
+ response_time_ms = _calculate_response_time(start_time)
158
+
159
+ # Add performance metadata
160
+ response["performance"] = {
161
+ "response_time_ms": response_time_ms,
162
+ "event_load_time_ms": int(event_load_time * 1000),
163
+ "weather_available": weather_available
164
+ }
165
+
166
+ # Warn if operation was slow
167
+ if response_time_ms > EventWeatherConfig.SLOW_OPERATION_THRESHOLD_MS:
168
+ logger.warning(
169
+ f"⚠️ Slow event weather operation: {response_time_ms}ms for {safe_tenant_id}"
170
+ )
171
+
172
+ # --- STEP 5: Log Structured Interaction ---
173
+ _log_operation(
174
+ operation="event_weather_recommendations",
175
+ tenant_id=tenant_id,
176
+ session_id=session_id,
177
+ user_id=user_id,
178
+ success=True,
179
+ event_count=len(events),
180
+ response_time_ms=response_time_ms,
181
+ fallback_used=not weather_available,
182
+ weather_available=weather_available
183
+ )
184
+
185
+ logger.info(
186
+ f"✅ Returning {len(response.get('suggestions', []))} recommendations "
187
+ f"for {safe_tenant_id} in {response_time_ms}ms"
188
+ )
189
+
190
+ return response
191
+
192
+ except EventWeatherException as e:
193
+ # Known error with structured handling
194
+ response_time_ms = _calculate_response_time(start_time)
195
+
196
+ _log_operation(
197
+ operation="event_weather_recommendations",
198
+ tenant_id=tenant_id,
199
+ session_id=session_id,
200
+ user_id=user_id,
201
+ success=False,
202
+ event_count=0,
203
+ response_time_ms=response_time_ms,
204
+ fallback_used=False,
205
+ weather_available=False,
206
+ error_type=e.error_type.value,
207
+ error_message=str(e)
208
+ )
209
+
210
+ return _create_error_response(
211
+ tenant_id=tenant_id,
212
+ error_type=e.error_type.value,
213
+ message=e.message
214
+ )
215
+
216
+ except Exception as e:
217
+ # Unexpected error
218
+ response_time_ms = _calculate_response_time(start_time)
219
+
220
+ logger.error(
221
+ f"❌ Unexpected error in event weather recommendations: {str(e)}",
222
+ exc_info=True
223
+ )
224
+
225
+ _log_operation(
226
+ operation="event_weather_recommendations",
227
+ tenant_id=tenant_id,
228
+ session_id=session_id,
229
+ user_id=user_id,
230
+ success=False,
231
+ event_count=0,
232
+ response_time_ms=response_time_ms,
233
+ fallback_used=False,
234
+ weather_available=False,
235
+ error_type=ErrorType.UNKNOWN.value,
236
+ error_message="Unexpected system error"
237
+ )
238
+
239
+ return _create_error_response(
240
+ tenant_id=tenant_id,
241
+ error_type=ErrorType.UNKNOWN.value,
242
+ message="Something unexpected happened. Please try again in a moment."
243
+ )
244
+
245
+
246
+ # --- EVENT LOADING WITH TIMING ---
247
+ async def _load_events_with_timing(tenant_id: str) -> Tuple[List[Dict[str, Any]], float]:
248
+ """
249
+ Load city events with performance timing.
250
+
251
+ Args:
252
+ tenant_id: City identifier
253
+
254
+ Returns:
255
+ Tuple of (events list, load time in seconds)
256
+
257
+ Raises:
258
+ EventWeatherException: When event loading fails
259
+ """
260
+ load_start = time.time()
261
+
262
+ try:
263
+ loaded_data = load_city_events(tenant_id)
264
+ events = loaded_data.get("events", [])
265
+ load_time = time.time() - load_start
266
+
267
+ return events, load_time
268
+
269
+ except FileNotFoundError as e:
270
+ logger.error(f"❌ Event data file not found for tenant: {tenant_id}")
271
+ raise EventWeatherException(
272
+ error_type=ErrorType.NOT_FOUND,
273
+ message=f"I don't have event data for {tenant_id} yet. Let me know if you'd like me to add it!",
274
+ original_error=e
275
+ )
276
+
277
+ except json.JSONDecodeError as e:
278
+ logger.error(f"❌ Invalid JSON in event data for {tenant_id}: {e}")
279
+ raise EventWeatherException(
280
+ error_type=ErrorType.PARSE_ERROR,
281
+ message="There's an issue with the event data format. Our team has been notified!",
282
+ original_error=e
283
+ )
284
+
285
+ except Exception as e:
286
+ logger.error(f"❌ Unexpected error loading events: {e}", exc_info=True)
287
+ raise EventWeatherException(
288
+ error_type=ErrorType.UNKNOWN,
289
+ message="Something went wrong loading events. Please try again in a moment.",
290
+ original_error=e
291
+ )
292
+
293
+
294
+ # --- WEATHER RETRIEVAL WITH FALLBACK ---
295
+ async def _get_weather_with_fallback(
296
+ lat: float,
297
+ lon: float
298
+ ) -> Tuple[Dict[str, Any], bool]:
299
+ """
300
+ Get weather data with graceful fallback if service is unavailable.
301
+
302
+ Args:
303
+ lat: Latitude
304
+ lon: Longitude
305
+
306
+ Returns:
307
+ Tuple of (weather data dict, availability boolean)
308
+ """
309
+ try:
310
+ weather = await get_weather_for_location(lat, lon)
311
+
312
+ temp = weather.get("temperature", {}).get("value")
313
+ phrase = weather.get("phrase", "N/A")
314
+
315
+ logger.info(f"✅ Weather retrieved: {phrase} at {temp}°F")
316
+
317
+ return weather, True
318
+
319
+ except Exception as e:
320
+ logger.warning(f"⚠️ Weather service unavailable: {str(e)}")
321
+ return {"error": "Weather service unavailable"}, False
322
+
323
+
324
+ # --- WEATHER-OPTIMIZED RECOMMENDATIONS ---
325
+ async def _generate_weather_optimized_recommendations(
326
+ tenant_id: str,
327
+ events: List[Dict[str, Any]],
328
+ weather: Dict[str, Any],
329
+ include_all_events: bool
330
+ ) -> Dict[str, Any]:
331
+ """
332
+ Generate event recommendations optimized for current weather conditions.
333
+
334
+ Args:
335
+ tenant_id: City identifier
336
+ events: List of available events
337
+ weather: Weather data dictionary
338
+ include_all_events: Whether to include full event list in response
339
+
340
+ Returns:
341
+ Structured response with weather-optimized suggestions
342
+ """
343
+ temp = weather.get("temperature", {}).get("value")
344
+ phrase = weather.get("phrase", "").lower()
345
+
346
+ # Analyze weather conditions
347
+ weather_analysis = _analyze_weather_conditions(temp, phrase)
348
+
349
+ # Generate Penny's smart suggestions
350
+ suggestions = _generate_recommendations(
351
+ events=events,
352
+ weather_analysis=weather_analysis,
353
+ temp=temp,
354
+ phrase=phrase
355
+ )
356
+
357
+ # Build response
358
+ response = {
359
+ "weather": weather,
360
+ "weather_summary": _create_weather_summary(temp, phrase),
361
+ "suggestions": suggestions[:EventWeatherConfig.MAX_RECOMMENDATIONS],
362
+ "tenant_id": tenant_id,
363
+ "event_count": len(events),
364
+ "timestamp": datetime.utcnow().isoformat(),
365
+ "weather_analysis": weather_analysis
366
+ }
367
+
368
+ # Optionally include full event list
369
+ if include_all_events:
370
+ response["all_events"] = events
371
+
372
+ return response
373
+
374
+
375
+ # --- HELPER FUNCTIONS (Penny's Intelligence Layer) ---
376
+
377
+ def _analyze_weather_conditions(temp: Optional[float], phrase: str) -> Dict[str, Any]:
378
+ """
379
+ 🧠 Penny's weather interpretation logic.
380
+ Returns structured analysis of current conditions.
381
+
382
+ Args:
383
+ temp: Temperature in Fahrenheit
384
+ phrase: Weather description phrase
385
+
386
+ Returns:
387
+ Dictionary with weather analysis including outdoor suitability
388
+ """
389
+ analysis = {
390
+ "is_rainy": any(keyword in phrase for keyword in WeatherThresholds.RAINY_KEYWORDS),
391
+ "is_snowy": any(keyword in phrase for keyword in WeatherThresholds.SNOWY_KEYWORDS),
392
+ "is_nice": any(keyword in phrase for keyword in WeatherThresholds.NICE_KEYWORDS),
393
+ "temp_category": None,
394
+ "outdoor_friendly": False,
395
+ "indoor_preferred": False
396
+ }
397
+
398
+ if temp:
399
+ if temp >= WeatherThresholds.HOT_THRESHOLD:
400
+ analysis["temp_category"] = "hot"
401
+ elif temp >= WeatherThresholds.WARM_THRESHOLD:
402
+ analysis["temp_category"] = "warm"
403
+ elif temp >= WeatherThresholds.COOL_THRESHOLD:
404
+ analysis["temp_category"] = "mild"
405
+ elif temp >= WeatherThresholds.COLD_THRESHOLD:
406
+ analysis["temp_category"] = "cool"
407
+ else:
408
+ analysis["temp_category"] = "cold"
409
+
410
+ # Outdoor-friendly = warm/mild + not rainy/snowy
411
+ analysis["outdoor_friendly"] = (
412
+ temp >= WeatherThresholds.COOL_THRESHOLD and
413
+ not analysis["is_rainy"] and
414
+ not analysis["is_snowy"]
415
+ )
416
+
417
+ # Indoor preferred = cold or rainy or snowy
418
+ analysis["indoor_preferred"] = (
419
+ temp < WeatherThresholds.COOL_THRESHOLD or
420
+ analysis["is_rainy"] or
421
+ analysis["is_snowy"]
422
+ )
423
+
424
+ return analysis
425
+
426
+
427
+ def _generate_recommendations(
428
+ events: List[Dict[str, Any]],
429
+ weather_analysis: Dict[str, Any],
430
+ temp: Optional[float],
431
+ phrase: str
432
+ ) -> List[str]:
433
+ """
434
+ 🎯 Penny's event recommendation engine.
435
+ Prioritizes events based on weather + category fit.
436
+ Keeps Penny's warm, helpful voice throughout.
437
+
438
+ Args:
439
+ events: List of available events
440
+ weather_analysis: Weather condition analysis
441
+ temp: Current temperature
442
+ phrase: Weather description
443
+
444
+ Returns:
445
+ List of formatted event suggestions
446
+ """
447
+ suggestions = []
448
+
449
+ # Sort events: Best weather fit first
450
+ scored_events = []
451
+ for event in events:
452
+ score = _calculate_event_weather_score(event, weather_analysis)
453
+ scored_events.append((score, event))
454
+
455
+ scored_events.sort(reverse=True, key=lambda x: x[0])
456
+
457
+ # Generate suggestions with Penny's personality
458
+ for score, event in scored_events:
459
+ event_name = event.get("name", "Unnamed Event")
460
+ event_category = event.get("category", "").lower()
461
+ event_location = event.get("location", "")
462
+
463
+ # Build suggestion with appropriate emoji + messaging
464
+ suggestion = _create_suggestion_message(
465
+ event_name=event_name,
466
+ event_category=event_category,
467
+ event_location=event_location,
468
+ score=score,
469
+ weather_analysis=weather_analysis,
470
+ temp=temp,
471
+ phrase=phrase
472
+ )
473
+
474
+ suggestions.append(suggestion)
475
+
476
+ return suggestions
477
+
478
+
479
+ def _calculate_event_weather_score(
480
+ event: Dict[str, Any],
481
+ weather_analysis: Dict[str, Any]
482
+ ) -> int:
483
+ """
484
+ 📊 Scores event suitability based on weather (0-100).
485
+ Higher = better match for current conditions.
486
+
487
+ Args:
488
+ event: Event dictionary with category information
489
+ weather_analysis: Weather condition analysis
490
+
491
+ Returns:
492
+ Integer score from 0-100
493
+ """
494
+ category = event.get("category", "").lower()
495
+ score = 50 # Neutral baseline
496
+
497
+ # Perfect matches
498
+ if "outdoor" in category and weather_analysis["outdoor_friendly"]:
499
+ score = 95
500
+ elif "indoor" in category and weather_analysis["indoor_preferred"]:
501
+ score = 90
502
+
503
+ # Good matches
504
+ elif "indoor" in category and not weather_analysis["outdoor_friendly"]:
505
+ score = 75
506
+ elif "outdoor" in category and weather_analysis["temp_category"] in ["warm", "mild"]:
507
+ score = 70
508
+
509
+ # Acceptable matches
510
+ elif "civic" in category or "community" in category:
511
+ score = 60 # Usually indoor, weather-neutral
512
+
513
+ # Poor matches (but still list them)
514
+ elif "outdoor" in category and weather_analysis["indoor_preferred"]:
515
+ score = 30
516
+
517
+ return score
518
+
519
+
520
+ def _create_suggestion_message(
521
+ event_name: str,
522
+ event_category: str,
523
+ event_location: str,
524
+ score: int,
525
+ weather_analysis: Dict[str, Any],
526
+ temp: Optional[float],
527
+ phrase: str
528
+ ) -> str:
529
+ """
530
+ 💬 Penny's voice: Generates natural, helpful event suggestions.
531
+ Adapts tone based on weather fit score.
532
+
533
+ Args:
534
+ event_name: Name of the event
535
+ event_category: Event category (outdoor, indoor, etc.)
536
+ event_location: Event location/venue
537
+ score: Weather suitability score (0-100)
538
+ weather_analysis: Weather condition analysis
539
+ temp: Current temperature
540
+ phrase: Weather description
541
+
542
+ Returns:
543
+ Formatted suggestion string with emoji and helpful context
544
+ """
545
+ location_text = f" at {event_location}" if event_location else ""
546
+
547
+ # PERFECT MATCHES (90-100)
548
+ if score >= 90:
549
+ if "outdoor" in event_category:
550
+ return f"🌟 **{event_name}**{location_text} — Perfect outdoor weather! This is the one."
551
+ else:
552
+ return f"🏛️ **{event_name}**{location_text} — Ideal indoor activity for today's weather!"
553
+
554
+ # GOOD MATCHES (70-89)
555
+ elif score >= 70:
556
+ if "outdoor" in event_category:
557
+ return f"☀️ **{event_name}**{location_text} — Great day for outdoor activities!"
558
+ else:
559
+ return f"🔵 **{event_name}**{location_text} — Solid indoor option!"
560
+
561
+ # DECENT MATCHES (50-69)
562
+ elif score >= 50:
563
+ if "outdoor" in event_category:
564
+ temp_text = f" (It's {int(temp)}°F)" if temp else ""
565
+ return f"🌤️ **{event_name}**{location_text} — Weather's okay for outdoor events{temp_text}."
566
+ else:
567
+ return f"⚪ **{event_name}**{location_text} — Weather-neutral activity."
568
+
569
+ # POOR MATCHES (Below 50)
570
+ else:
571
+ if "outdoor" in event_category and weather_analysis["is_rainy"]:
572
+ return f"🌧️ **{event_name}**{location_text} — Outdoor event, but it's rainy. Bring an umbrella or check if it's postponed!"
573
+ elif "outdoor" in event_category and weather_analysis.get("temp_category") == "cold":
574
+ return f"❄️ **{event_name}**{location_text} — Outdoor event, but bundle up — it's chilly!"
575
+ else:
576
+ return f"⚪ **{event_name}**{location_text} — Check weather before heading out."
577
+
578
+
579
+ def _create_weather_summary(temp: Optional[float], phrase: str) -> str:
580
+ """
581
+ 🌤️ Penny's plain-English weather summary.
582
+
583
+ Args:
584
+ temp: Temperature in Fahrenheit
585
+ phrase: Weather description phrase
586
+
587
+ Returns:
588
+ Human-readable weather summary
589
+ """
590
+ if not temp:
591
+ return f"Current conditions: {phrase.title()}"
592
+
593
+ temp_desc = ""
594
+ if temp >= 85:
595
+ temp_desc = "hot"
596
+ elif temp >= 70:
597
+ temp_desc = "warm"
598
+ elif temp >= 60:
599
+ temp_desc = "mild"
600
+ elif temp >= 40:
601
+ temp_desc = "cool"
602
+ else:
603
+ temp_desc = "cold"
604
+
605
+ return f"It's {temp_desc} at {int(temp)}°F — {phrase.lower()}."
606
+
607
+
608
+ # --- ERROR RESPONSE HELPERS (Penny stays helpful even in failures) ---
609
+
610
+ def _create_no_events_response(tenant_id: str) -> Dict[str, Any]:
611
+ """
612
+ Returns friendly response when no events are found.
613
+
614
+ Args:
615
+ tenant_id: City identifier
616
+
617
+ Returns:
618
+ Structured response with helpful message
619
+ """
620
+ return {
621
+ "weather": {},
622
+ "suggestions": [
623
+ f"🤔 I don't have any events loaded for {tenant_id} right now. "
624
+ "Let me know if you'd like me to check again or add some!"
625
+ ],
626
+ "tenant_id": tenant_id,
627
+ "event_count": 0,
628
+ "timestamp": datetime.utcnow().isoformat()
629
+ }
630
+
631
+
632
+ def _create_error_response(
633
+ tenant_id: str,
634
+ error_type: str,
635
+ message: str
636
+ ) -> Dict[str, Any]:
637
+ """
638
+ Returns structured error with Penny's helpful tone.
639
+
640
+ Args:
641
+ tenant_id: City identifier
642
+ error_type: Structured error type code
643
+ message: User-friendly error message
644
+
645
+ Returns:
646
+ Error response dictionary
647
+ """
648
+ logger.error(f"Error in event_weather: {error_type} - {message}")
649
+ return {
650
+ "weather": {},
651
+ "suggestions": [f"⚠️ {message}"],
652
+ "tenant_id": tenant_id,
653
+ "event_count": 0,
654
+ "error_type": error_type,
655
+ "timestamp": datetime.utcnow().isoformat()
656
+ }
657
+
658
+
659
+ def _create_fallback_response(
660
+ tenant_id: str,
661
+ events: List[Dict[str, Any]]
662
+ ) -> Dict[str, Any]:
663
+ """
664
+ Graceful degradation: Shows events even if weather service is down.
665
+ Penny stays helpful!
666
+
667
+ Args:
668
+ tenant_id: City identifier
669
+ events: List of available events
670
+
671
+ Returns:
672
+ Fallback response with events but no weather optimization
673
+ """
674
+ # Limit to configured maximum
675
+ display_events = events[:EventWeatherConfig.MAX_FALLBACK_EVENTS]
676
+
677
+ suggestions = [
678
+ f"📅 **{event.get('name', 'Event')}** — {event.get('category', 'Community event')}"
679
+ for event in display_events
680
+ ]
681
+
682
+ suggestions.insert(0, "⚠️ Weather service is temporarily unavailable, but here are today's events:")
683
+
684
+ return {
685
+ "weather": {"error": "Weather service unavailable"},
686
+ "suggestions": suggestions,
687
+ "tenant_id": tenant_id,
688
+ "event_count": len(events),
689
+ "timestamp": datetime.utcnow().isoformat(),
690
+ "fallback_mode": True
691
+ }
692
+
693
+
694
+ # --- STRUCTURED LOGGING HELPER ---
695
+
696
+ def _log_operation(
697
+ operation: str,
698
+ tenant_id: str,
699
+ success: bool,
700
+ event_count: int,
701
+ response_time_ms: int,
702
+ fallback_used: bool,
703
+ weather_available: bool,
704
+ session_id: Optional[str] = None,
705
+ user_id: Optional[str] = None,
706
+ error_type: Optional[str] = None,
707
+ error_message: Optional[str] = None
708
+ ) -> None:
709
+ """
710
+ Log event weather operation with structured data.
711
+
712
+ Args:
713
+ operation: Operation name
714
+ tenant_id: City identifier
715
+ success: Whether operation succeeded
716
+ event_count: Number of events processed
717
+ response_time_ms: Total response time in milliseconds
718
+ fallback_used: Whether fallback mode was used
719
+ weather_available: Whether weather data was available
720
+ session_id: Optional session identifier
721
+ user_id: Optional user identifier
722
+ error_type: Optional error type if failed
723
+ error_message: Optional error message if failed
724
+ """
725
+ log_data = {
726
+ "operation": operation,
727
+ "tenant_id": sanitize_for_logging(tenant_id),
728
+ "success": success,
729
+ "event_count": event_count,
730
+ "response_time_ms": response_time_ms,
731
+ "fallback_used": fallback_used,
732
+ "weather_available": weather_available,
733
+ "timestamp": datetime.utcnow().isoformat()
734
+ }
735
+
736
+ if session_id:
737
+ log_data["session_id"] = sanitize_for_logging(session_id)
738
+
739
+ if user_id:
740
+ log_data["user_id"] = sanitize_for_logging(user_id)
741
+
742
+ if error_type:
743
+ log_data["error_type"] = error_type
744
+
745
+ if error_message:
746
+ log_data["error_message"] = sanitize_for_logging(error_message)
747
+
748
+ log_interaction(log_data)
749
+
750
+
751
+ def _calculate_response_time(start_time: float) -> int:
752
+ """
753
+ Calculate response time in milliseconds.
754
+
755
+ Args:
756
+ start_time: Operation start time from time.time()
757
+
758
+ Returns:
759
+ Response time in milliseconds
760
+ """
761
+ return int((time.time() - start_time) * 1000)
app/intents.py ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/intents.py
2
+ """
3
+ 🎯 Penny's Intent Classification System
4
+ Rule-based intent classifier designed for civic engagement queries.
5
+
6
+ CURRENT: Simple keyword matching (fast, predictable, debuggable)
7
+ FUTURE: Will upgrade to ML/embedding-based classification (Gemma/LayoutLM)
8
+
9
+ This approach allows Penny to understand resident needs and route them
10
+ to the right civic systems — weather, resources, events, translation, etc.
11
+ """
12
+
13
+ import logging
14
+ from typing import Dict, List, Optional
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+
18
+ # --- LOGGING SETUP (Azure-friendly) ---
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ # --- INTENT CATEGORIES (Enumerated for type safety) ---
23
+ class IntentType(str, Enum):
24
+ """
25
+ Penny's supported intent categories.
26
+ Each maps to a specific civic assistance pathway.
27
+ """
28
+ WEATHER = "weather"
29
+ GREETING = "greeting"
30
+ LOCAL_RESOURCES = "local_resources"
31
+ EVENTS = "events"
32
+ TRANSLATION = "translation"
33
+ SENTIMENT_ANALYSIS = "sentiment_analysis"
34
+ BIAS_DETECTION = "bias_detection"
35
+ DOCUMENT_PROCESSING = "document_processing"
36
+ HELP = "help"
37
+ EMERGENCY = "emergency" # Critical safety routing
38
+ UNKNOWN = "unknown"
39
+
40
+
41
+ @dataclass
42
+ class IntentMatch:
43
+ """
44
+ Structured intent classification result.
45
+ Includes confidence score and matched keywords for debugging.
46
+ """
47
+ intent: IntentType
48
+ confidence: float # 0.0 - 1.0
49
+ matched_keywords: List[str]
50
+ is_compound: bool = False # True if query spans multiple intents
51
+ secondary_intents: List[IntentType] = field(default_factory=list)
52
+
53
+ def to_dict(self) -> Dict:
54
+ """Convert to dictionary for logging and API responses."""
55
+ return {
56
+ "intent": self.intent.value,
57
+ "confidence": self.confidence,
58
+ "matched_keywords": self.matched_keywords,
59
+ "is_compound": self.is_compound,
60
+ "secondary_intents": [intent.value for intent in self.secondary_intents]
61
+ }
62
+
63
+
64
+ # --- INTENT KEYWORD PATTERNS (Organized by priority) ---
65
+ class IntentPatterns:
66
+ """
67
+ Penny's keyword patterns for intent matching.
68
+ Organized by priority — critical intents checked first.
69
+ """
70
+
71
+ # 🚨 PRIORITY 1: EMERGENCY & SAFETY (Always check first)
72
+ EMERGENCY = [
73
+ "911", "emergency", "urgent", "crisis", "danger", "help me",
74
+ "suicide", "overdose", "assault", "abuse", "threatening",
75
+ "hurt myself", "hurt someone", "life threatening"
76
+ ]
77
+
78
+ # 🌍 PRIORITY 2: TRANSLATION (High civic value)
79
+ TRANSLATION = [
80
+ "translate", "in spanish", "in french", "in portuguese",
81
+ "in german", "in chinese", "in arabic", "in vietnamese",
82
+ "in russian", "in korean", "in japanese", "in tagalog",
83
+ "convert to", "say this in", "how do i say", "what is", "in hindi"
84
+ ]
85
+
86
+ # 📄 PRIORITY 3: DOCUMENT PROCESSING (Forms, PDFs)
87
+ DOCUMENT_PROCESSING = [
88
+ "process this document", "extract data", "analyze pdf",
89
+ "upload form", "read this file", "scan this", "form help",
90
+ "fill out", "document", "pdf", "application", "permit"
91
+ ]
92
+
93
+ # 🔍 PRIORITY 4: ANALYSIS TOOLS
94
+ SENTIMENT_ANALYSIS = [
95
+ "how does this sound", "is this positive", "is this negative",
96
+ "analyze", "sentiment", "feel about", "mood", "tone"
97
+ ]
98
+
99
+ BIAS_DETECTION = [
100
+ "is this biased", "check bias", "check fairness", "is this neutral",
101
+ "biased", "objective", "subjective", "fair", "discriminatory"
102
+ ]
103
+
104
+ # 🌤️ PRIORITY 5: WEATHER + EVENTS (Compound intent handling)
105
+ WEATHER = [
106
+ "weather", "rain", "snow", "sunny", "forecast", "temperature",
107
+ "hot", "cold", "storm", "wind", "outside", "climate",
108
+ "degrees", "celsius", "fahrenheit"
109
+ ]
110
+
111
+ # Specific date/time keywords that suggest event context
112
+ DATE_TIME = [
113
+ "today", "tomorrow", "this weekend", "next week",
114
+ "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
115
+ "tonight", "this morning", "this afternoon", "this evening"
116
+ ]
117
+
118
+ EVENTS = [
119
+ "event", "things to do", "what's happening", "activities",
120
+ "festival", "concert", "activity", "community event",
121
+ "show", "performance", "gathering", "meetup", "celebration"
122
+ ]
123
+
124
+ # 🏛️ PRIORITY 6: LOCAL RESOURCES (Core civic mission)
125
+ LOCAL_RESOURCES = [
126
+ "resource", "shelter", "library", "help center",
127
+ "food bank", "warming center", "cooling center", "csb",
128
+ "mental health", "housing", "community service",
129
+ "trash", "recycling", "transit", "bus", "schedule",
130
+ "clinic", "hospital", "pharmacy", "assistance",
131
+ "utility", "water", "electric", "gas", "bill"
132
+ ]
133
+
134
+ # 💬 PRIORITY 7: CONVERSATIONAL
135
+ GREETING = [
136
+ "hi", "hello", "hey", "what's up", "good morning",
137
+ "good afternoon", "good evening", "howdy", "yo",
138
+ "greetings", "sup", "hiya"
139
+ ]
140
+
141
+ HELP = [
142
+ "help", "how do i", "can you help", "i need help",
143
+ "what can you do", "how does this work", "instructions",
144
+ "guide", "tutorial", "show me how"
145
+ ]
146
+
147
+
148
+ def classify_intent(message: str) -> str:
149
+ """
150
+ 🎯 Main classification function (backward-compatible).
151
+ Returns intent as string for existing API compatibility.
152
+
153
+ Args:
154
+ message: User's query text
155
+
156
+ Returns:
157
+ Intent string (e.g., "weather", "events", "translation")
158
+ """
159
+ try:
160
+ result = classify_intent_detailed(message)
161
+ return result.intent.value
162
+ except Exception as e:
163
+ logger.error(f"Intent classification failed: {e}", exc_info=True)
164
+ return IntentType.UNKNOWN.value
165
+
166
+
167
+ def classify_intent_detailed(message: str) -> IntentMatch:
168
+ """
169
+ 🧠 Enhanced classification with confidence scores and metadata.
170
+
171
+ This function:
172
+ 1. Checks for emergency keywords FIRST (safety routing)
173
+ 2. Detects compound intents (e.g., "weather + events")
174
+ 3. Returns structured result with confidence + matched keywords
175
+
176
+ Args:
177
+ message: User's query text
178
+
179
+ Returns:
180
+ IntentMatch object with full classification details
181
+ """
182
+
183
+ if not message or not message.strip():
184
+ logger.warning("Empty message received for intent classification")
185
+ return IntentMatch(
186
+ intent=IntentType.UNKNOWN,
187
+ confidence=0.0,
188
+ matched_keywords=[]
189
+ )
190
+
191
+ try:
192
+ text = message.lower().strip()
193
+ logger.debug(f"Classifying intent for: '{text[:50]}...'")
194
+
195
+ # --- PRIORITY 1: EMERGENCY (Critical safety routing) ---
196
+ emergency_matches = _find_keyword_matches(text, IntentPatterns.EMERGENCY)
197
+ if emergency_matches:
198
+ logger.warning(f"🚨 EMERGENCY intent detected: {emergency_matches}")
199
+ return IntentMatch(
200
+ intent=IntentType.EMERGENCY,
201
+ confidence=1.0, # Always high confidence for safety
202
+ matched_keywords=emergency_matches
203
+ )
204
+
205
+ # --- PRIORITY 2: TRANSLATION ---
206
+ translation_matches = _find_keyword_matches(text, IntentPatterns.TRANSLATION)
207
+ if translation_matches:
208
+ return IntentMatch(
209
+ intent=IntentType.TRANSLATION,
210
+ confidence=0.9,
211
+ matched_keywords=translation_matches
212
+ )
213
+
214
+ # --- PRIORITY 3: DOCUMENT PROCESSING ---
215
+ doc_matches = _find_keyword_matches(text, IntentPatterns.DOCUMENT_PROCESSING)
216
+ if doc_matches:
217
+ return IntentMatch(
218
+ intent=IntentType.DOCUMENT_PROCESSING,
219
+ confidence=0.9,
220
+ matched_keywords=doc_matches
221
+ )
222
+
223
+ # --- PRIORITY 4: ANALYSIS TOOLS ---
224
+ sentiment_matches = _find_keyword_matches(text, IntentPatterns.SENTIMENT_ANALYSIS)
225
+ if sentiment_matches:
226
+ return IntentMatch(
227
+ intent=IntentType.SENTIMENT_ANALYSIS,
228
+ confidence=0.85,
229
+ matched_keywords=sentiment_matches
230
+ )
231
+
232
+ bias_matches = _find_keyword_matches(text, IntentPatterns.BIAS_DETECTION)
233
+ if bias_matches:
234
+ return IntentMatch(
235
+ intent=IntentType.BIAS_DETECTION,
236
+ confidence=0.85,
237
+ matched_keywords=bias_matches
238
+ )
239
+
240
+ # --- PRIORITY 5: COMPOUND INTENT HANDLING (Weather + Events) ---
241
+ weather_matches = _find_keyword_matches(text, IntentPatterns.WEATHER)
242
+ event_matches = _find_keyword_matches(text, IntentPatterns.EVENTS)
243
+ date_matches = _find_keyword_matches(text, IntentPatterns.DATE_TIME)
244
+
245
+ # Compound detection: "What events are happening this weekend?"
246
+ # or "What's the weather like for Sunday's festival?"
247
+ if event_matches and (weather_matches or date_matches):
248
+ logger.info("Compound intent detected: events + weather/date")
249
+ return IntentMatch(
250
+ intent=IntentType.EVENTS, # Primary intent
251
+ confidence=0.85,
252
+ matched_keywords=event_matches + weather_matches + date_matches,
253
+ is_compound=True,
254
+ secondary_intents=[IntentType.WEATHER]
255
+ )
256
+
257
+ # --- PRIORITY 6: SIMPLE WEATHER INTENT ---
258
+ if weather_matches:
259
+ return IntentMatch(
260
+ intent=IntentType.WEATHER,
261
+ confidence=0.9,
262
+ matched_keywords=weather_matches
263
+ )
264
+
265
+ # --- PRIORITY 7: LOCAL RESOURCES ---
266
+ resource_matches = _find_keyword_matches(text, IntentPatterns.LOCAL_RESOURCES)
267
+ if resource_matches:
268
+ return IntentMatch(
269
+ intent=IntentType.LOCAL_RESOURCES,
270
+ confidence=0.9,
271
+ matched_keywords=resource_matches
272
+ )
273
+
274
+ # --- PRIORITY 8: EVENTS (Simple check) ---
275
+ if event_matches:
276
+ return IntentMatch(
277
+ intent=IntentType.EVENTS,
278
+ confidence=0.85,
279
+ matched_keywords=event_matches
280
+ )
281
+
282
+ # --- PRIORITY 9: CONVERSATIONAL ---
283
+ greeting_matches = _find_keyword_matches(text, IntentPatterns.GREETING)
284
+ if greeting_matches:
285
+ return IntentMatch(
286
+ intent=IntentType.GREETING,
287
+ confidence=0.8,
288
+ matched_keywords=greeting_matches
289
+ )
290
+
291
+ help_matches = _find_keyword_matches(text, IntentPatterns.HELP)
292
+ if help_matches:
293
+ return IntentMatch(
294
+ intent=IntentType.HELP,
295
+ confidence=0.9,
296
+ matched_keywords=help_matches
297
+ )
298
+
299
+ # --- FALLBACK: UNKNOWN ---
300
+ logger.info(f"No clear intent match for: '{text[:50]}...'")
301
+ return IntentMatch(
302
+ intent=IntentType.UNKNOWN,
303
+ confidence=0.0,
304
+ matched_keywords=[]
305
+ )
306
+
307
+ except Exception as e:
308
+ logger.error(f"Error during intent classification: {e}", exc_info=True)
309
+ return IntentMatch(
310
+ intent=IntentType.UNKNOWN,
311
+ confidence=0.0,
312
+ matched_keywords=[],
313
+ )
314
+
315
+
316
+ # --- HELPER FUNCTIONS ---
317
+
318
+ def _find_keyword_matches(text: str, keywords: List[str]) -> List[str]:
319
+ """
320
+ Finds which keywords from a pattern list appear in the user's message.
321
+
322
+ Args:
323
+ text: Normalized user message (lowercase)
324
+ keywords: List of keywords to search for
325
+
326
+ Returns:
327
+ List of matched keywords (for debugging/logging)
328
+ """
329
+ try:
330
+ matches = []
331
+ for keyword in keywords:
332
+ if keyword in text:
333
+ matches.append(keyword)
334
+ return matches
335
+ except Exception as e:
336
+ logger.error(f"Error finding keyword matches: {e}", exc_info=True)
337
+ return []
338
+
339
+
340
+ def get_intent_description(intent: IntentType) -> str:
341
+ """
342
+ 🗣️ Penny's plain-English explanation of what each intent does.
343
+ Useful for help systems and debugging.
344
+
345
+ Args:
346
+ intent: IntentType enum value
347
+
348
+ Returns:
349
+ Human-readable description of the intent
350
+ """
351
+ descriptions = {
352
+ IntentType.WEATHER: "Get current weather conditions and forecasts for your area",
353
+ IntentType.GREETING: "Start a conversation with Penny",
354
+ IntentType.LOCAL_RESOURCES: "Find community resources like shelters, libraries, and services",
355
+ IntentType.EVENTS: "Discover local events and activities happening in your city",
356
+ IntentType.TRANSLATION: "Translate text between 27 languages",
357
+ IntentType.SENTIMENT_ANALYSIS: "Analyze the emotional tone of text",
358
+ IntentType.BIAS_DETECTION: "Check text for potential bias or fairness issues",
359
+ IntentType.DOCUMENT_PROCESSING: "Process PDFs and forms to extract information",
360
+ IntentType.HELP: "Learn how to use Penny's features",
361
+ IntentType.EMERGENCY: "Connect with emergency services and crisis support",
362
+ IntentType.UNKNOWN: "I'm not sure what you're asking — can you rephrase?"
363
+ }
364
+ return descriptions.get(intent, "Unknown intent type")
365
+
366
+
367
+ def get_all_supported_intents() -> Dict[str, str]:
368
+ """
369
+ 📋 Returns all supported intents with descriptions.
370
+ Useful for /help endpoints and documentation.
371
+
372
+ Returns:
373
+ Dictionary mapping intent values to descriptions
374
+ """
375
+ try:
376
+ return {
377
+ intent.value: get_intent_description(intent)
378
+ for intent in IntentType
379
+ if intent != IntentType.UNKNOWN
380
+ }
381
+ except Exception as e:
382
+ logger.error(f"Error getting supported intents: {e}", exc_info=True)
383
+ return {}
384
+
385
+
386
+ # --- FUTURE ML UPGRADE HOOK ---
387
+ def classify_intent_ml(message: str, use_embedding_model: bool = False) -> IntentMatch:
388
+ """
389
+ 🔮 PLACEHOLDER for future ML-based classification.
390
+
391
+ When ready to upgrade from keyword matching to embeddings:
392
+ 1. Load Gemma-7B or sentence-transformers model
393
+ 2. Generate message embeddings
394
+ 3. Compare to intent prototype embeddings
395
+ 4. Return top match with confidence score
396
+
397
+ Args:
398
+ message: User's query
399
+ use_embedding_model: If True, use ML model (not implemented yet)
400
+
401
+ Returns:
402
+ IntentMatch object (currently falls back to rule-based)
403
+ """
404
+
405
+ if use_embedding_model:
406
+ logger.warning("ML-based classification not yet implemented. Falling back to rules.")
407
+
408
+ # Fallback to rule-based for now
409
+ return classify_intent_detailed(message)
410
+
411
+
412
+ # --- TESTING & VALIDATION ---
413
+ def validate_intent_patterns() -> Dict[str, List[str]]:
414
+ """
415
+ 🧪 Validates that all intent patterns are properly configured.
416
+ Returns any overlapping keywords that might cause conflicts.
417
+
418
+ Returns:
419
+ Dictionary of overlapping keywords between intent pairs
420
+ """
421
+ try:
422
+ all_patterns = {
423
+ "emergency": IntentPatterns.EMERGENCY,
424
+ "translation": IntentPatterns.TRANSLATION,
425
+ "document": IntentPatterns.DOCUMENT_PROCESSING,
426
+ "sentiment": IntentPatterns.SENTIMENT_ANALYSIS,
427
+ "bias": IntentPatterns.BIAS_DETECTION,
428
+ "weather": IntentPatterns.WEATHER,
429
+ "events": IntentPatterns.EVENTS,
430
+ "resources": IntentPatterns.LOCAL_RESOURCES,
431
+ "greeting": IntentPatterns.GREETING,
432
+ "help": IntentPatterns.HELP
433
+ }
434
+
435
+ overlaps = {}
436
+
437
+ # Check for keyword overlap between different intents
438
+ for intent1, keywords1 in all_patterns.items():
439
+ for intent2, keywords2 in all_patterns.items():
440
+ if intent1 >= intent2: # Avoid duplicate comparisons
441
+ continue
442
+
443
+ overlap = set(keywords1) & set(keywords2)
444
+ if overlap:
445
+ key = f"{intent1}_vs_{intent2}"
446
+ overlaps[key] = list(overlap)
447
+
448
+ if overlaps:
449
+ logger.warning(f"Found keyword overlaps between intents: {overlaps}")
450
+
451
+ return overlaps
452
+
453
+ except Exception as e:
454
+ logger.error(f"Error validating intent patterns: {e}", exc_info=True)
455
+ return {}
456
+
457
+
458
+ # --- LOGGING SAMPLE CLASSIFICATIONS (For monitoring) ---
459
+ def log_intent_classification(message: str, result: IntentMatch) -> None:
460
+ """
461
+ 📊 Logs classification results for Azure Application Insights.
462
+ Helps track intent distribution and confidence patterns.
463
+
464
+ Args:
465
+ message: Original user message (truncated for PII safety)
466
+ result: IntentMatch classification result
467
+ """
468
+ try:
469
+ # Truncate message for PII safety
470
+ safe_message = message[:50] + "..." if len(message) > 50 else message
471
+
472
+ logger.info(
473
+ f"Intent classified | "
474
+ f"intent={result.intent.value} | "
475
+ f"confidence={result.confidence:.2f} | "
476
+ f"compound={result.is_compound} | "
477
+ f"keywords={result.matched_keywords[:5]} | " # Limit logged keywords
478
+ f"message_preview='{safe_message}'"
479
+ )
480
+ except Exception as e:
481
+ logger.error(f"Error logging intent classification: {e}", exc_info=True)
app/location_utils.py ADDED
@@ -0,0 +1,717 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/location_utils.py
2
+ """
3
+ 🗺️ Penny's Location Intelligence System
4
+ Handles city detection, tenant routing, and geographic data loading.
5
+
6
+ MISSION: Connect residents to the right local resources, regardless of how
7
+ they describe their location — whether it's "Atlanta", "ATL", "30303", or "near me".
8
+
9
+ CURRENT: Rule-based city matching with 6 supported cities
10
+ FUTURE: Will add ZIP→city mapping, geocoding API, and user location preferences
11
+ """
12
+
13
+ import re
14
+ import json
15
+ import os
16
+ import logging
17
+ from typing import Dict, Any, Optional, List, Tuple
18
+ from pathlib import Path
19
+ from dataclasses import dataclass
20
+ from enum import Enum
21
+
22
+ # --- LOGGING SETUP (Azure-friendly) ---
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # --- BASE PATHS (OS-agnostic for Azure/Windows/Linux) ---
26
+ BASE_DIR = Path(__file__).parent.parent.resolve()
27
+ DATA_PATH = BASE_DIR / "data"
28
+ EVENTS_PATH = DATA_PATH / "events"
29
+ RESOURCES_PATH = DATA_PATH / "resources"
30
+
31
+ # Ensure critical directories exist (Azure deployment safety)
32
+ for path in [DATA_PATH, EVENTS_PATH, RESOURCES_PATH]:
33
+ path.mkdir(parents=True, exist_ok=True)
34
+
35
+
36
+ # ============================================================
37
+ # CITY REGISTRY (Penny's Supported Cities)
38
+ # ============================================================
39
+
40
+ @dataclass
41
+ class CityInfo:
42
+ """
43
+ Structured information about a city Penny supports.
44
+ Makes it easy to add new cities with metadata.
45
+ """
46
+ tenant_id: str # Standard format: cityname_state (e.g., "atlanta_ga")
47
+ full_name: str # Display name: "Atlanta, GA"
48
+ state: str # Two-letter state code
49
+ aliases: List[str] # Common variations users might say
50
+ timezone: str # IANA timezone (e.g., "America/New_York")
51
+ lat: Optional[float] = None # For weather API fallback
52
+ lon: Optional[float] = None
53
+
54
+ def __post_init__(self):
55
+ # Normalize all aliases to lowercase for matching
56
+ self.aliases = [alias.lower().strip() for alias in self.aliases]
57
+
58
+
59
+ class SupportedCities:
60
+ """
61
+ 🏙️ Penny's city registry.
62
+ Each city gets standardized metadata for consistent routing.
63
+ """
64
+
65
+ ATLANTA = CityInfo(
66
+ tenant_id="atlanta_ga",
67
+ full_name="Atlanta, GA",
68
+ state="GA",
69
+ timezone="America/New_York",
70
+ lat=33.7490,
71
+ lon=-84.3880,
72
+ aliases=[
73
+ "atlanta", "atl", "atlanta ga", "atlanta, ga",
74
+ "city of atlanta", "hotlanta", "the atl"
75
+ ]
76
+ )
77
+
78
+ BIRMINGHAM = CityInfo(
79
+ tenant_id="birmingham_al",
80
+ full_name="Birmingham, AL",
81
+ state="AL",
82
+ timezone="America/Chicago",
83
+ lat=33.5207,
84
+ lon=-86.8025,
85
+ aliases=[
86
+ "birmingham", "birmingham al", "birmingham, al",
87
+ "city of birmingham", "bham"
88
+ ]
89
+ )
90
+
91
+ CHESTERFIELD = CityInfo(
92
+ tenant_id="chesterfield_va",
93
+ full_name="Chesterfield, VA",
94
+ state="VA",
95
+ timezone="America/New_York",
96
+ lat=37.3771,
97
+ lon=-77.5047,
98
+ aliases=[
99
+ "chesterfield", "chesterfield va", "chesterfield, va",
100
+ "chesterfield county"
101
+ ]
102
+ )
103
+
104
+ EL_PASO = CityInfo(
105
+ tenant_id="el_paso_tx",
106
+ full_name="El Paso, TX",
107
+ state="TX",
108
+ timezone="America/Denver",
109
+ lat=31.7619,
110
+ lon=-106.4850,
111
+ aliases=[
112
+ "el paso", "el paso tx", "el paso, tx",
113
+ "city of el paso", "elpaso"
114
+ ]
115
+ )
116
+
117
+ PROVIDENCE = CityInfo(
118
+ tenant_id="providence_ri",
119
+ full_name="Providence, RI",
120
+ state="RI",
121
+ timezone="America/New_York",
122
+ lat=41.8240,
123
+ lon=-71.4128,
124
+ aliases=[
125
+ "providence", "providence ri", "providence, ri",
126
+ "city of providence", "pvd"
127
+ ]
128
+ )
129
+
130
+ SEATTLE = CityInfo(
131
+ tenant_id="seattle_wa",
132
+ full_name="Seattle, WA",
133
+ state="WA",
134
+ timezone="America/Los_Angeles",
135
+ lat=47.6062,
136
+ lon=-122.3321,
137
+ aliases=[
138
+ "seattle", "seattle wa", "seattle, wa",
139
+ "city of seattle", "emerald city", "sea"
140
+ ]
141
+ )
142
+
143
+ @classmethod
144
+ def get_all_cities(cls) -> List[CityInfo]:
145
+ """Returns list of all supported cities."""
146
+ return [
147
+ cls.ATLANTA,
148
+ cls.BIRMINGHAM,
149
+ cls.CHESTERFIELD,
150
+ cls.EL_PASO,
151
+ cls.PROVIDENCE,
152
+ cls.SEATTLE
153
+ ]
154
+
155
+ @classmethod
156
+ def get_city_by_tenant_id(cls, tenant_id: str) -> Optional[CityInfo]:
157
+ """Lookup city info by tenant ID."""
158
+ for city in cls.get_all_cities():
159
+ if city.tenant_id == tenant_id:
160
+ return city
161
+ return None
162
+
163
+
164
+ # ============================================================
165
+ # BUILD DYNAMIC CITY PATTERNS (from CityInfo registry)
166
+ # ============================================================
167
+
168
+ def _build_city_patterns() -> Dict[str, str]:
169
+ """
170
+ Generates city matching dictionary from the CityInfo registry.
171
+ This keeps the pattern matching backward-compatible with existing code.
172
+ """
173
+ patterns = {}
174
+ for city in SupportedCities.get_all_cities():
175
+ for alias in city.aliases:
176
+ patterns[alias] = city.tenant_id
177
+ return patterns
178
+
179
+
180
+ # Dynamic pattern dictionary (auto-generated from city registry)
181
+ REAL_CITY_PATTERNS = _build_city_patterns()
182
+
183
+
184
+ # ============================================================
185
+ # LOCATION DETECTION ENUMS
186
+ # ============================================================
187
+
188
+ class LocationStatus(str, Enum):
189
+ """
190
+ Status codes for location detection results.
191
+ """
192
+ FOUND = "found" # Valid city matched
193
+ ZIP_DETECTED = "zip_detected" # ZIP code found (needs mapping)
194
+ USER_LOCATION_NEEDED = "user_location_needed" # "near me" detected
195
+ UNKNOWN = "unknown" # No match found
196
+ AMBIGUOUS = "ambiguous" # Multiple possible matches
197
+
198
+
199
+ @dataclass
200
+ class LocationMatch:
201
+ """
202
+ Structured result from location detection.
203
+ Includes confidence and matched patterns for debugging.
204
+ """
205
+ status: LocationStatus
206
+ tenant_id: Optional[str] = None
207
+ city_info: Optional[CityInfo] = None
208
+ confidence: float = 0.0 # 0.0 - 1.0
209
+ matched_pattern: Optional[str] = None
210
+ alternatives: List[str] = None
211
+
212
+ def __post_init__(self):
213
+ if self.alternatives is None:
214
+ self.alternatives = []
215
+
216
+
217
+ # ============================================================
218
+ # ZIP CODE PATTERNS (for future expansion)
219
+ # ============================================================
220
+
221
+ ZIP_PATTERN = re.compile(r"\b\d{5}(?:-\d{4})?\b") # Matches 12345 or 12345-6789
222
+
223
+ # Future ZIP → City mapping (placeholder)
224
+ ZIP_TO_CITY_MAP: Dict[str, str] = {
225
+ # Atlanta metro
226
+ "30303": "atlanta_ga",
227
+ "30318": "atlanta_ga",
228
+ "30309": "atlanta_ga",
229
+
230
+ # Birmingham metro
231
+ "35203": "birmingham_al",
232
+ "35233": "birmingham_al",
233
+
234
+ # Chesterfield County
235
+ "23832": "chesterfield_va",
236
+ "23838": "chesterfield_va",
237
+
238
+ # El Paso
239
+ "79901": "el_paso_tx",
240
+ "79936": "el_paso_tx",
241
+
242
+ # Providence
243
+ "02903": "providence_ri",
244
+ "02904": "providence_ri",
245
+
246
+ # Seattle metro
247
+ "98101": "seattle_wa",
248
+ "98104": "seattle_wa",
249
+ "98122": "seattle_wa",
250
+ }
251
+
252
+
253
+ # ============================================================
254
+ # MAIN CITY EXTRACTION LOGIC (Enhanced)
255
+ # ============================================================
256
+
257
+ def extract_city_name(text: str) -> str:
258
+ """
259
+ 🎯 BACKWARD-COMPATIBLE location extraction (returns tenant_id string).
260
+
261
+ Extracts tenant ID (e.g., 'atlanta_ga') from user input.
262
+
263
+ Args:
264
+ text: User's location input (e.g., "Atlanta", "30303", "near me")
265
+
266
+ Returns:
267
+ Tenant ID string or status code:
268
+ - Valid tenant_id (e.g., "atlanta_ga")
269
+ - "zip_detected" (ZIP code found, needs mapping)
270
+ - "user_location_needed" ("near me" detected)
271
+ - "unknown" (no match)
272
+ """
273
+ result = extract_location_detailed(text)
274
+ return result.tenant_id or result.status.value
275
+
276
+
277
+ def extract_location_detailed(text: str) -> LocationMatch:
278
+ """
279
+ 🧠 ENHANCED location extraction with confidence scoring.
280
+
281
+ This function intelligently parses location references and returns
282
+ structured results with metadata for better error handling.
283
+
284
+ Args:
285
+ text: User's location input
286
+
287
+ Returns:
288
+ LocationMatch object with full detection details
289
+ """
290
+
291
+ if not text or not text.strip():
292
+ logger.warning("Empty text provided to location extraction")
293
+ return LocationMatch(
294
+ status=LocationStatus.UNKNOWN,
295
+ confidence=0.0
296
+ )
297
+
298
+ lowered = text.lower().strip()
299
+ logger.debug(f"Extracting location from: '{lowered}'")
300
+
301
+ # --- STEP 1: Check for "near me" / location services needed ---
302
+ near_me_phrases = [
303
+ "near me", "my area", "my city", "my neighborhood",
304
+ "where i am", "current location", "my location",
305
+ "around here", "locally", "in my town"
306
+ ]
307
+
308
+ if any(phrase in lowered for phrase in near_me_phrases):
309
+ logger.info("User location services required")
310
+ return LocationMatch(
311
+ status=LocationStatus.USER_LOCATION_NEEDED,
312
+ confidence=1.0,
313
+ matched_pattern="near_me_detected"
314
+ )
315
+
316
+ # --- STEP 2: Check for ZIP codes ---
317
+ zip_matches = ZIP_PATTERN.findall(text)
318
+ if zip_matches:
319
+ zip_code = zip_matches[0] # Take first ZIP if multiple
320
+
321
+ # Try to map ZIP to known city
322
+ if zip_code in ZIP_TO_CITY_MAP:
323
+ tenant_id = ZIP_TO_CITY_MAP[zip_code]
324
+ city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
325
+ logger.info(f"ZIP {zip_code} mapped to {tenant_id}")
326
+ return LocationMatch(
327
+ status=LocationStatus.FOUND,
328
+ tenant_id=tenant_id,
329
+ city_info=city_info,
330
+ confidence=0.95,
331
+ matched_pattern=f"zip:{zip_code}"
332
+ )
333
+ else:
334
+ logger.info(f"ZIP code detected but not mapped: {zip_code}")
335
+ return LocationMatch(
336
+ status=LocationStatus.ZIP_DETECTED,
337
+ confidence=0.5,
338
+ matched_pattern=f"zip:{zip_code}"
339
+ )
340
+
341
+ # --- STEP 3: Match against city patterns ---
342
+ matches = []
343
+ for pattern, tenant_id in REAL_CITY_PATTERNS.items():
344
+ if pattern in lowered:
345
+ matches.append((pattern, tenant_id))
346
+
347
+ if not matches:
348
+ logger.info(f"No city match found for: '{lowered}'")
349
+ return LocationMatch(
350
+ status=LocationStatus.UNKNOWN,
351
+ confidence=0.0
352
+ )
353
+
354
+ # If multiple matches, pick the longest pattern (most specific)
355
+ # Example: "atlanta" vs "city of atlanta" — pick the longer one
356
+ matches.sort(key=lambda x: len(x[0]), reverse=True)
357
+ best_pattern, best_tenant_id = matches[0]
358
+
359
+ city_info = SupportedCities.get_city_by_tenant_id(best_tenant_id)
360
+
361
+ # Calculate confidence based on match specificity
362
+ confidence = min(len(best_pattern) / len(lowered), 1.0)
363
+
364
+ result = LocationMatch(
365
+ status=LocationStatus.FOUND,
366
+ tenant_id=best_tenant_id,
367
+ city_info=city_info,
368
+ confidence=confidence,
369
+ matched_pattern=best_pattern
370
+ )
371
+
372
+ # Check for ambiguity (multiple different cities matched)
373
+ unique_tenant_ids = set(tid for _, tid in matches)
374
+ if len(unique_tenant_ids) > 1:
375
+ result.status = LocationStatus.AMBIGUOUS
376
+ result.alternatives = [tid for _, tid in matches if tid != best_tenant_id]
377
+ logger.warning(f"Ambiguous location match: {unique_tenant_ids}")
378
+
379
+ logger.info(f"Location matched: {best_tenant_id} (confidence: {confidence:.2f})")
380
+ return result
381
+
382
+
383
+ # ============================================================
384
+ # DATA LOADING UTILITIES (Enhanced with error handling)
385
+ # ============================================================
386
+
387
+ def load_city_data(directory: Path, tenant_id: str) -> Dict[str, Any]:
388
+ """
389
+ 🗄️ Generic utility to load JSON data for a given tenant ID.
390
+
391
+ Args:
392
+ directory: Base path (EVENTS_PATH or RESOURCES_PATH)
393
+ tenant_id: City identifier (e.g., 'atlanta_ga')
394
+
395
+ Returns:
396
+ Parsed JSON content as dictionary
397
+
398
+ Raises:
399
+ FileNotFoundError: If the JSON file doesn't exist
400
+ json.JSONDecodeError: If the file is malformed
401
+ """
402
+
403
+ file_path = directory / f"{tenant_id}.json"
404
+
405
+ if not file_path.exists():
406
+ logger.error(f"Data file not found: {file_path}")
407
+ raise FileNotFoundError(f"Data file not found: {file_path}")
408
+
409
+ try:
410
+ with open(file_path, 'r', encoding='utf-8') as f:
411
+ data = json.load(f)
412
+ logger.debug(f"Loaded data from {file_path}")
413
+ return data
414
+ except json.JSONDecodeError as e:
415
+ logger.error(f"Invalid JSON in {file_path}: {e}")
416
+ raise
417
+ except Exception as e:
418
+ logger.error(f"Error reading {file_path}: {e}", exc_info=True)
419
+ raise
420
+
421
+
422
+ def load_city_events(tenant_id: str) -> Dict[str, Any]:
423
+ """
424
+ 📅 Loads structured event data for a given city.
425
+
426
+ Args:
427
+ tenant_id: City identifier (e.g., 'atlanta_ga')
428
+
429
+ Returns:
430
+ Event data structure with 'events' key containing list of events
431
+
432
+ Example:
433
+ {
434
+ "city": "Atlanta, GA",
435
+ "events": [
436
+ {"name": "Jazz Festival", "category": "outdoor", ...},
437
+ ...
438
+ ]
439
+ }
440
+ """
441
+ logger.info(f"Loading events for {tenant_id}")
442
+ return load_city_data(EVENTS_PATH, tenant_id)
443
+
444
+
445
+ def load_city_resources(tenant_id: str) -> Dict[str, Any]:
446
+ """
447
+ 🏛️ Loads civic resource data for a given city.
448
+
449
+ Args:
450
+ tenant_id: City identifier (e.g., 'atlanta_ga')
451
+
452
+ Returns:
453
+ Resource data structure with categorized resources
454
+
455
+ Example:
456
+ {
457
+ "city": "Atlanta, GA",
458
+ "resources": {
459
+ "shelters": [...],
460
+ "food_banks": [...],
461
+ "libraries": [...]
462
+ }
463
+ }
464
+ """
465
+ logger.info(f"Loading resources for {tenant_id}")
466
+ return load_city_data(RESOURCES_PATH, tenant_id)
467
+
468
+
469
+ # ============================================================
470
+ # UTILITY FUNCTIONS
471
+ # ============================================================
472
+
473
+ def normalize_location_name(text: str) -> str:
474
+ """
475
+ 🧹 Normalize location names into consistent format.
476
+ Removes spaces, hyphens, and special characters.
477
+
478
+ Example:
479
+ "El Paso, TX" → "elpasotx"
480
+ "Chesterfield County" → "chesterfieldcounty"
481
+ """
482
+ if not text:
483
+ return ""
484
+
485
+ # Remove punctuation and spaces
486
+ normalized = re.sub(r"[\s\-,\.]+", "", text.lower().strip())
487
+ return normalized
488
+
489
+
490
+ def get_city_coordinates(tenant_id: str) -> Optional[Dict[str, float]]:
491
+ """
492
+ 🗺️ Returns coordinates for a city as a dictionary.
493
+ Useful for weather API calls.
494
+
495
+ Args:
496
+ tenant_id: City identifier
497
+
498
+ Returns:
499
+ Dictionary with "lat" and "lon" keys, or None if not found
500
+
501
+ Note: This function returns a dict for consistency with orchestrator usage.
502
+ Use tuple unpacking: coords = get_city_coordinates(tenant_id); lat, lon = coords["lat"], coords["lon"]
503
+ """
504
+ city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
505
+ if city_info and city_info.lat is not None and city_info.lon is not None:
506
+ return {"lat": city_info.lat, "lon": city_info.lon}
507
+ return None
508
+
509
+
510
+ def get_city_info(tenant_id: str) -> Optional[Dict[str, Any]]:
511
+ """
512
+ 🏙️ Returns city information dictionary.
513
+
514
+ Args:
515
+ tenant_id: City identifier
516
+
517
+ Returns:
518
+ Dictionary with city information (name, state, coordinates, etc.) or None
519
+ """
520
+ city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
521
+ if city_info:
522
+ return {
523
+ "tenant_id": city_info.tenant_id,
524
+ "full_name": city_info.full_name,
525
+ "state": city_info.state,
526
+ "timezone": city_info.timezone,
527
+ "lat": city_info.lat,
528
+ "lon": city_info.lon,
529
+ "aliases": city_info.aliases
530
+ }
531
+ return None
532
+
533
+
534
+ def detect_location_from_text(text: str) -> Dict[str, Any]:
535
+ """
536
+ 🔍 Detects location from text input.
537
+
538
+ Args:
539
+ text: User input text
540
+
541
+ Returns:
542
+ Dictionary with keys:
543
+ - found: bool (whether location was detected)
544
+ - tenant_id: str (if found)
545
+ - city_info: dict (if found)
546
+ - confidence: float (0.0-1.0)
547
+ """
548
+ result = extract_location_detailed(text)
549
+
550
+ return {
551
+ "found": result.status == LocationStatus.FOUND,
552
+ "tenant_id": result.tenant_id,
553
+ "city_info": {
554
+ "tenant_id": result.city_info.tenant_id,
555
+ "full_name": result.city_info.full_name,
556
+ "state": result.city_info.state
557
+ } if result.city_info else None,
558
+ "confidence": result.confidence,
559
+ "status": result.status.value
560
+ }
561
+
562
+
563
+ def validate_coordinates(lat: float, lon: float) -> Tuple[bool, Optional[str]]:
564
+ """
565
+ ✅ Validates latitude and longitude coordinates.
566
+
567
+ Args:
568
+ lat: Latitude (-90 to 90)
569
+ lon: Longitude (-180 to 180)
570
+
571
+ Returns:
572
+ Tuple of (is_valid, error_message)
573
+ - is_valid: True if coordinates are valid
574
+ - error_message: None if valid, error description if invalid
575
+ """
576
+ if not isinstance(lat, (int, float)) or not isinstance(lon, (int, float)):
577
+ return False, "Coordinates must be numeric values"
578
+
579
+ if not (-90 <= lat <= 90):
580
+ return False, f"Latitude must be between -90 and 90, got {lat}"
581
+
582
+ if not (-180 <= lon <= 180):
583
+ return False, f"Longitude must be between -180 and 180, got {lon}"
584
+
585
+ return True, None
586
+
587
+
588
+ def get_city_timezone(tenant_id: str) -> Optional[str]:
589
+ """
590
+ 🕐 Returns IANA timezone string for a city.
591
+ Useful for time-sensitive features (events, business hours).
592
+
593
+ Args:
594
+ tenant_id: City identifier
595
+
596
+ Returns:
597
+ IANA timezone string (e.g., "America/New_York") or None
598
+ """
599
+ city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
600
+ return city_info.timezone if city_info else None
601
+
602
+
603
+ def validate_tenant_id(tenant_id: str) -> bool:
604
+ """
605
+ ✅ Checks if a tenant_id is valid and supported.
606
+
607
+ Args:
608
+ tenant_id: City identifier to validate
609
+
610
+ Returns:
611
+ True if valid and supported, False otherwise
612
+ """
613
+ city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
614
+ return city_info is not None
615
+
616
+
617
+ def get_all_supported_cities() -> List[Dict[str, str]]:
618
+ """
619
+ 📋 Returns list of all supported cities for API responses.
620
+
621
+ Returns:
622
+ List of city info dictionaries with tenant_id and display name
623
+
624
+ Example:
625
+ [
626
+ {"tenant_id": "atlanta_ga", "name": "Atlanta, GA"},
627
+ {"tenant_id": "seattle_wa", "name": "Seattle, WA"},
628
+ ...
629
+ ]
630
+ """
631
+ return [
632
+ {
633
+ "tenant_id": city.tenant_id,
634
+ "name": city.full_name,
635
+ "state": city.state
636
+ }
637
+ for city in SupportedCities.get_all_cities()
638
+ ]
639
+
640
+
641
+ # ============================================================
642
+ # DATA VALIDATION (For startup checks)
643
+ # ============================================================
644
+
645
+ def validate_city_data_files() -> Dict[str, Dict[str, bool]]:
646
+ """
647
+ 🧪 Validates that all expected data files exist.
648
+ Useful for startup checks and deployment verification.
649
+
650
+ Returns:
651
+ Dictionary mapping tenant_id to file existence status
652
+
653
+ Example:
654
+ {
655
+ "atlanta_ga": {"events": True, "resources": True},
656
+ "seattle_wa": {"events": False, "resources": True}
657
+ }
658
+ """
659
+ validation_results = {}
660
+
661
+ for city in SupportedCities.get_all_cities():
662
+ tenant_id = city.tenant_id
663
+ events_file = EVENTS_PATH / f"{tenant_id}.json"
664
+ resources_file = RESOURCES_PATH / f"{tenant_id}.json"
665
+
666
+ validation_results[tenant_id] = {
667
+ "events": events_file.exists(),
668
+ "resources": resources_file.exists()
669
+ }
670
+
671
+ if not events_file.exists():
672
+ logger.warning(f"Missing events file for {tenant_id}")
673
+ if not resources_file.exists():
674
+ logger.warning(f"Missing resources file for {tenant_id}")
675
+
676
+ return validation_results
677
+
678
+
679
+ # ============================================================
680
+ # INITIALIZATION CHECK (Call on app startup)
681
+ # ============================================================
682
+
683
+ def initialize_location_system() -> bool:
684
+ """
685
+ 🚀 Validates location system is ready.
686
+ Should be called during app startup.
687
+
688
+ Returns:
689
+ True if system is ready, False if critical files missing
690
+ """
691
+ logger.info("🗺️ Initializing Penny's location system...")
692
+
693
+ # Check directories exist
694
+ if not DATA_PATH.exists():
695
+ logger.error(f"Data directory not found: {DATA_PATH}")
696
+ return False
697
+
698
+ # Validate city data files
699
+ validation = validate_city_data_files()
700
+
701
+ total_cities = len(SupportedCities.get_all_cities())
702
+ cities_with_events = sum(1 for v in validation.values() if v["events"])
703
+ cities_with_resources = sum(1 for v in validation.values() if v["resources"])
704
+
705
+ logger.info(f"✅ {total_cities} cities registered")
706
+ logger.info(f"✅ {cities_with_events}/{total_cities} cities have event data")
707
+ logger.info(f"✅ {cities_with_resources}/{total_cities} cities have resource data")
708
+
709
+ # Warn about missing data but don't fail
710
+ missing_data = [tid for tid, status in validation.items()
711
+ if not status["events"] or not status["resources"]]
712
+
713
+ if missing_data:
714
+ logger.warning(f"⚠️ Incomplete data for cities: {missing_data}")
715
+
716
+ logger.info("🗺️ Location system initialized successfully")
717
+ return True
app/logging_utils.py ADDED
@@ -0,0 +1,778 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/logging_utils.py
2
+ """
3
+ 📊 Penny's Logging & Analytics System
4
+ Tracks user interactions, system performance, and civic engagement patterns.
5
+
6
+ MISSION: Create an audit trail that helps improve Penny's service while
7
+ respecting user privacy and meeting compliance requirements.
8
+
9
+ FEATURES:
10
+ - Structured JSON logging for Azure Application Insights
11
+ - Daily log rotation for long-term storage
12
+ - Privacy-safe request/response tracking
13
+ - Performance monitoring
14
+ - Error tracking with context
15
+ - Optional Azure Blob Storage integration
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+ import os
23
+ from typing import Dict, Any, Optional, List
24
+ from dataclasses import dataclass, asdict
25
+ from enum import Enum
26
+ import hashlib
27
+
28
+ # --- LOGGING SETUP ---
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # ============================================================
32
+ # LOG PATH CONFIGURATION (Environment-aware)
33
+ # ============================================================
34
+
35
+ # Base directories (use pathlib for OS compatibility)
36
+ PROJECT_ROOT = Path(__file__).parent.parent.resolve()
37
+ LOGS_BASE_DIR = PROJECT_ROOT / "data" / "logs"
38
+ DEFAULT_LOG_PATH = LOGS_BASE_DIR / "penny_combined.jsonl"
39
+
40
+ # Environment-configurable log path
41
+ LOG_PATH = Path(os.getenv("PENNY_LOG_PATH", str(DEFAULT_LOG_PATH)))
42
+
43
+ # Ensure log directory exists on import
44
+ LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
45
+
46
+
47
+ # ============================================================
48
+ # LOG LEVEL ENUM (For categorizing log entries)
49
+ # ============================================================
50
+
51
+ class LogLevel(str, Enum):
52
+ """
53
+ Categorizes the importance/type of log entries.
54
+ Maps to Azure Application Insights severity levels.
55
+ """
56
+ DEBUG = "debug" # Detailed diagnostic info
57
+ INFO = "info" # General informational messages
58
+ WARNING = "warning" # Potential issues
59
+ ERROR = "error" # Error events
60
+ CRITICAL = "critical" # Critical failures
61
+ AUDIT = "audit" # Compliance/audit trail
62
+
63
+
64
+ class InteractionType(str, Enum):
65
+ """
66
+ Categorizes the type of user interaction.
67
+ Helps track which features residents use most.
68
+ """
69
+ QUERY = "query" # General question
70
+ RESOURCE_LOOKUP = "resource_lookup" # Finding civic resources
71
+ TRANSLATION = "translation" # Language translation
72
+ EVENT_SEARCH = "event_search" # Looking for events
73
+ WEATHER = "weather" # Weather inquiry
74
+ DOCUMENT = "document_processing" # PDF/form processing
75
+ EMERGENCY = "emergency" # Crisis/emergency routing
76
+ GREETING = "greeting" # Conversational greeting
77
+ HELP = "help" # Help request
78
+ UNKNOWN = "unknown" # Unclassified
79
+
80
+
81
+ # ============================================================
82
+ # STRUCTURED LOG ENTRY (Type-safe logging)
83
+ # ============================================================
84
+
85
+ @dataclass
86
+ class PennyLogEntry:
87
+ """
88
+ 📋 Structured log entry for Penny interactions.
89
+
90
+ This format is:
91
+ - Azure Application Insights compatible
92
+ - Privacy-safe (no PII unless explicitly needed)
93
+ - Analytics-ready
94
+ - Compliance-friendly
95
+ """
96
+ # Timestamp
97
+ timestamp: str
98
+
99
+ # Request Context
100
+ input: str
101
+ input_length: int
102
+ tenant_id: str
103
+ user_role: str
104
+ interaction_type: InteractionType
105
+
106
+ # Response Context
107
+ intent: str
108
+ tool_used: Optional[str]
109
+ model_id: Optional[str]
110
+ response_summary: str
111
+ response_length: int
112
+ response_time_ms: Optional[float]
113
+
114
+ # Technical Context
115
+ log_level: LogLevel
116
+ success: bool
117
+ error_message: Optional[str] = None
118
+
119
+ # Location Context (Optional)
120
+ lat: Optional[float] = None
121
+ lon: Optional[float] = None
122
+ location_detected: Optional[str] = None
123
+
124
+ # Privacy & Compliance
125
+ session_id: Optional[str] = None # Hashed session identifier
126
+ contains_pii: bool = False
127
+
128
+ # Performance Metrics
129
+ tokens_used: Optional[int] = None
130
+ cache_hit: bool = False
131
+
132
+ def to_dict(self) -> Dict[str, Any]:
133
+ """Converts to dictionary for JSON serialization."""
134
+ return {k: v.value if isinstance(v, Enum) else v
135
+ for k, v in asdict(self).items()}
136
+
137
+
138
+ # ============================================================
139
+ # DAILY LOG ROTATION
140
+ # ============================================================
141
+
142
+ def get_daily_log_path() -> Path:
143
+ """
144
+ 🗓️ Returns a daily unique path for log rotation.
145
+
146
+ Creates files like:
147
+ data/logs/2025-02-01.jsonl
148
+ data/logs/2025-02-02.jsonl
149
+
150
+ This helps with:
151
+ - Log management (archive old logs)
152
+ - Azure Blob Storage uploads (one file per day)
153
+ - Performance (smaller files)
154
+ """
155
+ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
156
+ daily_path = LOGS_BASE_DIR / f"{date_str}.jsonl"
157
+
158
+ # Ensure directory exists
159
+ daily_path.parent.mkdir(parents=True, exist_ok=True)
160
+
161
+ return daily_path
162
+
163
+
164
+ # ============================================================
165
+ # MAIN LOGGING FUNCTION (Enhanced)
166
+ # ============================================================
167
+
168
+ def log_request(
169
+ payload: Dict[str, Any],
170
+ response: Dict[str, Any],
171
+ rotate_daily: bool = True,
172
+ log_level: LogLevel = LogLevel.INFO
173
+ ) -> None:
174
+ """
175
+ 📝 Logs a user interaction with Penny.
176
+
177
+ This is the primary logging function called by router.py after
178
+ processing each request. It creates a structured, privacy-safe
179
+ record of the interaction.
180
+
181
+ Args:
182
+ payload: Incoming request data from router.py
183
+ response: Final response dictionary from orchestrator
184
+ rotate_daily: If True, uses daily log files
185
+ log_level: Severity level for this log entry
186
+
187
+ Example:
188
+ log_request(
189
+ payload={"input": "What's the weather?", "tenant_id": "atlanta_ga"},
190
+ response={"intent": "weather", "response": "..."}
191
+ )
192
+ """
193
+
194
+ try:
195
+ # --- Extract Core Fields ---
196
+ user_input = payload.get("input", "")
197
+ tenant_id = payload.get("tenant_id", "unknown")
198
+ user_role = payload.get("role", "resident")
199
+
200
+ # --- Determine Interaction Type ---
201
+ intent = response.get("intent", "unknown")
202
+ interaction_type = _classify_interaction(intent)
203
+
204
+ # --- Privacy: Hash Session ID (if provided) ---
205
+ session_id = payload.get("session_id")
206
+ if session_id:
207
+ session_id = _hash_identifier(session_id)
208
+
209
+ # --- Detect PII (Simple check - can be enhanced) ---
210
+ contains_pii = _check_for_pii(user_input)
211
+
212
+ # --- Create Structured Log Entry ---
213
+ log_entry = PennyLogEntry(
214
+ timestamp=datetime.now(timezone.utc).isoformat(),
215
+ input=_sanitize_input(user_input, contains_pii),
216
+ input_length=len(user_input),
217
+ tenant_id=tenant_id,
218
+ user_role=user_role,
219
+ interaction_type=interaction_type,
220
+ intent=intent,
221
+ tool_used=response.get("tool", "none"),
222
+ model_id=response.get("model_id"),
223
+ response_summary=_summarize_response(response.get("response")),
224
+ response_length=len(str(response.get("response", ""))),
225
+ response_time_ms=response.get("response_time_ms"),
226
+ log_level=log_level,
227
+ success=response.get("success", True),
228
+ error_message=response.get("error"),
229
+ lat=payload.get("lat"),
230
+ lon=payload.get("lon"),
231
+ location_detected=response.get("location_detected"),
232
+ session_id=session_id,
233
+ contains_pii=contains_pii,
234
+ tokens_used=response.get("tokens_used"),
235
+ cache_hit=response.get("cache_hit", False)
236
+ )
237
+
238
+ # --- Write to File ---
239
+ log_path = get_daily_log_path() if rotate_daily else LOG_PATH
240
+ _write_log_entry(log_path, log_entry)
241
+
242
+ # --- Optional: Send to Azure (if enabled) ---
243
+ if os.getenv("AZURE_LOGS_ENABLED", "false").lower() == "true":
244
+ _send_to_azure(log_entry)
245
+
246
+ # --- Log to console (for Azure Application Insights) ---
247
+ logger.info(
248
+ f"Request logged | "
249
+ f"tenant={tenant_id} | "
250
+ f"intent={intent} | "
251
+ f"interaction={interaction_type.value} | "
252
+ f"success={log_entry.success}"
253
+ )
254
+
255
+ except Exception as e:
256
+ # Failsafe: Never let logging failures crash the application
257
+ logger.error(f"Failed to log request: {e}", exc_info=True)
258
+ _emergency_log(payload, response, str(e))
259
+
260
+
261
+ # ============================================================
262
+ # LOG WRITING (With error handling)
263
+ # ============================================================
264
+
265
+ def _write_log_entry(log_path: Path, log_entry: PennyLogEntry) -> None:
266
+ """
267
+ 📁 Writes log entry to JSONL file.
268
+ Handles file I/O errors gracefully.
269
+ """
270
+ try:
271
+ # Ensure parent directory exists
272
+ log_path.parent.mkdir(parents=True, exist_ok=True)
273
+
274
+ # Write as JSON Lines (append mode)
275
+ with open(log_path, "a", encoding="utf-8") as f:
276
+ json_str = json.dumps(log_entry.to_dict(), ensure_ascii=False)
277
+ f.write(json_str + "\n")
278
+
279
+ except IOError as e:
280
+ logger.error(f"Failed to write to log file {log_path}: {e}")
281
+ _emergency_log_to_console(log_entry)
282
+ except Exception as e:
283
+ logger.error(f"Unexpected error writing log: {e}", exc_info=True)
284
+ _emergency_log_to_console(log_entry)
285
+
286
+
287
+ def _emergency_log_to_console(log_entry: PennyLogEntry) -> None:
288
+ """
289
+ 🚨 Emergency fallback: Print log to console if file writing fails.
290
+ Azure Application Insights will capture console output.
291
+ """
292
+ print(f"[EMERGENCY LOG] {json.dumps(log_entry.to_dict())}")
293
+
294
+
295
+ def _emergency_log(payload: Dict, response: Dict, error: str) -> None:
296
+ """
297
+ 🚨 Absolute fallback for when structured logging fails entirely.
298
+ """
299
+ emergency_entry = {
300
+ "timestamp": datetime.now(timezone.utc).isoformat(),
301
+ "level": "CRITICAL",
302
+ "message": "Logging system failure",
303
+ "error": error,
304
+ "input_preview": str(payload.get("input", ""))[:100],
305
+ "response_preview": str(response.get("response", ""))[:100]
306
+ }
307
+ print(f"[LOGGING FAILURE] {json.dumps(emergency_entry)}")
308
+
309
+
310
+ # ============================================================
311
+ # HELPER FUNCTIONS
312
+ # ============================================================
313
+
314
+ def _classify_interaction(intent: str) -> InteractionType:
315
+ """
316
+ 🏷️ Maps intent to interaction type for analytics.
317
+ """
318
+ intent_mapping = {
319
+ "weather": InteractionType.WEATHER,
320
+ "events": InteractionType.EVENT_SEARCH,
321
+ "local_resources": InteractionType.RESOURCE_LOOKUP,
322
+ "translation": InteractionType.TRANSLATION,
323
+ "document_processing": InteractionType.DOCUMENT,
324
+ "emergency": InteractionType.EMERGENCY,
325
+ "greeting": InteractionType.GREETING,
326
+ "help": InteractionType.HELP,
327
+ }
328
+ return intent_mapping.get(intent.lower(), InteractionType.UNKNOWN)
329
+
330
+
331
+ def _summarize_response(resp: Optional[Any]) -> str:
332
+ """
333
+ ✂️ Creates a truncated summary of the response for logging.
334
+ Prevents log files from becoming bloated with full responses.
335
+ """
336
+ if resp is None:
337
+ return "No response content"
338
+
339
+ if isinstance(resp, dict):
340
+ # Try to extract the most meaningful part
341
+ summary = (
342
+ resp.get("response") or
343
+ resp.get("summary") or
344
+ resp.get("message") or
345
+ str(resp)
346
+ )
347
+ return str(summary)[:250]
348
+
349
+ return str(resp)[:250]
350
+
351
+
352
+ def _hash_identifier(identifier: str) -> str:
353
+ """
354
+ 🔒 Creates a privacy-safe hash of identifiers (session IDs, user IDs).
355
+
356
+ Uses SHA256 for one-way hashing. This allows:
357
+ - Session tracking without storing raw IDs
358
+ - Privacy compliance (GDPR, CCPA)
359
+ - Anonymized analytics
360
+ """
361
+ return hashlib.sha256(identifier.encode()).hexdigest()[:16]
362
+
363
+
364
+ def _check_for_pii(text: str) -> bool:
365
+ """
366
+ 🔍 Simple PII detection (can be enhanced with NER models).
367
+
368
+ Checks for common PII patterns:
369
+ - Social Security Numbers
370
+ - Email addresses
371
+ - Phone numbers
372
+
373
+ Returns True if potential PII detected.
374
+ """
375
+ import re
376
+
377
+ # SSN pattern: XXX-XX-XXXX
378
+ ssn_pattern = r'\b\d{3}-\d{2}-\d{4}\b'
379
+
380
+ # Email pattern
381
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
382
+
383
+ # Phone pattern: various formats
384
+ phone_pattern = r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'
385
+
386
+ patterns = [ssn_pattern, email_pattern, phone_pattern]
387
+
388
+ for pattern in patterns:
389
+ if re.search(pattern, text):
390
+ return True
391
+
392
+ return False
393
+
394
+
395
+ def _sanitize_input(text: str, contains_pii: bool) -> str:
396
+ """
397
+ 🧹 Sanitizes user input for logging.
398
+
399
+ If PII detected:
400
+ - Masks the input for privacy
401
+ - Keeps first/last few characters for debugging
402
+
403
+ Args:
404
+ text: Original user input
405
+ contains_pii: Whether PII was detected
406
+
407
+ Returns:
408
+ Sanitized text safe for logging
409
+ """
410
+ if not contains_pii:
411
+ return text
412
+
413
+ # Mask middle portion if PII detected
414
+ if len(text) <= 20:
415
+ return "[PII_DETECTED]"
416
+
417
+ # Keep first 10 and last 10 chars, mask middle
418
+ return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
419
+
420
+
421
+ # ============================================================
422
+ # AZURE INTEGRATION (Placeholder for future)
423
+ # ============================================================
424
+
425
+ def _send_to_azure(log_entry: PennyLogEntry) -> None:
426
+ """
427
+ ☁️ Sends log entry to Azure services.
428
+
429
+ Options:
430
+ 1. Azure Application Insights (custom events)
431
+ 2. Azure Blob Storage (long-term archival)
432
+ 3. Azure Table Storage (queryable logs)
433
+
434
+ TODO: Implement when Azure integration is ready
435
+ """
436
+ try:
437
+ # Example: Send to Application Insights
438
+ # from applicationinsights import TelemetryClient
439
+ # tc = TelemetryClient(os.getenv("APPINSIGHTS_INSTRUMENTATION_KEY"))
440
+ # tc.track_event(
441
+ # "PennyInteraction",
442
+ # properties=log_entry.to_dict()
443
+ # )
444
+ # tc.flush()
445
+
446
+ logger.debug("Azure logging not yet implemented")
447
+
448
+ except Exception as e:
449
+ logger.error(f"Failed to send log to Azure: {e}")
450
+ # Don't raise - logging failures should never crash the app
451
+
452
+
453
+ # ============================================================
454
+ # LOG ANALYSIS UTILITIES
455
+ # ============================================================
456
+
457
+ def get_logs_for_date(date: str) -> List[Dict[str, Any]]:
458
+ """
459
+ 📊 Retrieves all log entries for a specific date.
460
+
461
+ Args:
462
+ date: Date string in YYYY-MM-DD format
463
+
464
+ Returns:
465
+ List of log entry dictionaries
466
+
467
+ Example:
468
+ logs = get_logs_for_date("2025-02-01")
469
+ """
470
+ log_file = LOGS_BASE_DIR / f"{date}.jsonl"
471
+
472
+ if not log_file.exists():
473
+ logger.warning(f"No logs found for date: {date}")
474
+ return []
475
+
476
+ logs = []
477
+ try:
478
+ with open(log_file, "r", encoding="utf-8") as f:
479
+ for line in f:
480
+ if line.strip():
481
+ logs.append(json.loads(line))
482
+ except Exception as e:
483
+ logger.error(f"Error reading logs for {date}: {e}")
484
+
485
+ return logs
486
+
487
+
488
+ def get_interaction_stats(date: str) -> Dict[str, Any]:
489
+ """
490
+ 📈 Generates usage statistics for a given date.
491
+
492
+ Returns metrics like:
493
+ - Total interactions
494
+ - Interactions by type
495
+ - Average response time
496
+ - Success rate
497
+ - Most common intents
498
+
499
+ Args:
500
+ date: Date string in YYYY-MM-DD format
501
+
502
+ Returns:
503
+ Statistics dictionary
504
+ """
505
+ logs = get_logs_for_date(date)
506
+
507
+ if not logs:
508
+ return {"error": "No logs found for date", "date": date}
509
+
510
+ # Calculate statistics
511
+ total = len(logs)
512
+ successful = sum(1 for log in logs if log.get("success", False))
513
+
514
+ # Response time statistics
515
+ response_times = [
516
+ log["response_time_ms"]
517
+ for log in logs
518
+ if log.get("response_time_ms") is not None
519
+ ]
520
+ avg_response_time = sum(response_times) / len(response_times) if response_times else 0
521
+
522
+ # Interaction type breakdown
523
+ interaction_counts = {}
524
+ for log in logs:
525
+ itype = log.get("interaction_type", "unknown")
526
+ interaction_counts[itype] = interaction_counts.get(itype, 0) + 1
527
+
528
+ # Intent breakdown
529
+ intent_counts = {}
530
+ for log in logs:
531
+ intent = log.get("intent", "unknown")
532
+ intent_counts[intent] = intent_counts.get(intent, 0) + 1
533
+
534
+ return {
535
+ "date": date,
536
+ "total_interactions": total,
537
+ "successful_interactions": successful,
538
+ "success_rate": f"{(successful/total*100):.1f}%",
539
+ "avg_response_time_ms": round(avg_response_time, 2),
540
+ "interactions_by_type": interaction_counts,
541
+ "top_intents": dict(sorted(
542
+ intent_counts.items(),
543
+ key=lambda x: x[1],
544
+ reverse=True
545
+ )[:5])
546
+ }
547
+
548
+
549
+ # ============================================================
550
+ # LOG CLEANUP (For maintenance)
551
+ # ============================================================
552
+
553
+ def cleanup_old_logs(days_to_keep: int = 90) -> int:
554
+ """
555
+ 🧹 Removes log files older than specified days.
556
+
557
+ Args:
558
+ days_to_keep: Number of days to retain logs
559
+
560
+ Returns:
561
+ Number of files deleted
562
+
563
+ Example:
564
+ # Delete logs older than 90 days
565
+ deleted = cleanup_old_logs(90)
566
+ """
567
+ from datetime import timedelta
568
+
569
+ cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_to_keep)
570
+ deleted_count = 0
571
+
572
+ try:
573
+ for log_file in LOGS_BASE_DIR.glob("*.jsonl"):
574
+ try:
575
+ # Parse date from filename (YYYY-MM-DD.jsonl)
576
+ date_str = log_file.stem
577
+ file_date = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
578
+
579
+ if file_date < cutoff_date:
580
+ log_file.unlink()
581
+ deleted_count += 1
582
+ logger.info(f"Deleted old log file: {log_file.name}")
583
+
584
+ except ValueError:
585
+ # Skip files that don't match date format
586
+ continue
587
+
588
+ except Exception as e:
589
+ logger.error(f"Error during log cleanup: {e}")
590
+
591
+ logger.info(f"Log cleanup complete: {deleted_count} files deleted")
592
+ return deleted_count
593
+
594
+
595
+ # ============================================================
596
+ # PUBLIC API FUNCTIONS (Used by other modules)
597
+ # ============================================================
598
+
599
+ def log_interaction(
600
+ tenant_id: Optional[str] = None,
601
+ interaction_type: Optional[str] = None,
602
+ intent: Optional[str] = None,
603
+ response_time_ms: Optional[float] = None,
604
+ success: Optional[bool] = None,
605
+ metadata: Optional[Dict[str, Any]] = None,
606
+ **kwargs
607
+ ) -> None:
608
+ """
609
+ 📝 Simplified logging function used throughout Penny's codebase.
610
+
611
+ This is the main logging function called by orchestrator, router, agents, and model utils.
612
+ It creates a structured log entry and writes it to the log file.
613
+
614
+ Args:
615
+ tenant_id: City/location identifier (optional)
616
+ interaction_type: Type of interaction (e.g., "weather", "events", "orchestration") (optional)
617
+ intent: Detected intent (e.g., "weather", "emergency") (optional)
618
+ response_time_ms: Response time in milliseconds (optional)
619
+ success: Whether the operation succeeded (optional)
620
+ metadata: Optional additional metadata dictionary
621
+ **kwargs: Additional fields to include in log entry (e.g., error, details, fallback_used)
622
+
623
+ Example:
624
+ log_interaction(
625
+ tenant_id="atlanta_ga",
626
+ interaction_type="weather",
627
+ intent="weather",
628
+ response_time_ms=150.5,
629
+ success=True,
630
+ metadata={"temperature": 72, "condition": "sunny"}
631
+ )
632
+
633
+ # Or with keyword arguments:
634
+ log_interaction(
635
+ intent="translation_initialization",
636
+ success=False,
637
+ error="model_loader unavailable"
638
+ )
639
+ """
640
+ try:
641
+ # Build log entry dictionary from provided parameters
642
+ log_entry_dict = {
643
+ "timestamp": datetime.now(timezone.utc).isoformat()
644
+ }
645
+
646
+ # Add standard fields if provided
647
+ if tenant_id is not None:
648
+ log_entry_dict["tenant_id"] = sanitize_for_logging(tenant_id)
649
+ if interaction_type is not None:
650
+ log_entry_dict["interaction_type"] = interaction_type
651
+ if intent is not None:
652
+ log_entry_dict["intent"] = intent
653
+ if response_time_ms is not None:
654
+ log_entry_dict["response_time_ms"] = round(response_time_ms, 2)
655
+ if success is not None:
656
+ log_entry_dict["success"] = success
657
+
658
+ # Add metadata if provided
659
+ if metadata:
660
+ # Sanitize metadata values
661
+ sanitized_metadata = {}
662
+ for key, value in metadata.items():
663
+ if isinstance(value, str):
664
+ sanitized_metadata[key] = sanitize_for_logging(value)
665
+ else:
666
+ sanitized_metadata[key] = value
667
+ log_entry_dict["metadata"] = sanitized_metadata
668
+
669
+ # Add any additional kwargs (for backward compatibility with model utils)
670
+ for key, value in kwargs.items():
671
+ if key not in log_entry_dict: # Don't overwrite standard fields
672
+ if isinstance(value, str):
673
+ log_entry_dict[key] = sanitize_for_logging(value)
674
+ else:
675
+ log_entry_dict[key] = value
676
+
677
+ # Write to log file
678
+ log_path = get_daily_log_path()
679
+ _write_log_entry_dict(log_path, log_entry_dict)
680
+
681
+ except Exception as e:
682
+ # Failsafe: Never let logging failures crash the application
683
+ logger.error(f"Failed to log interaction: {e}", exc_info=True)
684
+ _emergency_log_to_console_dict(log_entry_dict if 'log_entry_dict' in locals() else {})
685
+
686
+
687
+ def sanitize_for_logging(text: str) -> str:
688
+ """
689
+ 🧹 Sanitizes text for safe logging (removes PII).
690
+
691
+ This function is used throughout Penny to ensure sensitive information
692
+ is not logged. It checks for PII and masks it appropriately.
693
+
694
+ Args:
695
+ text: Text to sanitize
696
+
697
+ Returns:
698
+ Sanitized text safe for logging
699
+
700
+ Example:
701
+ safe_text = sanitize_for_logging("My email is user@example.com")
702
+ # Returns: "My email is [PII_DETECTED]"
703
+ """
704
+ if not text or not isinstance(text, str):
705
+ return str(text) if text else ""
706
+
707
+ # Check for PII
708
+ contains_pii = _check_for_pii(text)
709
+
710
+ if contains_pii:
711
+ # Mask PII
712
+ if len(text) <= 20:
713
+ return "[PII_DETECTED]"
714
+ return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
715
+
716
+ return text
717
+
718
+
719
+ def _write_log_entry_dict(log_path: Path, log_entry_dict: Dict[str, Any]) -> None:
720
+ """
721
+ 📁 Writes log entry dictionary to JSONL file.
722
+ Helper function for simplified logging.
723
+ """
724
+ try:
725
+ log_path.parent.mkdir(parents=True, exist_ok=True)
726
+ with open(log_path, "a", encoding="utf-8") as f:
727
+ json_str = json.dumps(log_entry_dict, ensure_ascii=False)
728
+ f.write(json_str + "\n")
729
+ except Exception as e:
730
+ logger.error(f"Failed to write log entry: {e}")
731
+ _emergency_log_to_console_dict(log_entry_dict)
732
+
733
+
734
+ def _emergency_log_to_console_dict(log_entry_dict: Dict[str, Any]) -> None:
735
+ """
736
+ 🚨 Emergency fallback: Print log to console if file writing fails.
737
+ """
738
+ print(f"[EMERGENCY LOG] {json.dumps(log_entry_dict)}")
739
+
740
+
741
+ # ============================================================
742
+ # INITIALIZATION
743
+ # ============================================================
744
+
745
+ def initialize_logging_system() -> bool:
746
+ """
747
+ 🚀 Initializes the logging system.
748
+ Should be called during app startup.
749
+
750
+ Returns:
751
+ True if initialization successful
752
+ """
753
+ logger.info("📊 Initializing Penny's logging system...")
754
+
755
+ try:
756
+ # Ensure log directory exists
757
+ LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
758
+
759
+ # Test write permissions
760
+ test_file = LOGS_BASE_DIR / ".write_test"
761
+ test_file.write_text("test")
762
+ test_file.unlink()
763
+
764
+ logger.info(f"✅ Logging system initialized")
765
+ logger.info(f"📁 Log directory: {LOGS_BASE_DIR}")
766
+ logger.info(f"🔄 Daily rotation: Enabled")
767
+
768
+ # Log Azure status
769
+ if os.getenv("AZURE_LOGS_ENABLED") == "true":
770
+ logger.info("☁️ Azure logging: Enabled")
771
+ else:
772
+ logger.info("💾 Azure logging: Disabled (local only)")
773
+
774
+ return True
775
+
776
+ except Exception as e:
777
+ logger.error(f"❌ Failed to initialize logging system: {e}")
778
+ return False
app/main.py ADDED
@@ -0,0 +1,660 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+ """
3
+ 🤖 PENNY - People's Engagement Network Navigator for You
4
+ FastAPI Entry Point with Azure-Ready Configuration
5
+
6
+ This is Penny's front door. She loads her environment, registers all her endpoints,
7
+ and makes sure she's ready to help residents find what they need.
8
+
9
+ MISSION: Connect residents to civic resources through a warm, multilingual interface
10
+ that removes barriers and empowers communities.
11
+ """
12
+
13
+ from fastapi import FastAPI, Request, status
14
+ from fastapi.responses import JSONResponse
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ import logging
17
+ import sys
18
+ import os
19
+ from dotenv import load_dotenv
20
+ import pathlib
21
+ from typing import Dict, Any, Optional, List
22
+ from datetime import datetime, timedelta
23
+
24
+ # --- LOGGING CONFIGURATION (Must be set up before other imports) ---
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
28
+ handlers=[
29
+ logging.StreamHandler(sys.stdout)
30
+ ]
31
+ )
32
+ logger = logging.getLogger(__name__)
33
+
34
+ # --- CRITICAL: FORCE .ENV LOADING BEFORE ANY OTHER IMPORTS ---
35
+ # Determine the absolute path to the project root
36
+ PROJECT_ROOT = pathlib.Path(__file__).parent.parent
37
+
38
+ # Load environment variables into the active Python session IMMEDIATELY
39
+ # This ensures Azure Maps keys, API tokens, and model paths are available
40
+ try:
41
+ load_dotenv(PROJECT_ROOT / ".env")
42
+
43
+ # Verify critical environment variables are loaded
44
+ REQUIRED_ENV_VARS = ["AZURE_MAPS_KEY"]
45
+ missing_vars = [var for var in REQUIRED_ENV_VARS if not os.getenv(var)]
46
+ if missing_vars:
47
+ logger.warning(f"⚠️ WARNING: Missing required environment variables: {missing_vars}")
48
+ logger.warning(f"📁 Looking for .env file at: {PROJECT_ROOT / '.env'}")
49
+ else:
50
+ logger.info("✅ Environment variables loaded successfully")
51
+ except Exception as e:
52
+ logger.error(f"❌ Error loading environment variables: {e}")
53
+ logger.error(f"📁 Expected .env location: {PROJECT_ROOT / '.env'}")
54
+
55
+ # --- NOW SAFE TO IMPORT MODULES THAT DEPEND ON ENV VARS ---
56
+ try:
57
+ from app.weather_agent import get_weather_for_location
58
+ from app.router import router as api_router
59
+ from app.location_utils import (
60
+ initialize_location_system,
61
+ get_all_supported_cities,
62
+ validate_city_data_files,
63
+ SupportedCities,
64
+ get_city_coordinates
65
+ )
66
+ except ImportError as e:
67
+ logger.error(f"❌ Critical import error: {e}")
68
+ logger.error("⚠️ Penny cannot start without core modules")
69
+ sys.exit(1)
70
+
71
+ # --- FASTAPI APP INITIALIZATION ---
72
+ app = FastAPI(
73
+ title="PENNY - Civic Engagement Assistant",
74
+ description=(
75
+ "💛 Multilingual civic chatbot connecting residents with local services, "
76
+ "government programs, and community resources.\n\n"
77
+ "**Powered by:**\n"
78
+ "- Transformer models for natural language understanding\n"
79
+ "- Azure ML infrastructure for scalable deployment\n"
80
+ "- 27-language translation support\n"
81
+ "- Real-time weather integration\n"
82
+ "- Multi-city civic resource databases\n\n"
83
+ "**Supported Cities:** Atlanta, Birmingham, Chesterfield, El Paso, Providence, Seattle"
84
+ ),
85
+ version="1.0.0",
86
+ docs_url="/docs",
87
+ redoc_url="/redoc",
88
+ contact={
89
+ "name": "Penny Support",
90
+ "email": "support@pennyai.example"
91
+ },
92
+ license_info={
93
+ "name": "Proprietary",
94
+ }
95
+ )
96
+
97
+ # --- CORS MIDDLEWARE (Configure for your deployment) ---
98
+ # Production: Update allowed_origins to restrict to specific domains
99
+ allowed_origins = os.getenv("ALLOWED_ORIGINS", "*").split(",")
100
+ app.add_middleware(
101
+ CORSMiddleware,
102
+ allow_origins=allowed_origins,
103
+ allow_credentials=True,
104
+ allow_methods=["*"],
105
+ allow_headers=["*"],
106
+ )
107
+
108
+ # --- APPLICATION STATE (For health checks and monitoring) ---
109
+ app.state.location_system_healthy = False
110
+ app.state.startup_time = None
111
+ app.state.startup_errors: List[str] = []
112
+
113
+ # --- GLOBAL EXCEPTION HANDLER ---
114
+ @app.exception_handler(Exception)
115
+ async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
116
+ """
117
+ 🛡️ Catches any unhandled exceptions and returns a user-friendly response.
118
+ Logs full error details for debugging while keeping responses safe for users.
119
+
120
+ Penny stays helpful even when things go wrong!
121
+
122
+ Args:
123
+ request: FastAPI request object
124
+ exc: The unhandled exception
125
+
126
+ Returns:
127
+ JSONResponse with error details (sanitized for production)
128
+ """
129
+ logger.error(
130
+ f"Unhandled exception on {request.url.path} | "
131
+ f"method={request.method} | "
132
+ f"error={exc}",
133
+ exc_info=True
134
+ )
135
+
136
+ # Check if debug mode is enabled
137
+ debug_mode = os.getenv("DEBUG_MODE", "false").lower() == "true"
138
+
139
+ return JSONResponse(
140
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
141
+ content={
142
+ "error": "An unexpected error occurred. Penny's on it!",
143
+ "message": "Our team has been notified and we're working to fix this.",
144
+ "detail": str(exc) if debug_mode else None,
145
+ "request_path": str(request.url.path),
146
+ "timestamp": datetime.utcnow().isoformat()
147
+ }
148
+ )
149
+
150
+ # --- STARTUP EVENT ---
151
+ @app.on_event("startup")
152
+ async def startup_event() -> None:
153
+ """
154
+ 🚀 Runs when Penny wakes up.
155
+
156
+ Responsibilities:
157
+ 1. Validate environment configuration
158
+ 2. Initialize location/city systems
159
+ 3. Verify data files exist
160
+ 4. Log system status
161
+ """
162
+ try:
163
+ app.state.startup_time = datetime.utcnow()
164
+ app.state.startup_errors = []
165
+
166
+ logger.info("=" * 60)
167
+ logger.info("🤖 PENNY STARTUP INITIALIZED")
168
+ logger.info("=" * 60)
169
+
170
+ # --- Environment Info ---
171
+ logger.info(f"📂 Project Root: {PROJECT_ROOT}")
172
+ logger.info(f"🌍 Environment: {os.getenv('ENVIRONMENT', 'development')}")
173
+ logger.info(f"🐍 Python Version: {sys.version.split()[0]}")
174
+
175
+ # --- Azure Configuration Check ---
176
+ azure_maps_key = os.getenv("AZURE_MAPS_KEY")
177
+ if azure_maps_key:
178
+ logger.info("🗺️ Azure Maps: ✅ Configured")
179
+ else:
180
+ error_msg = "Azure Maps key missing - weather features will be limited"
181
+ logger.warning(f"🗺️ Azure Maps: ⚠️ {error_msg}")
182
+ app.state.startup_errors.append(error_msg)
183
+
184
+ # --- Initialize Location System ---
185
+ logger.info("🗺️ Initializing location system...")
186
+ try:
187
+ location_system_ready = initialize_location_system()
188
+ app.state.location_system_healthy = location_system_ready
189
+
190
+ if location_system_ready:
191
+ logger.info("✅ Location system initialized successfully")
192
+
193
+ # Log supported cities
194
+ cities = SupportedCities.get_all_cities()
195
+ logger.info(f"📍 Supported cities: {len(cities)}")
196
+ for city in cities:
197
+ logger.info(f" - {city.full_name} ({city.tenant_id})")
198
+
199
+ # Validate data files
200
+ validation = validate_city_data_files()
201
+ missing_data = [
202
+ tid for tid, status in validation.items()
203
+ if not status["events"] or not status["resources"]
204
+ ]
205
+ if missing_data:
206
+ error_msg = f"Incomplete data for cities: {missing_data}"
207
+ logger.warning(f"⚠️ {error_msg}")
208
+ app.state.startup_errors.append(error_msg)
209
+ else:
210
+ error_msg = "Location system initialization failed"
211
+ logger.error(f"❌ {error_msg}")
212
+ app.state.startup_errors.append(error_msg)
213
+
214
+ except Exception as e:
215
+ error_msg = f"Error initializing location system: {e}"
216
+ logger.error(f"❌ {error_msg}", exc_info=True)
217
+ app.state.location_system_healthy = False
218
+ app.state.startup_errors.append(error_msg)
219
+
220
+ # --- Startup Summary ---
221
+ logger.info("=" * 60)
222
+ if app.state.startup_errors:
223
+ logger.warning(f"⚠️ PENNY STARTED WITH {len(app.state.startup_errors)} WARNING(S)")
224
+ for error in app.state.startup_errors:
225
+ logger.warning(f" - {error}")
226
+ else:
227
+ logger.info("🎉 PENNY IS READY TO HELP RESIDENTS!")
228
+ logger.info("📖 API Documentation: http://localhost:8000/docs")
229
+ logger.info("=" * 60)
230
+
231
+ except Exception as e:
232
+ logger.error(f"❌ Critical startup error: {e}", exc_info=True)
233
+ app.state.startup_errors.append(f"Critical startup failure: {e}")
234
+
235
+ # --- SHUTDOWN EVENT ---
236
+ @app.on_event("shutdown")
237
+ async def shutdown_event() -> None:
238
+ """
239
+ 👋 Cleanup tasks when Penny shuts down.
240
+ """
241
+ try:
242
+ logger.info("=" * 60)
243
+ logger.info("👋 PENNY SHUTTING DOWN")
244
+ logger.info("=" * 60)
245
+
246
+ # Calculate uptime
247
+ if app.state.startup_time:
248
+ uptime = datetime.utcnow() - app.state.startup_time
249
+ logger.info(f"⏱️ Total uptime: {uptime}")
250
+
251
+ # TODO: Add cleanup tasks here
252
+ # - Close database connections
253
+ # - Save state if needed
254
+ # - Release model resources
255
+
256
+ logger.info("✅ Shutdown complete. Goodbye for now!")
257
+ except Exception as e:
258
+ logger.error(f"Error during shutdown: {e}", exc_info=True)
259
+
260
+ # --- ROUTER INCLUSION ---
261
+ # All API endpoints defined in router.py are registered here
262
+ try:
263
+ app.include_router(api_router)
264
+ logger.info("✅ API router registered successfully")
265
+ except Exception as e:
266
+ logger.error(f"❌ Failed to register API router: {e}", exc_info=True)
267
+
268
+ # ============================================================
269
+ # CORE HEALTH & STATUS ENDPOINTS
270
+ # ============================================================
271
+
272
+ @app.get("/", tags=["Health"])
273
+ async def root() -> Dict[str, Any]:
274
+ """
275
+ 🏠 Root endpoint - confirms Penny is alive and running.
276
+
277
+ This is the first thing users/load balancers will hit.
278
+ Penny always responds with warmth, even to bots! 💛
279
+
280
+ Returns:
281
+ Basic status and feature information
282
+ """
283
+ try:
284
+ return {
285
+ "message": "💛 Hi! I'm Penny, your civic engagement assistant.",
286
+ "status": "operational",
287
+ "tagline": "Connecting residents to community resources since 2024",
288
+ "docs": "/docs",
289
+ "api_version": "1.0.0",
290
+ "supported_cities": len(SupportedCities.get_all_cities()),
291
+ "features": [
292
+ "27-language translation",
293
+ "Real-time weather",
294
+ "Community events",
295
+ "Local resource finder",
296
+ "Document processing"
297
+ ],
298
+ "timestamp": datetime.utcnow().isoformat()
299
+ }
300
+ except Exception as e:
301
+ logger.error(f"Error in root endpoint: {e}", exc_info=True)
302
+ return {
303
+ "message": "💛 Hi! I'm Penny, your civic engagement assistant.",
304
+ "status": "degraded",
305
+ "error": "Some features may be unavailable"
306
+ }
307
+
308
+ @app.get("/health", tags=["Health"])
309
+ async def health_check() -> JSONResponse:
310
+ """
311
+ 🏥 Comprehensive health check for Azure load balancers and monitoring.
312
+
313
+ Returns detailed status of all critical components:
314
+ - Environment configuration
315
+ - Location system
316
+ - Data availability
317
+ - API components
318
+
319
+ Returns:
320
+ JSONResponse with health status (200 = healthy, 503 = degraded)
321
+ """
322
+ try:
323
+ # Calculate uptime
324
+ uptime = None
325
+ if app.state.startup_time:
326
+ uptime_delta = datetime.utcnow() - app.state.startup_time
327
+ uptime = str(uptime_delta).split('.')[0] # Remove microseconds
328
+
329
+ # Validate data files
330
+ validation = validate_city_data_files()
331
+ cities_with_full_data = sum(
332
+ 1 for v in validation.values()
333
+ if v.get("events", False) and v.get("resources", False)
334
+ )
335
+ total_cities = len(SupportedCities.get_all_cities())
336
+
337
+ health_status = {
338
+ "status": "healthy",
339
+ "timestamp": datetime.utcnow().isoformat(),
340
+ "uptime": uptime,
341
+ "environment": {
342
+ "azure_maps_configured": bool(os.getenv("AZURE_MAPS_KEY")),
343
+ "debug_mode": os.getenv("DEBUG_MODE", "false").lower() == "true",
344
+ "environment_type": os.getenv("ENVIRONMENT", "development")
345
+ },
346
+ "location_system": {
347
+ "status": "operational" if app.state.location_system_healthy else "degraded",
348
+ "supported_cities": total_cities,
349
+ "cities_with_full_data": cities_with_full_data
350
+ },
351
+ "api_components": {
352
+ "router": "operational",
353
+ "weather_agent": "operational" if os.getenv("AZURE_MAPS_KEY") else "degraded",
354
+ "translation": "operational",
355
+ "document_processing": "operational"
356
+ },
357
+ "startup_errors": app.state.startup_errors if app.state.startup_errors else None,
358
+ "api_version": "1.0.0"
359
+ }
360
+
361
+ # Determine overall health status
362
+ critical_checks = [
363
+ app.state.location_system_healthy,
364
+ bool(os.getenv("AZURE_MAPS_KEY"))
365
+ ]
366
+
367
+ all_healthy = all(critical_checks)
368
+
369
+ if not all_healthy:
370
+ health_status["status"] = "degraded"
371
+ logger.warning(f"Health check: System degraded - {health_status}")
372
+ return JSONResponse(
373
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
374
+ content=health_status
375
+ )
376
+
377
+ return JSONResponse(
378
+ status_code=status.HTTP_200_OK,
379
+ content=health_status
380
+ )
381
+
382
+ except Exception as e:
383
+ logger.error(f"Health check failed: {e}", exc_info=True)
384
+ return JSONResponse(
385
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
386
+ content={
387
+ "status": "error",
388
+ "timestamp": datetime.utcnow().isoformat(),
389
+ "error": "Health check failed",
390
+ "detail": str(e) if os.getenv("DEBUG_MODE", "false").lower() == "true" else None
391
+ }
392
+ )
393
+
394
+ @app.get("/cities", tags=["Location"])
395
+ async def list_supported_cities() -> JSONResponse:
396
+ """
397
+ 📍 Lists all cities Penny currently supports.
398
+
399
+ Returns:
400
+ List of city information including tenant_id and display name.
401
+ Useful for frontend dropdowns and API clients.
402
+
403
+ Example Response:
404
+ {
405
+ "total": 6,
406
+ "cities": [
407
+ {
408
+ "tenant_id": "atlanta_ga",
409
+ "name": "Atlanta, GA",
410
+ "state": "GA",
411
+ "data_status": {"events": true, "resources": true}
412
+ }
413
+ ]
414
+ }
415
+ """
416
+ try:
417
+ cities = get_all_supported_cities()
418
+
419
+ # Add validation status for each city
420
+ validation = validate_city_data_files()
421
+ for city in cities:
422
+ tenant_id = city["tenant_id"]
423
+ city["data_status"] = validation.get(tenant_id, {
424
+ "events": False,
425
+ "resources": False
426
+ })
427
+
428
+ return JSONResponse(
429
+ status_code=status.HTTP_200_OK,
430
+ content={
431
+ "total": len(cities),
432
+ "cities": cities,
433
+ "message": "These are the cities where Penny can help you find resources!",
434
+ "timestamp": datetime.utcnow().isoformat()
435
+ }
436
+ )
437
+ except Exception as e:
438
+ logger.error(f"Error listing cities: {e}", exc_info=True)
439
+ return JSONResponse(
440
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
441
+ content={
442
+ "error": "Unable to retrieve city list",
443
+ "message": "I'm having trouble loading the city list right now. Please try again in a moment!",
444
+ "detail": str(e) if os.getenv("DEBUG_MODE", "false").lower() == "true" else None,
445
+ "timestamp": datetime.utcnow().isoformat()
446
+ }
447
+ )
448
+
449
+ # ============================================================
450
+ # WEATHER ENDPOINTS
451
+ # ============================================================
452
+
453
+ @app.get("/weather_direct", tags=["Weather"])
454
+ async def weather_direct_endpoint(lat: float, lon: float) -> JSONResponse:
455
+ """
456
+ 🌤️ Direct weather lookup by coordinates.
457
+
458
+ Args:
459
+ lat: Latitude (-90 to 90)
460
+ lon: Longitude (-180 to 180)
461
+
462
+ Returns:
463
+ Current weather conditions for the specified location
464
+
465
+ Example:
466
+ GET /weather_direct?lat=36.8508&lon=-76.2859 (Norfolk, VA)
467
+ """
468
+ # Validate coordinates
469
+ if not (-90 <= lat <= 90):
470
+ return JSONResponse(
471
+ status_code=status.HTTP_400_BAD_REQUEST,
472
+ content={
473
+ "error": "Invalid latitude",
474
+ "message": "Latitude must be between -90 and 90",
475
+ "provided_value": lat
476
+ }
477
+ )
478
+ if not (-180 <= lon <= 180):
479
+ return JSONResponse(
480
+ status_code=status.HTTP_400_BAD_REQUEST,
481
+ content={
482
+ "error": "Invalid longitude",
483
+ "message": "Longitude must be between -180 and 180",
484
+ "provided_value": lon
485
+ }
486
+ )
487
+
488
+ try:
489
+ weather = await get_weather_for_location(lat=lat, lon=lon)
490
+ return JSONResponse(
491
+ status_code=status.HTTP_200_OK,
492
+ content={
493
+ "latitude": lat,
494
+ "longitude": lon,
495
+ "weather": weather,
496
+ "source": "Azure Maps Weather API",
497
+ "message": "Current weather conditions at your location",
498
+ "timestamp": datetime.utcnow().isoformat()
499
+ }
500
+ )
501
+ except Exception as e:
502
+ logger.error(f"Weather lookup failed for ({lat}, {lon}): {e}", exc_info=True)
503
+ return JSONResponse(
504
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
505
+ content={
506
+ "error": "Weather service temporarily unavailable",
507
+ "message": "We're having trouble reaching the weather service. Please try again in a moment.",
508
+ "latitude": lat,
509
+ "longitude": lon,
510
+ "timestamp": datetime.utcnow().isoformat()
511
+ }
512
+ )
513
+
514
+ @app.get("/weather/{tenant_id}", tags=["Weather"])
515
+ async def weather_by_city(tenant_id: str) -> JSONResponse:
516
+ """
517
+ 🌤️ Get weather for a supported city by tenant ID.
518
+
519
+ Args:
520
+ tenant_id: City identifier (e.g., 'atlanta_ga', 'seattle_wa')
521
+
522
+ Returns:
523
+ Current weather conditions for the specified city
524
+
525
+ Example:
526
+ GET /weather/atlanta_ga
527
+ """
528
+ try:
529
+ # Get city info
530
+ city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
531
+ if not city_info:
532
+ supported = [c["tenant_id"] for c in get_all_supported_cities()]
533
+ return JSONResponse(
534
+ status_code=status.HTTP_404_NOT_FOUND,
535
+ content={
536
+ "error": f"City not found: {tenant_id}",
537
+ "message": f"I don't have data for '{tenant_id}' yet. Try one of the supported cities!",
538
+ "supported_cities": supported,
539
+ "timestamp": datetime.utcnow().isoformat()
540
+ }
541
+ )
542
+
543
+ # Get coordinates
544
+ coords = get_city_coordinates(tenant_id)
545
+ if not coords:
546
+ return JSONResponse(
547
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
548
+ content={
549
+ "error": "City coordinates not available",
550
+ "city": city_info.full_name,
551
+ "tenant_id": tenant_id,
552
+ "timestamp": datetime.utcnow().isoformat()
553
+ }
554
+ )
555
+
556
+ lat, lon = coords["lat"], coords["lon"]
557
+
558
+ weather = await get_weather_for_location(lat=lat, lon=lon)
559
+ return JSONResponse(
560
+ status_code=status.HTTP_200_OK,
561
+ content={
562
+ "city": city_info.full_name,
563
+ "tenant_id": tenant_id,
564
+ "coordinates": {"latitude": lat, "longitude": lon},
565
+ "weather": weather,
566
+ "source": "Azure Maps Weather API",
567
+ "timestamp": datetime.utcnow().isoformat()
568
+ }
569
+ )
570
+ except Exception as e:
571
+ logger.error(f"Weather lookup failed for {tenant_id}: {e}", exc_info=True)
572
+ return JSONResponse(
573
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
574
+ content={
575
+ "error": "Weather service temporarily unavailable",
576
+ "message": "We're having trouble getting the weather right now. Please try again in a moment!",
577
+ "tenant_id": tenant_id,
578
+ "timestamp": datetime.utcnow().isoformat()
579
+ }
580
+ )
581
+
582
+ # ============================================================
583
+ # DEBUG ENDPOINTS (Only available in debug mode)
584
+ # ============================================================
585
+
586
+ @app.get("/debug/validation", tags=["Debug"], include_in_schema=False)
587
+ async def debug_validation() -> JSONResponse:
588
+ """
589
+ 🧪 Debug endpoint: Shows data file validation status.
590
+ Only available when DEBUG_MODE=true
591
+ """
592
+ if os.getenv("DEBUG_MODE", "false").lower() != "true":
593
+ return JSONResponse(
594
+ status_code=status.HTTP_403_FORBIDDEN,
595
+ content={"error": "Debug endpoints are disabled in production"}
596
+ )
597
+
598
+ try:
599
+ validation = validate_city_data_files()
600
+ return JSONResponse(
601
+ status_code=status.HTTP_200_OK,
602
+ content={
603
+ "validation": validation,
604
+ "summary": {
605
+ "total_cities": len(validation),
606
+ "cities_with_events": sum(1 for v in validation.values() if v.get("events", False)),
607
+ "cities_with_resources": sum(1 for v in validation.values() if v.get("resources", False))
608
+ },
609
+ "timestamp": datetime.utcnow().isoformat()
610
+ }
611
+ )
612
+ except Exception as e:
613
+ logger.error(f"Debug validation failed: {e}", exc_info=True)
614
+ return JSONResponse(
615
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
616
+ content={"error": str(e)}
617
+ )
618
+
619
+ @app.get("/debug/env", tags=["Debug"], include_in_schema=False)
620
+ async def debug_environment() -> JSONResponse:
621
+ """
622
+ 🧪 Debug endpoint: Shows environment configuration.
623
+ Sensitive values are masked. Only available when DEBUG_MODE=true
624
+ """
625
+ if os.getenv("DEBUG_MODE", "false").lower() != "true":
626
+ return JSONResponse(
627
+ status_code=status.HTTP_403_FORBIDDEN,
628
+ content={"error": "Debug endpoints are disabled in production"}
629
+ )
630
+
631
+ def mask_sensitive(key: str, value: str) -> str:
632
+ """Masks sensitive environment variables."""
633
+ sensitive_keys = ["key", "secret", "password", "token"]
634
+ if any(s in key.lower() for s in sensitive_keys):
635
+ return f"{value[:4]}...{value[-4:]}" if len(value) > 8 else "***"
636
+ return value
637
+
638
+ try:
639
+ env_vars = {
640
+ key: mask_sensitive(key, value)
641
+ for key, value in os.environ.items()
642
+ if key.startswith(("AZURE_", "PENNY_", "DEBUG_", "ENVIRONMENT"))
643
+ }
644
+
645
+ return JSONResponse(
646
+ status_code=status.HTTP_200_OK,
647
+ content={
648
+ "environment_variables": env_vars,
649
+ "project_root": str(PROJECT_ROOT),
650
+ "location_system_healthy": app.state.location_system_healthy,
651
+ "startup_errors": app.state.startup_errors,
652
+ "timestamp": datetime.utcnow().isoformat()
653
+ }
654
+ )
655
+ except Exception as e:
656
+ logger.error(f"Debug environment check failed: {e}", exc_info=True)
657
+ return JSONResponse(
658
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
659
+ content={"error": str(e)}
660
+ )
app/model_loader.py ADDED
@@ -0,0 +1,861 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/model_loader.py
2
+ """
3
+ 🧠 PENNY Model Loader - Azure-Ready Multi-Model Orchestration
4
+
5
+ This is Penny's brain loader. She manages multiple specialized models:
6
+ - Gemma 7B for conversational reasoning
7
+ - NLLB-200 for 27-language translation
8
+ - Sentiment analysis for resident wellbeing
9
+ - Bias detection for equitable service
10
+ - LayoutLM for civic document processing
11
+
12
+ MISSION: Load AI models efficiently in memory-constrained environments while
13
+ maintaining Penny's warm, civic-focused personality across all interactions.
14
+
15
+ FEATURES:
16
+ - Lazy loading (models only load when needed)
17
+ - 8-bit quantization for memory efficiency
18
+ - GPU/CPU auto-detection
19
+ - Model caching and reuse
20
+ - Graceful fallbacks for Azure ML deployment
21
+ - Memory monitoring and cleanup
22
+ """
23
+
24
+ import json
25
+ import os
26
+ import torch
27
+ from typing import Dict, Any, Callable, Optional, Union, List
28
+ from pathlib import Path
29
+ import logging
30
+ from dataclasses import dataclass
31
+ from enum import Enum
32
+ from datetime import datetime
33
+
34
+ from transformers import (
35
+ AutoTokenizer,
36
+ AutoModelForCausalLM,
37
+ AutoModelForSeq2SeqLM,
38
+ pipeline,
39
+ PreTrainedModel,
40
+ PreTrainedTokenizer
41
+ )
42
+
43
+ # --- LOGGING SETUP ---
44
+ logger = logging.getLogger(__name__)
45
+
46
+ # --- PATH CONFIGURATION (Environment-Aware) ---
47
+ # Support both local development and Azure ML deployment
48
+ if os.getenv("AZUREML_MODEL_DIR"):
49
+ # Azure ML deployment - models are in AZUREML_MODEL_DIR
50
+ MODEL_ROOT = Path(os.getenv("AZUREML_MODEL_DIR"))
51
+ CONFIG_PATH = MODEL_ROOT / "model_config.json"
52
+ logger.info("☁️ Running in Azure ML environment")
53
+ else:
54
+ # Local development - models are in project structure
55
+ PROJECT_ROOT = Path(__file__).parent.parent
56
+ MODEL_ROOT = PROJECT_ROOT / "models"
57
+ CONFIG_PATH = MODEL_ROOT / "model_config.json"
58
+ logger.info("💻 Running in local development environment")
59
+
60
+ logger.info(f"📂 Model config path: {CONFIG_PATH}")
61
+
62
+ # ============================================================
63
+ # PENNY'S CIVIC IDENTITY & PERSONALITY
64
+ # ============================================================
65
+
66
+ PENNY_SYSTEM_PROMPT = (
67
+ "You are Penny, a smart, civic-focused AI assistant serving local communities. "
68
+ "You help residents navigate city services, government programs, and community resources. "
69
+ "You're warm, professional, accurate, and always stay within your civic mission.\n\n"
70
+
71
+ "Your expertise includes:\n"
72
+ "- Connecting people with local services (food banks, shelters, libraries)\n"
73
+ "- Translating information into 27 languages\n"
74
+ "- Explaining public programs and eligibility\n"
75
+ "- Guiding residents through civic processes\n"
76
+ "- Providing emergency resources when needed\n\n"
77
+
78
+ "YOUR PERSONALITY:\n"
79
+ "- Warm and approachable, like a helpful community center staff member\n"
80
+ "- Clear and practical, avoiding jargon\n"
81
+ "- Culturally sensitive and inclusive\n"
82
+ "- Patient with repetition or clarification\n"
83
+ "- Funny when appropriate, but never at anyone's expense\n\n"
84
+
85
+ "CRITICAL RULES:\n"
86
+ "- When residents greet you by name (e.g., 'Hi Penny'), respond warmly and personally\n"
87
+ "- You are ALWAYS Penny - never ChatGPT, Assistant, Claude, or any other name\n"
88
+ "- If you don't know something, say so clearly and help find the right resource\n"
89
+ "- NEVER make up information about services, eligibility, or contacts\n"
90
+ "- Stay within your civic mission - you don't provide legal, medical, or financial advice\n"
91
+ "- For emergencies, immediately connect to appropriate services (911, crisis lines)\n\n"
92
+ )
93
+
94
+ # --- GLOBAL STATE ---
95
+ _MODEL_CACHE: Dict[str, Any] = {} # Memory-efficient model reuse
96
+ _LOAD_TIMES: Dict[str, float] = {} # Track model loading performance
97
+
98
+
99
+ # ============================================================
100
+ # DEVICE MANAGEMENT
101
+ # ============================================================
102
+
103
+ class DeviceType(str, Enum):
104
+ """Supported compute devices."""
105
+ CUDA = "cuda"
106
+ CPU = "cpu"
107
+ MPS = "mps" # Apple Silicon
108
+
109
+
110
+ def get_optimal_device() -> str:
111
+ """
112
+ 🎮 Determines the best device for model inference.
113
+
114
+ Priority:
115
+ 1. CUDA GPU (NVIDIA)
116
+ 2. MPS (Apple Silicon)
117
+ 3. CPU (fallback)
118
+
119
+ Returns:
120
+ Device string ("cuda", "mps", or "cpu")
121
+ """
122
+ if torch.cuda.is_available():
123
+ device = DeviceType.CUDA.value
124
+ gpu_name = torch.cuda.get_device_name(0)
125
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
126
+ logger.info(f"🎮 GPU detected: {gpu_name} ({gpu_memory:.1f}GB)")
127
+ return device
128
+
129
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
130
+ device = DeviceType.MPS.value
131
+ logger.info("🍎 Apple Silicon (MPS) detected")
132
+ return device
133
+
134
+ else:
135
+ device = DeviceType.CPU.value
136
+ logger.info("💻 Using CPU for inference")
137
+ logger.warning("⚠️ GPU not available - inference will be slower")
138
+ return device
139
+
140
+
141
+ def get_memory_stats() -> Dict[str, float]:
142
+ """
143
+ 📊 Returns current GPU/CPU memory statistics.
144
+
145
+ Returns:
146
+ Dict with memory stats in GB
147
+ """
148
+ stats = {}
149
+
150
+ if torch.cuda.is_available():
151
+ stats["gpu_allocated_gb"] = torch.cuda.memory_allocated() / 1e9
152
+ stats["gpu_reserved_gb"] = torch.cuda.memory_reserved() / 1e9
153
+ stats["gpu_total_gb"] = torch.cuda.get_device_properties(0).total_memory / 1e9
154
+
155
+ # CPU memory (requires psutil)
156
+ try:
157
+ import psutil
158
+ mem = psutil.virtual_memory()
159
+ stats["cpu_used_gb"] = mem.used / 1e9
160
+ stats["cpu_total_gb"] = mem.total / 1e9
161
+ stats["cpu_percent"] = mem.percent
162
+ except ImportError:
163
+ pass
164
+
165
+ return stats
166
+
167
+
168
+ # ============================================================
169
+ # MODEL CLIENT (Individual Model Handler)
170
+ # ============================================================
171
+
172
+ @dataclass
173
+ class ModelMetadata:
174
+ """
175
+ 📋 Metadata about a loaded model.
176
+ Tracks performance and resource usage.
177
+ """
178
+ name: str
179
+ task: str
180
+ model_name: str
181
+ device: str
182
+ loaded_at: Optional[datetime] = None
183
+ load_time_seconds: Optional[float] = None
184
+ memory_usage_gb: Optional[float] = None
185
+ inference_count: int = 0
186
+ total_inference_time_ms: float = 0.0
187
+
188
+ @property
189
+ def avg_inference_time_ms(self) -> float:
190
+ """Calculate average inference time."""
191
+ if self.inference_count == 0:
192
+ return 0.0
193
+ return self.total_inference_time_ms / self.inference_count
194
+
195
+
196
+ class ModelClient:
197
+ """
198
+ 🤖 Manages a single HuggingFace model with optimized loading and inference.
199
+
200
+ Features:
201
+ - Lazy loading (load on first use)
202
+ - Memory optimization (8-bit quantization)
203
+ - Performance tracking
204
+ - Graceful error handling
205
+ - Automatic device placement
206
+ """
207
+
208
+ def __init__(
209
+ self,
210
+ name: str,
211
+ model_name: str,
212
+ task: str,
213
+ device: str = None,
214
+ config: Optional[Dict[str, Any]] = None
215
+ ):
216
+ """
217
+ Initialize model client (doesn't load the model yet).
218
+
219
+ Args:
220
+ name: Model identifier (e.g., "penny-core-agent")
221
+ model_name: HuggingFace model ID
222
+ task: Task type (text-generation, translation, etc.)
223
+ device: Target device (auto-detected if None)
224
+ config: Additional model configuration
225
+ """
226
+ self.name = name
227
+ self.model_name = model_name
228
+ self.task = task
229
+ self.device = device or get_optimal_device()
230
+ self.config = config or {}
231
+ self.pipeline = None
232
+ self._load_attempted = False
233
+ self.metadata = ModelMetadata(
234
+ name=name,
235
+ task=task,
236
+ model_name=model_name,
237
+ device=self.device
238
+ )
239
+
240
+ logger.info(f"📦 Initialized ModelClient: {name}")
241
+ logger.debug(f" Model: {model_name}")
242
+ logger.debug(f" Task: {task}")
243
+ logger.debug(f" Device: {self.device}")
244
+
245
+ def load_pipeline(self) -> bool:
246
+ """
247
+ 🔄 Loads the HuggingFace pipeline with Azure-optimized settings.
248
+
249
+ Features:
250
+ - 8-bit quantization for large models (saves ~50% memory)
251
+ - Automatic device placement
252
+ - Memory monitoring
253
+ - Cache checking
254
+
255
+ Returns:
256
+ True if successful, False otherwise
257
+ """
258
+ if self.pipeline is not None:
259
+ logger.debug(f"✅ {self.name} already loaded")
260
+ return True
261
+
262
+ if self._load_attempted:
263
+ logger.warning(f"⚠️ Previous load attempt failed for {self.name}")
264
+ return False
265
+
266
+ global _MODEL_CACHE, _LOAD_TIMES
267
+
268
+ # Check cache first
269
+ if self.name in _MODEL_CACHE:
270
+ logger.info(f"♻️ Using cached pipeline for {self.name}")
271
+ self.pipeline = _MODEL_CACHE[self.name]
272
+ return True
273
+
274
+ logger.info(f"🔄 Loading {self.name} from HuggingFace...")
275
+ self._load_attempted = True
276
+
277
+ start_time = datetime.now()
278
+
279
+ try:
280
+ # === TEXT GENERATION (Gemma 7B, GPT-2, etc.) ===
281
+ if self.task == "text-generation":
282
+ logger.info(" Using 8-bit quantization for memory efficiency...")
283
+
284
+ # Check if model supports 8-bit loading
285
+ use_8bit = self.device == DeviceType.CUDA.value
286
+
287
+ if use_8bit:
288
+ self.pipeline = pipeline(
289
+ "text-generation",
290
+ model=self.model_name,
291
+ tokenizer=self.model_name,
292
+ device_map="auto",
293
+ load_in_8bit=True, # Reduces ~14GB to ~7GB
294
+ trust_remote_code=True,
295
+ torch_dtype=torch.float16
296
+ )
297
+ else:
298
+ # CPU fallback
299
+ self.pipeline = pipeline(
300
+ "text-generation",
301
+ model=self.model_name,
302
+ tokenizer=self.model_name,
303
+ device=-1, # CPU
304
+ trust_remote_code=True,
305
+ torch_dtype=torch.float32
306
+ )
307
+
308
+ # === TRANSLATION (NLLB-200, M2M-100, etc.) ===
309
+ elif self.task == "translation":
310
+ self.pipeline = pipeline(
311
+ "translation",
312
+ model=self.model_name,
313
+ device=0 if self.device == DeviceType.CUDA.value else -1,
314
+ src_lang=self.config.get("default_src_lang", "eng_Latn"),
315
+ tgt_lang=self.config.get("default_tgt_lang", "spa_Latn")
316
+ )
317
+
318
+ # === SENTIMENT ANALYSIS ===
319
+ elif self.task == "sentiment-analysis":
320
+ self.pipeline = pipeline(
321
+ "sentiment-analysis",
322
+ model=self.model_name,
323
+ device=0 if self.device == DeviceType.CUDA.value else -1,
324
+ truncation=True,
325
+ max_length=512
326
+ )
327
+
328
+ # === BIAS DETECTION (Zero-Shot Classification) ===
329
+ elif self.task == "bias-detection":
330
+ self.pipeline = pipeline(
331
+ "zero-shot-classification",
332
+ model=self.model_name,
333
+ device=0 if self.device == DeviceType.CUDA.value else -1
334
+ )
335
+
336
+ # === TEXT CLASSIFICATION (Generic) ===
337
+ elif self.task == "text-classification":
338
+ self.pipeline = pipeline(
339
+ "text-classification",
340
+ model=self.model_name,
341
+ device=0 if self.device == DeviceType.CUDA.value else -1,
342
+ truncation=True
343
+ )
344
+
345
+ # === PDF/DOCUMENT EXTRACTION (LayoutLMv3) ===
346
+ elif self.task == "pdf-extraction":
347
+ logger.warning("⚠️ PDF extraction requires additional OCR setup")
348
+ logger.info(" Consider using Azure Form Recognizer as alternative")
349
+ # Placeholder - requires pytesseract/OCR infrastructure
350
+ self.pipeline = None
351
+ return False
352
+
353
+ else:
354
+ raise ValueError(f"Unknown task type: {self.task}")
355
+
356
+ # === SUCCESS HANDLING ===
357
+ if self.pipeline is not None:
358
+ # Calculate load time
359
+ load_time = (datetime.now() - start_time).total_seconds()
360
+ self.metadata.loaded_at = datetime.now()
361
+ self.metadata.load_time_seconds = load_time
362
+
363
+ # Cache the pipeline
364
+ _MODEL_CACHE[self.name] = self.pipeline
365
+ _LOAD_TIMES[self.name] = load_time
366
+
367
+ # Log memory usage
368
+ mem_stats = get_memory_stats()
369
+ self.metadata.memory_usage_gb = mem_stats.get("gpu_allocated_gb", 0)
370
+
371
+ logger.info(f"✅ {self.name} loaded successfully!")
372
+ logger.info(f" Load time: {load_time:.2f}s")
373
+
374
+ if "gpu_allocated_gb" in mem_stats:
375
+ logger.info(
376
+ f" GPU Memory: {mem_stats['gpu_allocated_gb']:.2f}GB / "
377
+ f"{mem_stats['gpu_total_gb']:.2f}GB"
378
+ )
379
+
380
+ return True
381
+
382
+ except Exception as e:
383
+ logger.error(f"❌ Failed to load {self.name}: {e}", exc_info=True)
384
+ self.pipeline = None
385
+ return False
386
+
387
+ def predict(
388
+ self,
389
+ input_data: Union[str, Dict[str, Any]],
390
+ **kwargs
391
+ ) -> Dict[str, Any]:
392
+ """
393
+ 🎯 Runs inference with the loaded model pipeline.
394
+
395
+ Features:
396
+ - Automatic pipeline loading
397
+ - Error handling with fallback responses
398
+ - Performance tracking
399
+ - Penny's personality injection (for text-generation)
400
+
401
+ Args:
402
+ input_data: Text or structured input for the model
403
+ **kwargs: Task-specific parameters
404
+
405
+ Returns:
406
+ Model output dict with results or error information
407
+ """
408
+ # Track inference start time
409
+ start_time = datetime.now()
410
+
411
+ # Ensure pipeline is loaded
412
+ if self.pipeline is None:
413
+ success = self.load_pipeline()
414
+ if not success:
415
+ return {
416
+ "error": f"{self.name} pipeline unavailable",
417
+ "detail": "Model failed to load. Check logs for details.",
418
+ "model": self.name
419
+ }
420
+
421
+ try:
422
+ # === TEXT GENERATION ===
423
+ if self.task == "text-generation":
424
+ # Inject Penny's civic identity
425
+ if not kwargs.get("skip_system_prompt", False):
426
+ full_prompt = PENNY_SYSTEM_PROMPT + input_data
427
+ else:
428
+ full_prompt = input_data
429
+
430
+ # Extract generation parameters with safe defaults
431
+ max_new_tokens = kwargs.get("max_new_tokens", 256)
432
+ temperature = kwargs.get("temperature", 0.7)
433
+ top_p = kwargs.get("top_p", 0.9)
434
+ do_sample = kwargs.get("do_sample", temperature > 0.0)
435
+
436
+ result = self.pipeline(
437
+ full_prompt,
438
+ max_new_tokens=max_new_tokens,
439
+ temperature=temperature,
440
+ top_p=top_p,
441
+ do_sample=do_sample,
442
+ return_full_text=False,
443
+ pad_token_id=self.pipeline.tokenizer.eos_token_id,
444
+ truncation=True
445
+ )
446
+
447
+ output = {
448
+ "generated_text": result[0]["generated_text"],
449
+ "model": self.name,
450
+ "success": True
451
+ }
452
+
453
+ # === TRANSLATION ===
454
+ elif self.task == "translation":
455
+ src_lang = kwargs.get("source_lang", "eng_Latn")
456
+ tgt_lang = kwargs.get("target_lang", "spa_Latn")
457
+
458
+ result = self.pipeline(
459
+ input_data,
460
+ src_lang=src_lang,
461
+ tgt_lang=tgt_lang,
462
+ max_length=512
463
+ )
464
+
465
+ output = {
466
+ "translation": result[0]["translation_text"],
467
+ "source_lang": src_lang,
468
+ "target_lang": tgt_lang,
469
+ "model": self.name,
470
+ "success": True
471
+ }
472
+
473
+ # === SENTIMENT ANALYSIS ===
474
+ elif self.task == "sentiment-analysis":
475
+ result = self.pipeline(input_data)
476
+
477
+ output = {
478
+ "sentiment": result[0]["label"],
479
+ "confidence": result[0]["score"],
480
+ "model": self.name,
481
+ "success": True
482
+ }
483
+
484
+ # === BIAS DETECTION ===
485
+ elif self.task == "bias-detection":
486
+ candidate_labels = kwargs.get("candidate_labels", [
487
+ "neutral and objective",
488
+ "contains political bias",
489
+ "uses emotional language",
490
+ "culturally insensitive"
491
+ ])
492
+
493
+ result = self.pipeline(
494
+ input_data,
495
+ candidate_labels=candidate_labels,
496
+ multi_label=True
497
+ )
498
+
499
+ output = {
500
+ "labels": result["labels"],
501
+ "scores": result["scores"],
502
+ "model": self.name,
503
+ "success": True
504
+ }
505
+
506
+ # === TEXT CLASSIFICATION ===
507
+ elif self.task == "text-classification":
508
+ result = self.pipeline(input_data)
509
+
510
+ output = {
511
+ "label": result[0]["label"],
512
+ "confidence": result[0]["score"],
513
+ "model": self.name,
514
+ "success": True
515
+ }
516
+
517
+ else:
518
+ output = {
519
+ "error": f"Task '{self.task}' not implemented",
520
+ "model": self.name,
521
+ "success": False
522
+ }
523
+
524
+ # Track performance
525
+ inference_time = (datetime.now() - start_time).total_seconds() * 1000
526
+ self.metadata.inference_count += 1
527
+ self.metadata.total_inference_time_ms += inference_time
528
+ output["inference_time_ms"] = round(inference_time, 2)
529
+
530
+ return output
531
+
532
+ except Exception as e:
533
+ logger.error(f"❌ Inference error in {self.name}: {e}", exc_info=True)
534
+ return {
535
+ "error": "Inference failed",
536
+ "detail": str(e),
537
+ "model": self.name,
538
+ "success": False
539
+ }
540
+
541
+ def unload(self) -> None:
542
+ """
543
+ 🗑️ Unloads the model to free memory.
544
+ Critical for Azure environments with limited resources.
545
+ """
546
+ if self.pipeline is not None:
547
+ logger.info(f"🗑️ Unloading {self.name}...")
548
+
549
+ # Delete pipeline
550
+ del self.pipeline
551
+ self.pipeline = None
552
+
553
+ # Remove from cache
554
+ if self.name in _MODEL_CACHE:
555
+ del _MODEL_CACHE[self.name]
556
+
557
+ # Force GPU memory release
558
+ if torch.cuda.is_available():
559
+ torch.cuda.empty_cache()
560
+
561
+ logger.info(f"✅ {self.name} unloaded successfully")
562
+
563
+ # Log memory stats after unload
564
+ mem_stats = get_memory_stats()
565
+ if "gpu_allocated_gb" in mem_stats:
566
+ logger.info(f" GPU Memory: {mem_stats['gpu_allocated_gb']:.2f}GB remaining")
567
+
568
+ def get_metadata(self) -> Dict[str, Any]:
569
+ """
570
+ 📊 Returns model metadata and performance stats.
571
+ """
572
+ return {
573
+ "name": self.metadata.name,
574
+ "task": self.metadata.task,
575
+ "model_name": self.metadata.model_name,
576
+ "device": self.metadata.device,
577
+ "loaded": self.pipeline is not None,
578
+ "loaded_at": self.metadata.loaded_at.isoformat() if self.metadata.loaded_at else None,
579
+ "load_time_seconds": self.metadata.load_time_seconds,
580
+ "memory_usage_gb": self.metadata.memory_usage_gb,
581
+ "inference_count": self.metadata.inference_count,
582
+ "avg_inference_time_ms": round(self.metadata.avg_inference_time_ms, 2)
583
+ }
584
+
585
+
586
+ # ============================================================
587
+ # MODEL LOADER (Singleton Manager)
588
+ # ============================================================
589
+
590
+ class ModelLoader:
591
+ """
592
+ 🎛️ Singleton manager for all Penny's specialized models.
593
+
594
+ Features:
595
+ - Centralized model configuration
596
+ - Lazy loading (models only load when needed)
597
+ - Memory management
598
+ - Health monitoring
599
+ - Unified access interface
600
+ """
601
+
602
+ _instance: Optional['ModelLoader'] = None
603
+
604
+ def __new__(cls, *args, **kwargs):
605
+ """Singleton pattern - only one ModelLoader instance."""
606
+ if cls._instance is None:
607
+ cls._instance = super(ModelLoader, cls).__new__(cls)
608
+ return cls._instance
609
+
610
+ def __init__(self, config_path: Optional[str] = None):
611
+ """
612
+ Initialize ModelLoader (only runs once due to singleton).
613
+
614
+ Args:
615
+ config_path: Path to model_config.json (optional)
616
+ """
617
+ if not hasattr(self, '_models_loaded'):
618
+ self.models: Dict[str, ModelClient] = {}
619
+ self._models_loaded = True
620
+ self._initialization_time = datetime.now()
621
+
622
+ # Use provided path or default
623
+ config_file = Path(config_path) if config_path else CONFIG_PATH
624
+
625
+ try:
626
+ logger.info(f"📖 Loading model configuration from {config_file}")
627
+
628
+ if not config_file.exists():
629
+ logger.warning(f"⚠️ Configuration file not found: {config_file}")
630
+ logger.info(" Create model_config.json with your model definitions")
631
+ return
632
+
633
+ with open(config_file, "r") as f:
634
+ config = json.load(f)
635
+
636
+ # Initialize ModelClients (doesn't load models yet)
637
+ for model_id, model_info in config.items():
638
+ self.models[model_id] = ModelClient(
639
+ name=model_id,
640
+ model_name=model_info["model_name"],
641
+ task=model_info["task"],
642
+ config=model_info.get("config", {})
643
+ )
644
+
645
+ logger.info(f"✅ ModelLoader initialized with {len(self.models)} models:")
646
+ for model_id in self.models.keys():
647
+ logger.info(f" - {model_id}")
648
+
649
+ except json.JSONDecodeError as e:
650
+ logger.error(f"❌ Invalid JSON in model_config.json: {e}")
651
+ except Exception as e:
652
+ logger.error(f"❌ Failed to initialize ModelLoader: {e}", exc_info=True)
653
+
654
+ def get(self, model_id: str) -> Optional[ModelClient]:
655
+ """
656
+ 🎯 Retrieves a configured ModelClient by ID.
657
+
658
+ Args:
659
+ model_id: Model identifier from config
660
+
661
+ Returns:
662
+ ModelClient instance or None if not found
663
+ """
664
+ return self.models.get(model_id)
665
+
666
+ def list_models(self) -> List[str]:
667
+ """📋 Returns list of all available model IDs."""
668
+ return list(self.models.keys())
669
+
670
+ def get_loaded_models(self) -> List[str]:
671
+ """📋 Returns list of currently loaded model IDs."""
672
+ return [
673
+ model_id
674
+ for model_id, client in self.models.items()
675
+ if client.pipeline is not None
676
+ ]
677
+
678
+ def unload_all(self) -> None:
679
+ """
680
+ 🗑️ Unloads all models to free memory.
681
+ Useful for Azure environments when switching workloads.
682
+ """
683
+ logger.info("🗑️ Unloading all models...")
684
+ for model_client in self.models.values():
685
+ model_client.unload()
686
+ logger.info("✅ All models unloaded")
687
+
688
+ def get_status(self) -> Dict[str, Any]:
689
+ """
690
+ 📊 Returns comprehensive status of all models.
691
+ Useful for health checks and monitoring.
692
+ """
693
+ status = {
694
+ "initialization_time": self._initialization_time.isoformat(),
695
+ "total_models": len(self.models),
696
+ "loaded_models": len(self.get_loaded_models()),
697
+ "device": get_optimal_device(),
698
+ "memory": get_memory_stats(),
699
+ "models": {}
700
+ }
701
+
702
+ for model_id, client in self.models.items():
703
+ status["models"][model_id] = client.get_metadata()
704
+
705
+ return status
706
+
707
+
708
+ # ============================================================
709
+ # PUBLIC INTERFACE (Used by all *_utils.py modules)
710
+ # ============================================================
711
+
712
+ def load_model_pipeline(agent_name: str) -> Callable[..., Dict[str, Any]]:
713
+ """
714
+ 🚀 Loads a model client and returns its inference function.
715
+
716
+ This is the main function used by other modules (translation_utils.py,
717
+ sentiment_utils.py, etc.) to access Penny's models.
718
+
719
+ Args:
720
+ agent_name: Model ID from model_config.json
721
+
722
+ Returns:
723
+ Callable inference function
724
+
725
+ Raises:
726
+ ValueError: If agent_name not found in configuration
727
+
728
+ Example:
729
+ >>> translator = load_model_pipeline("penny-translate-agent")
730
+ >>> result = translator("Hello world", target_lang="spa_Latn")
731
+ """
732
+ loader = ModelLoader()
733
+ client = loader.get(agent_name)
734
+
735
+ if client is None:
736
+ available = loader.list_models()
737
+ raise ValueError(
738
+ f"Agent ID '{agent_name}' not found in model configuration. "
739
+ f"Available models: {available}"
740
+ )
741
+
742
+ # Load the pipeline (lazy loading)
743
+ client.load_pipeline()
744
+
745
+ # Return a callable wrapper
746
+ def inference_wrapper(input_data, **kwargs):
747
+ return client.predict(input_data, **kwargs)
748
+
749
+ return inference_wrapper
750
+
751
+
752
+ # === CONVENIENCE FUNCTIONS ===
753
+
754
+ def get_model_status() -> Dict[str, Any]:
755
+ """
756
+ 📊 Returns status of all configured models.
757
+ Useful for health checks and monitoring endpoints.
758
+ """
759
+ loader = ModelLoader()
760
+ return loader.get_status()
761
+
762
+
763
+ def preload_models(model_ids: Optional[List[str]] = None) -> None:
764
+ """
765
+ 🚀 Preloads specified models during startup.
766
+
767
+ Args:
768
+ model_ids: List of model IDs to preload (None = all models)
769
+ """
770
+ loader = ModelLoader()
771
+
772
+ if model_ids is None:
773
+ model_ids = loader.list_models()
774
+
775
+ logger.info(f"🚀 Preloading {len(model_ids)} models...")
776
+
777
+ for model_id in model_ids:
778
+ client = loader.get(model_id)
779
+ if client:
780
+ logger.info(f" Loading {model_id}...")
781
+ client.load_pipeline()
782
+
783
+ logger.info("✅ Model preloading complete")
784
+
785
+
786
+ def initialize_model_system() -> bool:
787
+ """
788
+ 🏁 Initializes the model system.
789
+ Should be called during app startup.
790
+
791
+ Returns:
792
+ True if initialization successful
793
+ """
794
+ logger.info("🧠 Initializing Penny's model system...")
795
+
796
+ try:
797
+ # Initialize singleton
798
+ loader = ModelLoader()
799
+
800
+ # Log device info
801
+ device = get_optimal_device()
802
+ mem_stats = get_memory_stats()
803
+
804
+ logger.info(f"✅ Model system initialized")
805
+ logger.info(f"🎮 Compute device: {device}")
806
+
807
+ if "gpu_total_gb" in mem_stats:
808
+ logger.info(
809
+ f"💾 GPU Memory: {mem_stats['gpu_total_gb']:.1f}GB total"
810
+ )
811
+
812
+ logger.info(f"📦 {len(loader.models)} models configured")
813
+
814
+ # Optional: Preload critical models
815
+ # Uncomment to preload models at startup
816
+ # preload_models(["penny-core-agent"])
817
+
818
+ return True
819
+
820
+ except Exception as e:
821
+ logger.error(f"❌ Failed to initialize model system: {e}", exc_info=True)
822
+ return False
823
+
824
+
825
+ # ============================================================
826
+ # CLI TESTING & DEBUGGING
827
+ # ============================================================
828
+
829
+ if __name__ == "__main__":
830
+ """
831
+ 🧪 Test script for model loading and inference.
832
+ Run with: python -m app.model_loader
833
+ """
834
+ print("=" * 60)
835
+ print("🧪 Testing Penny's Model System")
836
+ print("=" * 60)
837
+
838
+ # Initialize
839
+ loader = ModelLoader()
840
+ print(f"\n📋 Available models: {loader.list_models()}")
841
+
842
+ # Get status
843
+ status = get_model_status()
844
+ print(f"\n📊 System status:")
845
+ print(json.dumps(status, indent=2, default=str))
846
+
847
+ # Test model loading (if models configured)
848
+ if loader.models:
849
+ test_model_id = list(loader.models.keys())[0]
850
+ print(f"\n🧪 Testing model: {test_model_id}")
851
+
852
+ client = loader.get(test_model_id)
853
+ if client:
854
+ print(f" Loading pipeline...")
855
+ success = client.load_pipeline()
856
+
857
+ if success:
858
+ print(f" ✅ Model loaded successfully!")
859
+ print(f" Metadata: {json.dumps(client.get_metadata(), indent=2, default=str)}")
860
+ else:
861
+ print(f" ❌ Model loading failed")
app/router.py ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🚦 PENNY Request Router - Enhanced for Azure ML Production
3
+ Routes incoming requests to appropriate agents and tools based on intent classification.
4
+ Integrates with enhanced logging, location detection, and intent classification.
5
+
6
+ Mission: Ensure every resident request reaches the right civic service with proper tracking.
7
+ """
8
+
9
+ import logging
10
+ import time
11
+ import asyncio
12
+ import os
13
+ from typing import Dict, Any, Optional, List
14
+ from pathlib import Path
15
+ from fastapi import APIRouter, HTTPException
16
+ from fastapi.responses import JSONResponse
17
+
18
+ from app.model_loader import ModelLoader
19
+ from app.tool_agent import handle_tool_request
20
+ from app.weather_agent import (
21
+ get_weather_for_location,
22
+ weather_to_event_recommendations,
23
+ recommend_outfit
24
+ )
25
+ from app.intents import classify_intent_detailed, IntentType
26
+ from app.event_weather import get_event_recommendations_with_weather
27
+ from app.location_utils import (
28
+ detect_location_from_text,
29
+ get_city_info,
30
+ validate_coordinates
31
+ )
32
+ from app.logging_utils import log_interaction, sanitize_for_logging
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ # Initialize FastAPI router
37
+ router = APIRouter(prefix="/api", tags=["Penny API"])
38
+
39
+ # Initialize model loader
40
+ models = ModelLoader()
41
+
42
+ # Supported languages for translation routing
43
+ SUPPORTED_LANGUAGES = [
44
+ "arabic", "french", "german", "hindi", "mandarin",
45
+ "portuguese", "russian", "spanish", "swahili",
46
+ "tagalog", "urdu", "vietnamese", "translate", "translation"
47
+ ]
48
+
49
+ def validate_request_payload(payload: dict) -> tuple[bool, Optional[str]]:
50
+ """
51
+ Validate incoming request payload for required fields and data types.
52
+
53
+ Args:
54
+ payload: Request payload dictionary
55
+
56
+ Returns:
57
+ Tuple of (is_valid, error_message)
58
+ """
59
+ if not isinstance(payload, dict):
60
+ return False, "Payload must be a dictionary"
61
+
62
+ # Check for required input field
63
+ if "input" not in payload:
64
+ return False, "Missing required field: 'input'"
65
+
66
+ user_input = payload.get("input")
67
+ if not isinstance(user_input, str):
68
+ return False, "Field 'input' must be a string"
69
+
70
+ if not user_input.strip():
71
+ return False, "Input cannot be empty"
72
+
73
+ # Validate coordinates if provided
74
+ lat = payload.get("lat")
75
+ lon = payload.get("lon")
76
+
77
+ if lat is not None or lon is not None:
78
+ if lat is None or lon is None:
79
+ return False, "Both 'lat' and 'lon' must be provided together"
80
+
81
+ try:
82
+ lat = float(lat)
83
+ lon = float(lon)
84
+ is_valid, error = validate_coordinates(lat, lon)
85
+ if not is_valid:
86
+ return False, f"Invalid coordinates: {error}"
87
+ except (ValueError, TypeError):
88
+ return False, "Coordinates must be numeric values"
89
+
90
+ # Validate tenant_id if provided
91
+ tenant_id = payload.get("tenant_id")
92
+ if tenant_id is not None:
93
+ if not isinstance(tenant_id, str):
94
+ return False, "Field 'tenant_id' must be a string"
95
+ if not tenant_id.strip():
96
+ return False, "Field 'tenant_id' cannot be empty"
97
+
98
+ return True, None
99
+
100
+
101
+ def extract_location_info(payload: dict, user_input: str) -> Dict[str, Any]:
102
+ """
103
+ Extract and validate location information from payload or user input.
104
+
105
+ Args:
106
+ payload: Request payload
107
+ user_input: User's input text
108
+
109
+ Returns:
110
+ Dictionary with location info: {lat, lon, tenant_id, city_info, location_source}
111
+ """
112
+ location_info = {
113
+ "lat": payload.get("lat"),
114
+ "lon": payload.get("lon"),
115
+ "tenant_id": payload.get("tenant_id", "default"),
116
+ "city_info": None,
117
+ "location_source": "none"
118
+ }
119
+
120
+ try:
121
+ # Try to get location from coordinates
122
+ if location_info["lat"] is not None and location_info["lon"] is not None:
123
+ location_info["location_source"] = "coordinates"
124
+
125
+ # Try to map coordinates to a tenant city
126
+ if location_info["tenant_id"] == "default":
127
+ city_info = get_city_info(location_info["tenant_id"])
128
+ if city_info:
129
+ location_info["city_info"] = city_info
130
+
131
+ # Try to detect location from text if not provided
132
+ elif "near me" in user_input.lower() or any(
133
+ keyword in user_input.lower()
134
+ for keyword in ["in", "at", "near", "around"]
135
+ ):
136
+ detected = detect_location_from_text(user_input)
137
+ if detected.get("found"):
138
+ location_info["tenant_id"] = detected.get("tenant_id", "default")
139
+ location_info["city_info"] = detected.get("city_info")
140
+ location_info["location_source"] = "text_detection"
141
+ logger.info(
142
+ f"Detected location from text: {location_info['tenant_id']}"
143
+ )
144
+
145
+ # Get city info for tenant_id if we have it
146
+ if not location_info["city_info"] and location_info["tenant_id"] != "default":
147
+ location_info["city_info"] = get_city_info(location_info["tenant_id"])
148
+
149
+ except Exception as e:
150
+ logger.warning(f"Error extracting location info: {e}")
151
+
152
+ return location_info
153
+
154
+
155
+ def route_request(payload: dict) -> dict:
156
+ """
157
+ Main routing function for PENNY requests.
158
+ Routes requests to appropriate agents based on intent classification.
159
+
160
+ Args:
161
+ payload: Request payload with user input and metadata
162
+
163
+ Returns:
164
+ Response dictionary with agent output and metadata
165
+ """
166
+ start_time = time.time()
167
+
168
+ try:
169
+ # Validate request payload
170
+ is_valid, error_msg = validate_request_payload(payload)
171
+ if not is_valid:
172
+ logger.warning(f"Invalid request payload: {error_msg}")
173
+ return {
174
+ "error": "Oops! I couldn't understand that request. " + error_msg,
175
+ "status": "validation_error",
176
+ "response_time_ms": round((time.time() - start_time) * 1000)
177
+ }
178
+
179
+ # Extract basic request info
180
+ user_input = payload.get("input", "").strip()
181
+ role = payload.get("role", "unknown")
182
+
183
+ # Sanitize input for logging (remove PII)
184
+ sanitized_input = sanitize_for_logging(user_input)
185
+
186
+ # Extract location information
187
+ location_info = extract_location_info(payload, user_input)
188
+ tenant_id = location_info["tenant_id"]
189
+ lat = location_info["lat"]
190
+ lon = location_info["lon"]
191
+
192
+ logger.info(
193
+ f"Routing request from tenant '{tenant_id}', role '{role}', "
194
+ f"location_source: {location_info['location_source']}"
195
+ )
196
+
197
+ # Classify intent using enhanced intent classifier
198
+ try:
199
+ intent_result = classify_intent_detailed(user_input)
200
+ intent = intent_result["intent"]
201
+ confidence = intent_result["confidence"]
202
+ is_compound = intent_result["is_compound"]
203
+
204
+ logger.info(
205
+ f"Intent classified: {intent} (confidence: {confidence:.2f}, "
206
+ f"compound: {is_compound})"
207
+ )
208
+
209
+ except Exception as e:
210
+ logger.error(f"Intent classification failed: {e}")
211
+ intent = IntentType.GENERAL
212
+ confidence = 0.0
213
+ is_compound = False
214
+
215
+ # EMERGENCY ROUTING - Highest priority
216
+ if intent == IntentType.EMERGENCY:
217
+ logger.critical(
218
+ f"EMERGENCY intent detected from tenant '{tenant_id}'. "
219
+ f"Routing to safety protocols."
220
+ )
221
+
222
+ # Log emergency interaction for compliance
223
+ log_interaction(
224
+ tenant_id=tenant_id,
225
+ interaction_type="emergency",
226
+ intent="emergency",
227
+ response_time_ms=round((time.time() - start_time) * 1000),
228
+ success=True,
229
+ metadata={
230
+ "sanitized_input": sanitized_input,
231
+ "requires_followup": True,
232
+ "escalation_level": "critical"
233
+ }
234
+ )
235
+
236
+ return {
237
+ "response": (
238
+ "I can see you might need urgent help. Please contact:\n\n"
239
+ "🚨 **Emergency Services**: 911\n"
240
+ "💚 **National Crisis Hotline**: 988\n"
241
+ "💬 **Crisis Text Line**: Text HOME to 741741\n\n"
242
+ "You're not alone, and help is available 24/7."
243
+ ),
244
+ "intent": "emergency",
245
+ "model_id": "safety-agent",
246
+ "tenant_id": tenant_id,
247
+ "user_role": role,
248
+ "response_time_ms": round((time.time() - start_time) * 1000),
249
+ "escalation_required": True
250
+ }
251
+
252
+ # WEATHER ROUTING
253
+ if intent == IntentType.WEATHER:
254
+ return handle_weather_request(
255
+ user_input, lat, lon, tenant_id, role, start_time
256
+ )
257
+
258
+ # WEATHER + EVENTS ROUTING (compound intent)
259
+ if intent == IntentType.WEATHER_EVENTS or (
260
+ is_compound and "weather" in intent_result.get("components", [])
261
+ ):
262
+ return handle_weather_events_request(
263
+ user_input, lat, lon, tenant_id, role, start_time
264
+ )
265
+
266
+ # EVENTS ROUTING
267
+ if intent == IntentType.EVENTS:
268
+ return handle_events_request(
269
+ user_input, tenant_id, role, start_time
270
+ )
271
+
272
+ # TOOL-BASED ROUTING (transit, alerts, resources, etc.)
273
+ if intent in [
274
+ IntentType.TRANSIT, IntentType.ALERTS, IntentType.RESOURCES,
275
+ IntentType.PUBLIC_WORKS
276
+ ]:
277
+ return handle_tool_based_request(
278
+ user_input, intent, tenant_id, role, start_time
279
+ )
280
+
281
+ # TRANSLATION ROUTING
282
+ if intent == IntentType.TRANSLATION or any(
283
+ lang in user_input.lower() for lang in SUPPORTED_LANGUAGES
284
+ ):
285
+ return handle_translation_request(
286
+ user_input, tenant_id, role, start_time
287
+ )
288
+
289
+ # DOCUMENT/PDF ROUTING
290
+ if any(term in user_input.lower() for term in ["form", "upload", "document", "pdf"]):
291
+ return handle_document_request(
292
+ user_input, tenant_id, role, start_time
293
+ )
294
+
295
+ # SENTIMENT ANALYSIS ROUTING
296
+ if any(term in user_input.lower() for term in ["angry", "sentiment", "how do i feel"]):
297
+ return handle_sentiment_request(
298
+ user_input, tenant_id, role, start_time
299
+ )
300
+
301
+ # BIAS DETECTION ROUTING
302
+ if any(term in user_input.lower() for term in ["bias", "is this fair", "offensive"]):
303
+ return handle_bias_request(
304
+ user_input, tenant_id, role, start_time
305
+ )
306
+
307
+ # GENERAL/FALLBACK ROUTING
308
+ return handle_general_request(
309
+ user_input, tenant_id, role, start_time
310
+ )
311
+
312
+ except Exception as e:
313
+ logger.error(f"Unexpected error in route_request: {e}", exc_info=True)
314
+
315
+ return {
316
+ "error": (
317
+ "I'm having trouble processing that right now. "
318
+ "Could you try rephrasing your question? 💛"
319
+ ),
320
+ "status": "server_error",
321
+ "response_time_ms": round((time.time() - start_time) * 1000)
322
+ }
323
+
324
+
325
+ def handle_weather_request(
326
+ user_input: str, lat: Optional[float], lon: Optional[float],
327
+ tenant_id: str, role: str, start_time: float
328
+ ) -> dict:
329
+ """Handle weather-specific requests."""
330
+ try:
331
+ if lat is None or lon is None:
332
+ return {
333
+ "response": (
334
+ "I'd love to help with the weather! To give you accurate info, "
335
+ "I need your location. Can you share your coordinates or tell me "
336
+ "what city you're in? 🌤️"
337
+ ),
338
+ "intent": "weather",
339
+ "model_id": "weather-agent",
340
+ "tenant_id": tenant_id,
341
+ "user_role": role,
342
+ "response_time_ms": round((time.time() - start_time) * 1000),
343
+ "location_required": True
344
+ }
345
+
346
+ # Get weather data
347
+ weather = asyncio.run(get_weather_for_location(lat, lon))
348
+
349
+ # Generate recommendations
350
+ recs = weather_to_event_recommendations(weather)
351
+ outfit = recommend_outfit(
352
+ weather.get("temperature", {}).get("value"),
353
+ weather.get("phrase", "")
354
+ )
355
+
356
+ end_time = time.time()
357
+ response_time = round((end_time - start_time) * 1000)
358
+
359
+ # Log successful interaction
360
+ log_interaction(
361
+ tenant_id=tenant_id,
362
+ interaction_type="weather",
363
+ intent="weather",
364
+ response_time_ms=response_time,
365
+ success=True
366
+ )
367
+
368
+ return {
369
+ "response": {
370
+ "weather": weather,
371
+ "recommendations": recs,
372
+ "outfit": outfit
373
+ },
374
+ "intent": "weather",
375
+ "model_id": "weather-agent",
376
+ "tenant_id": tenant_id,
377
+ "user_role": role,
378
+ "response_time_ms": response_time
379
+ }
380
+
381
+ except Exception as e:
382
+ logger.error(f"Error handling weather request: {e}")
383
+
384
+ return {
385
+ "response": (
386
+ "I'm having trouble getting the weather right now. "
387
+ "The weather service might be down. Want to try again in a moment? 🌦️"
388
+ ),
389
+ "intent": "weather",
390
+ "model_id": "weather-agent",
391
+ "tenant_id": tenant_id,
392
+ "user_role": role,
393
+ "response_time_ms": round((time.time() - start_time) * 1000),
394
+ "error": "weather_service_unavailable"
395
+ }
396
+
397
+
398
+ def handle_weather_events_request(
399
+ user_input: str, lat: Optional[float], lon: Optional[float],
400
+ tenant_id: str, role: str, start_time: float
401
+ ) -> dict:
402
+ """Handle combined weather and events requests."""
403
+ try:
404
+ if lat is None or lon is None:
405
+ return {
406
+ "response": (
407
+ "I can suggest events based on the weather! "
408
+ "To do that, I need your location. Can you share your coordinates "
409
+ "or tell me what city you're in? 🎉☀️"
410
+ ),
411
+ "intent": "weather_events",
412
+ "model_id": "event-weather-agent",
413
+ "tenant_id": tenant_id,
414
+ "user_role": role,
415
+ "response_time_ms": round((time.time() - start_time) * 1000),
416
+ "location_required": True
417
+ }
418
+
419
+ # Get combined weather and event recommendations
420
+ combined = asyncio.run(
421
+ get_event_recommendations_with_weather(tenant_id, lat, lon)
422
+ )
423
+
424
+ end_time = time.time()
425
+ response_time = round((end_time - start_time) * 1000)
426
+
427
+ # Log successful interaction
428
+ log_interaction(
429
+ tenant_id=tenant_id,
430
+ interaction_type="weather_events",
431
+ intent="weather_events",
432
+ response_time_ms=response_time,
433
+ success=True
434
+ )
435
+
436
+ return {
437
+ "response": combined,
438
+ "intent": "weather_events",
439
+ "model_id": "event-weather-agent",
440
+ "tenant_id": tenant_id,
441
+ "user_role": role,
442
+ "response_time_ms": response_time
443
+ }
444
+
445
+ except Exception as e:
446
+ logger.error(f"Error handling weather_events request: {e}")
447
+
448
+ return {
449
+ "response": (
450
+ "I'm having trouble combining weather and events right now. "
451
+ "Let me try to help you with just one or the other! 🤔"
452
+ ),
453
+ "intent": "weather_events",
454
+ "model_id": "event-weather-agent",
455
+ "tenant_id": tenant_id,
456
+ "user_role": role,
457
+ "response_time_ms": round((time.time() - start_time) * 1000),
458
+ "error": "combined_service_unavailable"
459
+ }
460
+
461
+
462
+ def handle_events_request(
463
+ user_input: str, tenant_id: str, role: str, start_time: float
464
+ ) -> dict:
465
+ """Handle events-only requests."""
466
+ try:
467
+ tool_response = handle_tool_request(user_input, role, tenant_id, "events")
468
+ end_time = time.time()
469
+
470
+ return {
471
+ "response": tool_response.get("response"),
472
+ "intent": "events",
473
+ "model_id": "event-agent",
474
+ "tenant_id": tool_response.get("city", tenant_id),
475
+ "user_role": role,
476
+ "response_time_ms": round((end_time - start_time) * 1000)
477
+ }
478
+
479
+ except Exception as e:
480
+ logger.error(f"Error handling events request: {e}")
481
+
482
+ return {
483
+ "response": (
484
+ "I'm having trouble finding events right now. "
485
+ "Let me know what you're interested in and I'll do my best! 🎭"
486
+ ),
487
+ "intent": "events",
488
+ "model_id": "event-agent",
489
+ "tenant_id": tenant_id,
490
+ "user_role": role,
491
+ "response_time_ms": round((time.time() - start_time) * 1000),
492
+ "error": "events_service_unavailable"
493
+ }
494
+
495
+
496
+ def handle_tool_based_request(
497
+ user_input: str, intent: str, tenant_id: str, role: str, start_time: float
498
+ ) -> dict:
499
+ """Handle tool-based requests (transit, alerts, resources, etc.)."""
500
+ try:
501
+ tool_response = handle_tool_request(user_input, role, tenant_id, intent)
502
+ end_time = time.time()
503
+
504
+ return {
505
+ "response": tool_response.get("response"),
506
+ "intent": str(intent),
507
+ "model_id": tool_response.get("tool", "tool-agent"),
508
+ "tenant_id": tool_response.get("city", tenant_id),
509
+ "user_role": role,
510
+ "response_time_ms": round((end_time - start_time) * 1000)
511
+ }
512
+
513
+ except Exception as e:
514
+ logger.error(f"Error handling tool request for {intent}: {e}")
515
+
516
+ return {
517
+ "response": (
518
+ f"I'm having trouble with that {intent} request right now. "
519
+ "Could you try again or ask me something else? 💛"
520
+ ),
521
+ "intent": str(intent),
522
+ "model_id": "tool-agent",
523
+ "tenant_id": tenant_id,
524
+ "user_role": role,
525
+ "response_time_ms": round((time.time() - start_time) * 1000),
526
+ "error": f"{intent}_service_unavailable"
527
+ }
528
+
529
+
530
+ def handle_translation_request(
531
+ user_input: str, tenant_id: str, role: str, start_time: float
532
+ ) -> dict:
533
+ """Handle translation requests."""
534
+ model_id = "penny-translate-agent"
535
+
536
+ try:
537
+ model = models.get(model_id)
538
+ if not model:
539
+ raise ValueError(f"Translation model not found: {model_id}")
540
+
541
+ result = model.predict(user_input)
542
+ end_time = time.time()
543
+
544
+ return {
545
+ "response": result,
546
+ "intent": "translation",
547
+ "model_id": model_id,
548
+ "tenant_id": tenant_id,
549
+ "user_role": role,
550
+ "response_time_ms": round((end_time - start_time) * 1000)
551
+ }
552
+
553
+ except Exception as e:
554
+ logger.error(f"Error handling translation request: {e}")
555
+
556
+ return {
557
+ "response": (
558
+ "I'm having trouble with translation right now. "
559
+ "Which language would you like help with? 🌍"
560
+ ),
561
+ "intent": "translation",
562
+ "model_id": model_id,
563
+ "tenant_id": tenant_id,
564
+ "user_role": role,
565
+ "response_time_ms": round((time.time() - start_time) * 1000),
566
+ "error": "translation_service_unavailable"
567
+ }
568
+
569
+
570
+ def handle_document_request(
571
+ user_input: str, tenant_id: str, role: str, start_time: float
572
+ ) -> dict:
573
+ """Handle document/PDF processing requests."""
574
+ model_id = "penny-doc-agent"
575
+
576
+ try:
577
+ model = models.get(model_id)
578
+ if not model:
579
+ raise ValueError(f"Document model not found: {model_id}")
580
+
581
+ result = model.predict(user_input)
582
+ end_time = time.time()
583
+
584
+ return {
585
+ "response": result,
586
+ "intent": "document",
587
+ "model_id": model_id,
588
+ "tenant_id": tenant_id,
589
+ "user_role": role,
590
+ "response_time_ms": round((end_time - start_time) * 1000)
591
+ }
592
+
593
+ except Exception as e:
594
+ logger.error(f"Error handling document request: {e}")
595
+
596
+ return {
597
+ "response": (
598
+ "I'm having trouble processing documents right now. "
599
+ "What kind of form or document do you need help with? 📄"
600
+ ),
601
+ "intent": "document",
602
+ "model_id": model_id,
603
+ "tenant_id": tenant_id,
604
+ "user_role": role,
605
+ "response_time_ms": round((time.time() - start_time) * 1000),
606
+ "error": "document_service_unavailable"
607
+ }
608
+
609
+
610
+ def handle_sentiment_request(
611
+ user_input: str, tenant_id: str, role: str, start_time: float
612
+ ) -> dict:
613
+ """Handle sentiment analysis requests."""
614
+ model_id = "penny-sentiment-agent"
615
+
616
+ try:
617
+ model = models.get(model_id)
618
+ if not model:
619
+ raise ValueError(f"Sentiment model not found: {model_id}")
620
+
621
+ result = model.predict(user_input)
622
+ end_time = time.time()
623
+
624
+ return {
625
+ "response": result,
626
+ "intent": "sentiment",
627
+ "model_id": model_id,
628
+ "tenant_id": tenant_id,
629
+ "user_role": role,
630
+ "response_time_ms": round((end_time - start_time) * 1000)
631
+ }
632
+
633
+ except Exception as e:
634
+ logger.error(f"Error handling sentiment request: {e}")
635
+
636
+ return {
637
+ "response": (
638
+ "I'm having trouble analyzing sentiment right now. "
639
+ "How are you feeling about things? 💭"
640
+ ),
641
+ "intent": "sentiment",
642
+ "model_id": model_id,
643
+ "tenant_id": tenant_id,
644
+ "user_role": role,
645
+ "response_time_ms": round((time.time() - start_time) * 1000),
646
+ "error": "sentiment_service_unavailable"
647
+ }
648
+
649
+
650
+ def handle_bias_request(
651
+ user_input: str, tenant_id: str, role: str, start_time: float
652
+ ) -> dict:
653
+ """Handle bias detection requests."""
654
+ model_id = "penny-bias-checker"
655
+
656
+ try:
657
+ model = models.get(model_id)
658
+ if not model:
659
+ raise ValueError(f"Bias model not found: {model_id}")
660
+
661
+ result = model.predict(user_input)
662
+ end_time = time.time()
663
+
664
+ return {
665
+ "response": result,
666
+ "intent": "bias_check",
667
+ "model_id": model_id,
668
+ "tenant_id": tenant_id,
669
+ "user_role": role,
670
+ "response_time_ms": round((end_time - start_time) * 1000)
671
+ }
672
+
673
+ except Exception as e:
674
+ logger.error(f"Error handling bias request: {e}")
675
+
676
+ return {
677
+ "response": (
678
+ "I'm having trouble checking for bias right now. "
679
+ "What content would you like me to review? ⚖️"
680
+ ),
681
+ "intent": "bias_check",
682
+ "model_id": model_id,
683
+ "tenant_id": tenant_id,
684
+ "user_role": role,
685
+ "response_time_ms": round((time.time() - start_time) * 1000),
686
+ "error": "bias_service_unavailable"
687
+ }
688
+
689
+
690
+ def handle_general_request(
691
+ user_input: str, tenant_id: str, role: str, start_time: float
692
+ ) -> dict:
693
+ """Handle general/fallback requests."""
694
+ model_id = "penny-core-agent"
695
+
696
+ try:
697
+ model = models.get(model_id)
698
+ if not model:
699
+ raise ValueError(f"Core model not found: {model_id}")
700
+
701
+ result = model.predict(user_input)
702
+ end_time = time.time()
703
+
704
+ return {
705
+ "response": result,
706
+ "intent": "general",
707
+ "model_id": model_id,
708
+ "tenant_id": tenant_id,
709
+ "user_role": role,
710
+ "response_time_ms": round((end_time - start_time) * 1000)
711
+ }
712
+
713
+ except Exception as e:
714
+ logger.error(f"Error handling general request: {e}")
715
+
716
+ return {
717
+ "response": (
718
+ "I'm having some technical difficulties right now. "
719
+ "Can you try asking your question in a different way? "
720
+ "Or let me know if you need help with weather, events, or services! 💛"
721
+ ),
722
+ "intent": "general",
723
+ "model_id": model_id,
724
+ "tenant_id": tenant_id,
725
+ "user_role": role,
726
+ "response_time_ms": round((time.time() - start_time) * 1000),
727
+ "error": "general_service_unavailable"
728
+ }
729
+
730
+
731
+ @router.post("/chat", response_model=Dict[str, Any])
732
+ async def chat_endpoint(payload: Dict[str, Any]) -> JSONResponse:
733
+ """
734
+ 💬 Main chat endpoint for Penny.
735
+
736
+ Processes user requests and routes them to appropriate handlers.
737
+
738
+ Args:
739
+ payload: Request payload with 'input', 'tenant_id', 'lat', 'lon', etc.
740
+
741
+ Returns:
742
+ JSONResponse with Penny's response
743
+ """
744
+ try:
745
+ result = route_request(payload)
746
+ return JSONResponse(status_code=200, content=result)
747
+ except Exception as e:
748
+ logger.error(f"Error in chat endpoint: {e}", exc_info=True)
749
+ return JSONResponse(
750
+ status_code=500,
751
+ content={
752
+ "error": "I'm having trouble processing that right now. Please try again! 💛",
753
+ "detail": str(e) if os.getenv("DEBUG_MODE", "false").lower() == "true" else None
754
+ }
755
+ )
756
+
757
+
758
+ @router.get("/health/router", response_model=Dict[str, Any])
759
+ async def router_health_endpoint() -> JSONResponse:
760
+ """
761
+ 📊 Router health check endpoint.
762
+
763
+ Returns:
764
+ Health status of the router component
765
+ """
766
+ try:
767
+ health = get_router_health()
768
+ return JSONResponse(status_code=200, content=health)
769
+ except Exception as e:
770
+ logger.error(f"Router health check failed: {e}")
771
+ return JSONResponse(
772
+ status_code=500,
773
+ content={
774
+ "status": "degraded",
775
+ "error": str(e)
776
+ }
777
+ )
778
+
779
+
780
+ def get_router_health() -> dict:
781
+ """
782
+ Check router health status.
783
+
784
+ Returns:
785
+ Health status dictionary
786
+ """
787
+ try:
788
+ return {
789
+ "status": "operational",
790
+ "model_loader": "initialized" if models else "not_initialized",
791
+ "supported_languages": len(SUPPORTED_LANGUAGES),
792
+ "routing_capabilities": [
793
+ "weather", "events", "weather_events", "translation",
794
+ "documents", "sentiment", "bias_detection", "general"
795
+ ]
796
+ }
797
+ except Exception as e:
798
+ logger.error(f"Router health check failed: {e}")
799
+ return {
800
+ "status": "degraded",
801
+ "error": str(e)
802
+ }
app/tool_agent.py ADDED
@@ -0,0 +1,666 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/tool_agent.py
2
+ """
3
+ 🛠️ PENNY Tool Agent - Civic Data & Services Handler
4
+
5
+ Routes requests to civic data sources (events, resources, transit, etc.)
6
+ and integrates with real-time weather information.
7
+
8
+ MISSION: Connect residents to local civic services by intelligently
9
+ processing their requests and returning relevant, actionable information.
10
+
11
+ FEATURES:
12
+ - Real-time weather integration with outfit recommendations
13
+ - Event discovery with weather-aware suggestions
14
+ - Resource lookup (trash, transit, emergency services)
15
+ - City-specific data routing
16
+ - Graceful fallback for missing data
17
+
18
+ ENHANCEMENTS (Phase 1):
19
+ - ✅ Structured logging with performance tracking
20
+ - ✅ Enhanced error handling with user-friendly messages
21
+ - ✅ Type hints for all functions
22
+ - ✅ Health check integration
23
+ - ✅ Service availability tracking
24
+ - ✅ Integration with enhanced modules
25
+ - ✅ Penny's friendly voice throughout
26
+ """
27
+
28
+ import logging
29
+ import time
30
+ from typing import Optional, Dict, Any
31
+
32
+ # --- ENHANCED MODULE IMPORTS ---
33
+ from app.logging_utils import log_interaction, sanitize_for_logging
34
+
35
+ # --- AGENT IMPORTS (with availability tracking) ---
36
+ try:
37
+ from app.weather_agent import (
38
+ get_weather_for_location,
39
+ weather_to_event_recommendations,
40
+ recommend_outfit,
41
+ format_weather_summary
42
+ )
43
+ WEATHER_AGENT_AVAILABLE = True
44
+ except ImportError as e:
45
+ logging.getLogger(__name__).warning(f"Weather agent not available: {e}")
46
+ WEATHER_AGENT_AVAILABLE = False
47
+
48
+ # --- UTILITY IMPORTS (with availability tracking) ---
49
+ try:
50
+ from app.location_utils import (
51
+ extract_city_name,
52
+ load_city_events,
53
+ load_city_resources,
54
+ get_city_coordinates
55
+ )
56
+ LOCATION_UTILS_AVAILABLE = True
57
+ except ImportError as e:
58
+ logging.getLogger(__name__).warning(f"Location utils not available: {e}")
59
+ LOCATION_UTILS_AVAILABLE = False
60
+
61
+ # --- LOGGING SETUP ---
62
+ logger = logging.getLogger(__name__)
63
+
64
+ # --- TRACKING COUNTERS ---
65
+ _tool_request_count = 0
66
+ _weather_request_count = 0
67
+ _event_request_count = 0
68
+ _resource_request_count = 0
69
+
70
+
71
+ # ============================================================
72
+ # MAIN TOOL REQUEST HANDLER (ENHANCED)
73
+ # ============================================================
74
+
75
+ async def handle_tool_request(
76
+ user_input: str,
77
+ role: str = "unknown",
78
+ lat: Optional[float] = None,
79
+ lon: Optional[float] = None
80
+ ) -> Dict[str, Any]:
81
+ """
82
+ 🛠️ Handles tool-based actions for civic services.
83
+
84
+ Routes user requests to appropriate civic data sources and real-time
85
+ services, including weather, events, transit, trash, and emergency info.
86
+
87
+ Args:
88
+ user_input: User's request text
89
+ role: User's role (resident, official, etc.)
90
+ lat: Latitude coordinate (optional)
91
+ lon: Longitude coordinate (optional)
92
+
93
+ Returns:
94
+ Dictionary containing:
95
+ - tool: str (which tool was used)
96
+ - city: str (detected city name)
97
+ - response: str or dict (user-facing response)
98
+ - data: dict (optional, raw data)
99
+ - tenant_id: str (optional, standardized city identifier)
100
+
101
+ Example:
102
+ result = await handle_tool_request(
103
+ user_input="What's the weather in Atlanta?",
104
+ role="resident",
105
+ lat=33.7490,
106
+ lon=-84.3880
107
+ )
108
+ """
109
+ global _tool_request_count
110
+ _tool_request_count += 1
111
+
112
+ start_time = time.time()
113
+
114
+ # Sanitize input for logging (PII protection)
115
+ safe_input = sanitize_for_logging(user_input)
116
+ logger.info(f"🛠️ Tool request #{_tool_request_count}: '{safe_input[:50]}...'")
117
+
118
+ try:
119
+ # Check if location utilities are available
120
+ if not LOCATION_UTILS_AVAILABLE:
121
+ logger.error("Location utilities not available")
122
+ return {
123
+ "tool": "error",
124
+ "response": (
125
+ "I'm having trouble accessing city data right now. "
126
+ "Try again in a moment! 💛"
127
+ ),
128
+ "error": "Location utilities not loaded"
129
+ }
130
+
131
+ lowered = user_input.lower()
132
+ city_name = extract_city_name(user_input)
133
+
134
+ # Standardize tenant ID (e.g., "Atlanta" -> "atlanta_ga")
135
+ # TODO: Enhance city_name extraction to detect state
136
+ tenant_id = f"{city_name.lower().replace(' ', '_')}_ga"
137
+
138
+ logger.info(f"Detected city: {city_name} (tenant_id: {tenant_id})")
139
+
140
+ # Route to appropriate handler
141
+ result = None
142
+
143
+ # Weather queries
144
+ if any(keyword in lowered for keyword in ["weather", "forecast", "temperature", "rain", "sunny"]):
145
+ result = await _handle_weather_query(
146
+ user_input=user_input,
147
+ city_name=city_name,
148
+ tenant_id=tenant_id,
149
+ lat=lat,
150
+ lon=lon
151
+ )
152
+
153
+ # Event queries
154
+ elif any(keyword in lowered for keyword in ["events", "meetings", "city hall", "happening", "activities"]):
155
+ result = await _handle_events_query(
156
+ user_input=user_input,
157
+ city_name=city_name,
158
+ tenant_id=tenant_id,
159
+ lat=lat,
160
+ lon=lon
161
+ )
162
+
163
+ # Resource queries (trash, transit, emergency)
164
+ elif any(keyword in lowered for keyword in ["trash", "recycling", "garbage", "bus", "train", "schedule", "alert", "warning", "non emergency"]):
165
+ result = await _handle_resource_query(
166
+ user_input=user_input,
167
+ city_name=city_name,
168
+ tenant_id=tenant_id,
169
+ lowered=lowered
170
+ )
171
+
172
+ # Unknown/fallback
173
+ else:
174
+ result = _handle_unknown_query(city_name)
175
+
176
+ # Add metadata and log interaction
177
+ response_time = (time.time() - start_time) * 1000
178
+ result["response_time_ms"] = round(response_time, 2)
179
+ result["role"] = role
180
+
181
+ log_interaction(
182
+ tenant_id=tenant_id,
183
+ interaction_type="tool_request",
184
+ intent=result.get("tool", "unknown"),
185
+ response_time_ms=response_time,
186
+ success=result.get("error") is None,
187
+ metadata={
188
+ "city": city_name,
189
+ "tool": result.get("tool"),
190
+ "role": role,
191
+ "has_location": lat is not None and lon is not None
192
+ }
193
+ )
194
+
195
+ logger.info(
196
+ f"✅ Tool request complete: {result.get('tool')} "
197
+ f"({response_time:.0f}ms)"
198
+ )
199
+
200
+ return result
201
+
202
+ except Exception as e:
203
+ response_time = (time.time() - start_time) * 1000
204
+ logger.error(f"❌ Tool agent error: {e}", exc_info=True)
205
+
206
+ log_interaction(
207
+ tenant_id="unknown",
208
+ interaction_type="tool_error",
209
+ intent="error",
210
+ response_time_ms=response_time,
211
+ success=False,
212
+ metadata={
213
+ "error": str(e),
214
+ "error_type": type(e).__name__
215
+ }
216
+ )
217
+
218
+ return {
219
+ "tool": "error",
220
+ "response": (
221
+ "I ran into trouble processing that request. "
222
+ "Could you try rephrasing? 💛"
223
+ ),
224
+ "error": str(e),
225
+ "response_time_ms": round(response_time, 2)
226
+ }
227
+
228
+
229
+ # ============================================================
230
+ # WEATHER QUERY HANDLER (ENHANCED)
231
+ # ============================================================
232
+
233
+ async def _handle_weather_query(
234
+ user_input: str,
235
+ city_name: str,
236
+ tenant_id: str,
237
+ lat: Optional[float],
238
+ lon: Optional[float]
239
+ ) -> Dict[str, Any]:
240
+ """
241
+ 🌤️ Handles weather-related queries with outfit recommendations.
242
+ """
243
+ global _weather_request_count
244
+ _weather_request_count += 1
245
+
246
+ logger.info(f"🌤️ Weather query #{_weather_request_count} for {city_name}")
247
+
248
+ # Check weather agent availability
249
+ if not WEATHER_AGENT_AVAILABLE:
250
+ logger.warning("Weather agent not available")
251
+ return {
252
+ "tool": "weather",
253
+ "city": city_name,
254
+ "response": "Weather service isn't available right now. Try again soon! 🌤️"
255
+ }
256
+
257
+ # Get coordinates if not provided
258
+ if lat is None or lon is None:
259
+ coords = get_city_coordinates(tenant_id)
260
+ if coords:
261
+ lat, lon = coords["lat"], coords["lon"]
262
+ logger.info(f"Using city coordinates: {lat}, {lon}")
263
+
264
+ if lat is None or lon is None:
265
+ return {
266
+ "tool": "weather",
267
+ "city": city_name,
268
+ "response": (
269
+ f"To get weather for {city_name}, I need location coordinates. "
270
+ f"Can you share your location? 📍"
271
+ )
272
+ }
273
+
274
+ try:
275
+ # Fetch weather data
276
+ weather = await get_weather_for_location(lat, lon)
277
+
278
+ # Get weather-based event recommendations
279
+ recommendations = weather_to_event_recommendations(weather)
280
+
281
+ # Get outfit recommendation
282
+ temp = weather.get("temperature", {}).get("value", 70)
283
+ phrase = weather.get("phrase", "Clear")
284
+ outfit = recommend_outfit(temp, phrase)
285
+
286
+ # Format weather summary
287
+ weather_summary = format_weather_summary(weather)
288
+
289
+ # Build user-friendly response
290
+ response_text = (
291
+ f"🌤️ **Weather for {city_name}:**\n"
292
+ f"{weather_summary}\n\n"
293
+ f"���� **What to wear:** {outfit}"
294
+ )
295
+
296
+ # Add event recommendations if available
297
+ if recommendations:
298
+ rec = recommendations[0] # Get top recommendation
299
+ response_text += f"\n\n📅 **Activity suggestion:** {rec['reason']}"
300
+
301
+ return {
302
+ "tool": "weather",
303
+ "city": city_name,
304
+ "tenant_id": tenant_id,
305
+ "response": response_text,
306
+ "data": {
307
+ "weather": weather,
308
+ "recommendations": recommendations,
309
+ "outfit": outfit
310
+ }
311
+ }
312
+
313
+ except Exception as e:
314
+ logger.error(f"Weather query error: {e}", exc_info=True)
315
+ return {
316
+ "tool": "weather",
317
+ "city": city_name,
318
+ "response": (
319
+ f"I couldn't get the weather for {city_name} right now. "
320
+ f"Try again in a moment! 🌤️"
321
+ ),
322
+ "error": str(e)
323
+ }
324
+
325
+
326
+ # ============================================================
327
+ # EVENTS QUERY HANDLER (ENHANCED)
328
+ # ============================================================
329
+
330
+ async def _handle_events_query(
331
+ user_input: str,
332
+ city_name: str,
333
+ tenant_id: str,
334
+ lat: Optional[float],
335
+ lon: Optional[float]
336
+ ) -> Dict[str, Any]:
337
+ """
338
+ 📅 Handles event discovery queries.
339
+ """
340
+ global _event_request_count
341
+ _event_request_count += 1
342
+
343
+ logger.info(f"📅 Event query #{_event_request_count} for {city_name}")
344
+
345
+ try:
346
+ # Load structured event data
347
+ event_data = load_city_events(tenant_id)
348
+ events = event_data.get("events", [])
349
+ num_events = len(events)
350
+
351
+ if num_events == 0:
352
+ return {
353
+ "tool": "civic_events",
354
+ "city": city_name,
355
+ "tenant_id": tenant_id,
356
+ "response": (
357
+ f"I don't have any upcoming events for {city_name} right now. "
358
+ f"Check back soon! 📅"
359
+ )
360
+ }
361
+
362
+ # Get top event
363
+ top_event = events[0]
364
+ top_event_name = top_event.get("name", "Upcoming event")
365
+
366
+ # Build response
367
+ if num_events == 1:
368
+ response_text = (
369
+ f"📅 **Upcoming event in {city_name}:**\n"
370
+ f"• {top_event_name}\n\n"
371
+ f"Check the full details in the attached data!"
372
+ )
373
+ else:
374
+ response_text = (
375
+ f"📅 **Found {num_events} upcoming events in {city_name}!**\n"
376
+ f"Top event: {top_event_name}\n\n"
377
+ f"Check the full list in the attached data!"
378
+ )
379
+
380
+ return {
381
+ "tool": "civic_events",
382
+ "city": city_name,
383
+ "tenant_id": tenant_id,
384
+ "response": response_text,
385
+ "data": event_data
386
+ }
387
+
388
+ except FileNotFoundError:
389
+ logger.warning(f"Event data file not found for {tenant_id}")
390
+ return {
391
+ "tool": "civic_events",
392
+ "city": city_name,
393
+ "response": (
394
+ f"Event data for {city_name} isn't available yet. "
395
+ f"I'm still learning about events in your area! 📅"
396
+ ),
397
+ "error": "Event data file not found"
398
+ }
399
+
400
+ except Exception as e:
401
+ logger.error(f"Events query error: {e}", exc_info=True)
402
+ return {
403
+ "tool": "civic_events",
404
+ "city": city_name,
405
+ "response": (
406
+ f"I had trouble loading events for {city_name}. "
407
+ f"Try again soon! 📅"
408
+ ),
409
+ "error": str(e)
410
+ }
411
+
412
+
413
+ # ============================================================
414
+ # RESOURCE QUERY HANDLER (ENHANCED)
415
+ # ============================================================
416
+
417
+ async def _handle_resource_query(
418
+ user_input: str,
419
+ city_name: str,
420
+ tenant_id: str,
421
+ lowered: str
422
+ ) -> Dict[str, Any]:
423
+ """
424
+ ♻️ Handles resource queries (trash, transit, emergency).
425
+ """
426
+ global _resource_request_count
427
+ _resource_request_count += 1
428
+
429
+ logger.info(f"♻️ Resource query #{_resource_request_count} for {city_name}")
430
+
431
+ # Map keywords to resource types
432
+ resource_query_map = {
433
+ "trash": "trash_and_recycling",
434
+ "recycling": "trash_and_recycling",
435
+ "garbage": "trash_and_recycling",
436
+ "bus": "transit",
437
+ "train": "transit",
438
+ "schedule": "transit",
439
+ "alert": "emergency",
440
+ "warning": "emergency",
441
+ "non emergency": "emergency"
442
+ }
443
+
444
+ # Find matching resource type
445
+ resource_key = next(
446
+ (resource_query_map[key] for key in resource_query_map if key in lowered),
447
+ None
448
+ )
449
+
450
+ if not resource_key:
451
+ return {
452
+ "tool": "unknown",
453
+ "city": city_name,
454
+ "response": (
455
+ "I'm not sure which resource you're asking about. "
456
+ "Try asking about trash, transit, or emergency services! 💬"
457
+ )
458
+ }
459
+
460
+ try:
461
+ # Load structured resource data
462
+ resource_data = load_city_resources(tenant_id)
463
+ service_info = resource_data["services"].get(resource_key, {})
464
+
465
+ if not service_info:
466
+ return {
467
+ "tool": resource_key,
468
+ "city": city_name,
469
+ "response": (
470
+ f"I don't have {resource_key.replace('_', ' ')} information "
471
+ f"for {city_name} yet. Check the city's official website! 🏛️"
472
+ )
473
+ }
474
+
475
+ # Build resource-specific response
476
+ if resource_key == "trash_and_recycling":
477
+ pickup_days = service_info.get('pickup_days', 'Varies by address')
478
+ response_text = (
479
+ f"♻️ **Trash & Recycling for {city_name}:**\n"
480
+ f"Pickup days: {pickup_days}\n\n"
481
+ f"Check the official link for your specific schedule!"
482
+ )
483
+
484
+ elif resource_key == "transit":
485
+ provider = service_info.get('provider', 'The local transit authority')
486
+ response_text = (
487
+ f"🚌 **Transit for {city_name}:**\n"
488
+ f"Provider: {provider}\n\n"
489
+ f"Use the provided links to find routes and schedules!"
490
+ )
491
+
492
+ elif resource_key == "emergency":
493
+ non_emergency = service_info.get('non_emergency_phone', 'N/A')
494
+ response_text = (
495
+ f"🚨 **Emergency Info for {city_name}:**\n"
496
+ f"Non-emergency: {non_emergency}\n\n"
497
+ f"**For life-threatening emergencies, always call 911.**"
498
+ )
499
+
500
+ else:
501
+ response_text = f"Information found for {resource_key.replace('_', ' ')}, but details aren't available yet."
502
+
503
+ return {
504
+ "tool": resource_key,
505
+ "city": city_name,
506
+ "tenant_id": tenant_id,
507
+ "response": response_text,
508
+ "data": service_info
509
+ }
510
+
511
+ except FileNotFoundError:
512
+ logger.warning(f"Resource data file not found for {tenant_id}")
513
+ return {
514
+ "tool": "resource_loader",
515
+ "city": city_name,
516
+ "response": (
517
+ f"Resource data for {city_name} isn't available yet. "
518
+ f"Check back soon! 🏛️"
519
+ ),
520
+ "error": "Resource data file not found"
521
+ }
522
+
523
+ except Exception as e:
524
+ logger.error(f"Resource query error: {e}", exc_info=True)
525
+ return {
526
+ "tool": "resource_loader",
527
+ "city": city_name,
528
+ "response": (
529
+ f"I had trouble loading resource data for {city_name}. "
530
+ f"Try again soon! 🏛️"
531
+ ),
532
+ "error": str(e)
533
+ }
534
+
535
+
536
+ # ============================================================
537
+ # UNKNOWN QUERY HANDLER
538
+ # ============================================================
539
+
540
+ def _handle_unknown_query(city_name: str) -> Dict[str, Any]:
541
+ """
542
+ ❓ Fallback for queries that don't match any tool.
543
+ """
544
+ logger.info(f"❓ Unknown query for {city_name}")
545
+
546
+ return {
547
+ "tool": "unknown",
548
+ "city": city_name,
549
+ "response": (
550
+ "I'm not sure which civic service you're asking about. "
551
+ "Try asking about weather, events, trash, or transit! 💬"
552
+ )
553
+ }
554
+
555
+
556
+ # ============================================================
557
+ # HEALTH CHECK & DIAGNOSTICS
558
+ # ============================================================
559
+
560
+ def get_tool_agent_health() -> Dict[str, Any]:
561
+ """
562
+ 📊 Returns tool agent health status.
563
+
564
+ Used by the main application health check endpoint.
565
+ """
566
+ return {
567
+ "status": "operational",
568
+ "service_availability": {
569
+ "weather_agent": WEATHER_AGENT_AVAILABLE,
570
+ "location_utils": LOCATION_UTILS_AVAILABLE
571
+ },
572
+ "statistics": {
573
+ "total_requests": _tool_request_count,
574
+ "weather_requests": _weather_request_count,
575
+ "event_requests": _event_request_count,
576
+ "resource_requests": _resource_request_count
577
+ },
578
+ "supported_queries": [
579
+ "weather",
580
+ "events",
581
+ "trash_and_recycling",
582
+ "transit",
583
+ "emergency"
584
+ ]
585
+ }
586
+
587
+
588
+ # ============================================================
589
+ # TESTING
590
+ # ============================================================
591
+
592
+ if __name__ == "__main__":
593
+ """🧪 Test tool agent functionality"""
594
+ import asyncio
595
+
596
+ print("=" * 60)
597
+ print("🧪 Testing Tool Agent")
598
+ print("=" * 60)
599
+
600
+ # Display service availability
601
+ print("\n📊 Service Availability:")
602
+ print(f" Weather Agent: {'✅' if WEATHER_AGENT_AVAILABLE else '❌'}")
603
+ print(f" Location Utils: {'✅' if LOCATION_UTILS_AVAILABLE else '❌'}")
604
+
605
+ print("\n" + "=" * 60)
606
+
607
+ test_queries = [
608
+ {
609
+ "name": "Weather query",
610
+ "input": "What's the weather in Atlanta?",
611
+ "lat": 33.7490,
612
+ "lon": -84.3880
613
+ },
614
+ {
615
+ "name": "Events query",
616
+ "input": "Events in Atlanta",
617
+ "lat": None,
618
+ "lon": None
619
+ },
620
+ {
621
+ "name": "Trash query",
622
+ "input": "When is trash pickup?",
623
+ "lat": None,
624
+ "lon": None
625
+ }
626
+ ]
627
+
628
+ async def run_tests():
629
+ for i, query in enumerate(test_queries, 1):
630
+ print(f"\n--- Test {i}: {query['name']} ---")
631
+ print(f"Query: {query['input']}")
632
+
633
+ try:
634
+ result = await handle_tool_request(
635
+ user_input=query["input"],
636
+ role="test_user",
637
+ lat=query["lat"],
638
+ lon=query["lon"]
639
+ )
640
+
641
+ print(f"Tool: {result.get('tool')}")
642
+ print(f"City: {result.get('city')}")
643
+
644
+ response = result.get('response')
645
+ if isinstance(response, str):
646
+ print(f"Response: {response[:150]}...")
647
+ else:
648
+ print(f"Response: [Dict with {len(response)} keys]")
649
+
650
+ if result.get('response_time_ms'):
651
+ print(f"Response time: {result['response_time_ms']:.0f}ms")
652
+
653
+ except Exception as e:
654
+ print(f"❌ Error: {e}")
655
+
656
+ asyncio.run(run_tests())
657
+
658
+ print("\n" + "=" * 60)
659
+ print("📊 Final Statistics:")
660
+ health = get_tool_agent_health()
661
+ for key, value in health["statistics"].items():
662
+ print(f" {key}: {value}")
663
+
664
+ print("\n" + "=" * 60)
665
+ print("✅ Tests complete")
666
+ print("=" * 60)