pythonprincess commited on
Commit
f28ac7b
·
verified ·
1 Parent(s): 6f38cee

Delete app/logging_utils.py

Browse files
Files changed (1) hide show
  1. app/logging_utils.py +0 -778
app/logging_utils.py DELETED
@@ -1,778 +0,0 @@
1
- # app/logging_utils.py
2
- """
3
- 📊 Penny's Logging & Analytics System
4
- Tracks user interactions, system performance, and civic engagement patterns.
5
-
6
- MISSION: Create an audit trail that helps improve Penny's service while
7
- respecting user privacy and meeting compliance requirements.
8
-
9
- FEATURES:
10
- - Structured JSON logging for Azure Application Insights
11
- - Daily log rotation for long-term storage
12
- - Privacy-safe request/response tracking
13
- - Performance monitoring
14
- - Error tracking with context
15
- - Optional Azure Blob Storage integration
16
- """
17
-
18
- import json
19
- import logging
20
- from datetime import datetime, timezone
21
- from pathlib import Path
22
- import os
23
- from typing import Dict, Any, Optional, List
24
- from dataclasses import dataclass, asdict
25
- from enum import Enum
26
- import hashlib
27
-
28
- # --- LOGGING SETUP ---
29
- logger = logging.getLogger(__name__)
30
-
31
- # ============================================================
32
- # LOG PATH CONFIGURATION (Environment-aware)
33
- # ============================================================
34
-
35
- # Base directories (use pathlib for OS compatibility)
36
- PROJECT_ROOT = Path(__file__).parent.parent.resolve()
37
- LOGS_BASE_DIR = PROJECT_ROOT / "data" / "logs"
38
- DEFAULT_LOG_PATH = LOGS_BASE_DIR / "penny_combined.jsonl"
39
-
40
- # Environment-configurable log path
41
- LOG_PATH = Path(os.getenv("PENNY_LOG_PATH", str(DEFAULT_LOG_PATH)))
42
-
43
- # Ensure log directory exists on import
44
- LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
45
-
46
-
47
- # ============================================================
48
- # LOG LEVEL ENUM (For categorizing log entries)
49
- # ============================================================
50
-
51
- class LogLevel(str, Enum):
52
- """
53
- Categorizes the importance/type of log entries.
54
- Maps to Azure Application Insights severity levels.
55
- """
56
- DEBUG = "debug" # Detailed diagnostic info
57
- INFO = "info" # General informational messages
58
- WARNING = "warning" # Potential issues
59
- ERROR = "error" # Error events
60
- CRITICAL = "critical" # Critical failures
61
- AUDIT = "audit" # Compliance/audit trail
62
-
63
-
64
- class InteractionType(str, Enum):
65
- """
66
- Categorizes the type of user interaction.
67
- Helps track which features residents use most.
68
- """
69
- QUERY = "query" # General question
70
- RESOURCE_LOOKUP = "resource_lookup" # Finding civic resources
71
- TRANSLATION = "translation" # Language translation
72
- EVENT_SEARCH = "event_search" # Looking for events
73
- WEATHER = "weather" # Weather inquiry
74
- DOCUMENT = "document_processing" # PDF/form processing
75
- EMERGENCY = "emergency" # Crisis/emergency routing
76
- GREETING = "greeting" # Conversational greeting
77
- HELP = "help" # Help request
78
- UNKNOWN = "unknown" # Unclassified
79
-
80
-
81
- # ============================================================
82
- # STRUCTURED LOG ENTRY (Type-safe logging)
83
- # ============================================================
84
-
85
- @dataclass
86
- class PennyLogEntry:
87
- """
88
- 📋 Structured log entry for Penny interactions.
89
-
90
- This format is:
91
- - Azure Application Insights compatible
92
- - Privacy-safe (no PII unless explicitly needed)
93
- - Analytics-ready
94
- - Compliance-friendly
95
- """
96
- # Timestamp
97
- timestamp: str
98
-
99
- # Request Context
100
- input: str
101
- input_length: int
102
- tenant_id: str
103
- user_role: str
104
- interaction_type: InteractionType
105
-
106
- # Response Context
107
- intent: str
108
- tool_used: Optional[str]
109
- model_id: Optional[str]
110
- response_summary: str
111
- response_length: int
112
- response_time_ms: Optional[float]
113
-
114
- # Technical Context
115
- log_level: LogLevel
116
- success: bool
117
- error_message: Optional[str] = None
118
-
119
- # Location Context (Optional)
120
- lat: Optional[float] = None
121
- lon: Optional[float] = None
122
- location_detected: Optional[str] = None
123
-
124
- # Privacy & Compliance
125
- session_id: Optional[str] = None # Hashed session identifier
126
- contains_pii: bool = False
127
-
128
- # Performance Metrics
129
- tokens_used: Optional[int] = None
130
- cache_hit: bool = False
131
-
132
- def to_dict(self) -> Dict[str, Any]:
133
- """Converts to dictionary for JSON serialization."""
134
- return {k: v.value if isinstance(v, Enum) else v
135
- for k, v in asdict(self).items()}
136
-
137
-
138
- # ============================================================
139
- # DAILY LOG ROTATION
140
- # ============================================================
141
-
142
- def get_daily_log_path() -> Path:
143
- """
144
- 🗓️ Returns a daily unique path for log rotation.
145
-
146
- Creates files like:
147
- data/logs/2025-02-01.jsonl
148
- data/logs/2025-02-02.jsonl
149
-
150
- This helps with:
151
- - Log management (archive old logs)
152
- - Azure Blob Storage uploads (one file per day)
153
- - Performance (smaller files)
154
- """
155
- date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
156
- daily_path = LOGS_BASE_DIR / f"{date_str}.jsonl"
157
-
158
- # Ensure directory exists
159
- daily_path.parent.mkdir(parents=True, exist_ok=True)
160
-
161
- return daily_path
162
-
163
-
164
- # ============================================================
165
- # MAIN LOGGING FUNCTION (Enhanced)
166
- # ============================================================
167
-
168
- def log_request(
169
- payload: Dict[str, Any],
170
- response: Dict[str, Any],
171
- rotate_daily: bool = True,
172
- log_level: LogLevel = LogLevel.INFO
173
- ) -> None:
174
- """
175
- 📝 Logs a user interaction with Penny.
176
-
177
- This is the primary logging function called by router.py after
178
- processing each request. It creates a structured, privacy-safe
179
- record of the interaction.
180
-
181
- Args:
182
- payload: Incoming request data from router.py
183
- response: Final response dictionary from orchestrator
184
- rotate_daily: If True, uses daily log files
185
- log_level: Severity level for this log entry
186
-
187
- Example:
188
- log_request(
189
- payload={"input": "What's the weather?", "tenant_id": "atlanta_ga"},
190
- response={"intent": "weather", "response": "..."}
191
- )
192
- """
193
-
194
- try:
195
- # --- Extract Core Fields ---
196
- user_input = payload.get("input", "")
197
- tenant_id = payload.get("tenant_id", "unknown")
198
- user_role = payload.get("role", "resident")
199
-
200
- # --- Determine Interaction Type ---
201
- intent = response.get("intent", "unknown")
202
- interaction_type = _classify_interaction(intent)
203
-
204
- # --- Privacy: Hash Session ID (if provided) ---
205
- session_id = payload.get("session_id")
206
- if session_id:
207
- session_id = _hash_identifier(session_id)
208
-
209
- # --- Detect PII (Simple check - can be enhanced) ---
210
- contains_pii = _check_for_pii(user_input)
211
-
212
- # --- Create Structured Log Entry ---
213
- log_entry = PennyLogEntry(
214
- timestamp=datetime.now(timezone.utc).isoformat(),
215
- input=_sanitize_input(user_input, contains_pii),
216
- input_length=len(user_input),
217
- tenant_id=tenant_id,
218
- user_role=user_role,
219
- interaction_type=interaction_type,
220
- intent=intent,
221
- tool_used=response.get("tool", "none"),
222
- model_id=response.get("model_id"),
223
- response_summary=_summarize_response(response.get("response")),
224
- response_length=len(str(response.get("response", ""))),
225
- response_time_ms=response.get("response_time_ms"),
226
- log_level=log_level,
227
- success=response.get("success", True),
228
- error_message=response.get("error"),
229
- lat=payload.get("lat"),
230
- lon=payload.get("lon"),
231
- location_detected=response.get("location_detected"),
232
- session_id=session_id,
233
- contains_pii=contains_pii,
234
- tokens_used=response.get("tokens_used"),
235
- cache_hit=response.get("cache_hit", False)
236
- )
237
-
238
- # --- Write to File ---
239
- log_path = get_daily_log_path() if rotate_daily else LOG_PATH
240
- _write_log_entry(log_path, log_entry)
241
-
242
- # --- Optional: Send to Azure (if enabled) ---
243
- if os.getenv("AZURE_LOGS_ENABLED", "false").lower() == "true":
244
- _send_to_azure(log_entry)
245
-
246
- # --- Log to console (for Azure Application Insights) ---
247
- logger.info(
248
- f"Request logged | "
249
- f"tenant={tenant_id} | "
250
- f"intent={intent} | "
251
- f"interaction={interaction_type.value} | "
252
- f"success={log_entry.success}"
253
- )
254
-
255
- except Exception as e:
256
- # Failsafe: Never let logging failures crash the application
257
- logger.error(f"Failed to log request: {e}", exc_info=True)
258
- _emergency_log(payload, response, str(e))
259
-
260
-
261
- # ============================================================
262
- # LOG WRITING (With error handling)
263
- # ============================================================
264
-
265
- def _write_log_entry(log_path: Path, log_entry: PennyLogEntry) -> None:
266
- """
267
- 📁 Writes log entry to JSONL file.
268
- Handles file I/O errors gracefully.
269
- """
270
- try:
271
- # Ensure parent directory exists
272
- log_path.parent.mkdir(parents=True, exist_ok=True)
273
-
274
- # Write as JSON Lines (append mode)
275
- with open(log_path, "a", encoding="utf-8") as f:
276
- json_str = json.dumps(log_entry.to_dict(), ensure_ascii=False)
277
- f.write(json_str + "\n")
278
-
279
- except IOError as e:
280
- logger.error(f"Failed to write to log file {log_path}: {e}")
281
- _emergency_log_to_console(log_entry)
282
- except Exception as e:
283
- logger.error(f"Unexpected error writing log: {e}", exc_info=True)
284
- _emergency_log_to_console(log_entry)
285
-
286
-
287
- def _emergency_log_to_console(log_entry: PennyLogEntry) -> None:
288
- """
289
- 🚨 Emergency fallback: Print log to console if file writing fails.
290
- Azure Application Insights will capture console output.
291
- """
292
- print(f"[EMERGENCY LOG] {json.dumps(log_entry.to_dict())}")
293
-
294
-
295
- def _emergency_log(payload: Dict, response: Dict, error: str) -> None:
296
- """
297
- 🚨 Absolute fallback for when structured logging fails entirely.
298
- """
299
- emergency_entry = {
300
- "timestamp": datetime.now(timezone.utc).isoformat(),
301
- "level": "CRITICAL",
302
- "message": "Logging system failure",
303
- "error": error,
304
- "input_preview": str(payload.get("input", ""))[:100],
305
- "response_preview": str(response.get("response", ""))[:100]
306
- }
307
- print(f"[LOGGING FAILURE] {json.dumps(emergency_entry)}")
308
-
309
-
310
- # ============================================================
311
- # HELPER FUNCTIONS
312
- # ============================================================
313
-
314
- def _classify_interaction(intent: str) -> InteractionType:
315
- """
316
- 🏷️ Maps intent to interaction type for analytics.
317
- """
318
- intent_mapping = {
319
- "weather": InteractionType.WEATHER,
320
- "events": InteractionType.EVENT_SEARCH,
321
- "local_resources": InteractionType.RESOURCE_LOOKUP,
322
- "translation": InteractionType.TRANSLATION,
323
- "document_processing": InteractionType.DOCUMENT,
324
- "emergency": InteractionType.EMERGENCY,
325
- "greeting": InteractionType.GREETING,
326
- "help": InteractionType.HELP,
327
- }
328
- return intent_mapping.get(intent.lower(), InteractionType.UNKNOWN)
329
-
330
-
331
- def _summarize_response(resp: Optional[Any]) -> str:
332
- """
333
- ✂️ Creates a truncated summary of the response for logging.
334
- Prevents log files from becoming bloated with full responses.
335
- """
336
- if resp is None:
337
- return "No response content"
338
-
339
- if isinstance(resp, dict):
340
- # Try to extract the most meaningful part
341
- summary = (
342
- resp.get("response") or
343
- resp.get("summary") or
344
- resp.get("message") or
345
- str(resp)
346
- )
347
- return str(summary)[:250]
348
-
349
- return str(resp)[:250]
350
-
351
-
352
- def _hash_identifier(identifier: str) -> str:
353
- """
354
- 🔒 Creates a privacy-safe hash of identifiers (session IDs, user IDs).
355
-
356
- Uses SHA256 for one-way hashing. This allows:
357
- - Session tracking without storing raw IDs
358
- - Privacy compliance (GDPR, CCPA)
359
- - Anonymized analytics
360
- """
361
- return hashlib.sha256(identifier.encode()).hexdigest()[:16]
362
-
363
-
364
- def _check_for_pii(text: str) -> bool:
365
- """
366
- 🔍 Simple PII detection (can be enhanced with NER models).
367
-
368
- Checks for common PII patterns:
369
- - Social Security Numbers
370
- - Email addresses
371
- - Phone numbers
372
-
373
- Returns True if potential PII detected.
374
- """
375
- import re
376
-
377
- # SSN pattern: XXX-XX-XXXX
378
- ssn_pattern = r'\b\d{3}-\d{2}-\d{4}\b'
379
-
380
- # Email pattern
381
- email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
382
-
383
- # Phone pattern: various formats
384
- phone_pattern = r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'
385
-
386
- patterns = [ssn_pattern, email_pattern, phone_pattern]
387
-
388
- for pattern in patterns:
389
- if re.search(pattern, text):
390
- return True
391
-
392
- return False
393
-
394
-
395
- def _sanitize_input(text: str, contains_pii: bool) -> str:
396
- """
397
- 🧹 Sanitizes user input for logging.
398
-
399
- If PII detected:
400
- - Masks the input for privacy
401
- - Keeps first/last few characters for debugging
402
-
403
- Args:
404
- text: Original user input
405
- contains_pii: Whether PII was detected
406
-
407
- Returns:
408
- Sanitized text safe for logging
409
- """
410
- if not contains_pii:
411
- return text
412
-
413
- # Mask middle portion if PII detected
414
- if len(text) <= 20:
415
- return "[PII_DETECTED]"
416
-
417
- # Keep first 10 and last 10 chars, mask middle
418
- return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
419
-
420
-
421
- # ============================================================
422
- # AZURE INTEGRATION (Placeholder for future)
423
- # ============================================================
424
-
425
- def _send_to_azure(log_entry: PennyLogEntry) -> None:
426
- """
427
- ☁️ Sends log entry to Azure services.
428
-
429
- Options:
430
- 1. Azure Application Insights (custom events)
431
- 2. Azure Blob Storage (long-term archival)
432
- 3. Azure Table Storage (queryable logs)
433
-
434
- TODO: Implement when Azure integration is ready
435
- """
436
- try:
437
- # Example: Send to Application Insights
438
- # from applicationinsights import TelemetryClient
439
- # tc = TelemetryClient(os.getenv("APPINSIGHTS_INSTRUMENTATION_KEY"))
440
- # tc.track_event(
441
- # "PennyInteraction",
442
- # properties=log_entry.to_dict()
443
- # )
444
- # tc.flush()
445
-
446
- logger.debug("Azure logging not yet implemented")
447
-
448
- except Exception as e:
449
- logger.error(f"Failed to send log to Azure: {e}")
450
- # Don't raise - logging failures should never crash the app
451
-
452
-
453
- # ============================================================
454
- # LOG ANALYSIS UTILITIES
455
- # ============================================================
456
-
457
- def get_logs_for_date(date: str) -> List[Dict[str, Any]]:
458
- """
459
- 📊 Retrieves all log entries for a specific date.
460
-
461
- Args:
462
- date: Date string in YYYY-MM-DD format
463
-
464
- Returns:
465
- List of log entry dictionaries
466
-
467
- Example:
468
- logs = get_logs_for_date("2025-02-01")
469
- """
470
- log_file = LOGS_BASE_DIR / f"{date}.jsonl"
471
-
472
- if not log_file.exists():
473
- logger.warning(f"No logs found for date: {date}")
474
- return []
475
-
476
- logs = []
477
- try:
478
- with open(log_file, "r", encoding="utf-8") as f:
479
- for line in f:
480
- if line.strip():
481
- logs.append(json.loads(line))
482
- except Exception as e:
483
- logger.error(f"Error reading logs for {date}: {e}")
484
-
485
- return logs
486
-
487
-
488
- def get_interaction_stats(date: str) -> Dict[str, Any]:
489
- """
490
- 📈 Generates usage statistics for a given date.
491
-
492
- Returns metrics like:
493
- - Total interactions
494
- - Interactions by type
495
- - Average response time
496
- - Success rate
497
- - Most common intents
498
-
499
- Args:
500
- date: Date string in YYYY-MM-DD format
501
-
502
- Returns:
503
- Statistics dictionary
504
- """
505
- logs = get_logs_for_date(date)
506
-
507
- if not logs:
508
- return {"error": "No logs found for date", "date": date}
509
-
510
- # Calculate statistics
511
- total = len(logs)
512
- successful = sum(1 for log in logs if log.get("success", False))
513
-
514
- # Response time statistics
515
- response_times = [
516
- log["response_time_ms"]
517
- for log in logs
518
- if log.get("response_time_ms") is not None
519
- ]
520
- avg_response_time = sum(response_times) / len(response_times) if response_times else 0
521
-
522
- # Interaction type breakdown
523
- interaction_counts = {}
524
- for log in logs:
525
- itype = log.get("interaction_type", "unknown")
526
- interaction_counts[itype] = interaction_counts.get(itype, 0) + 1
527
-
528
- # Intent breakdown
529
- intent_counts = {}
530
- for log in logs:
531
- intent = log.get("intent", "unknown")
532
- intent_counts[intent] = intent_counts.get(intent, 0) + 1
533
-
534
- return {
535
- "date": date,
536
- "total_interactions": total,
537
- "successful_interactions": successful,
538
- "success_rate": f"{(successful/total*100):.1f}%",
539
- "avg_response_time_ms": round(avg_response_time, 2),
540
- "interactions_by_type": interaction_counts,
541
- "top_intents": dict(sorted(
542
- intent_counts.items(),
543
- key=lambda x: x[1],
544
- reverse=True
545
- )[:5])
546
- }
547
-
548
-
549
- # ============================================================
550
- # LOG CLEANUP (For maintenance)
551
- # ============================================================
552
-
553
- def cleanup_old_logs(days_to_keep: int = 90) -> int:
554
- """
555
- 🧹 Removes log files older than specified days.
556
-
557
- Args:
558
- days_to_keep: Number of days to retain logs
559
-
560
- Returns:
561
- Number of files deleted
562
-
563
- Example:
564
- # Delete logs older than 90 days
565
- deleted = cleanup_old_logs(90)
566
- """
567
- from datetime import timedelta
568
-
569
- cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_to_keep)
570
- deleted_count = 0
571
-
572
- try:
573
- for log_file in LOGS_BASE_DIR.glob("*.jsonl"):
574
- try:
575
- # Parse date from filename (YYYY-MM-DD.jsonl)
576
- date_str = log_file.stem
577
- file_date = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
578
-
579
- if file_date < cutoff_date:
580
- log_file.unlink()
581
- deleted_count += 1
582
- logger.info(f"Deleted old log file: {log_file.name}")
583
-
584
- except ValueError:
585
- # Skip files that don't match date format
586
- continue
587
-
588
- except Exception as e:
589
- logger.error(f"Error during log cleanup: {e}")
590
-
591
- logger.info(f"Log cleanup complete: {deleted_count} files deleted")
592
- return deleted_count
593
-
594
-
595
- # ============================================================
596
- # PUBLIC API FUNCTIONS (Used by other modules)
597
- # ============================================================
598
-
599
- def log_interaction(
600
- tenant_id: Optional[str] = None,
601
- interaction_type: Optional[str] = None,
602
- intent: Optional[str] = None,
603
- response_time_ms: Optional[float] = None,
604
- success: Optional[bool] = None,
605
- metadata: Optional[Dict[str, Any]] = None,
606
- **kwargs
607
- ) -> None:
608
- """
609
- 📝 Simplified logging function used throughout Penny's codebase.
610
-
611
- This is the main logging function called by orchestrator, router, agents, and model utils.
612
- It creates a structured log entry and writes it to the log file.
613
-
614
- Args:
615
- tenant_id: City/location identifier (optional)
616
- interaction_type: Type of interaction (e.g., "weather", "events", "orchestration") (optional)
617
- intent: Detected intent (e.g., "weather", "emergency") (optional)
618
- response_time_ms: Response time in milliseconds (optional)
619
- success: Whether the operation succeeded (optional)
620
- metadata: Optional additional metadata dictionary
621
- **kwargs: Additional fields to include in log entry (e.g., error, details, fallback_used)
622
-
623
- Example:
624
- log_interaction(
625
- tenant_id="atlanta_ga",
626
- interaction_type="weather",
627
- intent="weather",
628
- response_time_ms=150.5,
629
- success=True,
630
- metadata={"temperature": 72, "condition": "sunny"}
631
- )
632
-
633
- # Or with keyword arguments:
634
- log_interaction(
635
- intent="translation_initialization",
636
- success=False,
637
- error="model_loader unavailable"
638
- )
639
- """
640
- try:
641
- # Build log entry dictionary from provided parameters
642
- log_entry_dict = {
643
- "timestamp": datetime.now(timezone.utc).isoformat()
644
- }
645
-
646
- # Add standard fields if provided
647
- if tenant_id is not None:
648
- log_entry_dict["tenant_id"] = sanitize_for_logging(tenant_id)
649
- if interaction_type is not None:
650
- log_entry_dict["interaction_type"] = interaction_type
651
- if intent is not None:
652
- log_entry_dict["intent"] = intent
653
- if response_time_ms is not None:
654
- log_entry_dict["response_time_ms"] = round(response_time_ms, 2)
655
- if success is not None:
656
- log_entry_dict["success"] = success
657
-
658
- # Add metadata if provided
659
- if metadata:
660
- # Sanitize metadata values
661
- sanitized_metadata = {}
662
- for key, value in metadata.items():
663
- if isinstance(value, str):
664
- sanitized_metadata[key] = sanitize_for_logging(value)
665
- else:
666
- sanitized_metadata[key] = value
667
- log_entry_dict["metadata"] = sanitized_metadata
668
-
669
- # Add any additional kwargs (for backward compatibility with model utils)
670
- for key, value in kwargs.items():
671
- if key not in log_entry_dict: # Don't overwrite standard fields
672
- if isinstance(value, str):
673
- log_entry_dict[key] = sanitize_for_logging(value)
674
- else:
675
- log_entry_dict[key] = value
676
-
677
- # Write to log file
678
- log_path = get_daily_log_path()
679
- _write_log_entry_dict(log_path, log_entry_dict)
680
-
681
- except Exception as e:
682
- # Failsafe: Never let logging failures crash the application
683
- logger.error(f"Failed to log interaction: {e}", exc_info=True)
684
- _emergency_log_to_console_dict(log_entry_dict if 'log_entry_dict' in locals() else {})
685
-
686
-
687
- def sanitize_for_logging(text: str) -> str:
688
- """
689
- 🧹 Sanitizes text for safe logging (removes PII).
690
-
691
- This function is used throughout Penny to ensure sensitive information
692
- is not logged. It checks for PII and masks it appropriately.
693
-
694
- Args:
695
- text: Text to sanitize
696
-
697
- Returns:
698
- Sanitized text safe for logging
699
-
700
- Example:
701
- safe_text = sanitize_for_logging("My email is user@example.com")
702
- # Returns: "My email is [PII_DETECTED]"
703
- """
704
- if not text or not isinstance(text, str):
705
- return str(text) if text else ""
706
-
707
- # Check for PII
708
- contains_pii = _check_for_pii(text)
709
-
710
- if contains_pii:
711
- # Mask PII
712
- if len(text) <= 20:
713
- return "[PII_DETECTED]"
714
- return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
715
-
716
- return text
717
-
718
-
719
- def _write_log_entry_dict(log_path: Path, log_entry_dict: Dict[str, Any]) -> None:
720
- """
721
- 📁 Writes log entry dictionary to JSONL file.
722
- Helper function for simplified logging.
723
- """
724
- try:
725
- log_path.parent.mkdir(parents=True, exist_ok=True)
726
- with open(log_path, "a", encoding="utf-8") as f:
727
- json_str = json.dumps(log_entry_dict, ensure_ascii=False)
728
- f.write(json_str + "\n")
729
- except Exception as e:
730
- logger.error(f"Failed to write log entry: {e}")
731
- _emergency_log_to_console_dict(log_entry_dict)
732
-
733
-
734
- def _emergency_log_to_console_dict(log_entry_dict: Dict[str, Any]) -> None:
735
- """
736
- 🚨 Emergency fallback: Print log to console if file writing fails.
737
- """
738
- print(f"[EMERGENCY LOG] {json.dumps(log_entry_dict)}")
739
-
740
-
741
- # ============================================================
742
- # INITIALIZATION
743
- # ============================================================
744
-
745
- def initialize_logging_system() -> bool:
746
- """
747
- 🚀 Initializes the logging system.
748
- Should be called during app startup.
749
-
750
- Returns:
751
- True if initialization successful
752
- """
753
- logger.info("📊 Initializing Penny's logging system...")
754
-
755
- try:
756
- # Ensure log directory exists
757
- LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
758
-
759
- # Test write permissions
760
- test_file = LOGS_BASE_DIR / ".write_test"
761
- test_file.write_text("test")
762
- test_file.unlink()
763
-
764
- logger.info(f"✅ Logging system initialized")
765
- logger.info(f"📁 Log directory: {LOGS_BASE_DIR}")
766
- logger.info(f"🔄 Daily rotation: Enabled")
767
-
768
- # Log Azure status
769
- if os.getenv("AZURE_LOGS_ENABLED") == "true":
770
- logger.info("☁️ Azure logging: Enabled")
771
- else:
772
- logger.info("💾 Azure logging: Disabled (local only)")
773
-
774
- return True
775
-
776
- except Exception as e:
777
- logger.error(f"❌ Failed to initialize logging system: {e}")
778
- return False