| """ |
| Structured logging configuration for the application. |
| |
| This module sets up consistent logging across all components with support for |
| different log levels, formatters, and handlers. |
| """ |
|
|
| import logging |
| import sys |
| from pathlib import Path |
| from typing import Optional |
| from datetime import datetime |
|
|
|
|
| |
| class LogColors: |
| """ANSI color codes for terminal output.""" |
| RESET = "\033[0m" |
| BOLD = "\033[1m" |
| RED = "\033[91m" |
| GREEN = "\033[92m" |
| YELLOW = "\033[93m" |
| BLUE = "\033[94m" |
| MAGENTA = "\033[95m" |
| CYAN = "\033[96m" |
| GRAY = "\033[90m" |
|
|
|
|
| class ColoredFormatter(logging.Formatter): |
| """Custom formatter with colors for different log levels.""" |
|
|
| FORMATS = { |
| logging.DEBUG: LogColors.GRAY + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", |
| logging.INFO: LogColors.GREEN + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", |
| logging.WARNING: LogColors.YELLOW + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", |
| logging.ERROR: LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", |
| logging.CRITICAL: LogColors.BOLD + LogColors.RED + "%(levelname)s" + LogColors.RESET + " | %(name)s | %(message)s", |
| } |
|
|
| def format(self, record): |
| log_fmt = self.FORMATS.get(record.levelno) |
| formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S") |
| return formatter.format(record) |
|
|
|
|
| class FileFormatter(logging.Formatter): |
| """File formatter with timestamps and detailed information.""" |
|
|
| def __init__(self): |
| super().__init__( |
| fmt="%(asctime)s | %(levelname)-8s | %(name)s:%(lineno)d | %(message)s", |
| datefmt="%Y-%m-%d %H:%M:%S" |
| ) |
|
|
|
|
| def setup_logging( |
| log_level: str = "INFO", |
| log_file: Optional[str] = None, |
| log_to_console: bool = True, |
| ) -> None: |
| """ |
| Set up logging configuration for the application. |
| |
| Args: |
| log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
| log_file: Optional path to log file. If None, logs only to console |
| log_to_console: Whether to log to console (default: True) |
| """ |
| |
| numeric_level = getattr(logging, log_level.upper(), logging.INFO) |
|
|
| |
| root_logger = logging.getLogger() |
| root_logger.setLevel(numeric_level) |
|
|
| |
| root_logger.handlers.clear() |
|
|
| |
| if log_to_console: |
| console_handler = logging.StreamHandler(sys.stdout) |
| console_handler.setLevel(numeric_level) |
| console_handler.setFormatter(ColoredFormatter()) |
| root_logger.addHandler(console_handler) |
|
|
| |
| if log_file: |
| log_path = Path(log_file) |
| log_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
| file_handler = logging.FileHandler(log_file, encoding="utf-8") |
| file_handler.setLevel(numeric_level) |
| file_handler.setFormatter(FileFormatter()) |
| root_logger.addHandler(file_handler) |
|
|
| |
| logging.getLogger("chromadb").setLevel(logging.WARNING) |
| logging.getLogger("sentence_transformers").setLevel(logging.WARNING) |
| logging.getLogger("urllib3").setLevel(logging.WARNING) |
| logging.getLogger("httpx").setLevel(logging.WARNING) |
| logging.getLogger("httpcore").setLevel(logging.WARNING) |
| logging.getLogger("openai").setLevel(logging.WARNING) |
| logging.getLogger("anthropic").setLevel(logging.WARNING) |
|
|
| root_logger.info(f"Logging initialized at {log_level} level") |
|
|
|
|
| def get_logger(name: str) -> logging.Logger: |
| """ |
| Get a logger instance with the specified name. |
| |
| Args: |
| name: Logger name (typically __name__ of the module) |
| |
| Returns: |
| Logger instance |
| """ |
| return logging.getLogger(name) |
|
|
|
|
| def log_function_call(logger: logging.Logger): |
| """ |
| Decorator to log function calls with arguments and return values. |
| |
| Usage: |
| @log_function_call(logger) |
| def my_function(arg1, arg2): |
| return result |
| """ |
| def decorator(func): |
| def wrapper(*args, **kwargs): |
| func_name = func.__name__ |
| logger.debug(f"Calling {func_name} with args={args}, kwargs={kwargs}") |
| try: |
| result = func(*args, **kwargs) |
| logger.debug(f"{func_name} completed successfully") |
| return result |
| except Exception as e: |
| logger.error(f"{func_name} failed with error: {e}", exc_info=True) |
| raise |
| return wrapper |
| return decorator |
|
|
|
|
| |
| def log_pdf_processing(logger: logging.Logger, filename: str, pages: int, chunks: int): |
| """Log PDF processing completion.""" |
| logger.info(f"Processed PDF: {filename} | Pages: {pages} | Chunks: {chunks}") |
|
|
|
|
| def log_retrieval(logger: logging.Logger, query: str, num_results: int, duration_ms: float): |
| """Log retrieval operation.""" |
| logger.info(f"Retrieved {num_results} chunks for query in {duration_ms:.2f}ms") |
| logger.debug(f"Query: {query[:100]}...") |
|
|
|
|
| def log_llm_call(logger: logging.Logger, model: str, tokens_in: int, tokens_out: int, duration_s: float): |
| """Log LLM API call.""" |
| logger.info( |
| f"LLM call: {model} | In: {tokens_in} tokens | Out: {tokens_out} tokens | Duration: {duration_s:.2f}s" |
| ) |
|
|
|
|
| def log_embedding_generation(logger: logging.Logger, num_chunks: int, duration_s: float): |
| """Log embedding generation.""" |
| chunks_per_sec = num_chunks / duration_s if duration_s > 0 else 0 |
| logger.info(f"Generated embeddings for {num_chunks} chunks in {duration_s:.2f}s ({chunks_per_sec:.1f} chunks/s)") |
|
|
|
|
| def log_cache_hit(logger: logging.Logger, cache_type: str, key: str): |
| """Log cache hit.""" |
| logger.debug(f"Cache hit: {cache_type} | Key: {key[:50]}") |
|
|
|
|
| def log_cache_miss(logger: logging.Logger, cache_type: str, key: str): |
| """Log cache miss.""" |
| logger.debug(f"Cache miss: {cache_type} | Key: {key[:50]}") |
|
|
|
|
| def log_error(logger: logging.Logger, operation: str, error: Exception): |
| """Log error with context.""" |
| logger.error(f"Error in {operation}: {str(error)}", exc_info=True) |
|
|