""" Device and Hardware Management Module ==================================== Handles device detection, CUDA compatibility, memory management, and threading configuration for BackgroundFX Pro. Fixes: - CUDA multiprocessor_count compatibility error - OpenMP threading issues (OMP_NUM_THREADS) - GPU memory optimization - Automatic device selection Author: BackgroundFX Pro Team License: MIT """ import os import logging import warnings from typing import Dict, Optional, List, Tuple import platform import psutil # Fix threading issues immediately at module import os.environ.setdefault('OMP_NUM_THREADS', '4') os.environ.setdefault('MKL_NUM_THREADS', '4') os.environ.setdefault('NUMEXPR_NUM_THREADS', '4') try: import torch TORCH_AVAILABLE = True except ImportError: TORCH_AVAILABLE = False warnings.warn("PyTorch not available - using CPU-only processing") try: import cv2 OPENCV_AVAILABLE = True except ImportError: OPENCV_AVAILABLE = False warnings.warn("OpenCV not available") logger = logging.getLogger(__name__) class DeviceManager: """Manages device detection, selection and optimization""" def __init__(self): self.device = None self.device_info = {} self.cuda_available = False self.gpu_count = 0 self.memory_info = {} self.threading_configured = False def initialize(self) -> bool: """Initialize device manager and configure optimal settings""" try: logger.info("🔧 Initializing Device Manager...") # Fix threading first self._configure_threading() # Detect available devices self._detect_devices() # Configure CUDA if available if self.cuda_available: self._configure_cuda() # Select optimal device self.device = self._select_optimal_device() # Log system information self._log_system_info() logger.info(f"✅ Device Manager initialized - Using: {self.device}") return True except Exception as e: logger.error(f"❌ Device Manager initialization failed: {e}") self.device = 'cpu' return False def _configure_threading(self): """Configure threading for optimal performance""" try: # Set OpenMP threads if 'OMP_NUM_THREADS' not in os.environ: os.environ['OMP_NUM_THREADS'] = '4' # Set MKL threads if 'MKL_NUM_THREADS' not in os.environ: os.environ['MKL_NUM_THREADS'] = '4' # Set NumExpr threads if 'NUMEXPR_NUM_THREADS' not in os.environ: os.environ['NUMEXPR_NUM_THREADS'] = '4' # Configure PyTorch threads if TORCH_AVAILABLE: torch.set_num_threads(4) torch.set_num_interop_threads(4) # Configure OpenCV threads if OPENCV_AVAILABLE: cv2.setNumThreads(4) self.threading_configured = True logger.info(f"✅ Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}") except Exception as e: logger.warning(f"âš ī¸ Threading configuration warning: {e}") def _detect_devices(self): """Detect available computing devices""" try: if not TORCH_AVAILABLE: self.cuda_available = False self.gpu_count = 0 return # Check CUDA availability self.cuda_available = torch.cuda.is_available() self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0 if self.cuda_available: logger.info(f"✅ CUDA available: {self.gpu_count} GPU(s)") # Get device properties for each GPU for i in range(self.gpu_count): try: props = self._get_cuda_properties_safe(i) self.device_info[f'cuda:{i}'] = props logger.info(f" GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)") except Exception as e: logger.warning(f" GPU {i}: Properties unavailable ({e})") else: logger.info("â„šī¸ CUDA not available - using CPU") except Exception as e: logger.error(f"❌ Device detection failed: {e}") self.cuda_available = False self.gpu_count = 0 def _get_cuda_properties_safe(self, device_id: int) -> Dict: """Safely get CUDA device properties with compatibility handling""" try: if not TORCH_AVAILABLE or not torch.cuda.is_available(): return {} props = torch.cuda.get_device_properties(device_id) # Handle different PyTorch versions for multiprocessor count if hasattr(props, 'multi_processor_count'): sm_count = props.multi_processor_count elif hasattr(props, 'multiprocessor_count'): sm_count = props.multiprocessor_count else: # Fallback calculation for older PyTorch versions try: major, minor = torch.cuda.get_device_capability(device_id) # Rough estimation based on compute capability sm_count = major * 8 if major >= 6 else major * 4 except: sm_count = 'Unknown' device_props = { 'name': props.name, 'memory_gb': props.total_memory / (1024**3), 'memory_bytes': props.total_memory, 'multiprocessor_count': sm_count, 'major': props.major, 'minor': props.minor, 'compute_capability': f"{props.major}.{props.minor}" } return device_props except Exception as e: logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}") return { 'name': 'Unknown GPU', 'memory_gb': 0.0, 'memory_bytes': 0, 'multiprocessor_count': 'Unknown', 'error': str(e) } def _configure_cuda(self): """Configure CUDA for optimal performance""" try: if not self.cuda_available or not TORCH_AVAILABLE: return # Enable TensorRT optimization if available torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = False # Set memory management torch.cuda.empty_cache() # Enable mixed precision if supported try: # Check if Automatic Mixed Precision is available from torch.cuda.amp import autocast logger.info("✅ Mixed precision available") except ImportError: logger.info("â„šī¸ Mixed precision not available") logger.info("✅ CUDA optimization configured") except Exception as e: logger.warning(f"âš ī¸ CUDA configuration warning: {e}") def _select_optimal_device(self) -> str: """Select the optimal device for processing""" try: if not TORCH_AVAILABLE: return 'cpu' if not self.cuda_available or self.gpu_count == 0: return 'cpu' # Select GPU with most memory best_device = 'cuda:0' best_memory = 0 for device_name, props in self.device_info.items(): if device_name.startswith('cuda:'): memory = props.get('memory_gb', 0) if memory > best_memory: best_memory = memory best_device = device_name # Minimum memory check if best_memory < 2.0: # Require at least 2GB logger.warning(f"âš ī¸ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU") return 'cpu' return best_device except Exception as e: logger.error(f"❌ Device selection failed: {e}") return 'cpu' def _log_system_info(self): """Log comprehensive system information""" try: # System information logger.info(f"📊 System: {platform.system()} {platform.release()}") logger.info(f"💾 CPU: {platform.processor()}") logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB") # Python and package versions logger.info(f"🐍 Python: {platform.python_version()}") if TORCH_AVAILABLE: logger.info(f"đŸ”Ĩ PyTorch: {torch.__version__}") if torch.cuda.is_available(): logger.info(f"⚡ CUDA: {torch.version.cuda}") if OPENCV_AVAILABLE: logger.info(f"📷 OpenCV: {cv2.__version__}") except Exception as e: logger.warning(f"âš ī¸ System info logging failed: {e}") def get_device(self) -> str: """Get the selected device""" return self.device or 'cpu' def get_device_info(self) -> Dict: """Get device information""" return { 'device': self.device, 'cuda_available': self.cuda_available, 'gpu_count': self.gpu_count, 'device_info': self.device_info, 'threading_configured': self.threading_configured } def get_memory_usage(self) -> Dict: """Get current memory usage""" memory_info = { 'system_memory_gb': psutil.virtual_memory().total / (1024**3), 'system_memory_used_gb': psutil.virtual_memory().used / (1024**3), 'system_memory_percent': psutil.virtual_memory().percent } if self.cuda_available and TORCH_AVAILABLE: try: for i in range(self.gpu_count): allocated = torch.cuda.memory_allocated(i) / (1024**3) reserved = torch.cuda.memory_reserved(i) / (1024**3) total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0) memory_info[f'gpu_{i}_allocated_gb'] = allocated memory_info[f'gpu_{i}_reserved_gb'] = reserved memory_info[f'gpu_{i}_total_gb'] = total memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100 except Exception as e: logger.warning(f"âš ī¸ GPU memory info failed: {e}") return memory_info def optimize_for_model(self, model_name: str) -> Dict: """Optimize device settings for specific model""" optimizations = { 'device': self.device, 'mixed_precision': False, 'gradient_checkpointing': False, 'batch_size': 1 } try: # Model-specific optimizations if model_name.lower() == 'sam2': if self.cuda_available and self._get_gpu_memory_gb() >= 8: optimizations.update({ 'mixed_precision': True, 'batch_size': 2 }) elif model_name.lower() == 'matanyone': if self.cuda_available and self._get_gpu_memory_gb() >= 6: optimizations.update({ 'mixed_precision': True }) logger.info(f"âš™ī¸ Optimizations for {model_name}: {optimizations}") except Exception as e: logger.warning(f"âš ī¸ Model optimization failed: {e}") return optimizations def _get_gpu_memory_gb(self) -> float: """Get GPU memory in GB""" if not self.cuda_available or not self.device_info: return 0.0 device_key = self.device if self.device in self.device_info else 'cuda:0' return self.device_info.get(device_key, {}).get('memory_gb', 0.0) def cleanup(self): """Cleanup device resources""" try: if self.cuda_available and TORCH_AVAILABLE: torch.cuda.empty_cache() logger.info("✅ GPU cache cleared") except Exception as e: logger.warning(f"âš ī¸ Cleanup warning: {e}") # Global device manager instance _device_manager = None def get_device_manager() -> DeviceManager: """Get the global device manager instance""" global _device_manager if _device_manager is None: _device_manager = DeviceManager() _device_manager.initialize() return _device_manager def get_optimal_device() -> str: """Get the optimal device for processing""" return get_device_manager().get_device() def fix_cuda_compatibility(): """Fix CUDA compatibility issues""" try: dm = get_device_manager() logger.info("✅ CUDA compatibility checked and fixed") return dm.get_device_info() except Exception as e: logger.error(f"❌ CUDA compatibility fix failed: {e}") return {'device': 'cpu', 'error': str(e)} def setup_optimal_threading(): """Setup optimal threading configuration""" try: dm = get_device_manager() if dm.threading_configured: logger.info("✅ Threading already configured optimally") else: dm._configure_threading() return True except Exception as e: logger.error(f"❌ Threading setup failed: {e}") return False def get_system_diagnostics() -> Dict: """Get comprehensive system diagnostics""" dm = get_device_manager() return { 'device_info': dm.get_device_info(), 'memory_usage': dm.get_memory_usage(), 'system_ready': dm.device is not None } # Initialize on module import try: _device_manager = DeviceManager() _device_manager.initialize() logger.info("✅ Device manager initialized on import") except Exception as e: logger.warning(f"âš ī¸ Device manager initialization warning: {e}")