"""
Device and Hardware Management Module
====================================

Handles device detection, CUDA compatibility, memory management,
and threading configuration for BackgroundFX Pro.

Fixes:
- CUDA multiprocessor_count compatibility error
- OpenMP threading issues (OMP_NUM_THREADS)
- GPU memory optimization
- Automatic device selection

Author: BackgroundFX Pro Team
License: MIT
"""

import os
import logging
import warnings
from typing import Dict, Optional, List, Tuple
import platform
import psutil

# Fix threading issues immediately at module import
os.environ.setdefault('OMP_NUM_THREADS', '4')
os.environ.setdefault('MKL_NUM_THREADS', '4')
os.environ.setdefault('NUMEXPR_NUM_THREADS', '4')

try:
    import torch
    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False
    warnings.warn("PyTorch not available - using CPU-only processing")

try:
    import cv2
    OPENCV_AVAILABLE = True
except ImportError:
    OPENCV_AVAILABLE = False
    warnings.warn("OpenCV not available")

logger = logging.getLogger(__name__)

class DeviceManager:
    """Manages device detection, selection and optimization"""
    
    def __init__(self):
        self.device = None
        self.device_info = {}
        self.cuda_available = False
        self.gpu_count = 0
        self.memory_info = {}
        self.threading_configured = False
        
    def initialize(self) -> bool:
        """Initialize device manager and configure optimal settings"""
        try:
            logger.info("🔧 Initializing Device Manager...")
            
            # Fix threading first
            self._configure_threading()
            
            # Detect available devices
            self._detect_devices()
            
            # Configure CUDA if available
            if self.cuda_available:
                self._configure_cuda()
            
            # Select optimal device
            self.device = self._select_optimal_device()
            
            # Log system information
            self._log_system_info()
            
            logger.info(f"✅ Device Manager initialized - Using: {self.device}")
            return True
            
        except Exception as e:
            logger.error(f"❌ Device Manager initialization failed: {e}")
            self.device = 'cpu'
            return False
    
    def _configure_threading(self):
        """Configure threading for optimal performance"""
        try:
            # Set OpenMP threads
            if 'OMP_NUM_THREADS' not in os.environ:
                os.environ['OMP_NUM_THREADS'] = '4'
            
            # Set MKL threads  
            if 'MKL_NUM_THREADS' not in os.environ:
                os.environ['MKL_NUM_THREADS'] = '4'
            
            # Set NumExpr threads
            if 'NUMEXPR_NUM_THREADS' not in os.environ:
                os.environ['NUMEXPR_NUM_THREADS'] = '4'
            
            # Configure PyTorch threads
            if TORCH_AVAILABLE:
                torch.set_num_threads(4)
                torch.set_num_interop_threads(4)
            
            # Configure OpenCV threads
            if OPENCV_AVAILABLE:
                cv2.setNumThreads(4)
            
            self.threading_configured = True
            logger.info(f"✅ Threading configured: OMP={os.environ.get('OMP_NUM_THREADS')}")
            
        except Exception as e:
            logger.warning(f"⚠️ Threading configuration warning: {e}")
    
    def _detect_devices(self):
        """Detect available computing devices"""
        try:
            if not TORCH_AVAILABLE:
                self.cuda_available = False
                self.gpu_count = 0
                return
            
            # Check CUDA availability
            self.cuda_available = torch.cuda.is_available()
            self.gpu_count = torch.cuda.device_count() if self.cuda_available else 0
            
            if self.cuda_available:
                logger.info(f"✅ CUDA available: {self.gpu_count} GPU(s)")
                
                # Get device properties for each GPU
                for i in range(self.gpu_count):
                    try:
                        props = self._get_cuda_properties_safe(i)
                        self.device_info[f'cuda:{i}'] = props
                        logger.info(f"   GPU {i}: {props['name']} ({props['memory_gb']:.1f} GB)")
                    except Exception as e:
                        logger.warning(f"   GPU {i}: Properties unavailable ({e})")
            else:
                logger.info("ℹ️ CUDA not available - using CPU")
                
        except Exception as e:
            logger.error(f"❌ Device detection failed: {e}")
            self.cuda_available = False
            self.gpu_count = 0
    
    def _get_cuda_properties_safe(self, device_id: int) -> Dict:
        """Safely get CUDA device properties with compatibility handling"""
        try:
            if not TORCH_AVAILABLE or not torch.cuda.is_available():
                return {}
            
            props = torch.cuda.get_device_properties(device_id)
            
            # Handle different PyTorch versions for multiprocessor count
            if hasattr(props, 'multi_processor_count'):
                sm_count = props.multi_processor_count
            elif hasattr(props, 'multiprocessor_count'):
                sm_count = props.multiprocessor_count
            else:
                # Fallback calculation for older PyTorch versions
                try:
                    major, minor = torch.cuda.get_device_capability(device_id)
                    # Rough estimation based on compute capability
                    sm_count = major * 8 if major >= 6 else major * 4
                except:
                    sm_count = 'Unknown'
            
            device_props = {
                'name': props.name,
                'memory_gb': props.total_memory / (1024**3),
                'memory_bytes': props.total_memory,
                'multiprocessor_count': sm_count,
                'major': props.major,
                'minor': props.minor,
                'compute_capability': f"{props.major}.{props.minor}"
            }
            
            return device_props
            
        except Exception as e:
            logger.error(f"❌ Error getting CUDA properties for device {device_id}: {e}")
            return {
                'name': 'Unknown GPU',
                'memory_gb': 0.0,
                'memory_bytes': 0,
                'multiprocessor_count': 'Unknown',
                'error': str(e)
            }
    
    def _configure_cuda(self):
        """Configure CUDA for optimal performance"""
        try:
            if not self.cuda_available or not TORCH_AVAILABLE:
                return
            
            # Enable TensorRT optimization if available
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.deterministic = False
            
            # Set memory management
            torch.cuda.empty_cache()
            
            # Enable mixed precision if supported
            try:
                # Check if Automatic Mixed Precision is available
                from torch.cuda.amp import autocast
                logger.info("✅ Mixed precision available")
            except ImportError:
                logger.info("ℹ️ Mixed precision not available")
            
            logger.info("✅ CUDA optimization configured")
            
        except Exception as e:
            logger.warning(f"⚠️ CUDA configuration warning: {e}")
    
    def _select_optimal_device(self) -> str:
        """Select the optimal device for processing"""
        try:
            if not TORCH_AVAILABLE:
                return 'cpu'
            
            if not self.cuda_available or self.gpu_count == 0:
                return 'cpu'
            
            # Select GPU with most memory
            best_device = 'cuda:0'
            best_memory = 0
            
            for device_name, props in self.device_info.items():
                if device_name.startswith('cuda:'):
                    memory = props.get('memory_gb', 0)
                    if memory > best_memory:
                        best_memory = memory
                        best_device = device_name
            
            # Minimum memory check
            if best_memory < 2.0:  # Require at least 2GB
                logger.warning(f"⚠️ GPU memory ({best_memory:.1f}GB) may be insufficient, using CPU")
                return 'cpu'
            
            return best_device
            
        except Exception as e:
            logger.error(f"❌ Device selection failed: {e}")
            return 'cpu'
    
    def _log_system_info(self):
        """Log comprehensive system information"""
        try:
            # System information
            logger.info(f"📊 System: {platform.system()} {platform.release()}")
            logger.info(f"💾 CPU: {platform.processor()}")
            logger.info(f"🧠 RAM: {psutil.virtual_memory().total / (1024**3):.1f} GB")
            
            # Python and package versions
            logger.info(f"🐍 Python: {platform.python_version()}")
            
            if TORCH_AVAILABLE:
                logger.info(f"🔥 PyTorch: {torch.__version__}")
                if torch.cuda.is_available():
                    logger.info(f"⚡ CUDA: {torch.version.cuda}")
            
            if OPENCV_AVAILABLE:
                logger.info(f"📷 OpenCV: {cv2.__version__}")
            
        except Exception as e:
            logger.warning(f"⚠️ System info logging failed: {e}")
    
    def get_device(self) -> str:
        """Get the selected device"""
        return self.device or 'cpu'
    
    def get_device_info(self) -> Dict:
        """Get device information"""
        return {
            'device': self.device,
            'cuda_available': self.cuda_available,
            'gpu_count': self.gpu_count,
            'device_info': self.device_info,
            'threading_configured': self.threading_configured
        }
    
    def get_memory_usage(self) -> Dict:
        """Get current memory usage"""
        memory_info = {
            'system_memory_gb': psutil.virtual_memory().total / (1024**3),
            'system_memory_used_gb': psutil.virtual_memory().used / (1024**3),
            'system_memory_percent': psutil.virtual_memory().percent
        }
        
        if self.cuda_available and TORCH_AVAILABLE:
            try:
                for i in range(self.gpu_count):
                    allocated = torch.cuda.memory_allocated(i) / (1024**3)
                    reserved = torch.cuda.memory_reserved(i) / (1024**3)
                    total = self.device_info.get(f'cuda:{i}', {}).get('memory_gb', 0)
                    
                    memory_info[f'gpu_{i}_allocated_gb'] = allocated
                    memory_info[f'gpu_{i}_reserved_gb'] = reserved
                    memory_info[f'gpu_{i}_total_gb'] = total
                    memory_info[f'gpu_{i}_percent'] = (allocated / max(total, 1)) * 100
                    
            except Exception as e:
                logger.warning(f"⚠️ GPU memory info failed: {e}")
        
        return memory_info
    
    def optimize_for_model(self, model_name: str) -> Dict:
        """Optimize device settings for specific model"""
        optimizations = {
            'device': self.device,
            'mixed_precision': False,
            'gradient_checkpointing': False,
            'batch_size': 1
        }
        
        try:
            # Model-specific optimizations
            if model_name.lower() == 'sam2':
                if self.cuda_available and self._get_gpu_memory_gb() >= 8:
                    optimizations.update({
                        'mixed_precision': True,
                        'batch_size': 2
                    })
            
            elif model_name.lower() == 'matanyone':
                if self.cuda_available and self._get_gpu_memory_gb() >= 6:
                    optimizations.update({
                        'mixed_precision': True
                    })
            
            logger.info(f"⚙️ Optimizations for {model_name}: {optimizations}")
            
        except Exception as e:
            logger.warning(f"⚠️ Model optimization failed: {e}")
        
        return optimizations
    
    def _get_gpu_memory_gb(self) -> float:
        """Get GPU memory in GB"""
        if not self.cuda_available or not self.device_info:
            return 0.0
        
        device_key = self.device if self.device in self.device_info else 'cuda:0'
        return self.device_info.get(device_key, {}).get('memory_gb', 0.0)
    
    def cleanup(self):
        """Cleanup device resources"""
        try:
            if self.cuda_available and TORCH_AVAILABLE:
                torch.cuda.empty_cache()
                logger.info("✅ GPU cache cleared")
        except Exception as e:
            logger.warning(f"⚠️ Cleanup warning: {e}")

# Global device manager instance
_device_manager = None

def get_device_manager() -> DeviceManager:
    """Get the global device manager instance"""
    global _device_manager
    if _device_manager is None:
        _device_manager = DeviceManager()
        _device_manager.initialize()
    return _device_manager

def get_optimal_device() -> str:
    """Get the optimal device for processing"""
    return get_device_manager().get_device()

def fix_cuda_compatibility():
    """Fix CUDA compatibility issues"""
    try:
        dm = get_device_manager()
        logger.info("✅ CUDA compatibility checked and fixed")
        return dm.get_device_info()
    except Exception as e:
        logger.error(f"❌ CUDA compatibility fix failed: {e}")
        return {'device': 'cpu', 'error': str(e)}

def setup_optimal_threading():
    """Setup optimal threading configuration"""
    try:
        dm = get_device_manager()
        if dm.threading_configured:
            logger.info("✅ Threading already configured optimally")
        else:
            dm._configure_threading()
        return True
    except Exception as e:
        logger.error(f"❌ Threading setup failed: {e}")
        return False

def get_system_diagnostics() -> Dict:
    """Get comprehensive system diagnostics"""
    dm = get_device_manager()
    return {
        'device_info': dm.get_device_info(),
        'memory_usage': dm.get_memory_usage(),
        'system_ready': dm.device is not None
    }

# Initialize on module import
try:
    _device_manager = DeviceManager()
    _device_manager.initialize()
    logger.info("✅ Device manager initialized on import")
except Exception as e:
    logger.warning(f"⚠️ Device manager initialization warning: {e}")