Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 24

Commit

9015f7f

1 Parent(s): 4142570

Create core/models.py

Browse files

Files changed (1) hide show

core/models.py +559 -0

core/models.py ADDED Viewed

	@@ -0,0 +1,559 @@

+"""
+Model management and optimization for BackgroundFX Pro.
+Fixes MatAnyone quality issues and manages model loading.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Dict, Any, Optional, Tuple, List
+from dataclasses import dataclass
+import numpy as np
+from pathlib import Path
+import logging
+import gc
+from functools import lru_cache
+import warnings
+logger = logging.getLogger(__name__)
+@dataclass
+class ModelConfig:
+    """Configuration for model management."""
+    sam2_checkpoint: str = "checkpoints/sam2_hiera_large.pt"
+    matanyone_checkpoint: str = "checkpoints/matanyone_v2.pth"
+    device: str = "cuda"
+    dtype: torch.dtype = torch.float16
+    optimize_memory: bool = True
+    use_amp: bool = True
+    cache_size: int = 5
+    enable_quality_fixes: bool = True
+    matanyone_enhancement: bool = True
+    use_tensorrt: bool = False
+    batch_size: int = 1
+class ModelCache:
+    """Intelligent model caching system."""
+    def __init__(self, max_size: int = 5):
+        self.cache = {}
+        self.max_size = max_size
+        self.access_count = {}
+        self.memory_usage = {}
+    def add(self, key: str, model: Any, memory_size: float):
+        """Add model to cache with memory tracking."""
+        if len(self.cache) >= self.max_size:
+            # Remove least recently used
+            lru_key = min(self.access_count, key=self.access_count.get)
+            self.remove(lru_key)
+        self.cache[key] = model
+        self.access_count[key] = 0
+        self.memory_usage[key] = memory_size
+    def get(self, key: str) -> Optional[Any]:
+        """Get model from cache."""
+        if key in self.cache:
+            self.access_count[key] += 1
+            return self.cache[key]
+        return None
+    def remove(self, key: str):
+        """Remove model from cache and free memory."""
+        if key in self.cache:
+            model = self.cache[key]
+            del self.cache[key]
+            del self.access_count[key]
+            del self.memory_usage[key]
+            # Force cleanup
+            del model
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+    def clear(self):
+        """Clear entire cache."""
+        keys = list(self.cache.keys())
+        for key in keys:
+            self.remove(key)
+class MatAnyoneModel(nn.Module):
+    """Enhanced MatAnyone model with quality fixes."""
+    def __init__(self, config: ModelConfig):
+        super().__init__()
+        self.config = config
+        self.base_model = None
+        self.quality_enhancer = QualityEnhancer() if config.enable_quality_fixes else None
+        self.loaded = False
+    def load(self):
+        """Load MatAnyone model with optimizations."""
+        if self.loaded:
+            return
+        try:
+            # Load checkpoint
+            checkpoint_path = Path(self.config.matanyone_checkpoint)
+            if not checkpoint_path.exists():
+                logger.warning(f"MatAnyone checkpoint not found at {checkpoint_path}")
+                return
+            # Load model weights
+            state_dict = torch.load(
+                checkpoint_path,
+                map_location=self.config.device
+            )
+            # Initialize base model (placeholder - replace with actual MatAnyone architecture)
+            self.base_model = self._build_matanyone_architecture()
+            # Load weights with compatibility fixes
+            self._load_weights_safe(state_dict)
+            # Optimize model
+            if self.config.optimize_memory:
+                self._optimize_model()
+            self.loaded = True
+            logger.info("MatAnyone model loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load MatAnyone model: {e}")
+            self.loaded = False
+    def _build_matanyone_architecture(self) -> nn.Module:
+        """Build MatAnyone architecture."""
+        # This is a placeholder - replace with actual MatAnyone architecture
+        class MatAnyoneBase(nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.encoder = nn.Sequential(
+                    nn.Conv2d(4, 64, 3, padding=1),
+                    nn.ReLU(),
+                    nn.Conv2d(64, 128, 3, stride=2, padding=1),
+                    nn.ReLU(),
+                    nn.Conv2d(128, 256, 3, stride=2, padding=1),
+                    nn.ReLU(),
+                )
+                self.decoder = nn.Sequential(
+                    nn.ConvTranspose2d(256, 128, 4, stride=2, padding=1),
+                    nn.ReLU(),
+                    nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1),
+                    nn.ReLU(),
+                    nn.Conv2d(64, 4, 3, padding=1),
+                    nn.Sigmoid()
+                )
+            def forward(self, x):
+                features = self.encoder(x)
+                output = self.decoder(features)
+                return output
+        return MatAnyoneBase().to(self.config.device)
+    def _load_weights_safe(self, state_dict: Dict):
+        """Safely load weights with compatibility handling."""
+        model_dict = self.base_model.state_dict()
+        # Filter compatible weights
+        compatible_dict = {}
+        for k, v in state_dict.items():
+            # Remove module prefix if present
+            if k.startswith('module.'):
+                k = k[7:]
+            if k in model_dict and model_dict[k].shape == v.shape:
+                compatible_dict[k] = v
+            else:
+                logger.warning(f"Skipping incompatible weight: {k}")
+        # Load compatible weights
+        model_dict.update(compatible_dict)
+        self.base_model.load_state_dict(model_dict, strict=False)
+        logger.info(f"Loaded {len(compatible_dict)}/{len(state_dict)} weights")
+    def _optimize_model(self):
+        """Optimize model for inference."""
+        if not self.base_model:
+            return
+        self.base_model.eval()
+        # Convert to half precision if using GPU
+        if self.config.dtype == torch.float16 and self.config.device != "cpu":
+            self.base_model = self.base_model.half()
+        # Disable gradient computation
+        for param in self.base_model.parameters():
+            param.requires_grad = False
+        # TensorRT optimization (if available)
+        if self.config.use_tensorrt:
+            try:
+                self._optimize_with_tensorrt()
+            except Exception as e:
+                logger.warning(f"TensorRT optimization failed: {e}")
+    def forward(self, image: torch.Tensor, mask: torch.Tensor) -> Dict[str, torch.Tensor]:
+        """Enhanced forward pass with quality fixes."""
+        if not self.loaded:
+            self.load()
+        if not self.base_model:
+            return {'alpha': mask, 'foreground': image}
+        # Prepare input
+        x = torch.cat([image, mask.unsqueeze(1)], dim=1)
+        # Fix input quality issues
+        if self.config.matanyone_enhancement:
+            x = self._preprocess_input(x)
+        # Forward pass with mixed precision
+        with torch.cuda.amp.autocast(enabled=self.config.use_amp):
+            output = self.base_model(x)
+        # Parse output
+        alpha = output[:, 3:4, :, :]
+        foreground = output[:, :3, :, :]
+        # Apply quality enhancement
+        if self.quality_enhancer:
+            alpha = self.quality_enhancer.enhance_alpha(alpha, mask)
+            foreground = self.quality_enhancer.enhance_foreground(foreground, image)
+        # Post-process to fix common MatAnyone issues
+        alpha = self._fix_matanyone_artifacts(alpha, mask)
+        return {
+            'alpha': alpha,
+            'foreground': foreground,
+            'confidence': self._compute_confidence(alpha, mask)
+        }
+    def _preprocess_input(self, x: torch.Tensor) -> torch.Tensor:
+        """Preprocess input to improve MatAnyone quality."""
+        # Denoise input
+        if x.shape[2] > 64:  # Only for reasonable resolutions
+            x = self._bilateral_filter_torch(x)
+        # Normalize properly
+        x = torch.clamp(x, 0, 1)
+        # Enhance edges in mask channel
+        mask_channel = x[:, 3:4, :, :]
+        mask_enhanced = self._enhance_mask_edges(mask_channel)
+        x = torch.cat([x[:, :3, :, :], mask_enhanced], dim=1)
+        return x
+    def _fix_matanyone_artifacts(self, alpha: torch.Tensor,
+                                 original_mask: torch.Tensor) -> torch.Tensor:
+        """Fix common MatAnyone artifacts."""
+        # Fix edge bleeding
+        alpha = self._fix_edge_bleeding(alpha, original_mask)
+        # Fix transparency issues
+        alpha = self._fix_transparency_issues(alpha)
+        # Ensure consistency with original mask
+        alpha = self._ensure_mask_consistency(alpha, original_mask)
+        return alpha
+    def _fix_edge_bleeding(self, alpha: torch.Tensor,
+                          original_mask: torch.Tensor) -> torch.Tensor:
+        """Fix edge bleeding artifacts."""
+        # Detect edges
+        edges = self._detect_edges_torch(original_mask)
+        # Create edge mask
+        edge_mask = F.max_pool2d(edges, kernel_size=5, stride=1, padding=2)
+        # Refine alpha near edges
+        alpha_refined = alpha.clone()
+        edge_region = edge_mask > 0.1
+        # Apply guided filter near edges
+        if edge_region.any():
+            alpha_refined[edge_region] = (
+                0.7 * alpha[edge_region] +
+                0.3 * original_mask.unsqueeze(1).expand_as(alpha)[edge_region]
+            )
+        return alpha_refined
+    def _fix_transparency_issues(self, alpha: torch.Tensor) -> torch.Tensor:
+        """Fix transparency artifacts."""
+        # Identify problematic transparency values
+        mid_range = (alpha > 0.2) & (alpha < 0.8)
+        # Push mid-range values toward 0 or 1
+        alpha_fixed = alpha.clone()
+        alpha_fixed[mid_range] = torch.where(
+            alpha[mid_range] > 0.5,
+            torch.clamp(alpha[mid_range] * 1.2, max=1.0),
+            torch.clamp(alpha[mid_range] * 0.8, min=0.0)
+        )
+        # Smooth transitions
+        alpha_fixed = F.gaussian_blur(alpha_fixed, kernel_size=(3, 3))
+        return alpha_fixed
+    def _ensure_mask_consistency(self, alpha: torch.Tensor,
+                                original_mask: torch.Tensor) -> torch.Tensor:
+        """Ensure consistency with original mask."""
+        # Expand mask dimensions if needed
+        if original_mask.dim() == 2:
+            original_mask = original_mask.unsqueeze(0).unsqueeze(0)
+        elif original_mask.dim() == 3:
+            original_mask = original_mask.unsqueeze(1)
+        # Where original mask is 0, alpha should also be 0
+        alpha = torch.where(original_mask < 0.1, torch.zeros_like(alpha), alpha)
+        # Where original mask is 1, alpha should be close to 1
+        alpha = torch.where(original_mask > 0.9, torch.ones_like(alpha) * 0.95, alpha)
+        return alpha
+    def _compute_confidence(self, alpha: torch.Tensor,
+                          original_mask: torch.Tensor) -> torch.Tensor:
+        """Compute confidence score for the output."""
+        # Expand dimensions if needed
+        if original_mask.dim() < alpha.dim():
+            original_mask = original_mask.unsqueeze(1).expand_as(alpha)
+        # Compute similarity
+        diff = torch.abs(alpha - original_mask)
+        confidence = 1.0 - torch.mean(diff, dim=(1, 2, 3))
+        return confidence
+    def _bilateral_filter_torch(self, x: torch.Tensor) -> torch.Tensor:
+        """Apply bilateral filter in PyTorch."""
+        # Simple approximation using Gaussian blur
+        # For true bilateral filtering, would need custom CUDA kernel
+        return F.gaussian_blur(x, kernel_size=(5, 5))
+    def _enhance_mask_edges(self, mask: torch.Tensor) -> torch.Tensor:
+        """Enhance edges in mask channel."""
+        # Detect edges
+        edges = self._detect_edges_torch(mask)
+        # Enhance mask with edges
+        enhanced = mask + 0.3 * edges
+        enhanced = torch.clamp(enhanced, 0, 1)
+        return enhanced
+    def _detect_edges_torch(self, x: torch.Tensor) -> torch.Tensor:
+        """Detect edges using Sobel filters."""
+        # Sobel kernels
+        sobel_x = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]],
+                               dtype=x.dtype, device=x.device).view(1, 1, 3, 3)
+        sobel_y = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]],
+                               dtype=x.dtype, device=x.device).view(1, 1, 3, 3)
+        # Apply Sobel filters
+        edges_x = F.conv2d(x, sobel_x, padding=1)
+        edges_y = F.conv2d(x, sobel_y, padding=1)
+        # Compute edge magnitude
+        edges = torch.sqrt(edges_x ** 2 + edges_y ** 2)
+        return edges
+class SAM2Model:
+    """SAM2 model wrapper with optimizations."""
+    def __init__(self, config: ModelConfig):
+        self.config = config
+        self.model = None
+        self.predictor = None
+        self.loaded = False
+    def load(self):
+        """Load SAM2 model."""
+        if self.loaded:
+            return
+        try:
+            # Import SAM2 (assuming it's installed)
+            from sam2.build_sam import build_sam2
+            from sam2.sam2_image_predictor import SAM2ImagePredictor
+            # Build model
+            self.model = build_sam2(
+                config_file="sam2_hiera_l.yaml",
+                ckpt_path=self.config.sam2_checkpoint,
+                device=self.config.device
+            )
+            # Create predictor
+            self.predictor = SAM2ImagePredictor(self.model)
+            self.loaded = True
+            logger.info("SAM2 model loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load SAM2 model: {e}")
+            self.loaded = False
+    def predict(self, image: np.ndarray, prompts: Optional[Dict] = None) -> np.ndarray:
+        """Generate segmentation mask."""
+        if not self.loaded:
+            self.load()
+        if not self.predictor:
+            return np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)
+        # Set image
+        self.predictor.set_image(image)
+        # Use prompts if provided, otherwise use automatic segmentation
+        if prompts:
+            masks, scores, _ = self.predictor.predict(
+                point_coords=prompts.get('points'),
+                point_labels=prompts.get('labels'),
+                box=prompts.get('box'),
+                multimask_output=True
+            )
+            # Select best mask
+            mask = masks[np.argmax(scores)]
+        else:
+            # Automatic segmentation
+            masks = self.predictor.generate_auto_masks(image)
+            mask = masks[0] if len(masks) > 0 else np.zeros_like(image[:, :, 0])
+        return mask
+class QualityEnhancer(nn.Module):
+    """Neural quality enhancement module."""
+    def __init__(self):
+        super().__init__()
+        self.alpha_refiner = nn.Sequential(
+            nn.Conv2d(1, 16, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(16, 16, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(16, 1, 3, padding=1),
+            nn.Sigmoid()
+        )
+        self.foreground_enhancer = nn.Sequential(
+            nn.Conv2d(3, 32, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(32, 32, 3, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(32, 3, 3, padding=1),
+            nn.Tanh()
+        )
+    def enhance_alpha(self, alpha: torch.Tensor,
+                     original_mask: torch.Tensor) -> torch.Tensor:
+        """Enhance alpha channel quality."""
+        # Refine with neural network
+        refined = self.alpha_refiner(alpha)
+        # Blend with original for stability
+        enhanced = 0.7 * refined + 0.3 * alpha
+        return torch.clamp(enhanced, 0, 1)
+    def enhance_foreground(self, foreground: torch.Tensor,
+                          original_image: torch.Tensor) -> torch.Tensor:
+        """Enhance foreground quality."""
+        # Compute residual
+        residual = self.foreground_enhancer(foreground)
+        # Add residual
+        enhanced = foreground + 0.1 * residual
+        return torch.clamp(enhanced, 0, 1)
+class ModelManager:
+    """Central model management system."""
+    def __init__(self, config: Optional[ModelConfig] = None):
+        self.config = config or ModelConfig()
+        self.cache = ModelCache(max_size=self.config.cache_size)
+        self.models = {}
+        # Initialize models
+        self.sam2 = SAM2Model(self.config)
+        self.matanyone = MatAnyoneModel(self.config)
+    def load_all(self):
+        """Load all models."""
+        logger.info("Loading all models...")
+        self.sam2.load()
+        self.matanyone.load()
+        logger.info("All models loaded")
+    def get_sam2(self) -> SAM2Model:
+        """Get SAM2 model."""
+        if not self.sam2.loaded:
+            self.sam2.load()
+        return self.sam2
+    def get_matanyone(self) -> MatAnyoneModel:
+        """Get MatAnyone model."""
+        if not self.matanyone.loaded:
+            self.matanyone.load()
+        return self.matanyone
+    def process_frame(self, image: np.ndarray,
+                     mask: Optional[np.ndarray] = None) -> Dict[str, Any]:
+        """Process single frame through pipeline."""
+        # Convert to tensor
+        image_tensor = torch.from_numpy(image).permute(2, 0, 1).unsqueeze(0).float() / 255.0
+        image_tensor = image_tensor.to(self.config.device)
+        # Get or generate mask
+        if mask is None:
+            mask = self.sam2.predict(image)
+        mask_tensor = torch.from_numpy(mask).float().to(self.config.device)
+        # Process with MatAnyone
+        result = self.matanyone(image_tensor, mask_tensor)
+        # Convert back to numpy
+        output = {
+            'alpha': result['alpha'].squeeze().cpu().numpy(),
+            'foreground': result['foreground'].squeeze().permute(1, 2, 0).cpu().numpy() * 255,
+            'confidence': result['confidence'].cpu().numpy()
+        }
+        return output
+    def cleanup(self):
+        """Cleanup models and free memory."""
+        self.cache.clear()
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+# Export classes
+__all__ = [
+    'ModelManager',
+    'SAM2Model',
+    'MatAnyoneModel',
+    'ModelConfig',
+    'ModelCache',
+    'QualityEnhancer'
+]