Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 26

Commit

53fdc22

1 Parent(s): 151a692

Update utils/refinement.py

Browse files

Files changed (1) hide show

utils/refinement.py +213 -148

utils/refinement.py CHANGED Viewed

@@ -1,167 +1,232 @@
 #!/usr/bin/env python3
 """
 utils.refinement
-─────────────────────────────────────────────────────────────────────────────
-Single-frame mask refinement for BackgroundFX Pro.
-Public API
-----------
-refine_mask_hq(image, mask, matanyone_processor, fallback_enabled=True) -> np.ndarray
 """
 from __future__ import annotations
-from typing import Any, Tuple, Optional
-import logging, cv2, torch, numpy as np
-log = logging.getLogger(__name__)
-# Quality thresholds (same as before)
-MIN_AREA_RATIO = 0.015
-MAX_AREA_RATIO = 0.97
-# ────────────────────────────────────────────────────────────────────────────
-# Public
-# ────────────────────────────────────────────────────────────────────────────
-__all__ = ["refine_mask_hq"]
 def refine_mask_hq(
     image: np.ndarray,
-    mask:  np.ndarray,
-    matanyone_processor: Any,
-    fallback_enabled: bool = True,
 ) -> np.ndarray:
     """
-    1) Try MatAnyOne high-quality refinement.
-    2) Otherwise OpenCV “enhanced” filter.
-    3) GrabCut and saliency fallbacks.
-    Always returns uint8 mask (0/255).
     """
-    mask = _process_mask(mask)
-    # 1 — MatAnyOne
-    if matanyone_processor is not None:
         try:
-            refined = _matanyone_refine(image, mask, matanyone_processor)
-            if refined is not None and _validate_mask_quality(refined, image.shape[:2]):
                 return refined
-            log.warning("MatAnyOne produced poor mask; fallback")
         except Exception as e:
-            log.warning(f"MatAnyOne error: {e}")
-    # 2 — OpenCV “enhanced” bilateral+guided+MORPH
-    try:
-        refined = _opencv_enhance(image, mask)
-        if _validate_mask_quality(refined, image.shape[:2]):
-            return refined
-    except Exception as e:
-        log.debug(f"OpenCV enhance error: {e}")
-    # 3 — GrabCut + saliency double-fallback
-    try:
-        gc = _refine_with_grabcut(image, mask)
-        if _validate_mask_quality(gc, image.shape[:2]):
-            return gc
-        sal = _refine_with_saliency(image, mask)
-        if _validate_mask_quality(sal, image.shape[:2]):
-            return sal
-    except Exception as e:
-        log.debug(f"GrabCut/saliency fallback error: {e}")
-    # last resort
-    return mask if fallback_enabled else _opencv_enhance(image, mask)
-# ────────────────────────────────────────────────────────────────────────────
-# MatAnyOne wrapper (safe)
-# ──────────────────────────────────────────────────��─────────────────────────
-def _matanyone_refine(img, mask, proc) -> Optional[np.ndarray]:
-    if not (hasattr(proc, "step") and hasattr(proc, "output_prob_to_mask")):
-        return None
-    # image tensor (C,H,W) float32 0-1
-    anp = img.astype(np.float32)
-    if anp.max() > 1: anp /= 255.0
-    anp = np.transpose(anp, (2,0,1))
-    img_t  = torch.from_numpy(anp).unsqueeze(0).to(proc.device if hasattr(proc,"device") else "cpu")
-    mask_f = mask.astype(np.float32)/255.0
-    mask_t = torch.from_numpy(mask_f).unsqueeze(0).to(img_t.device)
-    with torch.no_grad():
-        prob = proc.step(img_t, mask_t, objects=[1])
-        m = proc.output_prob_to_mask(prob).squeeze().cpu().numpy()
-    if m.max() <= 1: m *= 255
-    return m.astype(np.uint8)
-# ────────────────────────────────────────────────────────────────────────────
-# OpenCV enhanced filter chain
-# ────────────────────────────────────────────────────────────────────────────
-def _opencv_enhance(img, mask):
-    if mask.ndim == 3: mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
-    if mask.max()<=1:  mask = (mask*255).astype(np.uint8)
-    m = cv2.bilateralFilter(mask, 9, 75, 75)
-    m = _guided_filter(img, m, r=8, eps=0.2)
-    m = cv2.morphologyEx(m, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)))
-    m = cv2.morphologyEx(m, cv2.MORPH_OPEN,  cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)))
-    m = cv2.GaussianBlur(m,(3,3),0.8)
-    _,m = cv2.threshold(m,127,255,cv2.THRESH_BINARY)
-    return m
-def _guided_filter(guide, mask, r=8, eps=0.2):
-    g = cv2.cvtColor(guide, cv2.COLOR_BGR2GRAY).astype(np.float32)/255.0
-    m = mask.astype(np.float32)/255.0
-    k = 2*r+1
-    mean_g  = cv2.boxFilter(g, -1, (k,k))
-    mean_m  = cv2.boxFilter(m, -1, (k,k))
-    corr_gm = cv2.boxFilter(g*m, -1, (k,k))
-    cov     = corr_gm - mean_g*mean_m
-    var_g   = cv2.boxFilter(g*g, -1, (k,k)) - mean_g*mean_g
-    a = cov/(var_g+eps)
-    b = mean_m - a*mean_g
-    mean_a = cv2.boxFilter(a, -1, (k,k))
-    mean_b = cv2.boxFilter(b, -1, (k,k))
-    out = (mean_a*g+mean_b)*255
-    return out.astype(np.uint8)
-# ────────────────────────────────────────────────────────────────────────────
-# GrabCut & saliency fallbacks
-# ────────────────────────────────────────────────────────────────────────────
-def _refine_with_grabcut(img, seed):
-    h,w = img.shape[:2]
-    gc = np.full((h,w), cv2.GC_PR_BGD, np.uint8)
-    gc[seed>200] = cv2.GC_FGD
-    rect = (w//4, h//6, w//2, int(h*0.7))
-    bgd,fgd = np.zeros((1,65),np.float64), np.zeros((1,65),np.float64)
-    cv2.grabCut(img, gc, rect, bgd, fgd, 3, cv2.GC_INIT_WITH_MASK)
-    return np.where((gc==cv2.GC_FGD)|(gc==cv2.GC_PR_FGD),255,0).astype(np.uint8)
-def _refine_with_saliency(img, seed):
-    sal = _compute_saliency(img)
-    if sal is None: return seed
-    high = (sal>0.6).astype(np.uint8)*255
-    cy,cx = img.shape[0]//2, img.shape[1]//2
-    if np.any(seed>127):
-        ys,xs = np.where(seed>127); cy,cx=int(np.mean(ys)),int(np.mean(xs))
-    ff = high.copy(); cv2.floodFill(ff,None,(cx,cy),255,loDiff=5,upDiff=5)
-    return ff
-def _compute_saliency(img):
-    try:
-        if hasattr(cv2,"saliency"):
-            s=cv2.saliency.StaticSaliencySpectralResidual_create()
-            ok,sm=s.computeSaliency(img)
-            if ok: return (sm-sm.min())/max(1e-6,sm.max()-sm.min())
-    except Exception: pass
-    return None
-# ────────────────────────────────────────────────────────────────────────────
-# Helpers
-# ────────────────────────────────────────────────────────────────────────────
-def _process_mask(mask):
-    if mask.ndim==3: mask=cv2.cvtColor(mask,cv2.COLOR_BGR2GRAY)
-    if mask.dtype!=np.uint8:
-        mask = (mask*255).astype(np.uint8) if mask.max()<=1 else mask.astype(np.uint8)
-    _,mask=cv2.threshold(mask,127,255,cv2.THRESH_BINARY)
     return mask
-def _validate_mask_quality(mask, shape: Tuple[int,int]) -> bool:
-    h,w = shape
-    ratio = np.sum(mask>127)/(h*w)
-    return MIN_AREA_RATIO <= ratio <= MAX_AREA_RATIO

 #!/usr/bin/env python3
 """
 utils.refinement
+High-quality mask refinement for BackgroundFX Pro.
 """
 from __future__ import annotations
+from typing import Any, Optional, Tuple
+import logging
+import cv2
+import numpy as np
+log = logging.getLogger(__name__)
+# ============================================================================
+# CUSTOM EXCEPTION
+# ============================================================================
+class MaskRefinementError(Exception):
+    """Custom exception for mask refinement errors"""
+    pass
+# ============================================================================
+# EXPORTS
+# ============================================================================
+__all__ = [
+    "refine_mask_hq",
+    "MaskRefinementError",
+]
+# ============================================================================
+# MAIN API
+# ============================================================================
 def refine_mask_hq(
     image: np.ndarray,
+    mask: np.ndarray,
+    matanyone_model: Optional[Any] = None,
+    fallback_enabled: bool = True
 ) -> np.ndarray:
     """
+    High-quality mask refinement with multiple strategies.
+    Args:
+        image: Original BGR image
+        mask: Initial binary mask (0/255)
+        matanyone_model: Optional MatAnyone model for AI refinement
+        fallback_enabled: Whether to use fallback methods if AI fails
+    Returns:
+        Refined binary mask (0/255)
     """
+    if image is None or mask is None:
+        raise MaskRefinementError("Invalid input image or mask")
+    if image.shape[:2] != mask.shape[:2]:
+        raise MaskRefinementError(f"Image shape {image.shape[:2]} doesn't match mask shape {mask.shape[:2]}")
+    # Try AI-based refinement first if model available
+    if matanyone_model is not None:
         try:
+            refined = _refine_with_matanyone(image, mask, matanyone_model)
+            if _validate_refined_mask(refined, mask):
                 return refined
+            log.warning("MatAnyone refinement failed validation")
         except Exception as e:
+            log.warning(f"MatAnyone refinement failed: {e}")
+    # Fallback to classical refinement methods
+    if fallback_enabled:
+        try:
+            return _classical_refinement(image, mask)
+        except Exception as e:
+            log.warning(f"Classical refinement failed: {e}")
+            return mask  # Return original if all fails
     return mask
+# ============================================================================
+# AI-BASED REFINEMENT
+# ============================================================================
+def _refine_with_matanyone(
+    image: np.ndarray,
+    mask: np.ndarray,
+    model: Any
+) -> np.ndarray:
+    """Use MatAnyone model for mask refinement."""
+    # Check if model has expected interface
+    if hasattr(model, 'process'):
+        result = model.process(image, mask)
+    elif hasattr(model, 'refine'):
+        result = model.refine(image, mask)
+    elif callable(model):
+        result = model(image, mask)
+    else:
+        raise MaskRefinementError("MatAnyone model doesn't have expected interface")
+    # Convert result to binary mask
+    if result is None:
+        raise MaskRefinementError("MatAnyone returned None")
+    return _process_mask(result)
+# ============================================================================
+# CLASSICAL REFINEMENT
+# ============================================================================
+def _classical_refinement(image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+    """Apply classical CV techniques for mask refinement."""
+    refined = mask.copy()
+    # 1. Morphological operations to clean up
+    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
+    refined = cv2.morphologyEx(refined, cv2.MORPH_CLOSE, kernel)
+    refined = cv2.morphologyEx(refined, cv2.MORPH_OPEN, kernel)
+    # 2. Edge-aware smoothing
+    refined = _edge_aware_smooth(image, refined)
+    # 3. Feather edges slightly
+    refined = _feather_edges(refined, radius=3)
+    # 4. Remove small disconnected components
+    refined = _remove_small_components(refined, min_area_ratio=0.005)
+    return refined
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+def _validate_refined_mask(refined: np.ndarray, original: np.ndarray) -> bool:
+    """Check if refined mask is reasonable."""
+    if refined is None or refined.size == 0:
+        return False
+    # Check if mask has reasonable coverage
+    refined_area = np.sum(refined > 127)
+    original_area = np.sum(original > 127)
+    if refined_area == 0:
+        return False
+    # Allow some variation but not extreme changes
+    ratio = refined_area / max(original_area, 1)
+    return 0.5 <= ratio <= 2.0
+def _process_mask(mask: np.ndarray) -> np.ndarray:
+    """Convert any mask format to binary 0/255."""
+    if mask.dtype == np.float32 or mask.dtype == np.float64:
+        if mask.max() <= 1.0:
+            mask = (mask * 255).astype(np.uint8)
+    if mask.dtype != np.uint8:
+        mask = mask.astype(np.uint8)
+    if mask.ndim == 3:
+        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
+    _, binary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
+    return binary
+def _edge_aware_smooth(image: np.ndarray, mask: np.ndarray) -> np.ndarray:
+    """Apply edge-aware smoothing using guided filter."""
+    # Convert to float for processing
+    mask_float = mask.astype(np.float32) / 255.0
+    # Simple guided filter approximation
+    radius = 5
+    eps = 0.01
+    # Use image as guide
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).astype(np.float32) / 255.0
+    # Box filter for mean
+    mean_I = cv2.boxFilter(gray, -1, (radius, radius))
+    mean_p = cv2.boxFilter(mask_float, -1, (radius, radius))
+    mean_Ip = cv2.boxFilter(gray * mask_float, -1, (radius, radius))
+    # Covariance
+    cov_Ip = mean_Ip - mean_I * mean_p
+    # Variance
+    mean_II = cv2.boxFilter(gray * gray, -1, (radius, radius))
+    var_I = mean_II - mean_I * mean_I
+    # Coefficients
+    a = cov_Ip / (var_I + eps)
+    b = mean_p - a * mean_I
+    # Filter
+    mean_a = cv2.boxFilter(a, -1, (radius, radius))
+    mean_b = cv2.boxFilter(b, -1, (radius, radius))
+    refined = mean_a * gray + mean_b
+    # Convert back to binary
+    return (refined * 255).clip(0, 255).astype(np.uint8)
+def _feather_edges(mask: np.ndarray, radius: int = 3) -> np.ndarray:
+    """Slightly blur edges for smoother transitions."""
+    if radius <= 0:
+        return mask
+    # Blur then threshold to maintain binary nature
+    blurred = cv2.GaussianBlur(mask, (radius*2+1, radius*2+1), radius/2)
+    _, binary = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY)
+    return binary
+def _remove_small_components(mask: np.ndarray, min_area_ratio: float = 0.005) -> np.ndarray:
+    """Remove small disconnected components."""
+    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)
+    if num_labels <= 1:
+        return mask
+    # Calculate minimum area
+    total_area = mask.shape[0] * mask.shape[1]
+    min_area = int(total_area * min_area_ratio)
+    # Find largest component (excluding background)
+    areas = stats[1:, cv2.CC_STAT_AREA]
+    if len(areas) == 0:
+        return mask
+    max_label = np.argmax(areas) + 1
+    # Keep only components above threshold or the largest one
+    cleaned = np.zeros_like(mask)
+    for label in range(1, num_labels):
+        if stats[label, cv2.CC_STAT_AREA] >= min_area or label == max_label:
+            cleaned[labels == label] = 255
+    return cleaned