Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 27

Commit

f0dc2a6

1 Parent(s): a8c6577

Update utils/init.py

Browse files

Files changed (1) hide show

utils/__init__.py +437 -0

utils/__init__.py CHANGED Viewed

	@@ -0,0 +1,437 @@

+"""
+Complete utils/__init__.py with all required functions
+Device-safe, SAM2↔MatAnyOne interop, and compositing helpers.
+"""
+from __future__ import annotations
+import os
+import logging
+import tempfile
+from typing import Optional, Tuple, Dict, Any, List, Iterable, Callable
+import cv2
+import numpy as np
+from PIL import Image
+import torch
+# NEW: interop + bridge imports (add these files from the previous steps)
+from utils.interop import ensure_image_nchw, ensure_mask_for_matanyone, log_shape
+from utils.mask_bridge import sam2_to_matanyone_mask
+logger = logging.getLogger(__name__)
+# Professional backgrounds configuration
+PROFESSIONAL_BACKGROUNDS = {
+    "office": {"color": (240, 248, 255), "gradient": True},
+    "studio": {"color": (32, 32, 32), "gradient": False},
+    "nature": {"color": (34, 139, 34), "gradient": True},
+    "abstract": {"color": (75, 0, 130), "gradient": True},
+    "white": {"color": (255, 255, 255), "gradient": False},
+    "black": {"color": (0, 0, 0), "gradient": False},
+}
+# -------------------------------
+# Utility: device
+# -------------------------------
+def _default_device() -> str:
+    return "cuda" if torch.cuda.is_available() else "cpu"
+# -------------------------------
+# Video validation
+# -------------------------------
+def validate_video_file(video_path: str) -> bool:
+    """Validate if video file is readable"""
+    try:
+        if not os.path.exists(video_path):
+            return False
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            return False
+        ret, frame = cap.read()
+        cap.release()
+        return ret and frame is not None
+    except Exception as e:
+        logger.error(f"Video validation failed: {e}")
+        return False
+# -------------------------------
+# SAM2 person segmentation (first-frame bootstrapping)
+# -------------------------------
+def segment_person_hq(
+    frame_rgb: np.ndarray,
+    *,
+    use_sam2: bool = True,
+    sam2_predictor: Any = None,  # prefer injecting a ready predictor (from your ModelLoader)
+) -> Optional[np.ndarray]:
+    """
+    High-quality person segmentation for a single RGB frame.
+    Returns a float mask HxW in [0,1], or None on failure.
+    Preferred path: pass a ready-made SAM2 predictor (e.g., SAM2ImagePredictor).
+    Fallback path: simple color-based segmentation.
+    """
+    try:
+        if use_sam2 and sam2_predictor is not None:
+            try:
+                # SAM2 official predictors accept RGB np.uint8; set + predict.
+                # We use a simple center-point prompt; adapt to your UX if needed.
+                if hasattr(sam2_predictor, "set_image"):
+                    sam2_predictor.set_image(frame_rgb)
+                h, w = frame_rgb.shape[:2]
+                center_point = np.array([[w // 2, h // 2]])
+                center_label = np.array([1])
+                # Try the SAM2 "predict" API (Meta’s predictor style)
+                if hasattr(sam2_predictor, "predict"):
+                    out = sam2_predictor.predict(
+                        point_coords=center_point,
+                        point_labels=center_label,
+                        multimask_output=True,
+                    )
+                    # Known Meta API returns (masks, scores, logits) as numpy
+                    if isinstance(out, (list, tuple)) and len(out) >= 1:
+                        masks = out[0]
+                        if masks is None or len(masks) == 0:
+                            return None
+                        # masks: (M,H,W); pick best by area
+                        areas = masks.reshape(masks.shape[0], -1).sum(axis=1)
+                        best = int(np.argmax(areas))
+                        m = masks[best].astype(np.float32)
+                        m = (m >= 0.5).astype(np.float32)
+                        return m
+                # Some wrappers expose processor/post_process; if you use that, call separately
+                logger.warning("SAM2 predictor provided but unknown API; falling back to simple segmentation")
+            except Exception as e:
+                logger.warning(f"SAM2 segmentation failed: {e}; falling back to simple method")
+        # Fallback: color-based person segmentation
+        return _simple_person_segmentation(frame_rgb)
+    except Exception as e:
+        logger.error(f"Person segmentation failed: {e}")
+        return None
+def _simple_person_segmentation(frame_rgb: np.ndarray) -> np.ndarray:
+    """Simple person segmentation using color-based methods"""
+    hsv = cv2.cvtColor(frame_rgb, cv2.COLOR_RGB2HSV)
+    # Green screen detection
+    lower_green = np.array([40, 40, 40])
+    upper_green = np.array([80, 255, 255])
+    green_mask = cv2.inRange(hsv, lower_green, upper_green)
+    # White background detection
+    lower_white = np.array([0, 0, 200])
+    upper_white = np.array([180, 30, 255])
+    white_mask = cv2.inRange(hsv, lower_white, upper_white)
+    # Combine + invert to person
+    bg_mask = cv2.bitwise_or(green_mask, white_mask)
+    person_mask = cv2.bitwise_not(bg_mask)
+    # Morph clean
+    kernel = np.ones((5, 5), np.uint8)
+    person_mask = cv2.morphologyEx(person_mask, cv2.MORPH_CLOSE, kernel)
+    person_mask = cv2.morphologyEx(person_mask, cv2.MORPH_OPEN, kernel)
+    return (person_mask.astype(np.float32) / 255.0)
+# -------------------------------
+# MatAnyOne integration (first-frame + per-frame)
+# -------------------------------
+def refine_mask_hq(
+    mask_hw_float01: np.ndarray,
+    frame_rgb: np.ndarray,
+    *,
+    use_matanyone: bool = True,
+    mat_core: Any = None,           # prefer injecting a ready InferenceCore from ModelLoader
+    first_frame: bool = True,
+    device: str | None = None,
+) -> np.ndarray:
+    """
+    High-quality mask refinement for a single frame + mask pair using MatAnyOne.
+    Returns refined mask HxW float in [0,1]. If use_matanyone=False or mat_core is None,
+    falls back to simple refinement.
+    NOTE: For videos, prefer using seed/refine helpers below that keep temporal memory.
+    """
+    try:
+        if not use_matanyone or mat_core is None:
+            return _simple_mask_refinement(mask_hw_float01, frame_rgb)
+        device = device or _default_device()
+        # Image → (1,3,H,W)
+        img_nchw = ensure_image_nchw(torch.from_numpy(frame_rgb).to(device), device=device, want_batched=True)
+        log_shape("refine.image_nchw", img_nchw)
+        # Mask → (1,H,W)
+        mask_t = torch.from_numpy(mask_hw_float01).to(device)
+        mask_c_hw = ensure_mask_for_matanyone(mask_t, idx_mask=False, threshold=0.5, keep_soft=False, device=device)
+        log_shape("refine.mask_c_hw", mask_c_hw)
+        # MatAnyOne step (we let the global guard in ModelLoader do additional checks)
+        pred = mat_core.step(
+            image=img_nchw[0],              # CHW
+            mask=mask_c_hw if first_frame else None,
+            idx_mask=False,
+            matting=True,
+            first_frame_pred=bool(first_frame),
+        )
+        # Try to decode output into an alpha HxW float mask
+        refined = _coerce_pred_to_mask(pred, device=device)
+        if refined is None:
+            # If the core doesn’t return alpha directly, fall back
+            return _simple_mask_refinement(mask_hw_float01, frame_rgb)
+        return refined
+    except Exception as e:
+        logger.warning(f"MatAnyOne refinement failed: {e}; using simple refinement")
+        return _simple_mask_refinement(mask_hw_float01, frame_rgb)
+def _coerce_pred_to_mask(pred: Any, device: str = "cuda") -> Optional[np.ndarray]:
+    """
+    Best-effort: extract HxW float mask from MatAnyOne output variants.
+    Supports torch.Tensor, numpy, PIL, or dict with common keys.
+    """
+    try:
+        # Dict-like: look for common keys
+        if isinstance(pred, dict):
+            for k in ("alpha", "mask", "matte", "mattes"):
+                if k in pred:
+                    v = pred[k]
+                    return _coerce_pred_to_mask(v, device=device)
+        # Torch tensor
+        if torch.is_tensor(pred):
+            t = pred.detach()
+            # possible shapes: (H,W), (1,H,W), (N,1,H,W)
+            if t.ndim == 4 and t.shape[1] == 1:
+                t = t[0, 0]
+            elif t.ndim == 3 and t.shape[0] == 1:
+                t = t[0]
+            t = t.float().clamp(0, 1).to("cpu").numpy()
+            if t.ndim == 2:
+                return t.astype(np.float32)
+        # Numpy
+        if isinstance(pred, np.ndarray):
+            a = pred
+            if a.ndim == 3 and a.shape[0] == 1:
+                a = a[0]
+            if a.ndim == 2:
+                a = a.astype(np.float32)
+                if a.max() > 1.0:
+                    a = a / 255.0
+                return np.clip(a, 0.0, 1.0)
+        # PIL Image
+        if isinstance(pred, Image.Image):
+            a = np.array(pred).astype(np.float32)
+            if a.ndim == 3 and a.shape[2] == 1:
+                a = a[:, :, 0]
+            if a.ndim == 2:
+                if a.max() > 1.0:
+                    a = a / 255.0
+                return np.clip(a, 0.0, 1.0)
+    except Exception as e:
+        logger.debug(f"_coerce_pred_to_mask fallback due to: {e}")
+    return None
+def _simple_mask_refinement(mask: np.ndarray, frame_rgb: np.ndarray) -> np.ndarray:
+    """Simple mask refinement using OpenCV operations"""
+    mask_uint8 = (np.clip(mask, 0.0, 1.0) * 255).astype(np.uint8)
+    mask_blurred = cv2.GaussianBlur(mask_uint8, (5, 5), 0)
+    mask_refined = cv2.bilateralFilter(mask_blurred, 9, 75, 75)
+    return (mask_refined.astype(np.float32) / 255.0)
+# -------------------------------
+# Two-stage video helpers (seed + propagate)
+# -------------------------------
+@torch.inference_mode()
+def seed_with_sam2_post_masks(
+    core: Any,
+    frame0_rgb: np.ndarray,                 # HxWx3 uint8 RGB
+    sam2_post_masks: torch.Tensor,          # (1,M,H,W)
+    iou_scores: Optional[torch.Tensor] = None,
+    *,
+    device: str | None = None,
+    idx_mask: bool = False,
+    threshold: float = 0.5,
+    keep_soft: bool = False,
+) -> Any:
+    """
+    Seed MatAnyOne on the first frame using SAM2 post-processed masks (preferred).
+    """
+    device = device or _default_device()
+    img0 = ensure_image_nchw(torch.from_numpy(frame0_rgb).to(device), device=device, want_batched=True)
+    log_shape("seed.image_nchw", img0)
+    if idx_mask:
+        m_c_hw = sam2_to_matanyone_mask(sam2_post_masks.to(device), iou_scores, threshold, "single", keep_soft=False)
+        idx_hw = ensure_mask_for_matanyone(m_c_hw, idx_mask=True, device=device, threshold=threshold)
+        log_shape("seed.idx_hw", idx_hw)
+        return core.step(
+            image=img0[0],
+            mask=idx_hw,
+            idx_mask=True,
+            matting=True,
+            first_frame_pred=True,
+        )
+    else:
+        m_c_hw = sam2_to_matanyone_mask(sam2_post_masks.to(device), iou_scores, threshold, "single", keep_soft=keep_soft)
+        log_shape("seed.mask_c_hw", m_c_hw)
+        return core.step(
+            image=img0[0],
+            mask=m_c_hw,
+            idx_mask=False,
+            matting=True,
+            first_frame_pred=True,
+        )
+@torch.inference_mode()
+def refine_next_frame(core: Any, frame_rgb: np.ndarray, *, device: str | None = None) -> Any:
+    """Step MatAnyOne forward on a subsequent frame (no mask; uses memory)."""
+    device = device or _default_device()
+    img = ensure_image_nchw(torch.from_numpy(frame_rgb).to(device), device=device, want_batched=True)
+    log_shape("refine.image_nchw", img)
+    return core.step(
+        image=img[0],
+        mask=None,
+        idx_mask=False,
+        matting=True,
+        first_frame_pred=False,
+    )
+@torch.inference_mode()
+def run_two_stage_matting(
+    core: Any,
+    frames_rgb_iter: Iterable[np.ndarray],     # iterable of HxWx3 uint8 RGB
+    sam2_post_masks: torch.Tensor,             # (1,M,H,W) for the first frame
+    iou_scores: Optional[torch.Tensor] = None,
+    *,
+    device: str | None = None,
+    on_pred: Optional[Callable[[int, Any], None]] = None,
+    progress: Optional[Callable[[int, Optional[int]], None]] = None,
+    total_frames: Optional[int] = None,
+    idx_mask: bool = False,
+    threshold: float = 0.5,
+    keep_soft: bool = False,
+) -> None:
+    """
+    Convenience runner for videos:
+      - Seeds on the first frame using SAM2 post-process outputs
+      - Propagates across the rest (one frame per step)
+    """
+    device = device or _default_device()
+    it = iter(frames_rgb_iter)
+    try:
+        f0 = next(it)
+    except StopIteration:
+        return
+    pred0 = seed_with_sam2_post_masks(
+        core, f0, sam2_post_masks, iou_scores,
+        device=device, idx_mask=idx_mask, threshold=threshold, keep_soft=keep_soft
+    )
+    if on_pred: on_pred(0, pred0)
+    if progress: progress(1, total_frames)
+    t = 1
+    for frgb in it:
+        pred = refine_next_frame(core, frgb, device=device)
+        if on_pred: on_pred(t, pred)
+        t += 1
+        if progress: progress(t, total_frames)
+# -------------------------------
+# Background replacement
+# -------------------------------
+def replace_background_hq(frame_rgb: np.ndarray, mask_hw_float01: np.ndarray, background_rgb: np.ndarray) -> np.ndarray:
+    """High-quality background replacement with proper compositing"""
+    try:
+        h, w = frame_rgb.shape[:2]
+        background_resized = cv2.resize(background_rgb, (w, h))
+        # Ensure mask is HxW float in [0,1]
+        if mask_hw_float01.ndim == 3:
+            mask_hw_float01 = mask_hw_float01[..., 0]
+        m = np.clip(mask_hw_float01.astype(np.float32), 0.0, 1.0)
+        # Feather edges lightly
+        m_uint8 = (m * 255).astype(np.uint8)
+        m_feather = cv2.GaussianBlur(m_uint8, (7, 7), 0).astype(np.float32) / 255.0
+        m3 = np.stack([m_feather] * 3, axis=-1)
+        result = frame_rgb.astype(np.float32) * m3 + background_resized.astype(np.float32) * (1.0 - m3)
+        return np.clip(result, 0, 255).astype(np.uint8)
+    except Exception as e:
+        logger.error(f"Background replacement failed: {e}")
+        return frame_rgb
+# -------------------------------
+# Background generators
+# -------------------------------
+def create_professional_background(bg_type: str, width: int, height: int) -> np.ndarray:
+    """Create professional background of specified type and size"""
+    try:
+        if bg_type not in PROFESSIONAL_BACKGROUNDS:
+            bg_type = "office"  # Default fallback
+        config = PROFESSIONAL_BACKGROUNDS[bg_type]
+        color = config["color"]
+        use_gradient = config["gradient"]
+        if use_gradient:
+            background = _create_gradient_background(color, width, height)
+        else:
+            background = np.full((height, width, 3), color, dtype=np.uint8)
+        return background
+    except Exception as e:
+        logger.error(f"Background creation failed: {e}")
+        return np.full((height, width, 3), (255, 255, 255), dtype=np.uint8)
+def _create_gradient_background(base_color: Tuple[int, int, int], width: int, height: int) -> np.ndarray:
+    """Create a vertical gradient background from base color"""
+    r, g, b = base_color
+    dark = (int(r * 0.7), int(g * 0.7), int(b * 0.7))
+    bg = np.zeros((height, width, 3), dtype=np.uint8)
+    for y in range(height):
+        t = y / max(height, 1)
+        bg[y, :] = [
+            int(dark[0] * (1 - t) + r * t),
+            int(dark[1] * (1 - t) + g * t),
+            int(dark[2] * (1 - t) + b * t),
+        ]
+    return bg
+# -------------------------------
+# Exports
+# -------------------------------
+__all__ = [
+    # segment / refine (single-frame)
+    "segment_person_hq",
+    "refine_mask_hq",
+    # video runner + steps
+    "seed_with_sam2_post_masks",
+    "refine_next_frame",
+    "run_two_stage_matting",
+    # backgrounds & utils
+    "replace_background_hq",
+    "create_professional_background",
+    "PROFESSIONAL_BACKGROUNDs" if False else "PROFESSIONAL_BACKGROUNDS",
+    "validate_video_file",
+]

Update utils/__init__.py

Update utils/init.py