Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 28

Commit

31fec2e

1 Parent(s): 4055558

Update processing/video/video_processor.py

Browse files

Files changed (1) hide show

processing/video/video_processor.py +809 -0

processing/video/video_processor.py CHANGED Viewed

@@ -60,9 +60,16 @@ def _to_rgb(c):
     return (255, 255, 255)
 def _create_gradient_background_local(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
     start = _to_rgb(spec.get("start", "#222222"))
     end   = _to_rgb(spec.get("end",   "#888888"))
     angle = float(spec.get("angle_deg", 0))
     bg = np.zeros((height, width, 3), np.uint8)
     for y in range(height):
         t = y / max(1, height - 1)
@@ -70,6 +77,808 @@ def _create_gradient_background_local(spec: Dict[str, Any], width: int, height:
         g = int(start[1]*(1-t) + end[1]*t)
         b = int(start[2]*(1-t) + end[2]*t)
         bg[y, :] = (r, g, b)
     if abs(angle) % 360 < 1e-6:
         return bg
     center = (width / 2, height / 2)

     return (255, 255, 255)
 def _create_gradient_background_local(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
+    """
+    Minimal gradient generator for backgrounds (linear with rotation).
+    spec = {"type": "linear"|"radial"(ignored), "start": (r,g,b)|"#rrggbb", "end": ..., "angle_deg": float}
+    Returns RGB np.uint8 (H,W,3)
+    """
     start = _to_rgb(spec.get("start", "#222222"))
     end   = _to_rgb(spec.get("end",   "#888888"))
     angle = float(spec.get("angle_deg", 0))
+    # build vertical gradient
     bg = np.zeros((height, width, 3), np.uint8)
     for y in range(height):
         t = y / max(1, height - 1)
         g = int(start[1]*(1-t) + end[1]*t)
         b = int(start[2]*(1-t) + end[2]*t)
         bg[y, :] = (r, g, b)
     if abs(angle) % 360 < 1e-6:
         return bg
+    # rotate by angle using OpenCV (RGB-safe)
     center = (width / 2, height / 2)
+    M = cv2.getRotationMatrix2D(center, angle, 1.0)
+    rot = cv2.warpAffine(bg, M, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
+    return rot
+@dataclass
+class ProcessorConfig:
+    background_preset: str = "office"    # key in PROFESSIONAL_BACKGROUNDS
+    write_fps: Optional[float] = None    # None -> keep source fps
+    # Model-only downscale (speedup without changing output resolution)
+    max_model_size: Optional[int] = 1280
+    # FFmpeg / NVENC output (pipe). If disabled or unavailable, use OpenCV writer.
+    use_nvenc: bool = True
+    nvenc_codec: str = "h264"            # "h264" or "hevc"
+    nvenc_preset: str = "p5"             # NVENC preset string
+    nvenc_cq: int = 18                   # constant quality (lower = higher quality)
+    nvenc_tune_hq: bool = True
+    nvenc_pix_fmt: str = "yuv420p"       # browser-safe
+    # libx264 fallback
+    x264_preset: str = "medium"
+    x264_crf: int = 18
+    x264_pix_fmt: str = "yuv420p"
+    movflags_faststart: bool = True
+    # ---------- stability & edge quality ----------
+    temporal_ema_alpha: float = 0.75   # higher = calmer (0.6–0.85 typical)
+    min_iou_to_accept: float = 0.05    # reject sudden mask jumps
+    dilate_px: int = 6                 # pad edges to keep hair/ears/shoulders
+    edge_blur_px: int = 1              # tiny blur to calm edge shimmer
+    # hardening (turn soft mask into crisper 0/1)
+    hard_low: float = 0.35             # values below -> 0
+    hard_high: float = 0.70            # values above -> 1
+    mask_gamma: float = 0.90           # <1 boosts mid-tones slightly
+    # ---------- windowed two-phase control ----------
+    use_windowed: bool = True          # enable two-phase SAM2→MatAnyone per chunk
+    window_size: int = 8               # frames per window
+# Back-compat name used elsewhere in the app
+ProcessingConfig = ProcessorConfig
+class _FFmpegPipe:
+    """
+    Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
+    """
+    def __init__(self, width: int, height: int, fps: float, out_path: str, cfg: ProcessorConfig, log=_log):
+        self.width = int(width)
+        self.height = int(height)
+        self.fps = float(fps) if fps and fps > 0 else 25.0
+        self.out_path = out_path
+        self.cfg = cfg
+        self.log = log
+        self.proc: Optional[subprocess.Popen] = None
+        self.encoder_used: Optional[str] = None
+        self._stderr: bytes | None = None
+        self._ffmpeg = shutil.which("ffmpeg")
+        if not self._ffmpeg:
+            raise RuntimeError("ffmpeg not found on PATH")
+        self._start_with_fallbacks()
+    def _cmd_for_encoder(self, encoder: str) -> list[str]:
+        base = [
+            self._ffmpeg,
+            "-hide_banner", "-loglevel", "error",
+            "-y",
+            # rawvideo input from stdin
+            "-f", "rawvideo",
+            "-vcodec", "rawvideo",
+            "-pix_fmt", "bgr24",
+            "-s", f"{self.width}x{self.height}",
+            "-r", f"{self.fps}",
+            "-i", "-",        # stdin
+            "-an",            # no audio here
+        ]
+        if self.cfg.movflags_faststart:
+            base += ["-movflags", "+faststart"]
+        if encoder == "h264_nvenc":
+            base += [
+                "-c:v", "h264_nvenc",
+                "-preset", self.cfg.nvenc_preset,
+                "-cq", str(int(self.cfg.nvenc_cq)),
+                "-pix_fmt", self.cfg.nvenc_pix_fmt,
+            ]
+            if self.cfg.nvenc_tune_hq:
+                base += ["-tune", "hq"]
+        elif encoder == "hevc_nvenc":
+            base += [
+                "-c:v", "hevc_nvenc",
+                "-preset", self.cfg.nvenc_preset,
+                "-cq", str(int(self.cfg.nvenc_cq)),
+                "-pix_fmt", self.cfg.nvenc_pix_fmt,
+            ]
+            if self.cfg.nvenc_tune_hq:
+                base += ["-tune", "hq"]
+        elif encoder == "libx264":
+            base += [
+                "-c:v", "libx264",
+                "-preset", self.cfg.x264_preset,
+                "-crf", str(int(self.cfg.x264_crf)),
+                "-pix_fmt", self.cfg.x264_pix_fmt,
+            ]
+        elif encoder == "mpeg4":
+            base += [
+                "-c:v", "mpeg4",
+                "-q:v", "2",
+                "-pix_fmt", "yuv420p",
+            ]
+        else:
+            base += ["-c:v", "libx264", "-preset", self.cfg.x264_preset, "-crf", str(int(self.cfg.x264_crf)), "-pix_fmt", self.cfg.x264_pix_fmt]
+        base += [self.out_path]
+        return base
+    def _try_start(self, enc: str) -> bool:
+        cmd = self._cmd_for_encoder(enc)
+        try:
+            self.proc = subprocess.Popen(
+                cmd,
+                stdin=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                bufsize=10**7,
+            )
+            self.encoder_used = enc
+            self.log.info("FFmpeg started: %s", " ".join(shlex.quote(c) for c in cmd))
+            # quick poll: if ffmpeg dies immediately, fail fast
+            time.sleep(0.05)
+            if self.proc.poll() is not None:
+                self._stderr = self.proc.stderr.read() if self.proc.stderr else b""
+                self.log.warning("FFmpeg exited on start with %s: %s", enc, (self._stderr or b"").decode(errors="ignore"))
+                self.proc = None
+                return False
+            return True
+        except Exception as e:
+            self.log.warning("Failed to start FFmpeg with %s: %s", enc, e)
+            self.proc = None
+            return False
+    def _start_with_fallbacks(self):
+        encoders = []
+        if self.cfg.use_nvenc:
+            encoders += ["h264_nvenc"] if self.cfg.nvenc_codec.lower() == "h264" else ["hevc_nvenc"]
+        encoders += ["libx264", "mpeg4"]
+        for enc in encoders:
+            if self._try_start(enc):
+                return
+        msg = "Could not start FFmpeg with any encoder (nvenc/libx264/mpeg4). Is ffmpeg present and codecs available?"
+        if self._stderr:
+            msg += f" Stderr: {(self._stderr or b'').decode(errors='ignore')[:500]}"
+        raise RuntimeError(msg)
+    def write(self, frame_bgr: np.ndarray):
+        if self.proc is None or self.proc.stdin is None:
+            raise RuntimeError("FFmpeg process is not running (stdin is None).")
+        if not isinstance(frame_bgr, np.ndarray) or frame_bgr.dtype != np.uint8:
+            raise ValueError("Frame must be a np.ndarray of dtype uint8.")
+        if frame_bgr.ndim != 3 or frame_bgr.shape[2] != 3:
+            raise ValueError("Frame must have shape (H, W, 3).")
+        if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
+            raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
+        # ensure contiguous for tobytes()
+        frame_bgr = np.ascontiguousarray(frame_bgr)
+        try:
+            self.proc.stdin.write(frame_bgr.tobytes())
+        except Exception as e:
+            # collect stderr for diagnostics
+            stderr = b""
+            try:
+                if self.proc and self.proc.stderr:
+                    stderr = self.proc.stderr.read()
+            except Exception:
+                pass
+            msg = f"FFmpeg pipe write failed: {e}"
+            if stderr:
+                msg += f"\nffmpeg stderr: {(stderr or b'').decode(errors='ignore')[:1000]}"
+            raise BrokenPipeError(msg)
+    def close(self):
+        if self.proc is None:
+            return
+        try:
+            if self.proc.stdin:
+                try:
+                    self.proc.stdin.flush()
+                except Exception:
+                    pass
+                try:
+                    self.proc.stdin.close()
+                except Exception:
+                    pass
+            # drain a bit of stderr for logs
+            if self.proc.stderr:
+                try:
+                    err = self.proc.stderr.read()
+                    if err:
+                        self.log.debug("FFmpeg stderr (tail): %s", err.decode(errors="ignore")[-2000:])
+                except Exception:
+                    pass
+            self.proc.wait(timeout=10)
+        except Exception:
+            try:
+                self.proc.kill()
+            except Exception:
+                pass
+        finally:
+            self.proc = None
+class CoreVideoProcessor:
+    """
+    Minimal, safe implementation used by core/app.py.
+    It relies on a models provider (e.g., ModelLoader) that implements:
+        - get_sam2()
+        - get_matanyone()
+    and uses utils.cv_processing for the pipeline.
+    Supports progress callback and cancellation via stop_event.
+    """
+    def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
+        self.log = _log
+        self.config = config or ProcessorConfig()
+        self.models = models  # do NOT load here; core/app handles loading
+        if self.models is None:
+            self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
+        self._ffmpeg = shutil.which("ffmpeg")
+        # state for temporal smoothing
+        self._prev_mask: Optional[np.ndarray] = None
+        # --- ENV overrides (tunable without code change) ---
+        try:
+            if "MATANYONE_WINDOWED" in os.environ:
+                self.config.use_windowed = os.environ["MATANYONE_WINDOWED"].strip().lower() not in ("0", "false", "no")
+            if "MATANYONE_WINDOW" in os.environ:
+                self.config.window_size = max(1, int(os.environ["MATANYONE_WINDOW"]))
+            if "MAX_MODEL_SIZE" in os.environ:
+                self.config.max_model_size = max(0, int(os.environ["MAX_MODEL_SIZE"]))
+        except Exception:
+            pass
+        # Legacy per-frame stateful chunking (used only if use_windowed=False)
+        try:
+            self._chunk_size = max(1, int(os.environ.get("MATANYONE_CHUNK", "12")))
+        except Exception:
+            self._chunk_size = 12
+        self._chunk_idx = 0
+    # ---------- mask post-processing (stability + crispness) ----------
+    def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
+        a_bin = (a >= thr).astype(np.uint8)
+        b_bin = (b >= thr).astype(np.uint8)
+        inter = np.count_nonzero(cv2.bitwise_and(a_bin, b_bin))
+        union = np.count_nonzero(cv2.bitwise_or(a_bin, b_bin))
+        return (inter / union) if union else 0.0
+    def _harden(self, m: np.ndarray) -> np.ndarray:
+        # optional gamma
+        g = float(self.config.mask_gamma)
+        if abs(g - 1.0) > 1e-6:
+            m = np.clip(m, 0, 1) ** g
+        lo = float(self.config.hard_low)
+        hi = float(self.config.hard_high)
+        if hi > lo + 1e-6:
+            m = (m - lo) / (hi - lo)
+            m = np.clip(m, 0.0, 1.0)
+        # pad edges then tiny blur
+        k = int(self.config.dilate_px)
+        if k > 0:
+            se = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2*k+1, 2*k+1))
+            m = cv2.dilate(m, se, iterations=1)
+        eb = int(self.config.edge_blur_px)
+        if eb > 0:
+            m = cv2.GaussianBlur(m, (2*eb+1, 2*eb+1), 0)
+        return np.clip(m, 0.0, 1.0)
+    def _stabilize(self, m: np.ndarray) -> np.ndarray:
+        if self._prev_mask is None:
+            self._prev_mask = m
+            return m
+        # outlier rejection
+        if self._iou(self._prev_mask, m, 0.5) < float(self.config.min_iou_to_accept):
+            # ignore this frame's mask → keep previous
+            return self._prev_mask
+        # EMA
+        a = float(self.config.temporal_ema_alpha)
+        m_ema = a * self._prev_mask + (1.0 - a) * m
+        self._prev_mask = m_ema
+        return m_ema
+    # ---------- Single frame (fallback path) ----------
+    def process_frame(self, frame_bgr: np.ndarray, background_rgb: np.ndarray) -> Dict[str, Any]:
+        """
+        Process one frame (legacy per-frame path):
+          - optionally downscale for model work,
+          - segment + refine,
+          - temporal stabilize + harden,
+          - upsample mask,
+          - composite full-res.
+        Returns dict with composited frame (BGR for writer) and mask (H,W float).
+        """
+        H, W = frame_bgr.shape[:2]
+        max_side = max(H, W)
+        scale = 1.0
+        proc_frame_bgr = frame_bgr
+        # Model-only downscale
+        if self.config.max_model_size and max_side > self.config.max_model_size:
+            scale = self.config.max_model_size / float(max_side)
+            newW = int(round(W * scale))
+            newH = int(round(H * scale))
+            proc_frame_bgr = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
+            self.log.debug(f"Model-only downscale: {W}x{H} -> {newW}x{newH} (scale={scale:.3f})")
+        # RGB for models
+        proc_frame_rgb = cv2.cvtColor(proc_frame_bgr, cv2.COLOR_BGR2RGB)
+        predictor = None
+        try:
+            if self.models and hasattr(self.models, "get_sam2"):
+                predictor = self.models.get_sam2()
+        except Exception as e:
+            self.log.warning(f"SAM2 predictor unavailable: {e}")
+        # 1) segmentation (with internal fallbacks)
+        mask_small = segment_person_hq(proc_frame_rgb, predictor, use_sam2=True)
+        # 2) refinement (MatAnyOne if available) — stateful chunking
+        matanyone = None
+        try:
+            if self.models and hasattr(self.models, "get_matanyone"):
+                matanyone = self.models.get_matanyone()
+        except Exception as e:
+            self.log.warning(f"MatAnyOne unavailable: {e}")
+        if matanyone is not None and hasattr(matanyone, "reset") and self._chunk_idx == 0:
+            try:
+                matanyone.reset()
+            except Exception:
+                pass
+        # IMPORTANT: call order is (frame, mask, matanyone=...)
+        mask_small_ref = refine_mask_hq(
+            proc_frame_rgb,
+            mask_small,
+            matanyone=matanyone,
+            use_matanyone=True,
+            frame_idx=self._chunk_idx,   # enable stateful first-frame + propagate
+        )
+        # advance chunk + optional defrag
+        self._chunk_idx = (self._chunk_idx + 1) % max(1, self._chunk_size)
+        if self._chunk_idx == 0:
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+            except Exception:
+                pass
+        # Stabilize + harden at model scale
+        mask_small_ref = np.clip(mask_small_ref.astype(np.float32), 0.0, 1.0)
+        mask_stable = self._stabilize(mask_small_ref)
+        mask_stable = self._harden(mask_stable)
+        # Upsample mask back to full-res
+        if scale != 1.0:
+            mask_full = cv2.resize(mask_stable, (W, H), interpolation=cv2.INTER_LINEAR)
+        else:
+            mask_full = mask_stable
+        # 3) compositing (helpers expect RGB inputs; return RGB)
+        frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
+        out_rgb = replace_background_hq(frame_rgb, mask_full, background_rgb)
+        # Convert to BGR for writer
+        out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
+        return {"frame": out_bgr, "mask": mask_full}
+    # ---------- Build background once per video ----------
+    def _prepare_background_from_config(
+        self,
+        bg_config: Optional[Dict[str, Any]],
+        width: int,
+        height: int
+    ) -> np.ndarray:
+        """
+        Accepts either:
+          - {"custom_path": "/path/to/image.png"} → load image (RGB out)
+          - {"background_choice": "office"} → preset
+          - {"gradient": {type,start,end,angle_deg}} → generated gradient
+        Returns RGB np.uint8
+        """
+        # 1) custom image?
+        if bg_config and bg_config.get("custom_path"):
+            path = bg_config["custom_path"]
+            img_bgr = cv2.imread(path, cv2.IMREAD_COLOR)
+            if img_bgr is None:
+                self.log.warning(f"Custom background at '{path}' could not be read. Falling back to preset.")
+            else:
+                img_bgr = cv2.resize(img_bgr, (width, height), interpolation=cv2.INTER_LANCZOS4)
+                return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+        # 2) gradient?
+        if bg_config and isinstance(bg_config.get("gradient"), dict):
+            try:
+                return _create_gradient_background_local(bg_config["gradient"], width, height)
+            except Exception as e:
+                self.log.warning(f"Gradient generation failed: {e}. Falling back to preset.")
+        # 3) preset (explicit choice or default)
+        choice = None
+        if bg_config and "background_choice" in bg_config:
+            choice = bg_config["background_choice"]
+        if not choice:
+            choice = self.config.background_preset
+        if choice not in PROFESSIONAL_BACKGROUNDS:
+            self.log.warning(f"Unknown background preset '{choice}'; using 'office'.")
+            choice = "office"
+        return create_professional_background(choice, width, height)  # RGB
+    # ---------- Windowed two-phase helpers ----------
+    def _model_downscale(self, frame_bgr: np.ndarray) -> Tuple[np.ndarray, float]:
+        """Apply model-only downscale; return (resized_bgr, scale)."""
+        H, W = frame_bgr.shape[:2]
+        max_side = max(H, W)
+        if self.config.max_model_size and max_side > self.config.max_model_size:
+            s = self.config.max_model_size / float(max_side)
+            newW = int(round(W * s))
+            newH = int(round(H * s))
+            small = cv2.resize(frame_bgr, (newW, newH), interpolation=cv2.INTER_AREA)
+            return small, s
+        return frame_bgr, 1.0
+    def _prepare_sam2_gpu(self, predictor):
+        """Best-effort: ensure SAM2 is on CUDA before SAM2 phase."""
+        try:
+            import torch  # local import to avoid hard dependency at import-time
+            if predictor is None or not torch.cuda.is_available():
+                return
+            # Try common patterns
+            if hasattr(predictor, "to"):
+                try:
+                    predictor.to("cuda")  # type: ignore[attr-defined]
+                    return
+                except Exception:
+                    pass
+            if hasattr(predictor, "model") and hasattr(predictor.model, "to"):
+                try:
+                    predictor.model.to("cuda")  # type: ignore[attr-defined]
+                except Exception:
+                    pass
+        except Exception:
+            pass
+    def _release_sam2_gpu(self, predictor):
+        """Best-effort release of SAM2 GPU residency between phases."""
+        try:
+            if predictor is None:
+                return
+            # Clear any sticky per-image state if exposed
+            for name in ("reset_image", "release_image", "clear_image", "clear_state"):
+                if hasattr(predictor, name) and callable(getattr(predictor, name)):
+                    try:
+                        getattr(predictor, name)()
+                    except Exception:
+                        pass
+            # Try moving large parts off-GPU (best-effort, may be no-op)
+            for name in ("to", "cpu"):
+                if hasattr(predictor, name):
+                    try:
+                        if name == "to":
+                            predictor.to("cpu")  # type: ignore[attr-defined]
+                        else:
+                            predictor.cpu()      # type: ignore[attr-defined]
+                    except Exception:
+                        pass
+        except Exception:
+            pass
+        try:
+            import torch
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        except Exception:
+            pass
+    # ---------- Full video ----------
+    def process_video(
+        self,
+        input_path: str,
+        output_path: str,
+        bg_config: Optional[Dict[str, Any]] = None,
+        progress_callback: Optional[Callable[[int, int, float], None]] = None,
+        stop_event: Optional[threading.Event] = None
+    ) -> Dict[str, Any]:
+        """
+        Process a full video with live progress and optional cancel.
+        progress_callback(current_frame, total_frames, fps_live)
+        """
+        ok, msg = validate_video_file(input_path)
+        if not ok:
+            raise ValueError(f"Invalid or unreadable video: {msg}")
+        cap = cv2.VideoCapture(input_path)
+        if not cap.isOpened():
+            raise RuntimeError(f"Could not open video: {input_path}")
+        width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps    = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps_out = self.config.write_fps or (fps if fps and fps > 0 else 25.0)
+        # Background once (RGB)
+        background_rgb = self._prepare_background_from_config(bg_config, width, height)
+        # reset temporal state for a new video
+        self._prev_mask = None
+        # Writer selection
+        ffmpeg_pipe: _FFmpegPipe | None = None
+        writer: cv2.VideoWriter | None = None
+        ffmpeg_failed_reason = None
+        if self.config.use_nvenc and self._ffmpeg:
+            try:
+                ffmpeg_pipe = _FFmpegPipe(width, height, float(fps_out), output_path, self.config, log=self.log)
+            except Exception as e:
+                ffmpeg_failed_reason = str(e)
+                self.log.warning("FFmpeg NVENC pipeline unavailable. Falling back to OpenCV. Reason: %s", e)
+        if ffmpeg_pipe is None:
+            fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+            writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
+            if not writer.isOpened():
+                cap.release()
+                raise RuntimeError(f"Could not open VideoWriter for: {output_path}")
+        # Determine models and decide execution mode
+        predictor = None
+        matanyone = None
+        try:
+            if self.models and hasattr(self.models, "get_sam2"):
+                predictor = self.models.get_sam2()
+        except Exception as e:
+            self.log.warning(f"SAM2 predictor unavailable: {e}")
+        try:
+            if self.models and hasattr(self.models, "get_matanyone"):
+                matanyone = self.models.get_matanyone()
+        except Exception as e:
+            self.log.warning(f"MatAnyOne unavailable: {e}")
+        use_windowed = bool(self.config.use_windowed and predictor is not None and matanyone is not None)
+        frame_count = 0
+        start_time = time.time()
+        try:
+            if not use_windowed:
+                # --------- Legacy per-frame path (fallback) ----------
+                while True:
+                    ret, frame_bgr = cap.read()
+                    if not ret:
+                        break
+                    if stop_event is not None and stop_event.is_set():
+                        self.log.info("Processing stopped by user request.")
+                        break
+                    result = self.process_frame(frame_bgr, background_rgb)
+                    out_bgr = np.ascontiguousarray(result["frame"])
+                    if ffmpeg_pipe is not None:
+                        try:
+                            ffmpeg_pipe.write(out_bgr)
+                        except Exception as e:
+                            self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
+                            try:
+                                ffmpeg_pipe.close()
+                            except Exception:
+                                pass
+                            ffmpeg_pipe = None
+                            if writer is None:
+                                fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+                                writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
+                                if not writer.isOpened():
+                                    raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
+                            writer.write(out_bgr)
+                    else:
+                        writer.write(out_bgr)
+                    frame_count += 1
+                    if progress_callback:
+                        elapsed = time.time() - start_time
+                        fps_live = frame_count / elapsed if elapsed > 0 else 0.0
+                        try: progress_callback(frame_count, total_frames, fps_live)
+                        except Exception: pass
+            else:
+                # --------- Windowed two-phase path ----------
+                WINDOW = max(1, int(self.config.window_size))
+                while True:
+                    # Read a window of frames
+                    frames_bgr: List[np.ndarray] = []
+                    for _ in range(WINDOW):
+                        ret, fr = cap.read()
+                        if not ret:
+                            break
+                        frames_bgr.append(fr)
+                    if not frames_bgr:
+                        break  # no more frames
+                    if stop_event is not None and stop_event.is_set():
+                        self.log.info("Processing stopped by user request.")
+                        break
+                    # Model-only downscale frames for model work (consistent per window)
+                    frames_small_bgr: List[np.ndarray] = []
+                    scales: List[float] = []
+                    for fr in frames_bgr:
+                        fr_small, s = self._model_downscale(fr)
+                        frames_small_bgr.append(fr_small)
+                        scales.append(s)
+                    # Use the first scale (frames normally same size)
+                    scale = scales[0] if scales else 1.0
+                    # Convert small frames to RGB for models
+                    frames_small_rgb = [cv2.cvtColor(fb, cv2.COLOR_BGR2RGB) for fb in frames_small_bgr]
+                    # -------- SAM2 phase (prime with first frame's mask) --------
+                    self._prepare_sam2_gpu(predictor)
+                    try:
+                        mask_small = segment_person_hq(frames_small_rgb[0], predictor, use_sam2=True)
+                    except Exception as e:
+                        self.log.warning(f"SAM2 segmentation error on window start: {e}")
+                        mask_small = segment_person_hq(frames_small_rgb[0], None, use_sam2=False)
+                    # Release SAM2 GPU residency before MatAnyone phase
+                    self._release_sam2_gpu(predictor)
+                    # -------- MatAnyone phase (prime + propagate) --------
+                    if hasattr(matanyone, "reset"):
+                        try:
+                            matanyone.reset()
+                        except Exception:
+                            pass
+                    for j, fr_rgb_small in enumerate(frames_small_rgb):
+                        try:
+                            if j == 0:
+                                m2d = mask_small
+                                if m2d.ndim == 3:
+                                    m2d = m2d[..., 0]
+                                alpha_small = matanyone(fr_rgb_small, m2d)  # adapter returns float32 [h,w]
+                            else:
+                                alpha_small = matanyone(fr_rgb_small)      # propagate (no mask)
+                            # Stabilize + harden at model scale
+                            alpha_small = np.clip(alpha_small.astype(np.float32), 0.0, 1.0)
+                            alpha_stable = self._stabilize(alpha_small)
+                            alpha_harden = self._harden(alpha_stable)
+                            # Upsample back to full-res
+                            if scale != 1.0:
+                                H, W = frames_bgr[j].shape[:2]
+                                alpha_full = cv2.resize(alpha_harden, (W, H), interpolation=cv2.INTER_LINEAR)
+                            else:
+                                alpha_full = alpha_harden
+                            # Composite at full-res (expects RGB)
+                            frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
+                            out_rgb = replace_background_hq(frame_rgb_full, alpha_full, background_rgb)
+                            out_bgr = cv2.cvtColor(out_rgb, cv2.COLOR_RGB2BGR)
+                            out_bgr = np.ascontiguousarray(out_bgr)
+                            # Write
+                            if ffmpeg_pipe is not None:
+                                try:
+                                    ffmpeg_pipe.write(out_bgr)
+                                except Exception as e:
+                                    self.log.warning("Switching to OpenCV writer after FFmpeg error at frame %d: %s", frame_count, e)
+                                    try:
+                                        ffmpeg_pipe.close()
+                                    except Exception:
+                                        pass
+                                    ffmpeg_pipe = None
+                                    if writer is None:
+                                        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+                                        writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
+                                        if not writer.isOpened():
+                                            raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
+                                    writer.write(out_bgr)
+                            else:
+                                writer.write(out_bgr)
+                            frame_count += 1
+                        except Exception as e:
+                            # If MatAnyone fails, log and fall back to SAM-only for this frame
+                            self.log.warning(f"MatAnyone failed at window frame {j}: {e}")
+                            if j == 0:
+                                alpha_small_fb = np.clip(mask_small.astype(np.float32), 0.0, 1.0)
+                            else:
+                                alpha_small_fb = self._prev_mask if self._prev_mask is not None else np.zeros_like(alpha_small, dtype=np.float32)
+                            if scale != 1.0:
+                                H, W = frames_bgr[j].shape[:2]
+                                alpha_full_fb = cv2.resize(alpha_small_fb, (W, H), interpolation=cv2.INTER_LINEAR)
+                            else:
+                                alpha_full_fb = alpha_small_fb
+                            frame_rgb_full = cv2.cvtColor(frames_bgr[j], cv2.COLOR_BGR2RGB)
+                            out_rgb_fb = replace_background_hq(frame_rgb_full, alpha_full_fb, background_rgb)
+                            out_bgr_fb = cv2.cvtColor(out_rgb_fb, cv2.COLOR_RGB2BGR)
+                            if ffmpeg_pipe is not None:
+                                try:
+                                    ffmpeg_pipe.write(np.ascontiguousarray(out_bgr_fb))
+                                except Exception:
+                                    try:
+                                        ffmpeg_pipe.close()
+                                    except Exception:
+                                        pass
+                                    ffmpeg_pipe = None
+                                    if writer is None:
+                                        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+                                        writer = cv2.VideoWriter(output_path, fourcc, float(fps_out), (width, height))
+                                        if not writer.isOpened():
+                                            raise RuntimeError(f"FFmpeg failed and VideoWriter could not open: {output_path}")
+                                    writer.write(np.ascontiguousarray(out_bgr_fb))
+                            else:
+                                writer.write(np.ascontiguousarray(out_bgr_fb))
+                            frame_count += 1
+                        # Progress update
+                        if progress_callback:
+                            elapsed = time.time() - start_time
+                            fps_live = frame_count / elapsed if elapsed > 0 else 0.0
+                            try:
+                                progress_callback(frame_count, total_frames, fps_live)
+                            except Exception:
+                                pass
+                    # Clean per-window buffers (CPU) and let CUDA defrag
+                    del frames_bgr, frames_small_bgr, frames_small_rgb, mask_small
+                    try:
+                        import torch
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                    except Exception:
+                        pass
+        finally:
+            cap.release()
+            if writer is not None:
+                writer.release()
+            if ffmpeg_pipe is not None:
+                try:
+                    ffmpeg_pipe.close()
+                except Exception:
+                    pass
+        if ffmpeg_failed_reason:
+            self.log.info("Completed via OpenCV writer (FFmpeg initially failed): %s", ffmpeg_failed_reason)
+        self.log.info("Processed %d frames → %s", frame_count, output_path)
+        return {
+            "frames": frame_count,
+            "width": width,
+            "height": height,
+            "fps_out": float(fps_out),
+            "output_path": output_path,
+        }
+# Backward-compat alias used elsewhere
+VideoProcessor = CoreVideoProcessor