Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on 26 days ago

Commit

e947d62

verified ·

1 Parent(s): b894e0d

Update app.py

Browse files

Files changed (1) hide show

app.py +272 -186

app.py CHANGED Viewed

@@ -1,20 +1,17 @@
 #!/usr/bin/env python3
 """
 HuggingFace Space app: Video background replacement with MatAnyone/SAM2/rembg.
-GPU-optimized with real verification & self-tests (no mocks).
-What’s new (2025-09-06):
-- K-Governor: runtime clamp for torch.topk / torch.kthvalue so MatAnyone can’t
-  request K > available candidates. This prevents "selected index k out of range".
-- Extra diagnostics: when clamping happens, we print shape/dim and old→new K.
-- Safer MatAnyone init: try HF repo-id first (needs `safetensors`), then local
-  checkpoint fallback; both with clear error reporting.
-- SAM2 only shows Verified after a real micro-inference.
-- Self-test is 100% real (CUDA, FFmpeg/MoviePy, rembg, SAM2, MatAnyone).
-NOTE: Make sure `requirements.txt` includes:
-  safetensors>=0.4.2
-  av==10.0.0
 """
 import os
@@ -30,39 +27,43 @@
 # =========================
 # Environment configuration
 # =========================
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:512"
-os.environ["CUDA_MODULE_LOADING"] = "LAZY"
-os.environ["OMP_NUM_THREADS"] = "8"
-os.environ["MKL_NUM_THREADS"] = "8"
-os.environ["PYTHONUNBUFFERED"] = "1"
 # MatAnyone GPU prefs
-os.environ["MATANYONE_MAX_EDGE"] = "1024"
-os.environ["MATANYONE_TARGET_PIXELS"] = "1000000"
-os.environ["MATANYONE_WINDOWED"] = "1"
-os.environ["MATANYONE_WINDOW"] = "16"
-os.environ["MAX_MODEL_SIZE"] = "1920"
 # CUDA + cuDNN
-os.environ["CUDA_LAUNCH_BLOCKING"] = "0"
-os.environ["TORCH_CUDNN_V8_API_ENABLED"] = "1"
-os.environ["CUDNN_BENCHMARK"] = "1"
 # HF cache prefs
-os.environ["HF_HOME"] = "./checkpoints/hf"
-os.environ["TRANSFORMERS_CACHE"] = "./checkpoints/hf"
-os.environ["HF_DATASETS_CACHE"] = "./checkpoints/hf"
-os.environ["HF_HUB_DISABLE_SYMLINKS"] = "1"
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 # Gradio binding
-os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
-os.environ["GRADIO_SERVER_PORT"] = "7860"
 # Feature flags
-os.environ["USE_MATANYONE"] = os.getenv("USE_MATANYONE", "true")
-os.environ["USE_SAM2"] = os.getenv("USE_SAM2", "true")
-os.environ["SELF_CHECK_MODE"] = os.getenv("SELF_CHECK_MODE", "false")
 # =========================
 # Imports
@@ -71,7 +72,7 @@
 import numpy as np
 from PIL import Image
 import gradio as gr
-from moviepy.editor import VideoFileClip, ImageSequenceClip
 print("=" * 50)
 print("Application Startup at", os.popen('date').read().strip())
@@ -125,7 +126,121 @@
         print(f"Added to path: {p}")
 # =========================
-# Torch & device (+ K-Governor)
 # =========================
 TORCH_AVAILABLE = False
 CUDA_AVAILABLE = False
@@ -158,39 +273,6 @@
 except Exception as e:
     print(f"Torch not available: {e}")
-# ---- K-Governor (MUST be installed before importing MatAnyone) ----
-if TORCH_AVAILABLE:
-    _ORIG_TOPK = torch.topk
-    _ORIG_KTH = getattr(torch, "kthvalue", None)
-    def _safe_topk(input, k, dim=None, largest=True, sorted=True, *, out=None):
-        if dim is None:
-            dim = input.ndim - 1
-        size = input.size(dim)
-        if k > size:
-            print(f"[K-Governor] torch.topk: clamp k {k} -> {size} for dim={dim} shape={tuple(input.shape)}")
-            k = int(size)
-        if k <= 0:
-            k = 1
-        return _ORIG_TOPK(input, k, dim, largest, sorted, out=out)
-    torch.topk = _safe_topk  # global guard
-    if _ORIG_KTH is not None:
-        def _safe_kthvalue(input, k, dim=None, keepdim=False, *, out=None):
-            if dim is None:
-                # flat length if dim is None
-                size = int(input.numel())
-            else:
-                size = int(input.size(dim))
-            if k > size:
-                print(f"[K-Governor] torch.kthvalue: clamp k {k} -> {size} (dim={dim}) shape={tuple(input.shape)}")
-                k = size
-            if k <= 0:
-                k = 1
-            return _ORIG_KTH(input, k, dim=dim, keepdim=keepdim, out=out)
-        torch.kthvalue = _safe_kthvalue
 # =========================
 # Light GPU monitor
 # =========================
@@ -274,10 +356,11 @@ def get_stats(self):
         config_dir = str(TP_DIR / "sam2" / "sam2" / "configs")
         config_file = "sam2.1/sam2.1_hiera_t.yaml"
         initialize_config_dir(config_dir=config_dir, version_base=None)
-        _ = compose(config_name=config_file)  # just to confirm config is readable
         model = build_sam2(config_file, str(ckpt), device="cuda" if CUDA_AVAILABLE else "cpu")
         if CUDA_AVAILABLE and hasattr(torch, "compile"):
             try:
                 model = torch.compile(model, mode="max-autotune")
@@ -286,7 +369,7 @@ def get_stats(self):
         SAM2_PREDICTOR = SAM2ImagePredictor(model)
-        # Verify with real micro-inference
         try:
             dummy = np.zeros((64, 64, 3), dtype=np.uint8)
             SAM2_PREDICTOR.set_image(dummy)
@@ -326,7 +409,7 @@ def get_stats(self):
     print(f"rembg not available: {e}")
 # =========================
-# Background helpers (real)
 # =========================
 def make_solid(w, h, rgb):
     return np.full((h, w, 3), rgb, dtype=np.uint8)
@@ -352,15 +435,14 @@ def build_professional_bg(w, h, preset: str) -> np.ndarray:
     return make_solid(w, h, (240, 240, 240))
 # =========================
-# MatAnyone wrapper (with HF + local fallback)
 # =========================
 class OptimizedMatAnyoneProcessor:
     """
-    Wrapper around MatAnyone's InferenceCore.
-    1) Try HF repo-id ("PeiqingYang/MatAnyone") → needs `safetensors`.
-    2) Fallback: download local checkpoint and build default model → InferenceCore(network).
-    K-Governor above guards any internal top-k/kthvalue ops.
     """
     def __init__(self):
         self.processor = None
@@ -369,15 +451,11 @@ def __init__(self):
         self.verified = False
         self.last_error = None
-    def _clear_hydra(self, note=""):
         try:
-            from hydra.core.global_hydra import GlobalHydra
-            if GlobalHydra().is_initialized():
-                GlobalHydra.instance().clear()
-                if note:
-                    print(f"(Hydra cleared: {note})")
         except Exception:
-            pass
     def initialize(self) -> bool:
         if not MATANYONE_IMPORTED:
@@ -391,7 +469,7 @@ def initialize(self) -> bool:
         # 1) Preferred: HF repo-id
         try:
             print(f"Initializing MatAnyone (HF repo-id) on {self.device}…")
-            self.processor = MatAnyInferenceCore("PeiqingYang/MatAnyone")  # will auto-download model.safetensors
             self.verified = hasattr(self.processor, "process_video")
             if self.device == "cuda":
                 torch.cuda.empty_cache()
@@ -399,30 +477,22 @@ def initialize(self) -> bool:
             self.initialized = True
             print("✅ MatAnyone initialized via HF repo-id.")
             return True
-        except NameError as e:
-            # Common if `safetensors` is missing during HF load
-            self.last_error = f"HF init NameError: {e}"
-            print(f"HF init failed: {e}")
-        except TypeError as e:
-            if "network" in str(e).lower():
-                print("MatAnyone InferenceCore expects a `network`; switching to local checkpoint init.")
-            self.last_error = f"HF init TypeError: {e}"
         except Exception as e:
             self.last_error = f"HF init failed: {type(e).__name__}: {e}"
             print(self.last_error)
-        # 2) Fallback: local checkpoint → get_default_model → InferenceCore(network)
         try:
             print("Falling back to local checkpoint init for MatAnyone…")
-            self._clear_hydra("MatAnyone/local-pre")
             import requests
             from matanyone.utils.get_default_model import get_matanyone_model
-            ckpt_dir = Path("./pretrained_models")
-            ckpt_dir.mkdir(parents=True, exist_ok=True)
             ckpt_path = ckpt_dir / "matanyone.pth"
             if not ckpt_path.exists():
                 url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
                 print(f"Downloading MatAnyone checkpoint from: {url}")
@@ -434,28 +504,52 @@ def initialize(self) -> bool:
                                 f.write(chunk)
                 print(f"Checkpoint saved to {ckpt_path}")
-            network = get_matanyone_model(
-                str(ckpt_path),
-                device=("cuda" if CUDA_AVAILABLE else "cpu")
-            )
             self.processor = MatAnyInferenceCore(network)
             self.verified = hasattr(self.processor, "process_video")
             if self.device == "cuda":
-                torch.cuda.empty_cache()
-                _ = torch.rand(1, device="cuda") * 0.0
             self.initialized = True
             print("✅ MatAnyone initialized via local checkpoint.")
-            self._clear_hydra("MatAnyone/local-post")
             return True
         except Exception as e:
             self.last_error = f"Local init failed: {type(e).__name__}: {e}"
             print(f"MatAnyone initialization failed: {self.last_error}")
             import traceback; traceback.print_exc()
             return False
     def create_mask_optimized(self, video_path: str, output_path: str) -> str:
         cap = cv2.VideoCapture(video_path)
         ret, frame = cap.read()
@@ -477,11 +571,12 @@ def create_mask_optimized(self, video_path: str, output_path: str) -> str:
                 best = masks[np.argmax(scores)]
                 mask = (best.astype(np.uint8) * 255)
                 cv2.imwrite(output_path, mask)
                 return output_path
             except Exception as e:
                 print(f"SAM2 mask creation failed; fallback rectangle. Error: {e}")
-        # Fallback: centered box (pipeline continuity)
         h, w = frame.shape[:2]
         mask = np.zeros((h, w), dtype=np.uint8)
         mx, my = int(w * 0.15), int(h * 0.10)
@@ -497,25 +592,30 @@ def process_video_optimized(self, input_path: str, output_dir: str):
             if CUDA_AVAILABLE:
                 torch.cuda.empty_cache(); gc.collect()
-            # Lightweight video meta (frames) for logs
-            cap = cv2.VideoCapture(input_path)
-            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
-            cap.release()
-            if frame_count:
-                print(f"[MatAnyone] Input frames: {frame_count}")
             mask_path = os.path.join(output_dir, "mask.png")
             self.create_mask_optimized(input_path, mask_path)
-            # Only pass supported kwargs for MatAnyone.process_video
-            # (Typical signature: input_path, mask_path, output_path=None, n_warmup=10, r_erode=10, r_dilate=10, suffix='', save_image=False, max_size=-1)
             fg_path, alpha_path = self.processor.process_video(
-                input_path=input_path,
                 mask_path=mask_path,
                 output_path=output_dir,
-                max_size=int(os.getenv("MAX_MODEL_SIZE", "1920")),
             )
-            print("✅ MatAnyone processing complete.")
             return alpha_path
         except Exception as e:
             print(f"❌ MatAnyone processing failed: {e}")
@@ -545,7 +645,7 @@ def process_frame_rembg_optimized(frame_bgr_u8, bg_img_rgb_u8):
         return cv2.cvtColor(frame_bgr_u8, cv2.COLOR_BGR2RGB)
 # =========================
-# Compositing (fixed)
 # =========================
 def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset=None):
     """Composite original video with alpha matte and a background (image or preset)."""
@@ -553,8 +653,7 @@ def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset
     orig_clip = VideoFileClip(original_path)
     alpha_clip = VideoFileClip(alpha_path)
-    w, h = orig_clip.size  # MoviePy returns (w, h)
     if bg_path:
         bg_img = cv2.imread(bg_path)
         if bg_img is None:
@@ -565,15 +664,13 @@ def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset
         bg_img = build_professional_bg(w, h, bg_preset)
     def process_func(get_frame, t):
-        frame = get_frame(t)  # float [0,1], shape (h, w, 3)
-        a = alpha_clip.get_frame(t)  # float [0,1], shape (h, w) or (h, w, 1..3)
         if a.ndim == 2:
             a = a[..., None]
         elif a.shape[2] > 1:
             a = a[..., :1]
         a = np.clip(a, 0.0, 1.0).astype(np.float32)
         bg_f32 = (bg_img.astype(np.float32) / 255.0)
         comp = a * frame.astype(np.float32) + (1.0 - a) * bg_f32
         return comp.astype(np.float32)
@@ -582,12 +679,11 @@ def process_func(get_frame, t):
     output_path = "final_output.mp4"
     new_clip.write_videofile(output_path, audio=False, logger=None)
-    alpha_clip.close()
-    orig_clip.close()
     return output_path
 # =========================
-# Fallback whole-video path
 # =========================
 def process_video_rembg_fallback(video_path, bg_image_path=None, bg_preset=None):
     print("🔄 Processing with rembg fallback…")
@@ -619,10 +715,11 @@ def process_func(get_frame, t):
     new_clip = clip.fl(process_func)
     output_path = "rembg_output.mp4"
     new_clip.write_videofile(output_path, audio=False, logger=None)
     return output_path
 # =========================
-# Self-test harness (real)
 # =========================
 def _ok(flag):  # visual mark in reports
     return "✅" if flag else "❌"
@@ -649,10 +746,9 @@ def self_test_ffmpeg_moviepy() -> (bool, str):
         clip = ImageSequenceClip(frames, fps=4)
         with tempfile.TemporaryDirectory() as td:
             vp = os.path.join(td, "tiny.mp4")
-            clip.write_videofile(vp, audio=False, logger=None)
             clip_r = VideoFileClip(vp)
-            _ = clip_r.get_frame(0.1)
-            clip_r.close()
         return True, "FFmpeg/MoviePy encode/decode ok"
     except Exception as e:
         return False, f"FFmpeg/MoviePy test failed: {e}"
@@ -661,10 +757,9 @@ def self_test_rembg() -> (bool, str):
     try:
         if not REMBG_AVAILABLE:
             return False, "rembg not importable"
-        img = np.zeros((64, 64, 3), dtype=np.uint8)
-        img[:,:] = (0, 255, 0)  # green
         pil = Image.fromarray(img)
-        out = remove(pil)  # real call
         ok = isinstance(out, Image.Image) and out.size == (64, 64)
         return ok, "rembg ok" if ok else "rembg returned unexpected output"
     except Exception as e:
@@ -693,53 +788,32 @@ def self_test_matanyone() -> (bool, str):
             return False, f"MatAnyone init failed: {getattr(matanyone_processor, 'last_error', 'no details')}"
         if not matanyone_processor.verified:
             return False, "MatAnyone missing process_video API"
-        # Create a tiny real video + mask, then run process_video
         with tempfile.TemporaryDirectory() as td:
             frames = []
-            for t in range(12):  # a dozen frames > typical warmup
                 frame = np.zeros((64, 64, 3), dtype=np.uint8)
-                x = 8 + t*3
-                cv2.rectangle(frame, (x, 20), (min(x+12, 63), 44), (200, 200, 200), -1)
                 frames.append(frame)
             vid_path = os.path.join(td, "tiny_input.mp4")
             clip = ImageSequenceClip(frames, fps=8)
-            clip.write_videofile(vid_path, audio=False, logger=None)
-            visible_test_clip = CHECKPOINTS_DIR / "selftest_clip.mp4"
-            clip.write_videofile(str(visible_test_clip), audio=False, logger=None)
-            clip.close()
-            print(f"📹 Self-test clip saved to {visible_test_clip}")
-            # Simple central mask (seed)
             mask = np.zeros((64, 64), dtype=np.uint8)
             cv2.rectangle(mask, (24, 24), (40, 40), 255, -1)
             mask_path = os.path.join(td, "mask.png")
             cv2.imwrite(mask_path, mask)
-            print("Self-test mask uniques:", np.unique(mask))
-            # Introspect signature for clarity
-            try:
-                import inspect
-                sig = str(inspect.signature(matanyone_processor.processor.process_video))
-                print(f"MatAnyone process_video signature: {sig}")
-            except Exception:
-                pass
-            fg_path, alpha_path = matanyone_processor.processor.process_video(
-                input_path=vid_path,
-                mask_path=mask_path,
-                output_path=td,
-                max_size=256,
-            )
-            ok = os.path.exists(alpha_path)
-            if ok:
-                try:
-                    _alpha_clip = VideoFileClip(alpha_path)
-                    _ = _alpha_clip.get_frame(0.1)
-                    _alpha_clip.close()
-                except Exception as _e:
-                    return False, f"MatAnyone alpha video unreadable: {_e}"
-            return ok, "MatAnyone process_video ok" if ok else "MatAnyone did not produce alpha video"
     except Exception as e:
         return False, f"MatAnyone test failed: {e}"
@@ -750,7 +824,6 @@ def run_self_test() -> str:
     lines.append(f"Torch: {torch.__version__ if TORCH_AVAILABLE else 'N/A'} | CUDA: {CUDA_AVAILABLE} | Device: {DEVICE} | GPU: {GPU_NAME}")
     lines.append(f"FFmpeg on PATH: {bool(shutil.which('ffmpeg'))}")
     lines.append("")
     tests = [
         ("CUDA", self_test_cuda),
         ("FFmpeg/MoviePy", self_test_ffmpeg_moviepy),
@@ -768,13 +841,20 @@ def run_self_test() -> str:
 # =========================
 # Gradio callback
 # =========================
-def gradio_interface_optimized(video_file, bg_image, use_matanyone=True, bg_preset="Office (Soft Gray)"):
     """Main entry: choose MatAnyone (if verified) or rembg fallback; show real metrics."""
     if video_file is None:
         return None, None, "Please upload a video."
     video_path = video_file.name if hasattr(video_file, "name") else video_file
     bg_path = bg_image.name if (bg_image is not None and hasattr(bg_image, "name")) else (bg_image if bg_image else None)
     start_time = time.time()
     try:
         if use_matanyone and MATANYONE_IMPORTED:
@@ -837,7 +917,7 @@ def show_matanyone_diag():
 # =========================
 with gr.Blocks(title="Video Background Replacer - GPU Optimized", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎬 Video Background Replacer (GPU Optimized)")
-    gr.Markdown("All green checks are earned by real tests. No guesses. K-Governor is active for safety.")
     gpu_status = f"✅ {GPU_NAME}" if CUDA_AVAILABLE else "❌ CPU Only"
     matany_status = "✅ Module Imported" if MATANYONE_IMPORTED else "❌ Not Importable"
@@ -854,8 +934,7 @@ def show_matanyone_diag():
         <strong>MatAnyone ready:</strong> {"✅ Yes" if getattr(matanyone_processor, "verified", False) else "❌ No"}<br>
         <strong>SAM2:</strong> {sam2_status}<br>
         <strong>rembg:</strong> {rembg_status}<br>
-        <strong>PyTorch:</strong> {torch_status}<br>
-        <strong>K-Governor:</strong> ✅ Active (torch.topk / torch.kthvalue)
     </div>
     """
     gr.HTML(status_html)
@@ -880,6 +959,15 @@ def show_matanyone_diag():
                 value=MATANYONE_IMPORTED,
                 interactive=MATANYONE_IMPORTED,
             )
             process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
             gr.Markdown("### 🔎 Self-Verification")
@@ -897,7 +985,7 @@ def show_matanyone_diag():
     process_btn.click(
         fn=gradio_interface_optimized,
-        inputs=[video_input, bg_input, use_matanyone, bg_preset],
         outputs=[output_video, download_file, status_text],
     )
@@ -916,10 +1004,9 @@ def show_matanyone_diag():
     gr.Markdown("---")
     gr.Markdown("""
     **Notes**
     - SAM2 shows ✅ only after a real micro-inference passes.
-    - K-Governor clamps unsafe K at runtime and logs the clamp (shape/dim and old→new K).
-    - MatAnyone tries HF load first (needs `safetensors`), else falls back to local checkpoint.
-    - The self-test saves a visible clip to `checkpoints/selftest_clip.mp4` for inspection.
     - FFmpeg/MoviePy, CUDA, and rembg are validated by actually running them.
     """)
@@ -930,7 +1017,6 @@ def show_matanyone_diag():
     if "--self-test" in sys.argv:
         report = run_self_test()
         print(report)
-        # Exit non-zero if any test failed
         exit_code = 0
         for line in report.splitlines():
             if line.startswith("❌"):

 #!/usr/bin/env python3
 """
 HuggingFace Space app: Video background replacement with MatAnyone/SAM2/rembg.
+GPU-optimized with real verification, K-Governor (safe_topk), and short-clip stabilizer.
+What’s new vs. your last version:
+- K-Governor: rewrites MatAnyone .topk/.kthvalue call-sites to use safe guards (no more "selected index k out of range").
+- Pre-roll stabilizer: duplicates first real frame to grow the candidate bank for very short clips; trimmed out before compositing.
+- Diagnostic prints show exactly what was patched and when clamping happens.
+Environment knobs:
+- MATANYONE_PREROLL_FRAMES (default 12) – how many frames to prepend as pre-roll.
+- MATANYONE_STABILIZE (default "true") – enable/disable pre-roll stabilizer globally.
+- MAX_MODEL_SIZE, MATANYONE_* knobs unchanged from before.
 """
 import os
 # =========================
 # Environment configuration
 # =========================
+os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:512")
+os.environ.setdefault("CUDA_MODULE_LOADING", "LAZY")
+os.environ.setdefault("OMP_NUM_THREADS", "8")
+os.environ.setdefault("MKL_NUM_THREADS", "8")
+os.environ.setdefault("PYTHONUNBUFFERED", "1")
 # MatAnyone GPU prefs
+os.environ.setdefault("MATANYONE_MAX_EDGE", "1024")
+os.environ.setdefault("MATANYONE_TARGET_PIXELS", "1000000")
+os.environ.setdefault("MATANYONE_WINDOWED", "1")
+os.environ.setdefault("MATANYONE_WINDOW", "16")
+os.environ.setdefault("MAX_MODEL_SIZE", "1920")
 # CUDA + cuDNN
+os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "0")
+os.environ.setdefault("TORCH_CUDNN_V8_API_ENABLED", "1")
+os.environ.setdefault("CUDNN_BENCHMARK", "1")
 # HF cache prefs
+os.environ.setdefault("HF_HOME", "./checkpoints/hf")
+os.environ.setdefault("TRANSFORMERS_CACHE", "./checkpoints/hf")
+os.environ.setdefault("HF_DATASETS_CACHE", "./checkpoints/hf")
+os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
+os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
 # Gradio binding
+os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
+os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
 # Feature flags
+os.environ.setdefault("USE_MATANYONE", "true")
+os.environ.setdefault("USE_SAM2", "true")
+os.environ.setdefault("SELF_CHECK_MODE", "false")
+# Short-clip stabilizer
+os.environ.setdefault("MATANYONE_STABILIZE", "true")
+os.environ.setdefault("MATANYONE_PREROLL_FRAMES", "12")  # duplicated first frame count
 # =========================
 # Imports
 import numpy as np
 from PIL import Image
 import gradio as gr
+from moviepy.editor import VideoFileClip, ImageSequenceClip, concatenate_videoclips
 print("=" * 50)
 print("Application Startup at", os.popen('date').read().strip())
         print(f"Added to path: {p}")
 # =========================
+# K-Governor: patch MatAnyone topk/kthvalue call-sites
+# =========================
+import re
+def _write_safe_ops_file(pkg_root: Path):
+    utils_dir = pkg_root / "matanyone" / "utils"
+    # handle clones that place package directly at root
+    if not utils_dir.exists():
+        utils_dir = pkg_root / "utils"
+    utils_dir.mkdir(parents=True, exist_ok=True)
+    (utils_dir / "safe_ops.py").write_text(
+        """
+import torch
+_VERBOSE = bool(int(os.environ.get("SAFE_TOPK_VERBOSE", "1")))
+def _log(msg):
+    if _VERBOSE:
+        print(f"[K-Governor] {msg}")
+def safe_topk(x, k, dim=None, largest=True, sorted=True):
+    if not isinstance(k, int):
+        k = int(k)
+    if dim is None:
+        dim = -1
+    n = x.size(dim)
+    k_eff = max(1, min(k, int(n)))
+    if k_eff != k:
+        _log(f"torch.topk: clamp k {k} -> {k_eff} for dim={dim} shape={tuple(x.shape)}")
+    values, indices = torch.topk.__wrapped__(x, k_eff, dim=dim, largest=largest, sorted=sorted)
+    if k_eff < k:
+        pad = k - k_eff
+        pad_shape = list(values.shape); pad_shape[dim] = pad
+        pad_vals = values.new_full(pad_shape, float('-inf'))
+        pad_idx  = indices.new_zeros(pad_shape, dtype=indices.dtype)
+        values = torch.cat([values, pad_vals], dim=dim)
+        indices = torch.cat([indices, pad_idx],  dim=dim)
+    return values, indices
+def safe_kthvalue(x, k, dim=None, keepdim=False):
+    if not isinstance(k, int):
+        k = int(k)
+    if dim is None:
+        dim = -1
+    n = x.size(dim)
+    k_eff = max(1, min(k, int(n)))
+    if k_eff != k:
+        _log(f"torch.kthvalue: clamp k {k} -> {k_eff} for dim={dim} shape={tuple(x.shape)}")
+    return torch.kthvalue.__wrapped__(x, k_eff, dim=dim, keepdim=keepdim)
+""".lstrip()
+    )
+def _patch_matanyone_sources(repo_dir: Path) -> int:
+    root = repo_dir / "matanyone"
+    if not root.exists():  # some layouts have files directly
+        root = repo_dir
+    changed = 0
+    header_import = "from matanyone.utils.safe_ops import safe_topk, safe_kthvalue\n"
+    pat_torch_topk = re.compile(r"\btorch\.topk\s*\(")
+    pat_method_topk = re.compile(r"(\b[\w\.]+)\.topk\s*\(")
+    pat_torch_kth  = re.compile(r"\btorch\.kthvalue\s*\(")
+    pat_method_kth = re.compile(r"(\b[\w\.]+)\.kthvalue\s*\(")
+    for py in root.rglob("*.py"):
+        txt = py.read_text()
+        orig = txt
+        if "safe_topk" not in txt and py.name != "__init__.py":
+            lines = txt.splitlines(keepends=True)
+            insert_at = 0
+            for i, L in enumerate(lines[:80]):
+                if L.startswith("import ") or L.startswith("from "):
+                    insert_at = i + 1
+            lines.insert(insert_at, header_import)
+            txt = "".join(lines)
+        txt = pat_torch_topk.sub("safe_topk(", txt)
+        txt = pat_torch_kth.sub("safe_kthvalue(", txt)
+        def _meth_topk(m):
+            obj = m.group(1)
+            return f"safe_topk({obj}, "
+        def _meth_kth(m):
+            obj = m.group(1)
+            return f"safe_kthvalue({obj}, "
+        txt = pat_method_topk.sub(_meth_topk, txt)
+        txt = pat_method_kth.sub(_meth_kth, txt)
+        if txt != orig:
+            py.write_text(txt)
+            changed += 1
+    return changed
+# Stash original torch ops for our wrappers
+try:
+    import torch as _torch_guard
+    if not hasattr(_torch_guard.topk, "__wrapped__"):
+        _torch_guard.topk.__wrapped__ = _torch_guard.topk
+    if not hasattr(_torch_guard.kthvalue, "__wrapped__"):
+        _torch_guard.kthvalue.__wrapped__ = _torch_guard.kthvalue
+except Exception:
+    pass
+# Write safe ops & patch sources
+MATANY_REPO_DIR = TP_DIR / "matanyone"
+try:
+    _write_safe_ops_file(MATANY_REPO_DIR)
+    patched_files = _patch_matanyone_sources(MATANY_REPO_DIR)
+    print(f"[K-Governor] Patched MatAnyone sources: {patched_files} files updated.")
+except Exception as e:
+    print(f"[K-Governor] Patch failed: {e}")
+# =========================
+# Torch & device
 # =========================
 TORCH_AVAILABLE = False
 CUDA_AVAILABLE = False
 except Exception as e:
     print(f"Torch not available: {e}")
 # =========================
 # Light GPU monitor
 # =========================
         config_dir = str(TP_DIR / "sam2" / "sam2" / "configs")
         config_file = "sam2.1/sam2.1_hiera_t.yaml"
         initialize_config_dir(config_dir=config_dir, version_base=None)
+        _ = compose(config_name=config_file)
         model = build_sam2(config_file, str(ckpt), device="cuda" if CUDA_AVAILABLE else "cpu")
+        # Optional torch.compile
         if CUDA_AVAILABLE and hasattr(torch, "compile"):
             try:
                 model = torch.compile(model, mode="max-autotune")
         SAM2_PREDICTOR = SAM2ImagePredictor(model)
+        # Verify micro inference
         try:
             dummy = np.zeros((64, 64, 3), dtype=np.uint8)
             SAM2_PREDICTOR.set_image(dummy)
     print(f"rembg not available: {e}")
 # =========================
+# Background helpers
 # =========================
 def make_solid(w, h, rgb):
     return np.full((h, w, 3), rgb, dtype=np.uint8)
     return make_solid(w, h, (240, 240, 240))
 # =========================
+# MatAnyone wrapper (with pre-roll stabilizer)
 # =========================
 class OptimizedMatAnyoneProcessor:
     """
+    Wrapper around MatAnyone's InferenceCore with:
+    - HF repo-id path first (may require safetensors); fallback to local checkpoint via get_matanyone_model.
+    - K-Governor already patches library calls; no API changes needed here.
+    - Optional pre-roll: duplicate the first real frame to build a stable memory bank; trim it from alpha before compositing.
     """
     def __init__(self):
         self.processor = None
         self.verified = False
         self.last_error = None
+        self.stabilize = os.getenv("MATANYONE_STABILIZE", "true").lower() == "true"
         try:
+            self.preroll_frames = max(0, int(os.getenv("MATANYONE_PREROLL_FRAMES", "12")))
         except Exception:
+            self.preroll_frames = 12
     def initialize(self) -> bool:
         if not MATANYONE_IMPORTED:
         # 1) Preferred: HF repo-id
         try:
             print(f"Initializing MatAnyone (HF repo-id) on {self.device}…")
+            self.processor = MatAnyInferenceCore("PeiqingYang/MatAnyone")
             self.verified = hasattr(self.processor, "process_video")
             if self.device == "cuda":
                 torch.cuda.empty_cache()
             self.initialized = True
             print("✅ MatAnyone initialized via HF repo-id.")
             return True
         except Exception as e:
             self.last_error = f"HF init failed: {type(e).__name__}: {e}"
             print(self.last_error)
+        # 2) Fallback: local checkpoint → network → InferenceCore(network)
         try:
             print("Falling back to local checkpoint init for MatAnyone…")
+            from hydra.core.global_hydra import GlobalHydra
+            if hasattr(GlobalHydra, "instance") and GlobalHydra().is_initialized():
+                GlobalHydra.instance().clear()
             import requests
             from matanyone.utils.get_default_model import get_matanyone_model
+            ckpt_dir = Path("./pretrained_models"); ckpt_dir.mkdir(parents=True, exist_ok=True)
             ckpt_path = ckpt_dir / "matanyone.pth"
             if not ckpt_path.exists():
                 url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
                 print(f"Downloading MatAnyone checkpoint from: {url}")
                                 f.write(chunk)
                 print(f"Checkpoint saved to {ckpt_path}")
+            network = get_matanyone_model(str(ckpt_path), device=("cuda" if CUDA_AVAILABLE else "cpu"))
             self.processor = MatAnyInferenceCore(network)
             self.verified = hasattr(self.processor, "process_video")
             if self.device == "cuda":
+                torch.cuda.empty_cache(); _ = torch.rand(1, device="cuda") * 0.0
             self.initialized = True
             print("✅ MatAnyone initialized via local checkpoint.")
             return True
         except Exception as e:
             self.last_error = f"Local init failed: {type(e).__name__}: {e}"
             print(f"MatAnyone initialization failed: {self.last_error}")
             import traceback; traceback.print_exc()
             return False
+    # ---- Pre-roll helpers
+    @staticmethod
+    def _build_preroll_concat(input_path: str, frames: int) -> tuple[str, float]:
+        """Return (concat_path, preroll_seconds) where concat_path = [pre-roll + original]."""
+        clip = VideoFileClip(input_path)
+        fps = max(1, int(round(clip.fps or 24)))
+        preroll_frames = max(0, frames)
+        if preroll_frames == 0:
+            out = input_path
+            clip.close()
+            return out, 0.0
+        first = clip.get_frame(0)
+        pre = ImageSequenceClip([first] * preroll_frames, fps=fps)
+        concat = concatenate_videoclips([pre, clip])
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix="_concat.mp4")
+        concat.write_videofile(tmp.name, audio=False, logger=None)
+        pre.close(); concat.close(); clip.close()
+        return tmp.name, preroll_frames / fps
+    @staticmethod
+    def _trim_head(video_path: str, seconds: float) -> str:
+        if seconds <= 0:
+            return video_path
+        clip = VideoFileClip(video_path)
+        dur = clip.duration or 0
+        start = min(seconds, max(0.0, dur - 0.001))
+        trimmed = tempfile.NamedTemporaryFile(delete=False, suffix="_trim.mp4").name
+        clip.subclip(start, None).write_videofile(trimmed, audio=False, logger=None)
+        clip.close()
+        return trimmed
     def create_mask_optimized(self, video_path: str, output_path: str) -> str:
         cap = cv2.VideoCapture(video_path)
         ret, frame = cap.read()
                 best = masks[np.argmax(scores)]
                 mask = (best.astype(np.uint8) * 255)
                 cv2.imwrite(output_path, mask)
+                print(f"Self-test mask uniques: {np.unique(mask//255)}")
                 return output_path
             except Exception as e:
                 print(f"SAM2 mask creation failed; fallback rectangle. Error: {e}")
+        # Fallback: centered box
         h, w = frame.shape[:2]
         mask = np.zeros((h, w), dtype=np.uint8)
         mx, my = int(w * 0.15), int(h * 0.10)
             if CUDA_AVAILABLE:
                 torch.cuda.empty_cache(); gc.collect()
+            # Optional pre-roll stabilizer (always trimmed out later)
+            concat_path = input_path
+            preroll_sec = 0.0
+            if self.stabilize and self.preroll_frames > 0:
+                concat_path, preroll_sec = self._build_preroll_concat(input_path, self.preroll_frames)
+                print(f"[Stabilizer] Pre-rolled {self.preroll_frames} frames ({preroll_sec:.3f}s).")
             mask_path = os.path.join(output_dir, "mask.png")
+            # Create mask from the ORIGINAL first frame (not the pre-roll)
             self.create_mask_optimized(input_path, mask_path)
+            # Call MatAnyone (signature introspected earlier; pass only known-safe kwargs)
             fg_path, alpha_path = self.processor.process_video(
+                input_path=concat_path,
                 mask_path=mask_path,
                 output_path=output_dir,
+                max_size=int(os.getenv("MAX_MODEL_SIZE", "1920"))
             )
+            # If we had a pre-roll, trim it off the alpha
+            if preroll_sec > 0.0:
+                alpha_path_trimmed = self._trim_head(alpha_path, preroll_sec)
+                print(f"[Stabilizer] Trimmed {preroll_sec:.3f}s from alpha.")
+                return alpha_path_trimmed
             return alpha_path
         except Exception as e:
             print(f"❌ MatAnyone processing failed: {e}")
         return cv2.cvtColor(frame_bgr_u8, cv2.COLOR_BGR2RGB)
 # =========================
+# Compositing
 # =========================
 def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset=None):
     """Composite original video with alpha matte and a background (image or preset)."""
     orig_clip = VideoFileClip(original_path)
     alpha_clip = VideoFileClip(alpha_path)
+    w, h = orig_clip.size
     if bg_path:
         bg_img = cv2.imread(bg_path)
         if bg_img is None:
         bg_img = build_professional_bg(w, h, bg_preset)
     def process_func(get_frame, t):
+        frame = get_frame(t)  # float [0,1]
+        a = alpha_clip.get_frame(t)
         if a.ndim == 2:
             a = a[..., None]
         elif a.shape[2] > 1:
             a = a[..., :1]
         a = np.clip(a, 0.0, 1.0).astype(np.float32)
         bg_f32 = (bg_img.astype(np.float32) / 255.0)
         comp = a * frame.astype(np.float32) + (1.0 - a) * bg_f32
         return comp.astype(np.float32)
     output_path = "final_output.mp4"
     new_clip.write_videofile(output_path, audio=False, logger=None)
+    alpha_clip.close(); orig_clip.close(); new_clip.close()
     return output_path
 # =========================
+# Fallback whole-video path (rembg)
 # =========================
 def process_video_rembg_fallback(video_path, bg_image_path=None, bg_preset=None):
     print("🔄 Processing with rembg fallback…")
     new_clip = clip.fl(process_func)
     output_path = "rembg_output.mp4"
     new_clip.write_videofile(output_path, audio=False, logger=None)
+    clip.close(); new_clip.close()
     return output_path
 # =========================
+# Self-test harness
 # =========================
 def _ok(flag):  # visual mark in reports
     return "✅" if flag else "❌"
         clip = ImageSequenceClip(frames, fps=4)
         with tempfile.TemporaryDirectory() as td:
             vp = os.path.join(td, "tiny.mp4")
+            clip.write_videofile(vp, audio=False, logger=None); clip.close()
             clip_r = VideoFileClip(vp)
+            _ = clip_r.get_frame(0.1); clip_r.close()
         return True, "FFmpeg/MoviePy encode/decode ok"
     except Exception as e:
         return False, f"FFmpeg/MoviePy test failed: {e}"
     try:
         if not REMBG_AVAILABLE:
             return False, "rembg not importable"
+        img = np.zeros((64, 64, 3), dtype=np.uint8); img[:,:] = (0, 255, 0)
         pil = Image.fromarray(img)
+        out = remove(pil)
         ok = isinstance(out, Image.Image) and out.size == (64, 64)
         return ok, "rembg ok" if ok else "rembg returned unexpected output"
     except Exception as e:
             return False, f"MatAnyone init failed: {getattr(matanyone_processor, 'last_error', 'no details')}"
         if not matanyone_processor.verified:
             return False, "MatAnyone missing process_video API"
         with tempfile.TemporaryDirectory() as td:
+            # tiny moving square video
             frames = []
+            for t in range(8):
                 frame = np.zeros((64, 64, 3), dtype=np.uint8)
+                x = 8 + t*4
+                cv2.rectangle(frame, (x, 20), (x+12, 44), (200, 200, 200), -1)
                 frames.append(frame)
             vid_path = os.path.join(td, "tiny_input.mp4")
             clip = ImageSequenceClip(frames, fps=8)
+            clip.write_videofile(vid_path, audio=False, logger=None); clip.close()
+            # central seed mask
             mask = np.zeros((64, 64), dtype=np.uint8)
             cv2.rectangle(mask, (24, 24), (40, 40), 255, -1)
             mask_path = os.path.join(td, "mask.png")
             cv2.imwrite(mask_path, mask)
+            # run through our stabilized path
+            alpha = matanyone_processor.process_video_optimized(vid_path, td)
+            if alpha is None or not os.path.exists(alpha):
+                return False, "MatAnyone did not produce alpha video"
+            # try open alpha
+            _alpha_clip = VideoFileClip(alpha)
+            _ = _alpha_clip.get_frame(0.1); _alpha_clip.close()
+            return True, "MatAnyone process_video ok"
     except Exception as e:
         return False, f"MatAnyone test failed: {e}"
     lines.append(f"Torch: {torch.__version__ if TORCH_AVAILABLE else 'N/A'} | CUDA: {CUDA_AVAILABLE} | Device: {DEVICE} | GPU: {GPU_NAME}")
     lines.append(f"FFmpeg on PATH: {bool(shutil.which('ffmpeg'))}")
     lines.append("")
     tests = [
         ("CUDA", self_test_cuda),
         ("FFmpeg/MoviePy", self_test_ffmpeg_moviepy),
 # =========================
 # Gradio callback
 # =========================
+def gradio_interface_optimized(video_file, bg_image, use_matanyone=True, bg_preset="Office (Soft Gray)", stabilize=True, preroll_frames=12):
     """Main entry: choose MatAnyone (if verified) or rembg fallback; show real metrics."""
     if video_file is None:
         return None, None, "Please upload a video."
     video_path = video_file.name if hasattr(video_file, "name") else video_file
     bg_path = bg_image.name if (bg_image is not None and hasattr(bg_image, "name")) else (bg_image if bg_image else None)
+    # reflect UI choices into processor for this run
+    matanyone_processor.stabilize = bool(stabilize)
+    try:
+        matanyone_processor.preroll_frames = max(0, int(preroll_frames))
+    except Exception:
+        pass
     start_time = time.time()
     try:
         if use_matanyone and MATANYONE_IMPORTED:
 # =========================
 with gr.Blocks(title="Video Background Replacer - GPU Optimized", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎬 Video Background Replacer (GPU Optimized)")
+    gr.Markdown("All green checks are earned by real tests. No guesses.")
     gpu_status = f"✅ {GPU_NAME}" if CUDA_AVAILABLE else "❌ CPU Only"
     matany_status = "✅ Module Imported" if MATANYONE_IMPORTED else "❌ Not Importable"
         <strong>MatAnyone ready:</strong> {"✅ Yes" if getattr(matanyone_processor, "verified", False) else "❌ No"}<br>
         <strong>SAM2:</strong> {sam2_status}<br>
         <strong>rembg:</strong> {rembg_status}<br>
+        <strong>PyTorch:</strong> {torch_status}
     </div>
     """
     gr.HTML(status_html)
                 value=MATANYONE_IMPORTED,
                 interactive=MATANYONE_IMPORTED,
             )
+            stabilize = gr.Checkbox(
+                label="🧱 Stabilize short clips (pre-roll first frame)",
+                value=os.getenv("MATANYONE_STABILIZE", "true").lower() == "true",
+            )
+            preroll_frames = gr.Slider(
+                label="Pre-roll frames",
+                minimum=0, maximum=24, step=1,
+                value=int(os.getenv("MATANYONE_PREROLL_FRAMES", "12"))
+            )
             process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
             gr.Markdown("### 🔎 Self-Verification")
     process_btn.click(
         fn=gradio_interface_optimized,
+        inputs=[video_input, bg_input, use_matanyone, bg_preset, stabilize, preroll_frames],
         outputs=[output_video, download_file, status_text],
     )
     gr.Markdown("---")
     gr.Markdown("""
     **Notes**
+    - K-Governor clamps and pads Top-K inside MatAnyone so it cannot crash on small pools.
+    - Short-clip stabilizer (pre-roll of the first real frame) is trimmed out of the alpha automatically.
     - SAM2 shows ✅ only after a real micro-inference passes.
     - FFmpeg/MoviePy, CUDA, and rembg are validated by actually running them.
     """)
     if "--self-test" in sys.argv:
         report = run_self_test()
         print(report)
         exit_code = 0
         for line in report.splitlines():
             if line.startswith("❌"):