MogensR commited on
Commit
e947d62
·
verified ·
1 Parent(s): b894e0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -186
app.py CHANGED
@@ -1,20 +1,17 @@
1
  #!/usr/bin/env python3
2
  """
3
  HuggingFace Space app: Video background replacement with MatAnyone/SAM2/rembg.
4
- GPU-optimized with real verification & self-tests (no mocks).
5
-
6
- What’s new (2025-09-06):
7
- - K-Governor: runtime clamp for torch.topk / torch.kthvalue so MatAnyone can’t
8
- request K > available candidates. This prevents "selected index k out of range".
9
- - Extra diagnostics: when clamping happens, we print shape/dim and old→new K.
10
- - Safer MatAnyone init: try HF repo-id first (needs `safetensors`), then local
11
- checkpoint fallback; both with clear error reporting.
12
- - SAM2 only shows Verified after a real micro-inference.
13
- - Self-test is 100% real (CUDA, FFmpeg/MoviePy, rembg, SAM2, MatAnyone).
14
-
15
- NOTE: Make sure `requirements.txt` includes:
16
- safetensors>=0.4.2
17
- av==10.0.0
18
  """
19
 
20
  import os
@@ -30,39 +27,43 @@
30
  # =========================
31
  # Environment configuration
32
  # =========================
33
- os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:512"
34
- os.environ["CUDA_MODULE_LOADING"] = "LAZY"
35
- os.environ["OMP_NUM_THREADS"] = "8"
36
- os.environ["MKL_NUM_THREADS"] = "8"
37
- os.environ["PYTHONUNBUFFERED"] = "1"
38
 
39
  # MatAnyone GPU prefs
40
- os.environ["MATANYONE_MAX_EDGE"] = "1024"
41
- os.environ["MATANYONE_TARGET_PIXELS"] = "1000000"
42
- os.environ["MATANYONE_WINDOWED"] = "1"
43
- os.environ["MATANYONE_WINDOW"] = "16"
44
- os.environ["MAX_MODEL_SIZE"] = "1920"
45
 
46
  # CUDA + cuDNN
47
- os.environ["CUDA_LAUNCH_BLOCKING"] = "0"
48
- os.environ["TORCH_CUDNN_V8_API_ENABLED"] = "1"
49
- os.environ["CUDNN_BENCHMARK"] = "1"
50
 
51
  # HF cache prefs
52
- os.environ["HF_HOME"] = "./checkpoints/hf"
53
- os.environ["TRANSFORMERS_CACHE"] = "./checkpoints/hf"
54
- os.environ["HF_DATASETS_CACHE"] = "./checkpoints/hf"
55
- os.environ["HF_HUB_DISABLE_SYMLINKS"] = "1"
56
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
57
 
58
  # Gradio binding
59
- os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
60
- os.environ["GRADIO_SERVER_PORT"] = "7860"
61
 
62
  # Feature flags
63
- os.environ["USE_MATANYONE"] = os.getenv("USE_MATANYONE", "true")
64
- os.environ["USE_SAM2"] = os.getenv("USE_SAM2", "true")
65
- os.environ["SELF_CHECK_MODE"] = os.getenv("SELF_CHECK_MODE", "false")
 
 
 
 
66
 
67
  # =========================
68
  # Imports
@@ -71,7 +72,7 @@
71
  import numpy as np
72
  from PIL import Image
73
  import gradio as gr
74
- from moviepy.editor import VideoFileClip, ImageSequenceClip
75
 
76
  print("=" * 50)
77
  print("Application Startup at", os.popen('date').read().strip())
@@ -125,7 +126,121 @@
125
  print(f"Added to path: {p}")
126
 
127
  # =========================
128
- # Torch & device (+ K-Governor)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  # =========================
130
  TORCH_AVAILABLE = False
131
  CUDA_AVAILABLE = False
@@ -158,39 +273,6 @@
158
  except Exception as e:
159
  print(f"Torch not available: {e}")
160
 
161
- # ---- K-Governor (MUST be installed before importing MatAnyone) ----
162
- if TORCH_AVAILABLE:
163
- _ORIG_TOPK = torch.topk
164
- _ORIG_KTH = getattr(torch, "kthvalue", None)
165
-
166
- def _safe_topk(input, k, dim=None, largest=True, sorted=True, *, out=None):
167
- if dim is None:
168
- dim = input.ndim - 1
169
- size = input.size(dim)
170
- if k > size:
171
- print(f"[K-Governor] torch.topk: clamp k {k} -> {size} for dim={dim} shape={tuple(input.shape)}")
172
- k = int(size)
173
- if k <= 0:
174
- k = 1
175
- return _ORIG_TOPK(input, k, dim, largest, sorted, out=out)
176
-
177
- torch.topk = _safe_topk # global guard
178
-
179
- if _ORIG_KTH is not None:
180
- def _safe_kthvalue(input, k, dim=None, keepdim=False, *, out=None):
181
- if dim is None:
182
- # flat length if dim is None
183
- size = int(input.numel())
184
- else:
185
- size = int(input.size(dim))
186
- if k > size:
187
- print(f"[K-Governor] torch.kthvalue: clamp k {k} -> {size} (dim={dim}) shape={tuple(input.shape)}")
188
- k = size
189
- if k <= 0:
190
- k = 1
191
- return _ORIG_KTH(input, k, dim=dim, keepdim=keepdim, out=out)
192
- torch.kthvalue = _safe_kthvalue
193
-
194
  # =========================
195
  # Light GPU monitor
196
  # =========================
@@ -274,10 +356,11 @@ def get_stats(self):
274
  config_dir = str(TP_DIR / "sam2" / "sam2" / "configs")
275
  config_file = "sam2.1/sam2.1_hiera_t.yaml"
276
  initialize_config_dir(config_dir=config_dir, version_base=None)
277
- _ = compose(config_name=config_file) # just to confirm config is readable
278
 
279
  model = build_sam2(config_file, str(ckpt), device="cuda" if CUDA_AVAILABLE else "cpu")
280
 
 
281
  if CUDA_AVAILABLE and hasattr(torch, "compile"):
282
  try:
283
  model = torch.compile(model, mode="max-autotune")
@@ -286,7 +369,7 @@ def get_stats(self):
286
 
287
  SAM2_PREDICTOR = SAM2ImagePredictor(model)
288
 
289
- # Verify with real micro-inference
290
  try:
291
  dummy = np.zeros((64, 64, 3), dtype=np.uint8)
292
  SAM2_PREDICTOR.set_image(dummy)
@@ -326,7 +409,7 @@ def get_stats(self):
326
  print(f"rembg not available: {e}")
327
 
328
  # =========================
329
- # Background helpers (real)
330
  # =========================
331
  def make_solid(w, h, rgb):
332
  return np.full((h, w, 3), rgb, dtype=np.uint8)
@@ -352,15 +435,14 @@ def build_professional_bg(w, h, preset: str) -> np.ndarray:
352
  return make_solid(w, h, (240, 240, 240))
353
 
354
  # =========================
355
- # MatAnyone wrapper (with HF + local fallback)
356
  # =========================
357
  class OptimizedMatAnyoneProcessor:
358
  """
359
- Wrapper around MatAnyone's InferenceCore.
360
- 1) Try HF repo-id ("PeiqingYang/MatAnyone") needs `safetensors`.
361
- 2) Fallback: download local checkpoint and build default model InferenceCore(network).
362
-
363
- K-Governor above guards any internal top-k/kthvalue ops.
364
  """
365
  def __init__(self):
366
  self.processor = None
@@ -369,15 +451,11 @@ def __init__(self):
369
  self.verified = False
370
  self.last_error = None
371
 
372
- def _clear_hydra(self, note=""):
373
  try:
374
- from hydra.core.global_hydra import GlobalHydra
375
- if GlobalHydra().is_initialized():
376
- GlobalHydra.instance().clear()
377
- if note:
378
- print(f"(Hydra cleared: {note})")
379
  except Exception:
380
- pass
381
 
382
  def initialize(self) -> bool:
383
  if not MATANYONE_IMPORTED:
@@ -391,7 +469,7 @@ def initialize(self) -> bool:
391
  # 1) Preferred: HF repo-id
392
  try:
393
  print(f"Initializing MatAnyone (HF repo-id) on {self.device}…")
394
- self.processor = MatAnyInferenceCore("PeiqingYang/MatAnyone") # will auto-download model.safetensors
395
  self.verified = hasattr(self.processor, "process_video")
396
  if self.device == "cuda":
397
  torch.cuda.empty_cache()
@@ -399,30 +477,22 @@ def initialize(self) -> bool:
399
  self.initialized = True
400
  print("✅ MatAnyone initialized via HF repo-id.")
401
  return True
402
- except NameError as e:
403
- # Common if `safetensors` is missing during HF load
404
- self.last_error = f"HF init NameError: {e}"
405
- print(f"HF init failed: {e}")
406
- except TypeError as e:
407
- if "network" in str(e).lower():
408
- print("MatAnyone InferenceCore expects a `network`; switching to local checkpoint init.")
409
- self.last_error = f"HF init TypeError: {e}"
410
  except Exception as e:
411
  self.last_error = f"HF init failed: {type(e).__name__}: {e}"
412
  print(self.last_error)
413
 
414
- # 2) Fallback: local checkpoint → get_default_model → InferenceCore(network)
415
  try:
416
  print("Falling back to local checkpoint init for MatAnyone…")
417
- self._clear_hydra("MatAnyone/local-pre")
 
 
418
 
419
  import requests
420
  from matanyone.utils.get_default_model import get_matanyone_model
421
 
422
- ckpt_dir = Path("./pretrained_models")
423
- ckpt_dir.mkdir(parents=True, exist_ok=True)
424
  ckpt_path = ckpt_dir / "matanyone.pth"
425
-
426
  if not ckpt_path.exists():
427
  url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
428
  print(f"Downloading MatAnyone checkpoint from: {url}")
@@ -434,28 +504,52 @@ def initialize(self) -> bool:
434
  f.write(chunk)
435
  print(f"Checkpoint saved to {ckpt_path}")
436
 
437
- network = get_matanyone_model(
438
- str(ckpt_path),
439
- device=("cuda" if CUDA_AVAILABLE else "cpu")
440
- )
441
  self.processor = MatAnyInferenceCore(network)
442
  self.verified = hasattr(self.processor, "process_video")
443
-
444
  if self.device == "cuda":
445
- torch.cuda.empty_cache()
446
- _ = torch.rand(1, device="cuda") * 0.0
447
-
448
  self.initialized = True
449
  print("✅ MatAnyone initialized via local checkpoint.")
450
- self._clear_hydra("MatAnyone/local-post")
451
  return True
452
-
453
  except Exception as e:
454
  self.last_error = f"Local init failed: {type(e).__name__}: {e}"
455
  print(f"MatAnyone initialization failed: {self.last_error}")
456
  import traceback; traceback.print_exc()
457
  return False
458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  def create_mask_optimized(self, video_path: str, output_path: str) -> str:
460
  cap = cv2.VideoCapture(video_path)
461
  ret, frame = cap.read()
@@ -477,11 +571,12 @@ def create_mask_optimized(self, video_path: str, output_path: str) -> str:
477
  best = masks[np.argmax(scores)]
478
  mask = (best.astype(np.uint8) * 255)
479
  cv2.imwrite(output_path, mask)
 
480
  return output_path
481
  except Exception as e:
482
  print(f"SAM2 mask creation failed; fallback rectangle. Error: {e}")
483
 
484
- # Fallback: centered box (pipeline continuity)
485
  h, w = frame.shape[:2]
486
  mask = np.zeros((h, w), dtype=np.uint8)
487
  mx, my = int(w * 0.15), int(h * 0.10)
@@ -497,25 +592,30 @@ def process_video_optimized(self, input_path: str, output_dir: str):
497
  if CUDA_AVAILABLE:
498
  torch.cuda.empty_cache(); gc.collect()
499
 
500
- # Lightweight video meta (frames) for logs
501
- cap = cv2.VideoCapture(input_path)
502
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
503
- cap.release()
504
- if frame_count:
505
- print(f"[MatAnyone] Input frames: {frame_count}")
506
 
507
  mask_path = os.path.join(output_dir, "mask.png")
 
508
  self.create_mask_optimized(input_path, mask_path)
509
 
510
- # Only pass supported kwargs for MatAnyone.process_video
511
- # (Typical signature: input_path, mask_path, output_path=None, n_warmup=10, r_erode=10, r_dilate=10, suffix='', save_image=False, max_size=-1)
512
  fg_path, alpha_path = self.processor.process_video(
513
- input_path=input_path,
514
  mask_path=mask_path,
515
  output_path=output_dir,
516
- max_size=int(os.getenv("MAX_MODEL_SIZE", "1920")),
517
  )
518
- print("✅ MatAnyone processing complete.")
 
 
 
 
 
519
  return alpha_path
520
  except Exception as e:
521
  print(f"❌ MatAnyone processing failed: {e}")
@@ -545,7 +645,7 @@ def process_frame_rembg_optimized(frame_bgr_u8, bg_img_rgb_u8):
545
  return cv2.cvtColor(frame_bgr_u8, cv2.COLOR_BGR2RGB)
546
 
547
  # =========================
548
- # Compositing (fixed)
549
  # =========================
550
  def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset=None):
551
  """Composite original video with alpha matte and a background (image or preset)."""
@@ -553,8 +653,7 @@ def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset
553
  orig_clip = VideoFileClip(original_path)
554
  alpha_clip = VideoFileClip(alpha_path)
555
 
556
- w, h = orig_clip.size # MoviePy returns (w, h)
557
-
558
  if bg_path:
559
  bg_img = cv2.imread(bg_path)
560
  if bg_img is None:
@@ -565,15 +664,13 @@ def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset
565
  bg_img = build_professional_bg(w, h, bg_preset)
566
 
567
  def process_func(get_frame, t):
568
- frame = get_frame(t) # float [0,1], shape (h, w, 3)
569
- a = alpha_clip.get_frame(t) # float [0,1], shape (h, w) or (h, w, 1..3)
570
-
571
  if a.ndim == 2:
572
  a = a[..., None]
573
  elif a.shape[2] > 1:
574
  a = a[..., :1]
575
  a = np.clip(a, 0.0, 1.0).astype(np.float32)
576
-
577
  bg_f32 = (bg_img.astype(np.float32) / 255.0)
578
  comp = a * frame.astype(np.float32) + (1.0 - a) * bg_f32
579
  return comp.astype(np.float32)
@@ -582,12 +679,11 @@ def process_func(get_frame, t):
582
  output_path = "final_output.mp4"
583
  new_clip.write_videofile(output_path, audio=False, logger=None)
584
 
585
- alpha_clip.close()
586
- orig_clip.close()
587
  return output_path
588
 
589
  # =========================
590
- # Fallback whole-video path
591
  # =========================
592
  def process_video_rembg_fallback(video_path, bg_image_path=None, bg_preset=None):
593
  print("🔄 Processing with rembg fallback…")
@@ -619,10 +715,11 @@ def process_func(get_frame, t):
619
  new_clip = clip.fl(process_func)
620
  output_path = "rembg_output.mp4"
621
  new_clip.write_videofile(output_path, audio=False, logger=None)
 
622
  return output_path
623
 
624
  # =========================
625
- # Self-test harness (real)
626
  # =========================
627
  def _ok(flag): # visual mark in reports
628
  return "✅" if flag else "❌"
@@ -649,10 +746,9 @@ def self_test_ffmpeg_moviepy() -> (bool, str):
649
  clip = ImageSequenceClip(frames, fps=4)
650
  with tempfile.TemporaryDirectory() as td:
651
  vp = os.path.join(td, "tiny.mp4")
652
- clip.write_videofile(vp, audio=False, logger=None)
653
  clip_r = VideoFileClip(vp)
654
- _ = clip_r.get_frame(0.1)
655
- clip_r.close()
656
  return True, "FFmpeg/MoviePy encode/decode ok"
657
  except Exception as e:
658
  return False, f"FFmpeg/MoviePy test failed: {e}"
@@ -661,10 +757,9 @@ def self_test_rembg() -> (bool, str):
661
  try:
662
  if not REMBG_AVAILABLE:
663
  return False, "rembg not importable"
664
- img = np.zeros((64, 64, 3), dtype=np.uint8)
665
- img[:,:] = (0, 255, 0) # green
666
  pil = Image.fromarray(img)
667
- out = remove(pil) # real call
668
  ok = isinstance(out, Image.Image) and out.size == (64, 64)
669
  return ok, "rembg ok" if ok else "rembg returned unexpected output"
670
  except Exception as e:
@@ -693,53 +788,32 @@ def self_test_matanyone() -> (bool, str):
693
  return False, f"MatAnyone init failed: {getattr(matanyone_processor, 'last_error', 'no details')}"
694
  if not matanyone_processor.verified:
695
  return False, "MatAnyone missing process_video API"
696
- # Create a tiny real video + mask, then run process_video
697
  with tempfile.TemporaryDirectory() as td:
 
698
  frames = []
699
- for t in range(12): # a dozen frames > typical warmup
700
  frame = np.zeros((64, 64, 3), dtype=np.uint8)
701
- x = 8 + t*3
702
- cv2.rectangle(frame, (x, 20), (min(x+12, 63), 44), (200, 200, 200), -1)
703
  frames.append(frame)
704
-
705
  vid_path = os.path.join(td, "tiny_input.mp4")
706
  clip = ImageSequenceClip(frames, fps=8)
707
- clip.write_videofile(vid_path, audio=False, logger=None)
708
- visible_test_clip = CHECKPOINTS_DIR / "selftest_clip.mp4"
709
- clip.write_videofile(str(visible_test_clip), audio=False, logger=None)
710
- clip.close()
711
- print(f"📹 Self-test clip saved to {visible_test_clip}")
712
 
713
- # Simple central mask (seed)
714
  mask = np.zeros((64, 64), dtype=np.uint8)
715
  cv2.rectangle(mask, (24, 24), (40, 40), 255, -1)
716
  mask_path = os.path.join(td, "mask.png")
717
  cv2.imwrite(mask_path, mask)
718
- print("Self-test mask uniques:", np.unique(mask))
719
 
720
- # Introspect signature for clarity
721
- try:
722
- import inspect
723
- sig = str(inspect.signature(matanyone_processor.processor.process_video))
724
- print(f"MatAnyone process_video signature: {sig}")
725
- except Exception:
726
- pass
727
-
728
- fg_path, alpha_path = matanyone_processor.processor.process_video(
729
- input_path=vid_path,
730
- mask_path=mask_path,
731
- output_path=td,
732
- max_size=256,
733
- )
734
- ok = os.path.exists(alpha_path)
735
- if ok:
736
- try:
737
- _alpha_clip = VideoFileClip(alpha_path)
738
- _ = _alpha_clip.get_frame(0.1)
739
- _alpha_clip.close()
740
- except Exception as _e:
741
- return False, f"MatAnyone alpha video unreadable: {_e}"
742
- return ok, "MatAnyone process_video ok" if ok else "MatAnyone did not produce alpha video"
743
  except Exception as e:
744
  return False, f"MatAnyone test failed: {e}"
745
 
@@ -750,7 +824,6 @@ def run_self_test() -> str:
750
  lines.append(f"Torch: {torch.__version__ if TORCH_AVAILABLE else 'N/A'} | CUDA: {CUDA_AVAILABLE} | Device: {DEVICE} | GPU: {GPU_NAME}")
751
  lines.append(f"FFmpeg on PATH: {bool(shutil.which('ffmpeg'))}")
752
  lines.append("")
753
-
754
  tests = [
755
  ("CUDA", self_test_cuda),
756
  ("FFmpeg/MoviePy", self_test_ffmpeg_moviepy),
@@ -768,13 +841,20 @@ def run_self_test() -> str:
768
  # =========================
769
  # Gradio callback
770
  # =========================
771
- def gradio_interface_optimized(video_file, bg_image, use_matanyone=True, bg_preset="Office (Soft Gray)"):
772
  """Main entry: choose MatAnyone (if verified) or rembg fallback; show real metrics."""
773
  if video_file is None:
774
  return None, None, "Please upload a video."
775
  video_path = video_file.name if hasattr(video_file, "name") else video_file
776
  bg_path = bg_image.name if (bg_image is not None and hasattr(bg_image, "name")) else (bg_image if bg_image else None)
777
 
 
 
 
 
 
 
 
778
  start_time = time.time()
779
  try:
780
  if use_matanyone and MATANYONE_IMPORTED:
@@ -837,7 +917,7 @@ def show_matanyone_diag():
837
  # =========================
838
  with gr.Blocks(title="Video Background Replacer - GPU Optimized", theme=gr.themes.Soft()) as demo:
839
  gr.Markdown("# 🎬 Video Background Replacer (GPU Optimized)")
840
- gr.Markdown("All green checks are earned by real tests. No guesses. K-Governor is active for safety.")
841
 
842
  gpu_status = f"✅ {GPU_NAME}" if CUDA_AVAILABLE else "❌ CPU Only"
843
  matany_status = "✅ Module Imported" if MATANYONE_IMPORTED else "❌ Not Importable"
@@ -854,8 +934,7 @@ def show_matanyone_diag():
854
  <strong>MatAnyone ready:</strong> {"✅ Yes" if getattr(matanyone_processor, "verified", False) else "❌ No"}<br>
855
  <strong>SAM2:</strong> {sam2_status}<br>
856
  <strong>rembg:</strong> {rembg_status}<br>
857
- <strong>PyTorch:</strong> {torch_status}<br>
858
- <strong>K-Governor:</strong> ✅ Active (torch.topk / torch.kthvalue)
859
  </div>
860
  """
861
  gr.HTML(status_html)
@@ -880,6 +959,15 @@ def show_matanyone_diag():
880
  value=MATANYONE_IMPORTED,
881
  interactive=MATANYONE_IMPORTED,
882
  )
 
 
 
 
 
 
 
 
 
883
  process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
884
 
885
  gr.Markdown("### 🔎 Self-Verification")
@@ -897,7 +985,7 @@ def show_matanyone_diag():
897
 
898
  process_btn.click(
899
  fn=gradio_interface_optimized,
900
- inputs=[video_input, bg_input, use_matanyone, bg_preset],
901
  outputs=[output_video, download_file, status_text],
902
  )
903
 
@@ -916,10 +1004,9 @@ def show_matanyone_diag():
916
  gr.Markdown("---")
917
  gr.Markdown("""
918
  **Notes**
 
 
919
  - SAM2 shows ✅ only after a real micro-inference passes.
920
- - K-Governor clamps unsafe K at runtime and logs the clamp (shape/dim and old→new K).
921
- - MatAnyone tries HF load first (needs `safetensors`), else falls back to local checkpoint.
922
- - The self-test saves a visible clip to `checkpoints/selftest_clip.mp4` for inspection.
923
  - FFmpeg/MoviePy, CUDA, and rembg are validated by actually running them.
924
  """)
925
 
@@ -930,7 +1017,6 @@ def show_matanyone_diag():
930
  if "--self-test" in sys.argv:
931
  report = run_self_test()
932
  print(report)
933
- # Exit non-zero if any test failed
934
  exit_code = 0
935
  for line in report.splitlines():
936
  if line.startswith("❌"):
 
1
  #!/usr/bin/env python3
2
  """
3
  HuggingFace Space app: Video background replacement with MatAnyone/SAM2/rembg.
4
+ GPU-optimized with real verification, K-Governor (safe_topk), and short-clip stabilizer.
5
+
6
+ What’s new vs. your last version:
7
+ - K-Governor: rewrites MatAnyone .topk/.kthvalue call-sites to use safe guards (no more "selected index k out of range").
8
+ - Pre-roll stabilizer: duplicates first real frame to grow the candidate bank for very short clips; trimmed out before compositing.
9
+ - Diagnostic prints show exactly what was patched and when clamping happens.
10
+
11
+ Environment knobs:
12
+ - MATANYONE_PREROLL_FRAMES (default 12) how many frames to prepend as pre-roll.
13
+ - MATANYONE_STABILIZE (default "true") enable/disable pre-roll stabilizer globally.
14
+ - MAX_MODEL_SIZE, MATANYONE_* knobs unchanged from before.
 
 
 
15
  """
16
 
17
  import os
 
27
  # =========================
28
  # Environment configuration
29
  # =========================
30
+ os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:512")
31
+ os.environ.setdefault("CUDA_MODULE_LOADING", "LAZY")
32
+ os.environ.setdefault("OMP_NUM_THREADS", "8")
33
+ os.environ.setdefault("MKL_NUM_THREADS", "8")
34
+ os.environ.setdefault("PYTHONUNBUFFERED", "1")
35
 
36
  # MatAnyone GPU prefs
37
+ os.environ.setdefault("MATANYONE_MAX_EDGE", "1024")
38
+ os.environ.setdefault("MATANYONE_TARGET_PIXELS", "1000000")
39
+ os.environ.setdefault("MATANYONE_WINDOWED", "1")
40
+ os.environ.setdefault("MATANYONE_WINDOW", "16")
41
+ os.environ.setdefault("MAX_MODEL_SIZE", "1920")
42
 
43
  # CUDA + cuDNN
44
+ os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "0")
45
+ os.environ.setdefault("TORCH_CUDNN_V8_API_ENABLED", "1")
46
+ os.environ.setdefault("CUDNN_BENCHMARK", "1")
47
 
48
  # HF cache prefs
49
+ os.environ.setdefault("HF_HOME", "./checkpoints/hf")
50
+ os.environ.setdefault("TRANSFORMERS_CACHE", "./checkpoints/hf")
51
+ os.environ.setdefault("HF_DATASETS_CACHE", "./checkpoints/hf")
52
+ os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
53
+ os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
54
 
55
  # Gradio binding
56
+ os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
57
+ os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
58
 
59
  # Feature flags
60
+ os.environ.setdefault("USE_MATANYONE", "true")
61
+ os.environ.setdefault("USE_SAM2", "true")
62
+ os.environ.setdefault("SELF_CHECK_MODE", "false")
63
+
64
+ # Short-clip stabilizer
65
+ os.environ.setdefault("MATANYONE_STABILIZE", "true")
66
+ os.environ.setdefault("MATANYONE_PREROLL_FRAMES", "12") # duplicated first frame count
67
 
68
  # =========================
69
  # Imports
 
72
  import numpy as np
73
  from PIL import Image
74
  import gradio as gr
75
+ from moviepy.editor import VideoFileClip, ImageSequenceClip, concatenate_videoclips
76
 
77
  print("=" * 50)
78
  print("Application Startup at", os.popen('date').read().strip())
 
126
  print(f"Added to path: {p}")
127
 
128
  # =========================
129
+ # K-Governor: patch MatAnyone topk/kthvalue call-sites
130
+ # =========================
131
+ import re
132
+ def _write_safe_ops_file(pkg_root: Path):
133
+ utils_dir = pkg_root / "matanyone" / "utils"
134
+ # handle clones that place package directly at root
135
+ if not utils_dir.exists():
136
+ utils_dir = pkg_root / "utils"
137
+ utils_dir.mkdir(parents=True, exist_ok=True)
138
+ (utils_dir / "safe_ops.py").write_text(
139
+ """
140
+ import torch
141
+
142
+ _VERBOSE = bool(int(os.environ.get("SAFE_TOPK_VERBOSE", "1")))
143
+
144
+ def _log(msg):
145
+ if _VERBOSE:
146
+ print(f"[K-Governor] {msg}")
147
+
148
+ def safe_topk(x, k, dim=None, largest=True, sorted=True):
149
+ if not isinstance(k, int):
150
+ k = int(k)
151
+ if dim is None:
152
+ dim = -1
153
+ n = x.size(dim)
154
+ k_eff = max(1, min(k, int(n)))
155
+ if k_eff != k:
156
+ _log(f"torch.topk: clamp k {k} -> {k_eff} for dim={dim} shape={tuple(x.shape)}")
157
+ values, indices = torch.topk.__wrapped__(x, k_eff, dim=dim, largest=largest, sorted=sorted)
158
+ if k_eff < k:
159
+ pad = k - k_eff
160
+ pad_shape = list(values.shape); pad_shape[dim] = pad
161
+ pad_vals = values.new_full(pad_shape, float('-inf'))
162
+ pad_idx = indices.new_zeros(pad_shape, dtype=indices.dtype)
163
+ values = torch.cat([values, pad_vals], dim=dim)
164
+ indices = torch.cat([indices, pad_idx], dim=dim)
165
+ return values, indices
166
+
167
+ def safe_kthvalue(x, k, dim=None, keepdim=False):
168
+ if not isinstance(k, int):
169
+ k = int(k)
170
+ if dim is None:
171
+ dim = -1
172
+ n = x.size(dim)
173
+ k_eff = max(1, min(k, int(n)))
174
+ if k_eff != k:
175
+ _log(f"torch.kthvalue: clamp k {k} -> {k_eff} for dim={dim} shape={tuple(x.shape)}")
176
+ return torch.kthvalue.__wrapped__(x, k_eff, dim=dim, keepdim=keepdim)
177
+ """.lstrip()
178
+ )
179
+
180
+ def _patch_matanyone_sources(repo_dir: Path) -> int:
181
+ root = repo_dir / "matanyone"
182
+ if not root.exists(): # some layouts have files directly
183
+ root = repo_dir
184
+ changed = 0
185
+ header_import = "from matanyone.utils.safe_ops import safe_topk, safe_kthvalue\n"
186
+
187
+ pat_torch_topk = re.compile(r"\btorch\.topk\s*\(")
188
+ pat_method_topk = re.compile(r"(\b[\w\.]+)\.topk\s*\(")
189
+ pat_torch_kth = re.compile(r"\btorch\.kthvalue\s*\(")
190
+ pat_method_kth = re.compile(r"(\b[\w\.]+)\.kthvalue\s*\(")
191
+
192
+ for py in root.rglob("*.py"):
193
+ txt = py.read_text()
194
+ orig = txt
195
+
196
+ if "safe_topk" not in txt and py.name != "__init__.py":
197
+ lines = txt.splitlines(keepends=True)
198
+ insert_at = 0
199
+ for i, L in enumerate(lines[:80]):
200
+ if L.startswith("import ") or L.startswith("from "):
201
+ insert_at = i + 1
202
+ lines.insert(insert_at, header_import)
203
+ txt = "".join(lines)
204
+
205
+ txt = pat_torch_topk.sub("safe_topk(", txt)
206
+ txt = pat_torch_kth.sub("safe_kthvalue(", txt)
207
+
208
+ def _meth_topk(m):
209
+ obj = m.group(1)
210
+ return f"safe_topk({obj}, "
211
+ def _meth_kth(m):
212
+ obj = m.group(1)
213
+ return f"safe_kthvalue({obj}, "
214
+
215
+ txt = pat_method_topk.sub(_meth_topk, txt)
216
+ txt = pat_method_kth.sub(_meth_kth, txt)
217
+
218
+ if txt != orig:
219
+ py.write_text(txt)
220
+ changed += 1
221
+ return changed
222
+
223
+ # Stash original torch ops for our wrappers
224
+ try:
225
+ import torch as _torch_guard
226
+ if not hasattr(_torch_guard.topk, "__wrapped__"):
227
+ _torch_guard.topk.__wrapped__ = _torch_guard.topk
228
+ if not hasattr(_torch_guard.kthvalue, "__wrapped__"):
229
+ _torch_guard.kthvalue.__wrapped__ = _torch_guard.kthvalue
230
+ except Exception:
231
+ pass
232
+
233
+ # Write safe ops & patch sources
234
+ MATANY_REPO_DIR = TP_DIR / "matanyone"
235
+ try:
236
+ _write_safe_ops_file(MATANY_REPO_DIR)
237
+ patched_files = _patch_matanyone_sources(MATANY_REPO_DIR)
238
+ print(f"[K-Governor] Patched MatAnyone sources: {patched_files} files updated.")
239
+ except Exception as e:
240
+ print(f"[K-Governor] Patch failed: {e}")
241
+
242
+ # =========================
243
+ # Torch & device
244
  # =========================
245
  TORCH_AVAILABLE = False
246
  CUDA_AVAILABLE = False
 
273
  except Exception as e:
274
  print(f"Torch not available: {e}")
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  # =========================
277
  # Light GPU monitor
278
  # =========================
 
356
  config_dir = str(TP_DIR / "sam2" / "sam2" / "configs")
357
  config_file = "sam2.1/sam2.1_hiera_t.yaml"
358
  initialize_config_dir(config_dir=config_dir, version_base=None)
359
+ _ = compose(config_name=config_file)
360
 
361
  model = build_sam2(config_file, str(ckpt), device="cuda" if CUDA_AVAILABLE else "cpu")
362
 
363
+ # Optional torch.compile
364
  if CUDA_AVAILABLE and hasattr(torch, "compile"):
365
  try:
366
  model = torch.compile(model, mode="max-autotune")
 
369
 
370
  SAM2_PREDICTOR = SAM2ImagePredictor(model)
371
 
372
+ # Verify micro inference
373
  try:
374
  dummy = np.zeros((64, 64, 3), dtype=np.uint8)
375
  SAM2_PREDICTOR.set_image(dummy)
 
409
  print(f"rembg not available: {e}")
410
 
411
  # =========================
412
+ # Background helpers
413
  # =========================
414
  def make_solid(w, h, rgb):
415
  return np.full((h, w, 3), rgb, dtype=np.uint8)
 
435
  return make_solid(w, h, (240, 240, 240))
436
 
437
  # =========================
438
+ # MatAnyone wrapper (with pre-roll stabilizer)
439
  # =========================
440
  class OptimizedMatAnyoneProcessor:
441
  """
442
+ Wrapper around MatAnyone's InferenceCore with:
443
+ - HF repo-id path first (may require safetensors); fallback to local checkpoint via get_matanyone_model.
444
+ - K-Governor already patches library calls; no API changes needed here.
445
+ - Optional pre-roll: duplicate the first real frame to build a stable memory bank; trim it from alpha before compositing.
 
446
  """
447
  def __init__(self):
448
  self.processor = None
 
451
  self.verified = False
452
  self.last_error = None
453
 
454
+ self.stabilize = os.getenv("MATANYONE_STABILIZE", "true").lower() == "true"
455
  try:
456
+ self.preroll_frames = max(0, int(os.getenv("MATANYONE_PREROLL_FRAMES", "12")))
 
 
 
 
457
  except Exception:
458
+ self.preroll_frames = 12
459
 
460
  def initialize(self) -> bool:
461
  if not MATANYONE_IMPORTED:
 
469
  # 1) Preferred: HF repo-id
470
  try:
471
  print(f"Initializing MatAnyone (HF repo-id) on {self.device}…")
472
+ self.processor = MatAnyInferenceCore("PeiqingYang/MatAnyone")
473
  self.verified = hasattr(self.processor, "process_video")
474
  if self.device == "cuda":
475
  torch.cuda.empty_cache()
 
477
  self.initialized = True
478
  print("✅ MatAnyone initialized via HF repo-id.")
479
  return True
 
 
 
 
 
 
 
 
480
  except Exception as e:
481
  self.last_error = f"HF init failed: {type(e).__name__}: {e}"
482
  print(self.last_error)
483
 
484
+ # 2) Fallback: local checkpoint → network → InferenceCore(network)
485
  try:
486
  print("Falling back to local checkpoint init for MatAnyone…")
487
+ from hydra.core.global_hydra import GlobalHydra
488
+ if hasattr(GlobalHydra, "instance") and GlobalHydra().is_initialized():
489
+ GlobalHydra.instance().clear()
490
 
491
  import requests
492
  from matanyone.utils.get_default_model import get_matanyone_model
493
 
494
+ ckpt_dir = Path("./pretrained_models"); ckpt_dir.mkdir(parents=True, exist_ok=True)
 
495
  ckpt_path = ckpt_dir / "matanyone.pth"
 
496
  if not ckpt_path.exists():
497
  url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
498
  print(f"Downloading MatAnyone checkpoint from: {url}")
 
504
  f.write(chunk)
505
  print(f"Checkpoint saved to {ckpt_path}")
506
 
507
+ network = get_matanyone_model(str(ckpt_path), device=("cuda" if CUDA_AVAILABLE else "cpu"))
 
 
 
508
  self.processor = MatAnyInferenceCore(network)
509
  self.verified = hasattr(self.processor, "process_video")
 
510
  if self.device == "cuda":
511
+ torch.cuda.empty_cache(); _ = torch.rand(1, device="cuda") * 0.0
 
 
512
  self.initialized = True
513
  print("✅ MatAnyone initialized via local checkpoint.")
 
514
  return True
 
515
  except Exception as e:
516
  self.last_error = f"Local init failed: {type(e).__name__}: {e}"
517
  print(f"MatAnyone initialization failed: {self.last_error}")
518
  import traceback; traceback.print_exc()
519
  return False
520
 
521
+ # ---- Pre-roll helpers
522
+ @staticmethod
523
+ def _build_preroll_concat(input_path: str, frames: int) -> tuple[str, float]:
524
+ """Return (concat_path, preroll_seconds) where concat_path = [pre-roll + original]."""
525
+ clip = VideoFileClip(input_path)
526
+ fps = max(1, int(round(clip.fps or 24)))
527
+ preroll_frames = max(0, frames)
528
+ if preroll_frames == 0:
529
+ out = input_path
530
+ clip.close()
531
+ return out, 0.0
532
+
533
+ first = clip.get_frame(0)
534
+ pre = ImageSequenceClip([first] * preroll_frames, fps=fps)
535
+ concat = concatenate_videoclips([pre, clip])
536
+ tmp = tempfile.NamedTemporaryFile(delete=False, suffix="_concat.mp4")
537
+ concat.write_videofile(tmp.name, audio=False, logger=None)
538
+ pre.close(); concat.close(); clip.close()
539
+ return tmp.name, preroll_frames / fps
540
+
541
+ @staticmethod
542
+ def _trim_head(video_path: str, seconds: float) -> str:
543
+ if seconds <= 0:
544
+ return video_path
545
+ clip = VideoFileClip(video_path)
546
+ dur = clip.duration or 0
547
+ start = min(seconds, max(0.0, dur - 0.001))
548
+ trimmed = tempfile.NamedTemporaryFile(delete=False, suffix="_trim.mp4").name
549
+ clip.subclip(start, None).write_videofile(trimmed, audio=False, logger=None)
550
+ clip.close()
551
+ return trimmed
552
+
553
  def create_mask_optimized(self, video_path: str, output_path: str) -> str:
554
  cap = cv2.VideoCapture(video_path)
555
  ret, frame = cap.read()
 
571
  best = masks[np.argmax(scores)]
572
  mask = (best.astype(np.uint8) * 255)
573
  cv2.imwrite(output_path, mask)
574
+ print(f"Self-test mask uniques: {np.unique(mask//255)}")
575
  return output_path
576
  except Exception as e:
577
  print(f"SAM2 mask creation failed; fallback rectangle. Error: {e}")
578
 
579
+ # Fallback: centered box
580
  h, w = frame.shape[:2]
581
  mask = np.zeros((h, w), dtype=np.uint8)
582
  mx, my = int(w * 0.15), int(h * 0.10)
 
592
  if CUDA_AVAILABLE:
593
  torch.cuda.empty_cache(); gc.collect()
594
 
595
+ # Optional pre-roll stabilizer (always trimmed out later)
596
+ concat_path = input_path
597
+ preroll_sec = 0.0
598
+ if self.stabilize and self.preroll_frames > 0:
599
+ concat_path, preroll_sec = self._build_preroll_concat(input_path, self.preroll_frames)
600
+ print(f"[Stabilizer] Pre-rolled {self.preroll_frames} frames ({preroll_sec:.3f}s).")
601
 
602
  mask_path = os.path.join(output_dir, "mask.png")
603
+ # Create mask from the ORIGINAL first frame (not the pre-roll)
604
  self.create_mask_optimized(input_path, mask_path)
605
 
606
+ # Call MatAnyone (signature introspected earlier; pass only known-safe kwargs)
 
607
  fg_path, alpha_path = self.processor.process_video(
608
+ input_path=concat_path,
609
  mask_path=mask_path,
610
  output_path=output_dir,
611
+ max_size=int(os.getenv("MAX_MODEL_SIZE", "1920"))
612
  )
613
+
614
+ # If we had a pre-roll, trim it off the alpha
615
+ if preroll_sec > 0.0:
616
+ alpha_path_trimmed = self._trim_head(alpha_path, preroll_sec)
617
+ print(f"[Stabilizer] Trimmed {preroll_sec:.3f}s from alpha.")
618
+ return alpha_path_trimmed
619
  return alpha_path
620
  except Exception as e:
621
  print(f"❌ MatAnyone processing failed: {e}")
 
645
  return cv2.cvtColor(frame_bgr_u8, cv2.COLOR_BGR2RGB)
646
 
647
  # =========================
648
+ # Compositing
649
  # =========================
650
  def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset=None):
651
  """Composite original video with alpha matte and a background (image or preset)."""
 
653
  orig_clip = VideoFileClip(original_path)
654
  alpha_clip = VideoFileClip(alpha_path)
655
 
656
+ w, h = orig_clip.size
 
657
  if bg_path:
658
  bg_img = cv2.imread(bg_path)
659
  if bg_img is None:
 
664
  bg_img = build_professional_bg(w, h, bg_preset)
665
 
666
  def process_func(get_frame, t):
667
+ frame = get_frame(t) # float [0,1]
668
+ a = alpha_clip.get_frame(t)
 
669
  if a.ndim == 2:
670
  a = a[..., None]
671
  elif a.shape[2] > 1:
672
  a = a[..., :1]
673
  a = np.clip(a, 0.0, 1.0).astype(np.float32)
 
674
  bg_f32 = (bg_img.astype(np.float32) / 255.0)
675
  comp = a * frame.astype(np.float32) + (1.0 - a) * bg_f32
676
  return comp.astype(np.float32)
 
679
  output_path = "final_output.mp4"
680
  new_clip.write_videofile(output_path, audio=False, logger=None)
681
 
682
+ alpha_clip.close(); orig_clip.close(); new_clip.close()
 
683
  return output_path
684
 
685
  # =========================
686
+ # Fallback whole-video path (rembg)
687
  # =========================
688
  def process_video_rembg_fallback(video_path, bg_image_path=None, bg_preset=None):
689
  print("🔄 Processing with rembg fallback…")
 
715
  new_clip = clip.fl(process_func)
716
  output_path = "rembg_output.mp4"
717
  new_clip.write_videofile(output_path, audio=False, logger=None)
718
+ clip.close(); new_clip.close()
719
  return output_path
720
 
721
  # =========================
722
+ # Self-test harness
723
  # =========================
724
  def _ok(flag): # visual mark in reports
725
  return "✅" if flag else "❌"
 
746
  clip = ImageSequenceClip(frames, fps=4)
747
  with tempfile.TemporaryDirectory() as td:
748
  vp = os.path.join(td, "tiny.mp4")
749
+ clip.write_videofile(vp, audio=False, logger=None); clip.close()
750
  clip_r = VideoFileClip(vp)
751
+ _ = clip_r.get_frame(0.1); clip_r.close()
 
752
  return True, "FFmpeg/MoviePy encode/decode ok"
753
  except Exception as e:
754
  return False, f"FFmpeg/MoviePy test failed: {e}"
 
757
  try:
758
  if not REMBG_AVAILABLE:
759
  return False, "rembg not importable"
760
+ img = np.zeros((64, 64, 3), dtype=np.uint8); img[:,:] = (0, 255, 0)
 
761
  pil = Image.fromarray(img)
762
+ out = remove(pil)
763
  ok = isinstance(out, Image.Image) and out.size == (64, 64)
764
  return ok, "rembg ok" if ok else "rembg returned unexpected output"
765
  except Exception as e:
 
788
  return False, f"MatAnyone init failed: {getattr(matanyone_processor, 'last_error', 'no details')}"
789
  if not matanyone_processor.verified:
790
  return False, "MatAnyone missing process_video API"
 
791
  with tempfile.TemporaryDirectory() as td:
792
+ # tiny moving square video
793
  frames = []
794
+ for t in range(8):
795
  frame = np.zeros((64, 64, 3), dtype=np.uint8)
796
+ x = 8 + t*4
797
+ cv2.rectangle(frame, (x, 20), (x+12, 44), (200, 200, 200), -1)
798
  frames.append(frame)
 
799
  vid_path = os.path.join(td, "tiny_input.mp4")
800
  clip = ImageSequenceClip(frames, fps=8)
801
+ clip.write_videofile(vid_path, audio=False, logger=None); clip.close()
 
 
 
 
802
 
803
+ # central seed mask
804
  mask = np.zeros((64, 64), dtype=np.uint8)
805
  cv2.rectangle(mask, (24, 24), (40, 40), 255, -1)
806
  mask_path = os.path.join(td, "mask.png")
807
  cv2.imwrite(mask_path, mask)
 
808
 
809
+ # run through our stabilized path
810
+ alpha = matanyone_processor.process_video_optimized(vid_path, td)
811
+ if alpha is None or not os.path.exists(alpha):
812
+ return False, "MatAnyone did not produce alpha video"
813
+ # try open alpha
814
+ _alpha_clip = VideoFileClip(alpha)
815
+ _ = _alpha_clip.get_frame(0.1); _alpha_clip.close()
816
+ return True, "MatAnyone process_video ok"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
817
  except Exception as e:
818
  return False, f"MatAnyone test failed: {e}"
819
 
 
824
  lines.append(f"Torch: {torch.__version__ if TORCH_AVAILABLE else 'N/A'} | CUDA: {CUDA_AVAILABLE} | Device: {DEVICE} | GPU: {GPU_NAME}")
825
  lines.append(f"FFmpeg on PATH: {bool(shutil.which('ffmpeg'))}")
826
  lines.append("")
 
827
  tests = [
828
  ("CUDA", self_test_cuda),
829
  ("FFmpeg/MoviePy", self_test_ffmpeg_moviepy),
 
841
  # =========================
842
  # Gradio callback
843
  # =========================
844
+ def gradio_interface_optimized(video_file, bg_image, use_matanyone=True, bg_preset="Office (Soft Gray)", stabilize=True, preroll_frames=12):
845
  """Main entry: choose MatAnyone (if verified) or rembg fallback; show real metrics."""
846
  if video_file is None:
847
  return None, None, "Please upload a video."
848
  video_path = video_file.name if hasattr(video_file, "name") else video_file
849
  bg_path = bg_image.name if (bg_image is not None and hasattr(bg_image, "name")) else (bg_image if bg_image else None)
850
 
851
+ # reflect UI choices into processor for this run
852
+ matanyone_processor.stabilize = bool(stabilize)
853
+ try:
854
+ matanyone_processor.preroll_frames = max(0, int(preroll_frames))
855
+ except Exception:
856
+ pass
857
+
858
  start_time = time.time()
859
  try:
860
  if use_matanyone and MATANYONE_IMPORTED:
 
917
  # =========================
918
  with gr.Blocks(title="Video Background Replacer - GPU Optimized", theme=gr.themes.Soft()) as demo:
919
  gr.Markdown("# 🎬 Video Background Replacer (GPU Optimized)")
920
+ gr.Markdown("All green checks are earned by real tests. No guesses.")
921
 
922
  gpu_status = f"✅ {GPU_NAME}" if CUDA_AVAILABLE else "❌ CPU Only"
923
  matany_status = "✅ Module Imported" if MATANYONE_IMPORTED else "❌ Not Importable"
 
934
  <strong>MatAnyone ready:</strong> {"✅ Yes" if getattr(matanyone_processor, "verified", False) else "❌ No"}<br>
935
  <strong>SAM2:</strong> {sam2_status}<br>
936
  <strong>rembg:</strong> {rembg_status}<br>
937
+ <strong>PyTorch:</strong> {torch_status}
 
938
  </div>
939
  """
940
  gr.HTML(status_html)
 
959
  value=MATANYONE_IMPORTED,
960
  interactive=MATANYONE_IMPORTED,
961
  )
962
+ stabilize = gr.Checkbox(
963
+ label="🧱 Stabilize short clips (pre-roll first frame)",
964
+ value=os.getenv("MATANYONE_STABILIZE", "true").lower() == "true",
965
+ )
966
+ preroll_frames = gr.Slider(
967
+ label="Pre-roll frames",
968
+ minimum=0, maximum=24, step=1,
969
+ value=int(os.getenv("MATANYONE_PREROLL_FRAMES", "12"))
970
+ )
971
  process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
972
 
973
  gr.Markdown("### 🔎 Self-Verification")
 
985
 
986
  process_btn.click(
987
  fn=gradio_interface_optimized,
988
+ inputs=[video_input, bg_input, use_matanyone, bg_preset, stabilize, preroll_frames],
989
  outputs=[output_video, download_file, status_text],
990
  )
991
 
 
1004
  gr.Markdown("---")
1005
  gr.Markdown("""
1006
  **Notes**
1007
+ - K-Governor clamps and pads Top-K inside MatAnyone so it cannot crash on small pools.
1008
+ - Short-clip stabilizer (pre-roll of the first real frame) is trimmed out of the alpha automatically.
1009
  - SAM2 shows ✅ only after a real micro-inference passes.
 
 
 
1010
  - FFmpeg/MoviePy, CUDA, and rembg are validated by actually running them.
1011
  """)
1012
 
 
1017
  if "--self-test" in sys.argv:
1018
  report = run_self_test()
1019
  print(report)
 
1020
  exit_code = 0
1021
  for line in report.splitlines():
1022
  if line.startswith("❌"):