Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,17 @@
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
HuggingFace Space app: Video background replacement with MatAnyone/SAM2/rembg.
|
4 |
-
GPU-optimized with real verification
|
5 |
-
|
6 |
-
What’s new
|
7 |
-
- K-Governor:
|
8 |
-
|
9 |
-
-
|
10 |
-
|
11 |
-
|
12 |
-
-
|
13 |
-
-
|
14 |
-
|
15 |
-
NOTE: Make sure `requirements.txt` includes:
|
16 |
-
safetensors>=0.4.2
|
17 |
-
av==10.0.0
|
18 |
"""
|
19 |
|
20 |
import os
|
@@ -30,39 +27,43 @@
|
|
30 |
# =========================
|
31 |
# Environment configuration
|
32 |
# =========================
|
33 |
-
os.environ
|
34 |
-
os.environ
|
35 |
-
os.environ
|
36 |
-
os.environ
|
37 |
-
os.environ
|
38 |
|
39 |
# MatAnyone GPU prefs
|
40 |
-
os.environ
|
41 |
-
os.environ
|
42 |
-
os.environ
|
43 |
-
os.environ
|
44 |
-
os.environ
|
45 |
|
46 |
# CUDA + cuDNN
|
47 |
-
os.environ
|
48 |
-
os.environ
|
49 |
-
os.environ
|
50 |
|
51 |
# HF cache prefs
|
52 |
-
os.environ
|
53 |
-
os.environ
|
54 |
-
os.environ
|
55 |
-
os.environ
|
56 |
-
os.environ
|
57 |
|
58 |
# Gradio binding
|
59 |
-
os.environ
|
60 |
-
os.environ
|
61 |
|
62 |
# Feature flags
|
63 |
-
os.environ
|
64 |
-
os.environ
|
65 |
-
os.environ
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# =========================
|
68 |
# Imports
|
@@ -71,7 +72,7 @@
|
|
71 |
import numpy as np
|
72 |
from PIL import Image
|
73 |
import gradio as gr
|
74 |
-
from moviepy.editor import VideoFileClip, ImageSequenceClip
|
75 |
|
76 |
print("=" * 50)
|
77 |
print("Application Startup at", os.popen('date').read().strip())
|
@@ -125,7 +126,121 @@
|
|
125 |
print(f"Added to path: {p}")
|
126 |
|
127 |
# =========================
|
128 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
# =========================
|
130 |
TORCH_AVAILABLE = False
|
131 |
CUDA_AVAILABLE = False
|
@@ -158,39 +273,6 @@
|
|
158 |
except Exception as e:
|
159 |
print(f"Torch not available: {e}")
|
160 |
|
161 |
-
# ---- K-Governor (MUST be installed before importing MatAnyone) ----
|
162 |
-
if TORCH_AVAILABLE:
|
163 |
-
_ORIG_TOPK = torch.topk
|
164 |
-
_ORIG_KTH = getattr(torch, "kthvalue", None)
|
165 |
-
|
166 |
-
def _safe_topk(input, k, dim=None, largest=True, sorted=True, *, out=None):
|
167 |
-
if dim is None:
|
168 |
-
dim = input.ndim - 1
|
169 |
-
size = input.size(dim)
|
170 |
-
if k > size:
|
171 |
-
print(f"[K-Governor] torch.topk: clamp k {k} -> {size} for dim={dim} shape={tuple(input.shape)}")
|
172 |
-
k = int(size)
|
173 |
-
if k <= 0:
|
174 |
-
k = 1
|
175 |
-
return _ORIG_TOPK(input, k, dim, largest, sorted, out=out)
|
176 |
-
|
177 |
-
torch.topk = _safe_topk # global guard
|
178 |
-
|
179 |
-
if _ORIG_KTH is not None:
|
180 |
-
def _safe_kthvalue(input, k, dim=None, keepdim=False, *, out=None):
|
181 |
-
if dim is None:
|
182 |
-
# flat length if dim is None
|
183 |
-
size = int(input.numel())
|
184 |
-
else:
|
185 |
-
size = int(input.size(dim))
|
186 |
-
if k > size:
|
187 |
-
print(f"[K-Governor] torch.kthvalue: clamp k {k} -> {size} (dim={dim}) shape={tuple(input.shape)}")
|
188 |
-
k = size
|
189 |
-
if k <= 0:
|
190 |
-
k = 1
|
191 |
-
return _ORIG_KTH(input, k, dim=dim, keepdim=keepdim, out=out)
|
192 |
-
torch.kthvalue = _safe_kthvalue
|
193 |
-
|
194 |
# =========================
|
195 |
# Light GPU monitor
|
196 |
# =========================
|
@@ -274,10 +356,11 @@ def get_stats(self):
|
|
274 |
config_dir = str(TP_DIR / "sam2" / "sam2" / "configs")
|
275 |
config_file = "sam2.1/sam2.1_hiera_t.yaml"
|
276 |
initialize_config_dir(config_dir=config_dir, version_base=None)
|
277 |
-
_ = compose(config_name=config_file)
|
278 |
|
279 |
model = build_sam2(config_file, str(ckpt), device="cuda" if CUDA_AVAILABLE else "cpu")
|
280 |
|
|
|
281 |
if CUDA_AVAILABLE and hasattr(torch, "compile"):
|
282 |
try:
|
283 |
model = torch.compile(model, mode="max-autotune")
|
@@ -286,7 +369,7 @@ def get_stats(self):
|
|
286 |
|
287 |
SAM2_PREDICTOR = SAM2ImagePredictor(model)
|
288 |
|
289 |
-
# Verify
|
290 |
try:
|
291 |
dummy = np.zeros((64, 64, 3), dtype=np.uint8)
|
292 |
SAM2_PREDICTOR.set_image(dummy)
|
@@ -326,7 +409,7 @@ def get_stats(self):
|
|
326 |
print(f"rembg not available: {e}")
|
327 |
|
328 |
# =========================
|
329 |
-
# Background helpers
|
330 |
# =========================
|
331 |
def make_solid(w, h, rgb):
|
332 |
return np.full((h, w, 3), rgb, dtype=np.uint8)
|
@@ -352,15 +435,14 @@ def build_professional_bg(w, h, preset: str) -> np.ndarray:
|
|
352 |
return make_solid(w, h, (240, 240, 240))
|
353 |
|
354 |
# =========================
|
355 |
-
# MatAnyone wrapper (with
|
356 |
# =========================
|
357 |
class OptimizedMatAnyoneProcessor:
|
358 |
"""
|
359 |
-
Wrapper around MatAnyone's InferenceCore
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
K-Governor above guards any internal top-k/kthvalue ops.
|
364 |
"""
|
365 |
def __init__(self):
|
366 |
self.processor = None
|
@@ -369,15 +451,11 @@ def __init__(self):
|
|
369 |
self.verified = False
|
370 |
self.last_error = None
|
371 |
|
372 |
-
|
373 |
try:
|
374 |
-
|
375 |
-
if GlobalHydra().is_initialized():
|
376 |
-
GlobalHydra.instance().clear()
|
377 |
-
if note:
|
378 |
-
print(f"(Hydra cleared: {note})")
|
379 |
except Exception:
|
380 |
-
|
381 |
|
382 |
def initialize(self) -> bool:
|
383 |
if not MATANYONE_IMPORTED:
|
@@ -391,7 +469,7 @@ def initialize(self) -> bool:
|
|
391 |
# 1) Preferred: HF repo-id
|
392 |
try:
|
393 |
print(f"Initializing MatAnyone (HF repo-id) on {self.device}…")
|
394 |
-
self.processor = MatAnyInferenceCore("PeiqingYang/MatAnyone")
|
395 |
self.verified = hasattr(self.processor, "process_video")
|
396 |
if self.device == "cuda":
|
397 |
torch.cuda.empty_cache()
|
@@ -399,30 +477,22 @@ def initialize(self) -> bool:
|
|
399 |
self.initialized = True
|
400 |
print("✅ MatAnyone initialized via HF repo-id.")
|
401 |
return True
|
402 |
-
except NameError as e:
|
403 |
-
# Common if `safetensors` is missing during HF load
|
404 |
-
self.last_error = f"HF init NameError: {e}"
|
405 |
-
print(f"HF init failed: {e}")
|
406 |
-
except TypeError as e:
|
407 |
-
if "network" in str(e).lower():
|
408 |
-
print("MatAnyone InferenceCore expects a `network`; switching to local checkpoint init.")
|
409 |
-
self.last_error = f"HF init TypeError: {e}"
|
410 |
except Exception as e:
|
411 |
self.last_error = f"HF init failed: {type(e).__name__}: {e}"
|
412 |
print(self.last_error)
|
413 |
|
414 |
-
# 2) Fallback: local checkpoint →
|
415 |
try:
|
416 |
print("Falling back to local checkpoint init for MatAnyone…")
|
417 |
-
|
|
|
|
|
418 |
|
419 |
import requests
|
420 |
from matanyone.utils.get_default_model import get_matanyone_model
|
421 |
|
422 |
-
ckpt_dir = Path("./pretrained_models")
|
423 |
-
ckpt_dir.mkdir(parents=True, exist_ok=True)
|
424 |
ckpt_path = ckpt_dir / "matanyone.pth"
|
425 |
-
|
426 |
if not ckpt_path.exists():
|
427 |
url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
|
428 |
print(f"Downloading MatAnyone checkpoint from: {url}")
|
@@ -434,28 +504,52 @@ def initialize(self) -> bool:
|
|
434 |
f.write(chunk)
|
435 |
print(f"Checkpoint saved to {ckpt_path}")
|
436 |
|
437 |
-
network = get_matanyone_model(
|
438 |
-
str(ckpt_path),
|
439 |
-
device=("cuda" if CUDA_AVAILABLE else "cpu")
|
440 |
-
)
|
441 |
self.processor = MatAnyInferenceCore(network)
|
442 |
self.verified = hasattr(self.processor, "process_video")
|
443 |
-
|
444 |
if self.device == "cuda":
|
445 |
-
torch.cuda.empty_cache()
|
446 |
-
_ = torch.rand(1, device="cuda") * 0.0
|
447 |
-
|
448 |
self.initialized = True
|
449 |
print("✅ MatAnyone initialized via local checkpoint.")
|
450 |
-
self._clear_hydra("MatAnyone/local-post")
|
451 |
return True
|
452 |
-
|
453 |
except Exception as e:
|
454 |
self.last_error = f"Local init failed: {type(e).__name__}: {e}"
|
455 |
print(f"MatAnyone initialization failed: {self.last_error}")
|
456 |
import traceback; traceback.print_exc()
|
457 |
return False
|
458 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
def create_mask_optimized(self, video_path: str, output_path: str) -> str:
|
460 |
cap = cv2.VideoCapture(video_path)
|
461 |
ret, frame = cap.read()
|
@@ -477,11 +571,12 @@ def create_mask_optimized(self, video_path: str, output_path: str) -> str:
|
|
477 |
best = masks[np.argmax(scores)]
|
478 |
mask = (best.astype(np.uint8) * 255)
|
479 |
cv2.imwrite(output_path, mask)
|
|
|
480 |
return output_path
|
481 |
except Exception as e:
|
482 |
print(f"SAM2 mask creation failed; fallback rectangle. Error: {e}")
|
483 |
|
484 |
-
# Fallback: centered box
|
485 |
h, w = frame.shape[:2]
|
486 |
mask = np.zeros((h, w), dtype=np.uint8)
|
487 |
mx, my = int(w * 0.15), int(h * 0.10)
|
@@ -497,25 +592,30 @@ def process_video_optimized(self, input_path: str, output_dir: str):
|
|
497 |
if CUDA_AVAILABLE:
|
498 |
torch.cuda.empty_cache(); gc.collect()
|
499 |
|
500 |
-
#
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
print(f"[
|
506 |
|
507 |
mask_path = os.path.join(output_dir, "mask.png")
|
|
|
508 |
self.create_mask_optimized(input_path, mask_path)
|
509 |
|
510 |
-
#
|
511 |
-
# (Typical signature: input_path, mask_path, output_path=None, n_warmup=10, r_erode=10, r_dilate=10, suffix='', save_image=False, max_size=-1)
|
512 |
fg_path, alpha_path = self.processor.process_video(
|
513 |
-
input_path=
|
514 |
mask_path=mask_path,
|
515 |
output_path=output_dir,
|
516 |
-
max_size=int(os.getenv("MAX_MODEL_SIZE", "1920"))
|
517 |
)
|
518 |
-
|
|
|
|
|
|
|
|
|
|
|
519 |
return alpha_path
|
520 |
except Exception as e:
|
521 |
print(f"❌ MatAnyone processing failed: {e}")
|
@@ -545,7 +645,7 @@ def process_frame_rembg_optimized(frame_bgr_u8, bg_img_rgb_u8):
|
|
545 |
return cv2.cvtColor(frame_bgr_u8, cv2.COLOR_BGR2RGB)
|
546 |
|
547 |
# =========================
|
548 |
-
# Compositing
|
549 |
# =========================
|
550 |
def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset=None):
|
551 |
"""Composite original video with alpha matte and a background (image or preset)."""
|
@@ -553,8 +653,7 @@ def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset
|
|
553 |
orig_clip = VideoFileClip(original_path)
|
554 |
alpha_clip = VideoFileClip(alpha_path)
|
555 |
|
556 |
-
w, h = orig_clip.size
|
557 |
-
|
558 |
if bg_path:
|
559 |
bg_img = cv2.imread(bg_path)
|
560 |
if bg_img is None:
|
@@ -565,15 +664,13 @@ def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset
|
|
565 |
bg_img = build_professional_bg(w, h, bg_preset)
|
566 |
|
567 |
def process_func(get_frame, t):
|
568 |
-
frame = get_frame(t) # float [0,1]
|
569 |
-
a = alpha_clip.get_frame(t)
|
570 |
-
|
571 |
if a.ndim == 2:
|
572 |
a = a[..., None]
|
573 |
elif a.shape[2] > 1:
|
574 |
a = a[..., :1]
|
575 |
a = np.clip(a, 0.0, 1.0).astype(np.float32)
|
576 |
-
|
577 |
bg_f32 = (bg_img.astype(np.float32) / 255.0)
|
578 |
comp = a * frame.astype(np.float32) + (1.0 - a) * bg_f32
|
579 |
return comp.astype(np.float32)
|
@@ -582,12 +679,11 @@ def process_func(get_frame, t):
|
|
582 |
output_path = "final_output.mp4"
|
583 |
new_clip.write_videofile(output_path, audio=False, logger=None)
|
584 |
|
585 |
-
alpha_clip.close()
|
586 |
-
orig_clip.close()
|
587 |
return output_path
|
588 |
|
589 |
# =========================
|
590 |
-
# Fallback whole-video path
|
591 |
# =========================
|
592 |
def process_video_rembg_fallback(video_path, bg_image_path=None, bg_preset=None):
|
593 |
print("🔄 Processing with rembg fallback…")
|
@@ -619,10 +715,11 @@ def process_func(get_frame, t):
|
|
619 |
new_clip = clip.fl(process_func)
|
620 |
output_path = "rembg_output.mp4"
|
621 |
new_clip.write_videofile(output_path, audio=False, logger=None)
|
|
|
622 |
return output_path
|
623 |
|
624 |
# =========================
|
625 |
-
# Self-test harness
|
626 |
# =========================
|
627 |
def _ok(flag): # visual mark in reports
|
628 |
return "✅" if flag else "❌"
|
@@ -649,10 +746,9 @@ def self_test_ffmpeg_moviepy() -> (bool, str):
|
|
649 |
clip = ImageSequenceClip(frames, fps=4)
|
650 |
with tempfile.TemporaryDirectory() as td:
|
651 |
vp = os.path.join(td, "tiny.mp4")
|
652 |
-
clip.write_videofile(vp, audio=False, logger=None)
|
653 |
clip_r = VideoFileClip(vp)
|
654 |
-
_ = clip_r.get_frame(0.1)
|
655 |
-
clip_r.close()
|
656 |
return True, "FFmpeg/MoviePy encode/decode ok"
|
657 |
except Exception as e:
|
658 |
return False, f"FFmpeg/MoviePy test failed: {e}"
|
@@ -661,10 +757,9 @@ def self_test_rembg() -> (bool, str):
|
|
661 |
try:
|
662 |
if not REMBG_AVAILABLE:
|
663 |
return False, "rembg not importable"
|
664 |
-
img = np.zeros((64, 64, 3), dtype=np.uint8)
|
665 |
-
img[:,:] = (0, 255, 0) # green
|
666 |
pil = Image.fromarray(img)
|
667 |
-
out = remove(pil)
|
668 |
ok = isinstance(out, Image.Image) and out.size == (64, 64)
|
669 |
return ok, "rembg ok" if ok else "rembg returned unexpected output"
|
670 |
except Exception as e:
|
@@ -693,53 +788,32 @@ def self_test_matanyone() -> (bool, str):
|
|
693 |
return False, f"MatAnyone init failed: {getattr(matanyone_processor, 'last_error', 'no details')}"
|
694 |
if not matanyone_processor.verified:
|
695 |
return False, "MatAnyone missing process_video API"
|
696 |
-
# Create a tiny real video + mask, then run process_video
|
697 |
with tempfile.TemporaryDirectory() as td:
|
|
|
698 |
frames = []
|
699 |
-
for t in range(
|
700 |
frame = np.zeros((64, 64, 3), dtype=np.uint8)
|
701 |
-
x = 8 + t*
|
702 |
-
cv2.rectangle(frame, (x, 20), (
|
703 |
frames.append(frame)
|
704 |
-
|
705 |
vid_path = os.path.join(td, "tiny_input.mp4")
|
706 |
clip = ImageSequenceClip(frames, fps=8)
|
707 |
-
clip.write_videofile(vid_path, audio=False, logger=None)
|
708 |
-
visible_test_clip = CHECKPOINTS_DIR / "selftest_clip.mp4"
|
709 |
-
clip.write_videofile(str(visible_test_clip), audio=False, logger=None)
|
710 |
-
clip.close()
|
711 |
-
print(f"📹 Self-test clip saved to {visible_test_clip}")
|
712 |
|
713 |
-
#
|
714 |
mask = np.zeros((64, 64), dtype=np.uint8)
|
715 |
cv2.rectangle(mask, (24, 24), (40, 40), 255, -1)
|
716 |
mask_path = os.path.join(td, "mask.png")
|
717 |
cv2.imwrite(mask_path, mask)
|
718 |
-
print("Self-test mask uniques:", np.unique(mask))
|
719 |
|
720 |
-
#
|
721 |
-
|
722 |
-
|
723 |
-
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
-
|
728 |
-
fg_path, alpha_path = matanyone_processor.processor.process_video(
|
729 |
-
input_path=vid_path,
|
730 |
-
mask_path=mask_path,
|
731 |
-
output_path=td,
|
732 |
-
max_size=256,
|
733 |
-
)
|
734 |
-
ok = os.path.exists(alpha_path)
|
735 |
-
if ok:
|
736 |
-
try:
|
737 |
-
_alpha_clip = VideoFileClip(alpha_path)
|
738 |
-
_ = _alpha_clip.get_frame(0.1)
|
739 |
-
_alpha_clip.close()
|
740 |
-
except Exception as _e:
|
741 |
-
return False, f"MatAnyone alpha video unreadable: {_e}"
|
742 |
-
return ok, "MatAnyone process_video ok" if ok else "MatAnyone did not produce alpha video"
|
743 |
except Exception as e:
|
744 |
return False, f"MatAnyone test failed: {e}"
|
745 |
|
@@ -750,7 +824,6 @@ def run_self_test() -> str:
|
|
750 |
lines.append(f"Torch: {torch.__version__ if TORCH_AVAILABLE else 'N/A'} | CUDA: {CUDA_AVAILABLE} | Device: {DEVICE} | GPU: {GPU_NAME}")
|
751 |
lines.append(f"FFmpeg on PATH: {bool(shutil.which('ffmpeg'))}")
|
752 |
lines.append("")
|
753 |
-
|
754 |
tests = [
|
755 |
("CUDA", self_test_cuda),
|
756 |
("FFmpeg/MoviePy", self_test_ffmpeg_moviepy),
|
@@ -768,13 +841,20 @@ def run_self_test() -> str:
|
|
768 |
# =========================
|
769 |
# Gradio callback
|
770 |
# =========================
|
771 |
-
def gradio_interface_optimized(video_file, bg_image, use_matanyone=True, bg_preset="Office (Soft Gray)"):
|
772 |
"""Main entry: choose MatAnyone (if verified) or rembg fallback; show real metrics."""
|
773 |
if video_file is None:
|
774 |
return None, None, "Please upload a video."
|
775 |
video_path = video_file.name if hasattr(video_file, "name") else video_file
|
776 |
bg_path = bg_image.name if (bg_image is not None and hasattr(bg_image, "name")) else (bg_image if bg_image else None)
|
777 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
778 |
start_time = time.time()
|
779 |
try:
|
780 |
if use_matanyone and MATANYONE_IMPORTED:
|
@@ -837,7 +917,7 @@ def show_matanyone_diag():
|
|
837 |
# =========================
|
838 |
with gr.Blocks(title="Video Background Replacer - GPU Optimized", theme=gr.themes.Soft()) as demo:
|
839 |
gr.Markdown("# 🎬 Video Background Replacer (GPU Optimized)")
|
840 |
-
gr.Markdown("All green checks are earned by real tests. No guesses.
|
841 |
|
842 |
gpu_status = f"✅ {GPU_NAME}" if CUDA_AVAILABLE else "❌ CPU Only"
|
843 |
matany_status = "✅ Module Imported" if MATANYONE_IMPORTED else "❌ Not Importable"
|
@@ -854,8 +934,7 @@ def show_matanyone_diag():
|
|
854 |
<strong>MatAnyone ready:</strong> {"✅ Yes" if getattr(matanyone_processor, "verified", False) else "❌ No"}<br>
|
855 |
<strong>SAM2:</strong> {sam2_status}<br>
|
856 |
<strong>rembg:</strong> {rembg_status}<br>
|
857 |
-
<strong>PyTorch:</strong> {torch_status}
|
858 |
-
<strong>K-Governor:</strong> ✅ Active (torch.topk / torch.kthvalue)
|
859 |
</div>
|
860 |
"""
|
861 |
gr.HTML(status_html)
|
@@ -880,6 +959,15 @@ def show_matanyone_diag():
|
|
880 |
value=MATANYONE_IMPORTED,
|
881 |
interactive=MATANYONE_IMPORTED,
|
882 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
883 |
process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
|
884 |
|
885 |
gr.Markdown("### 🔎 Self-Verification")
|
@@ -897,7 +985,7 @@ def show_matanyone_diag():
|
|
897 |
|
898 |
process_btn.click(
|
899 |
fn=gradio_interface_optimized,
|
900 |
-
inputs=[video_input, bg_input, use_matanyone, bg_preset],
|
901 |
outputs=[output_video, download_file, status_text],
|
902 |
)
|
903 |
|
@@ -916,10 +1004,9 @@ def show_matanyone_diag():
|
|
916 |
gr.Markdown("---")
|
917 |
gr.Markdown("""
|
918 |
**Notes**
|
|
|
|
|
919 |
- SAM2 shows ✅ only after a real micro-inference passes.
|
920 |
-
- K-Governor clamps unsafe K at runtime and logs the clamp (shape/dim and old→new K).
|
921 |
-
- MatAnyone tries HF load first (needs `safetensors`), else falls back to local checkpoint.
|
922 |
-
- The self-test saves a visible clip to `checkpoints/selftest_clip.mp4` for inspection.
|
923 |
- FFmpeg/MoviePy, CUDA, and rembg are validated by actually running them.
|
924 |
""")
|
925 |
|
@@ -930,7 +1017,6 @@ def show_matanyone_diag():
|
|
930 |
if "--self-test" in sys.argv:
|
931 |
report = run_self_test()
|
932 |
print(report)
|
933 |
-
# Exit non-zero if any test failed
|
934 |
exit_code = 0
|
935 |
for line in report.splitlines():
|
936 |
if line.startswith("❌"):
|
|
|
1 |
#!/usr/bin/env python3
|
2 |
"""
|
3 |
HuggingFace Space app: Video background replacement with MatAnyone/SAM2/rembg.
|
4 |
+
GPU-optimized with real verification, K-Governor (safe_topk), and short-clip stabilizer.
|
5 |
+
|
6 |
+
What’s new vs. your last version:
|
7 |
+
- K-Governor: rewrites MatAnyone .topk/.kthvalue call-sites to use safe guards (no more "selected index k out of range").
|
8 |
+
- Pre-roll stabilizer: duplicates first real frame to grow the candidate bank for very short clips; trimmed out before compositing.
|
9 |
+
- Diagnostic prints show exactly what was patched and when clamping happens.
|
10 |
+
|
11 |
+
Environment knobs:
|
12 |
+
- MATANYONE_PREROLL_FRAMES (default 12) – how many frames to prepend as pre-roll.
|
13 |
+
- MATANYONE_STABILIZE (default "true") – enable/disable pre-roll stabilizer globally.
|
14 |
+
- MAX_MODEL_SIZE, MATANYONE_* knobs unchanged from before.
|
|
|
|
|
|
|
15 |
"""
|
16 |
|
17 |
import os
|
|
|
27 |
# =========================
|
28 |
# Environment configuration
|
29 |
# =========================
|
30 |
+
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:512")
|
31 |
+
os.environ.setdefault("CUDA_MODULE_LOADING", "LAZY")
|
32 |
+
os.environ.setdefault("OMP_NUM_THREADS", "8")
|
33 |
+
os.environ.setdefault("MKL_NUM_THREADS", "8")
|
34 |
+
os.environ.setdefault("PYTHONUNBUFFERED", "1")
|
35 |
|
36 |
# MatAnyone GPU prefs
|
37 |
+
os.environ.setdefault("MATANYONE_MAX_EDGE", "1024")
|
38 |
+
os.environ.setdefault("MATANYONE_TARGET_PIXELS", "1000000")
|
39 |
+
os.environ.setdefault("MATANYONE_WINDOWED", "1")
|
40 |
+
os.environ.setdefault("MATANYONE_WINDOW", "16")
|
41 |
+
os.environ.setdefault("MAX_MODEL_SIZE", "1920")
|
42 |
|
43 |
# CUDA + cuDNN
|
44 |
+
os.environ.setdefault("CUDA_LAUNCH_BLOCKING", "0")
|
45 |
+
os.environ.setdefault("TORCH_CUDNN_V8_API_ENABLED", "1")
|
46 |
+
os.environ.setdefault("CUDNN_BENCHMARK", "1")
|
47 |
|
48 |
# HF cache prefs
|
49 |
+
os.environ.setdefault("HF_HOME", "./checkpoints/hf")
|
50 |
+
os.environ.setdefault("TRANSFORMERS_CACHE", "./checkpoints/hf")
|
51 |
+
os.environ.setdefault("HF_DATASETS_CACHE", "./checkpoints/hf")
|
52 |
+
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
|
53 |
+
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
|
54 |
|
55 |
# Gradio binding
|
56 |
+
os.environ.setdefault("GRADIO_SERVER_NAME", "0.0.0.0")
|
57 |
+
os.environ.setdefault("GRADIO_SERVER_PORT", "7860")
|
58 |
|
59 |
# Feature flags
|
60 |
+
os.environ.setdefault("USE_MATANYONE", "true")
|
61 |
+
os.environ.setdefault("USE_SAM2", "true")
|
62 |
+
os.environ.setdefault("SELF_CHECK_MODE", "false")
|
63 |
+
|
64 |
+
# Short-clip stabilizer
|
65 |
+
os.environ.setdefault("MATANYONE_STABILIZE", "true")
|
66 |
+
os.environ.setdefault("MATANYONE_PREROLL_FRAMES", "12") # duplicated first frame count
|
67 |
|
68 |
# =========================
|
69 |
# Imports
|
|
|
72 |
import numpy as np
|
73 |
from PIL import Image
|
74 |
import gradio as gr
|
75 |
+
from moviepy.editor import VideoFileClip, ImageSequenceClip, concatenate_videoclips
|
76 |
|
77 |
print("=" * 50)
|
78 |
print("Application Startup at", os.popen('date').read().strip())
|
|
|
126 |
print(f"Added to path: {p}")
|
127 |
|
128 |
# =========================
|
129 |
+
# K-Governor: patch MatAnyone topk/kthvalue call-sites
|
130 |
+
# =========================
|
131 |
+
import re
|
132 |
+
def _write_safe_ops_file(pkg_root: Path):
|
133 |
+
utils_dir = pkg_root / "matanyone" / "utils"
|
134 |
+
# handle clones that place package directly at root
|
135 |
+
if not utils_dir.exists():
|
136 |
+
utils_dir = pkg_root / "utils"
|
137 |
+
utils_dir.mkdir(parents=True, exist_ok=True)
|
138 |
+
(utils_dir / "safe_ops.py").write_text(
|
139 |
+
"""
|
140 |
+
import torch
|
141 |
+
|
142 |
+
_VERBOSE = bool(int(os.environ.get("SAFE_TOPK_VERBOSE", "1")))
|
143 |
+
|
144 |
+
def _log(msg):
|
145 |
+
if _VERBOSE:
|
146 |
+
print(f"[K-Governor] {msg}")
|
147 |
+
|
148 |
+
def safe_topk(x, k, dim=None, largest=True, sorted=True):
|
149 |
+
if not isinstance(k, int):
|
150 |
+
k = int(k)
|
151 |
+
if dim is None:
|
152 |
+
dim = -1
|
153 |
+
n = x.size(dim)
|
154 |
+
k_eff = max(1, min(k, int(n)))
|
155 |
+
if k_eff != k:
|
156 |
+
_log(f"torch.topk: clamp k {k} -> {k_eff} for dim={dim} shape={tuple(x.shape)}")
|
157 |
+
values, indices = torch.topk.__wrapped__(x, k_eff, dim=dim, largest=largest, sorted=sorted)
|
158 |
+
if k_eff < k:
|
159 |
+
pad = k - k_eff
|
160 |
+
pad_shape = list(values.shape); pad_shape[dim] = pad
|
161 |
+
pad_vals = values.new_full(pad_shape, float('-inf'))
|
162 |
+
pad_idx = indices.new_zeros(pad_shape, dtype=indices.dtype)
|
163 |
+
values = torch.cat([values, pad_vals], dim=dim)
|
164 |
+
indices = torch.cat([indices, pad_idx], dim=dim)
|
165 |
+
return values, indices
|
166 |
+
|
167 |
+
def safe_kthvalue(x, k, dim=None, keepdim=False):
|
168 |
+
if not isinstance(k, int):
|
169 |
+
k = int(k)
|
170 |
+
if dim is None:
|
171 |
+
dim = -1
|
172 |
+
n = x.size(dim)
|
173 |
+
k_eff = max(1, min(k, int(n)))
|
174 |
+
if k_eff != k:
|
175 |
+
_log(f"torch.kthvalue: clamp k {k} -> {k_eff} for dim={dim} shape={tuple(x.shape)}")
|
176 |
+
return torch.kthvalue.__wrapped__(x, k_eff, dim=dim, keepdim=keepdim)
|
177 |
+
""".lstrip()
|
178 |
+
)
|
179 |
+
|
180 |
+
def _patch_matanyone_sources(repo_dir: Path) -> int:
|
181 |
+
root = repo_dir / "matanyone"
|
182 |
+
if not root.exists(): # some layouts have files directly
|
183 |
+
root = repo_dir
|
184 |
+
changed = 0
|
185 |
+
header_import = "from matanyone.utils.safe_ops import safe_topk, safe_kthvalue\n"
|
186 |
+
|
187 |
+
pat_torch_topk = re.compile(r"\btorch\.topk\s*\(")
|
188 |
+
pat_method_topk = re.compile(r"(\b[\w\.]+)\.topk\s*\(")
|
189 |
+
pat_torch_kth = re.compile(r"\btorch\.kthvalue\s*\(")
|
190 |
+
pat_method_kth = re.compile(r"(\b[\w\.]+)\.kthvalue\s*\(")
|
191 |
+
|
192 |
+
for py in root.rglob("*.py"):
|
193 |
+
txt = py.read_text()
|
194 |
+
orig = txt
|
195 |
+
|
196 |
+
if "safe_topk" not in txt and py.name != "__init__.py":
|
197 |
+
lines = txt.splitlines(keepends=True)
|
198 |
+
insert_at = 0
|
199 |
+
for i, L in enumerate(lines[:80]):
|
200 |
+
if L.startswith("import ") or L.startswith("from "):
|
201 |
+
insert_at = i + 1
|
202 |
+
lines.insert(insert_at, header_import)
|
203 |
+
txt = "".join(lines)
|
204 |
+
|
205 |
+
txt = pat_torch_topk.sub("safe_topk(", txt)
|
206 |
+
txt = pat_torch_kth.sub("safe_kthvalue(", txt)
|
207 |
+
|
208 |
+
def _meth_topk(m):
|
209 |
+
obj = m.group(1)
|
210 |
+
return f"safe_topk({obj}, "
|
211 |
+
def _meth_kth(m):
|
212 |
+
obj = m.group(1)
|
213 |
+
return f"safe_kthvalue({obj}, "
|
214 |
+
|
215 |
+
txt = pat_method_topk.sub(_meth_topk, txt)
|
216 |
+
txt = pat_method_kth.sub(_meth_kth, txt)
|
217 |
+
|
218 |
+
if txt != orig:
|
219 |
+
py.write_text(txt)
|
220 |
+
changed += 1
|
221 |
+
return changed
|
222 |
+
|
223 |
+
# Stash original torch ops for our wrappers
|
224 |
+
try:
|
225 |
+
import torch as _torch_guard
|
226 |
+
if not hasattr(_torch_guard.topk, "__wrapped__"):
|
227 |
+
_torch_guard.topk.__wrapped__ = _torch_guard.topk
|
228 |
+
if not hasattr(_torch_guard.kthvalue, "__wrapped__"):
|
229 |
+
_torch_guard.kthvalue.__wrapped__ = _torch_guard.kthvalue
|
230 |
+
except Exception:
|
231 |
+
pass
|
232 |
+
|
233 |
+
# Write safe ops & patch sources
|
234 |
+
MATANY_REPO_DIR = TP_DIR / "matanyone"
|
235 |
+
try:
|
236 |
+
_write_safe_ops_file(MATANY_REPO_DIR)
|
237 |
+
patched_files = _patch_matanyone_sources(MATANY_REPO_DIR)
|
238 |
+
print(f"[K-Governor] Patched MatAnyone sources: {patched_files} files updated.")
|
239 |
+
except Exception as e:
|
240 |
+
print(f"[K-Governor] Patch failed: {e}")
|
241 |
+
|
242 |
+
# =========================
|
243 |
+
# Torch & device
|
244 |
# =========================
|
245 |
TORCH_AVAILABLE = False
|
246 |
CUDA_AVAILABLE = False
|
|
|
273 |
except Exception as e:
|
274 |
print(f"Torch not available: {e}")
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
# =========================
|
277 |
# Light GPU monitor
|
278 |
# =========================
|
|
|
356 |
config_dir = str(TP_DIR / "sam2" / "sam2" / "configs")
|
357 |
config_file = "sam2.1/sam2.1_hiera_t.yaml"
|
358 |
initialize_config_dir(config_dir=config_dir, version_base=None)
|
359 |
+
_ = compose(config_name=config_file)
|
360 |
|
361 |
model = build_sam2(config_file, str(ckpt), device="cuda" if CUDA_AVAILABLE else "cpu")
|
362 |
|
363 |
+
# Optional torch.compile
|
364 |
if CUDA_AVAILABLE and hasattr(torch, "compile"):
|
365 |
try:
|
366 |
model = torch.compile(model, mode="max-autotune")
|
|
|
369 |
|
370 |
SAM2_PREDICTOR = SAM2ImagePredictor(model)
|
371 |
|
372 |
+
# Verify micro inference
|
373 |
try:
|
374 |
dummy = np.zeros((64, 64, 3), dtype=np.uint8)
|
375 |
SAM2_PREDICTOR.set_image(dummy)
|
|
|
409 |
print(f"rembg not available: {e}")
|
410 |
|
411 |
# =========================
|
412 |
+
# Background helpers
|
413 |
# =========================
|
414 |
def make_solid(w, h, rgb):
|
415 |
return np.full((h, w, 3), rgb, dtype=np.uint8)
|
|
|
435 |
return make_solid(w, h, (240, 240, 240))
|
436 |
|
437 |
# =========================
|
438 |
+
# MatAnyone wrapper (with pre-roll stabilizer)
|
439 |
# =========================
|
440 |
class OptimizedMatAnyoneProcessor:
|
441 |
"""
|
442 |
+
Wrapper around MatAnyone's InferenceCore with:
|
443 |
+
- HF repo-id path first (may require safetensors); fallback to local checkpoint via get_matanyone_model.
|
444 |
+
- K-Governor already patches library calls; no API changes needed here.
|
445 |
+
- Optional pre-roll: duplicate the first real frame to build a stable memory bank; trim it from alpha before compositing.
|
|
|
446 |
"""
|
447 |
def __init__(self):
|
448 |
self.processor = None
|
|
|
451 |
self.verified = False
|
452 |
self.last_error = None
|
453 |
|
454 |
+
self.stabilize = os.getenv("MATANYONE_STABILIZE", "true").lower() == "true"
|
455 |
try:
|
456 |
+
self.preroll_frames = max(0, int(os.getenv("MATANYONE_PREROLL_FRAMES", "12")))
|
|
|
|
|
|
|
|
|
457 |
except Exception:
|
458 |
+
self.preroll_frames = 12
|
459 |
|
460 |
def initialize(self) -> bool:
|
461 |
if not MATANYONE_IMPORTED:
|
|
|
469 |
# 1) Preferred: HF repo-id
|
470 |
try:
|
471 |
print(f"Initializing MatAnyone (HF repo-id) on {self.device}…")
|
472 |
+
self.processor = MatAnyInferenceCore("PeiqingYang/MatAnyone")
|
473 |
self.verified = hasattr(self.processor, "process_video")
|
474 |
if self.device == "cuda":
|
475 |
torch.cuda.empty_cache()
|
|
|
477 |
self.initialized = True
|
478 |
print("✅ MatAnyone initialized via HF repo-id.")
|
479 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
except Exception as e:
|
481 |
self.last_error = f"HF init failed: {type(e).__name__}: {e}"
|
482 |
print(self.last_error)
|
483 |
|
484 |
+
# 2) Fallback: local checkpoint → network → InferenceCore(network)
|
485 |
try:
|
486 |
print("Falling back to local checkpoint init for MatAnyone…")
|
487 |
+
from hydra.core.global_hydra import GlobalHydra
|
488 |
+
if hasattr(GlobalHydra, "instance") and GlobalHydra().is_initialized():
|
489 |
+
GlobalHydra.instance().clear()
|
490 |
|
491 |
import requests
|
492 |
from matanyone.utils.get_default_model import get_matanyone_model
|
493 |
|
494 |
+
ckpt_dir = Path("./pretrained_models"); ckpt_dir.mkdir(parents=True, exist_ok=True)
|
|
|
495 |
ckpt_path = ckpt_dir / "matanyone.pth"
|
|
|
496 |
if not ckpt_path.exists():
|
497 |
url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
|
498 |
print(f"Downloading MatAnyone checkpoint from: {url}")
|
|
|
504 |
f.write(chunk)
|
505 |
print(f"Checkpoint saved to {ckpt_path}")
|
506 |
|
507 |
+
network = get_matanyone_model(str(ckpt_path), device=("cuda" if CUDA_AVAILABLE else "cpu"))
|
|
|
|
|
|
|
508 |
self.processor = MatAnyInferenceCore(network)
|
509 |
self.verified = hasattr(self.processor, "process_video")
|
|
|
510 |
if self.device == "cuda":
|
511 |
+
torch.cuda.empty_cache(); _ = torch.rand(1, device="cuda") * 0.0
|
|
|
|
|
512 |
self.initialized = True
|
513 |
print("✅ MatAnyone initialized via local checkpoint.")
|
|
|
514 |
return True
|
|
|
515 |
except Exception as e:
|
516 |
self.last_error = f"Local init failed: {type(e).__name__}: {e}"
|
517 |
print(f"MatAnyone initialization failed: {self.last_error}")
|
518 |
import traceback; traceback.print_exc()
|
519 |
return False
|
520 |
|
521 |
+
# ---- Pre-roll helpers
|
522 |
+
@staticmethod
|
523 |
+
def _build_preroll_concat(input_path: str, frames: int) -> tuple[str, float]:
|
524 |
+
"""Return (concat_path, preroll_seconds) where concat_path = [pre-roll + original]."""
|
525 |
+
clip = VideoFileClip(input_path)
|
526 |
+
fps = max(1, int(round(clip.fps or 24)))
|
527 |
+
preroll_frames = max(0, frames)
|
528 |
+
if preroll_frames == 0:
|
529 |
+
out = input_path
|
530 |
+
clip.close()
|
531 |
+
return out, 0.0
|
532 |
+
|
533 |
+
first = clip.get_frame(0)
|
534 |
+
pre = ImageSequenceClip([first] * preroll_frames, fps=fps)
|
535 |
+
concat = concatenate_videoclips([pre, clip])
|
536 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix="_concat.mp4")
|
537 |
+
concat.write_videofile(tmp.name, audio=False, logger=None)
|
538 |
+
pre.close(); concat.close(); clip.close()
|
539 |
+
return tmp.name, preroll_frames / fps
|
540 |
+
|
541 |
+
@staticmethod
|
542 |
+
def _trim_head(video_path: str, seconds: float) -> str:
|
543 |
+
if seconds <= 0:
|
544 |
+
return video_path
|
545 |
+
clip = VideoFileClip(video_path)
|
546 |
+
dur = clip.duration or 0
|
547 |
+
start = min(seconds, max(0.0, dur - 0.001))
|
548 |
+
trimmed = tempfile.NamedTemporaryFile(delete=False, suffix="_trim.mp4").name
|
549 |
+
clip.subclip(start, None).write_videofile(trimmed, audio=False, logger=None)
|
550 |
+
clip.close()
|
551 |
+
return trimmed
|
552 |
+
|
553 |
def create_mask_optimized(self, video_path: str, output_path: str) -> str:
|
554 |
cap = cv2.VideoCapture(video_path)
|
555 |
ret, frame = cap.read()
|
|
|
571 |
best = masks[np.argmax(scores)]
|
572 |
mask = (best.astype(np.uint8) * 255)
|
573 |
cv2.imwrite(output_path, mask)
|
574 |
+
print(f"Self-test mask uniques: {np.unique(mask//255)}")
|
575 |
return output_path
|
576 |
except Exception as e:
|
577 |
print(f"SAM2 mask creation failed; fallback rectangle. Error: {e}")
|
578 |
|
579 |
+
# Fallback: centered box
|
580 |
h, w = frame.shape[:2]
|
581 |
mask = np.zeros((h, w), dtype=np.uint8)
|
582 |
mx, my = int(w * 0.15), int(h * 0.10)
|
|
|
592 |
if CUDA_AVAILABLE:
|
593 |
torch.cuda.empty_cache(); gc.collect()
|
594 |
|
595 |
+
# Optional pre-roll stabilizer (always trimmed out later)
|
596 |
+
concat_path = input_path
|
597 |
+
preroll_sec = 0.0
|
598 |
+
if self.stabilize and self.preroll_frames > 0:
|
599 |
+
concat_path, preroll_sec = self._build_preroll_concat(input_path, self.preroll_frames)
|
600 |
+
print(f"[Stabilizer] Pre-rolled {self.preroll_frames} frames ({preroll_sec:.3f}s).")
|
601 |
|
602 |
mask_path = os.path.join(output_dir, "mask.png")
|
603 |
+
# Create mask from the ORIGINAL first frame (not the pre-roll)
|
604 |
self.create_mask_optimized(input_path, mask_path)
|
605 |
|
606 |
+
# Call MatAnyone (signature introspected earlier; pass only known-safe kwargs)
|
|
|
607 |
fg_path, alpha_path = self.processor.process_video(
|
608 |
+
input_path=concat_path,
|
609 |
mask_path=mask_path,
|
610 |
output_path=output_dir,
|
611 |
+
max_size=int(os.getenv("MAX_MODEL_SIZE", "1920"))
|
612 |
)
|
613 |
+
|
614 |
+
# If we had a pre-roll, trim it off the alpha
|
615 |
+
if preroll_sec > 0.0:
|
616 |
+
alpha_path_trimmed = self._trim_head(alpha_path, preroll_sec)
|
617 |
+
print(f"[Stabilizer] Trimmed {preroll_sec:.3f}s from alpha.")
|
618 |
+
return alpha_path_trimmed
|
619 |
return alpha_path
|
620 |
except Exception as e:
|
621 |
print(f"❌ MatAnyone processing failed: {e}")
|
|
|
645 |
return cv2.cvtColor(frame_bgr_u8, cv2.COLOR_BGR2RGB)
|
646 |
|
647 |
# =========================
|
648 |
+
# Compositing
|
649 |
# =========================
|
650 |
def composite_with_background(original_path, alpha_path, bg_path=None, bg_preset=None):
|
651 |
"""Composite original video with alpha matte and a background (image or preset)."""
|
|
|
653 |
orig_clip = VideoFileClip(original_path)
|
654 |
alpha_clip = VideoFileClip(alpha_path)
|
655 |
|
656 |
+
w, h = orig_clip.size
|
|
|
657 |
if bg_path:
|
658 |
bg_img = cv2.imread(bg_path)
|
659 |
if bg_img is None:
|
|
|
664 |
bg_img = build_professional_bg(w, h, bg_preset)
|
665 |
|
666 |
def process_func(get_frame, t):
|
667 |
+
frame = get_frame(t) # float [0,1]
|
668 |
+
a = alpha_clip.get_frame(t)
|
|
|
669 |
if a.ndim == 2:
|
670 |
a = a[..., None]
|
671 |
elif a.shape[2] > 1:
|
672 |
a = a[..., :1]
|
673 |
a = np.clip(a, 0.0, 1.0).astype(np.float32)
|
|
|
674 |
bg_f32 = (bg_img.astype(np.float32) / 255.0)
|
675 |
comp = a * frame.astype(np.float32) + (1.0 - a) * bg_f32
|
676 |
return comp.astype(np.float32)
|
|
|
679 |
output_path = "final_output.mp4"
|
680 |
new_clip.write_videofile(output_path, audio=False, logger=None)
|
681 |
|
682 |
+
alpha_clip.close(); orig_clip.close(); new_clip.close()
|
|
|
683 |
return output_path
|
684 |
|
685 |
# =========================
|
686 |
+
# Fallback whole-video path (rembg)
|
687 |
# =========================
|
688 |
def process_video_rembg_fallback(video_path, bg_image_path=None, bg_preset=None):
|
689 |
print("🔄 Processing with rembg fallback…")
|
|
|
715 |
new_clip = clip.fl(process_func)
|
716 |
output_path = "rembg_output.mp4"
|
717 |
new_clip.write_videofile(output_path, audio=False, logger=None)
|
718 |
+
clip.close(); new_clip.close()
|
719 |
return output_path
|
720 |
|
721 |
# =========================
|
722 |
+
# Self-test harness
|
723 |
# =========================
|
724 |
def _ok(flag): # visual mark in reports
|
725 |
return "✅" if flag else "❌"
|
|
|
746 |
clip = ImageSequenceClip(frames, fps=4)
|
747 |
with tempfile.TemporaryDirectory() as td:
|
748 |
vp = os.path.join(td, "tiny.mp4")
|
749 |
+
clip.write_videofile(vp, audio=False, logger=None); clip.close()
|
750 |
clip_r = VideoFileClip(vp)
|
751 |
+
_ = clip_r.get_frame(0.1); clip_r.close()
|
|
|
752 |
return True, "FFmpeg/MoviePy encode/decode ok"
|
753 |
except Exception as e:
|
754 |
return False, f"FFmpeg/MoviePy test failed: {e}"
|
|
|
757 |
try:
|
758 |
if not REMBG_AVAILABLE:
|
759 |
return False, "rembg not importable"
|
760 |
+
img = np.zeros((64, 64, 3), dtype=np.uint8); img[:,:] = (0, 255, 0)
|
|
|
761 |
pil = Image.fromarray(img)
|
762 |
+
out = remove(pil)
|
763 |
ok = isinstance(out, Image.Image) and out.size == (64, 64)
|
764 |
return ok, "rembg ok" if ok else "rembg returned unexpected output"
|
765 |
except Exception as e:
|
|
|
788 |
return False, f"MatAnyone init failed: {getattr(matanyone_processor, 'last_error', 'no details')}"
|
789 |
if not matanyone_processor.verified:
|
790 |
return False, "MatAnyone missing process_video API"
|
|
|
791 |
with tempfile.TemporaryDirectory() as td:
|
792 |
+
# tiny moving square video
|
793 |
frames = []
|
794 |
+
for t in range(8):
|
795 |
frame = np.zeros((64, 64, 3), dtype=np.uint8)
|
796 |
+
x = 8 + t*4
|
797 |
+
cv2.rectangle(frame, (x, 20), (x+12, 44), (200, 200, 200), -1)
|
798 |
frames.append(frame)
|
|
|
799 |
vid_path = os.path.join(td, "tiny_input.mp4")
|
800 |
clip = ImageSequenceClip(frames, fps=8)
|
801 |
+
clip.write_videofile(vid_path, audio=False, logger=None); clip.close()
|
|
|
|
|
|
|
|
|
802 |
|
803 |
+
# central seed mask
|
804 |
mask = np.zeros((64, 64), dtype=np.uint8)
|
805 |
cv2.rectangle(mask, (24, 24), (40, 40), 255, -1)
|
806 |
mask_path = os.path.join(td, "mask.png")
|
807 |
cv2.imwrite(mask_path, mask)
|
|
|
808 |
|
809 |
+
# run through our stabilized path
|
810 |
+
alpha = matanyone_processor.process_video_optimized(vid_path, td)
|
811 |
+
if alpha is None or not os.path.exists(alpha):
|
812 |
+
return False, "MatAnyone did not produce alpha video"
|
813 |
+
# try open alpha
|
814 |
+
_alpha_clip = VideoFileClip(alpha)
|
815 |
+
_ = _alpha_clip.get_frame(0.1); _alpha_clip.close()
|
816 |
+
return True, "MatAnyone process_video ok"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
817 |
except Exception as e:
|
818 |
return False, f"MatAnyone test failed: {e}"
|
819 |
|
|
|
824 |
lines.append(f"Torch: {torch.__version__ if TORCH_AVAILABLE else 'N/A'} | CUDA: {CUDA_AVAILABLE} | Device: {DEVICE} | GPU: {GPU_NAME}")
|
825 |
lines.append(f"FFmpeg on PATH: {bool(shutil.which('ffmpeg'))}")
|
826 |
lines.append("")
|
|
|
827 |
tests = [
|
828 |
("CUDA", self_test_cuda),
|
829 |
("FFmpeg/MoviePy", self_test_ffmpeg_moviepy),
|
|
|
841 |
# =========================
|
842 |
# Gradio callback
|
843 |
# =========================
|
844 |
+
def gradio_interface_optimized(video_file, bg_image, use_matanyone=True, bg_preset="Office (Soft Gray)", stabilize=True, preroll_frames=12):
|
845 |
"""Main entry: choose MatAnyone (if verified) or rembg fallback; show real metrics."""
|
846 |
if video_file is None:
|
847 |
return None, None, "Please upload a video."
|
848 |
video_path = video_file.name if hasattr(video_file, "name") else video_file
|
849 |
bg_path = bg_image.name if (bg_image is not None and hasattr(bg_image, "name")) else (bg_image if bg_image else None)
|
850 |
|
851 |
+
# reflect UI choices into processor for this run
|
852 |
+
matanyone_processor.stabilize = bool(stabilize)
|
853 |
+
try:
|
854 |
+
matanyone_processor.preroll_frames = max(0, int(preroll_frames))
|
855 |
+
except Exception:
|
856 |
+
pass
|
857 |
+
|
858 |
start_time = time.time()
|
859 |
try:
|
860 |
if use_matanyone and MATANYONE_IMPORTED:
|
|
|
917 |
# =========================
|
918 |
with gr.Blocks(title="Video Background Replacer - GPU Optimized", theme=gr.themes.Soft()) as demo:
|
919 |
gr.Markdown("# 🎬 Video Background Replacer (GPU Optimized)")
|
920 |
+
gr.Markdown("All green checks are earned by real tests. No guesses.")
|
921 |
|
922 |
gpu_status = f"✅ {GPU_NAME}" if CUDA_AVAILABLE else "❌ CPU Only"
|
923 |
matany_status = "✅ Module Imported" if MATANYONE_IMPORTED else "❌ Not Importable"
|
|
|
934 |
<strong>MatAnyone ready:</strong> {"✅ Yes" if getattr(matanyone_processor, "verified", False) else "❌ No"}<br>
|
935 |
<strong>SAM2:</strong> {sam2_status}<br>
|
936 |
<strong>rembg:</strong> {rembg_status}<br>
|
937 |
+
<strong>PyTorch:</strong> {torch_status}
|
|
|
938 |
</div>
|
939 |
"""
|
940 |
gr.HTML(status_html)
|
|
|
959 |
value=MATANYONE_IMPORTED,
|
960 |
interactive=MATANYONE_IMPORTED,
|
961 |
)
|
962 |
+
stabilize = gr.Checkbox(
|
963 |
+
label="🧱 Stabilize short clips (pre-roll first frame)",
|
964 |
+
value=os.getenv("MATANYONE_STABILIZE", "true").lower() == "true",
|
965 |
+
)
|
966 |
+
preroll_frames = gr.Slider(
|
967 |
+
label="Pre-roll frames",
|
968 |
+
minimum=0, maximum=24, step=1,
|
969 |
+
value=int(os.getenv("MATANYONE_PREROLL_FRAMES", "12"))
|
970 |
+
)
|
971 |
process_btn = gr.Button("🚀 Process Video", variant="primary", size="lg")
|
972 |
|
973 |
gr.Markdown("### 🔎 Self-Verification")
|
|
|
985 |
|
986 |
process_btn.click(
|
987 |
fn=gradio_interface_optimized,
|
988 |
+
inputs=[video_input, bg_input, use_matanyone, bg_preset, stabilize, preroll_frames],
|
989 |
outputs=[output_video, download_file, status_text],
|
990 |
)
|
991 |
|
|
|
1004 |
gr.Markdown("---")
|
1005 |
gr.Markdown("""
|
1006 |
**Notes**
|
1007 |
+
- K-Governor clamps and pads Top-K inside MatAnyone so it cannot crash on small pools.
|
1008 |
+
- Short-clip stabilizer (pre-roll of the first real frame) is trimmed out of the alpha automatically.
|
1009 |
- SAM2 shows ✅ only after a real micro-inference passes.
|
|
|
|
|
|
|
1010 |
- FFmpeg/MoviePy, CUDA, and rembg are validated by actually running them.
|
1011 |
""")
|
1012 |
|
|
|
1017 |
if "--self-test" in sys.argv:
|
1018 |
report = run_self_test()
|
1019 |
print(report)
|
|
|
1020 |
exit_code = 0
|
1021 |
for line in report.splitlines():
|
1022 |
if line.startswith("❌"):
|