MogensR commited on
Commit
b3a57d5
Β·
1 Parent(s): 0f94e43

Update utils/cv_processing.py

Browse files
Files changed (1) hide show
  1. utils/cv_processing.py +251 -209
utils/cv_processing.py CHANGED
@@ -1,14 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- cv_processing.py Β· slim orchestrator layer (self-contained, backward-compatible)
4
- ──────────────────────────────────────────────────────────────────────────────
5
- Public API (unchanged):
6
- - segment_person_hq(frame, predictor=None, fallback_enabled=True, **compat)
7
- - segment_person_hq_original(...)
8
- - refine_mask_hq(frame, mask, matanyone=None, fallback_enabled=True, **compat)
9
- - replace_background_hq(frame, mask, background, fallback_enabled=True)
10
- - create_professional_background(key_or_cfg, width, height)
11
- - validate_video_file(video_path) -> (bool, reason)
12
  """
13
 
14
  from __future__ import annotations
@@ -23,7 +15,7 @@
23
  logger = logging.getLogger(__name__)
24
 
25
  # ----------------------------------------------------------------------------
26
- # Background presets (local copy; safe defaults)
27
  # ----------------------------------------------------------------------------
28
  PROFESSIONAL_BACKGROUNDS_LOCAL: Dict[str, Dict[str, Any]] = {
29
  "office": {"color": (240, 248, 255), "gradient": True},
@@ -33,7 +25,7 @@
33
  "white": {"color": (255, 255, 255), "gradient": False},
34
  "black": {"color": (0, 0, 0), "gradient": False},
35
  }
36
- PROFESSIONAL_BACKGROUNDS = PROFESSIONAL_BACKGROUNDS_LOCAL # alias for callers
37
 
38
  # ----------------------------------------------------------------------------
39
  # Helpers
@@ -42,7 +34,6 @@ def _ensure_rgb(img: np.ndarray) -> np.ndarray:
42
  if img is None:
43
  return img
44
  if img.ndim == 3 and img.shape[2] == 3:
45
- # Assume OpenCV BGR β†’ convert to RGB
46
  return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
47
  return img
48
 
@@ -73,16 +64,8 @@ def _vertical_gradient(top: Tuple[int,int,int], bottom: Tuple[int,int,int], widt
73
  bg[y, :] = (r, g, b)
74
  return bg
75
 
76
- def _looks_like_mask(x: Any) -> bool:
77
- return (
78
- isinstance(x, np.ndarray)
79
- and x.ndim in (2, 3)
80
- and (x.ndim == 2 or (x.ndim == 3 and x.shape[2] in (1, 3)))
81
- and x.dtype != object
82
- )
83
-
84
  # ----------------------------------------------------------------------------
85
- # Background creation (RGB)
86
  # ----------------------------------------------------------------------------
87
  def create_professional_background(key_or_cfg: Any, width: int, height: int) -> np.ndarray:
88
  if isinstance(key_or_cfg, str):
@@ -102,213 +85,262 @@ def create_professional_background(key_or_cfg: Any, width: int, height: int) ->
102
  return _vertical_gradient(dark, color, width, height)
103
 
104
  # ----------------------------------------------------------------------------
105
- # Segmentation
106
  # ----------------------------------------------------------------------------
107
  def _simple_person_segmentation(frame_bgr: np.ndarray) -> np.ndarray:
 
 
 
 
108
  hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
109
-
 
 
 
 
 
 
110
  lower_green = np.array([40, 40, 40], dtype=np.uint8)
111
  upper_green = np.array([80, 255, 255], dtype=np.uint8)
112
  green_mask = cv2.inRange(hsv, lower_green, upper_green)
113
-
114
- lower_white = np.array([0, 0, 200], dtype=np.uint8)
115
- upper_white = np.array([180, 30, 255], dtype=np.uint8)
116
- white_mask = cv2.inRange(hsv, lower_white, upper_white)
117
-
118
- bg_mask = cv2.bitwise_or(green_mask, white_mask)
119
- person_mask = cv2.bitwise_not(bg_mask)
120
-
121
- kernel = np.ones((5, 5), np.uint8)
122
- person_mask = cv2.morphologyEx(person_mask, cv2.MORPH_CLOSE, kernel)
123
- person_mask = cv2.morphologyEx(person_mask, cv2.MORPH_OPEN, kernel)
124
-
 
 
 
 
 
 
 
125
  return (person_mask.astype(np.float32) / 255.0)
126
 
127
  def segment_person_hq(
128
  frame: np.ndarray,
129
  predictor: Optional[Any] = None,
130
  fallback_enabled: bool = True,
131
- # backward-compat shim:
132
  use_sam2: Optional[bool] = None,
133
  **_compat_kwargs,
134
  ) -> np.ndarray:
135
- try:
136
- if use_sam2 is False:
137
- return _simple_person_segmentation(frame)
138
-
139
- if predictor is not None and hasattr(predictor, "set_image") and hasattr(predictor, "predict"):
140
- rgb = _ensure_rgb(frame)
141
- predictor.set_image(rgb)
142
- h, w = rgb.shape[:2]
143
- center = np.array([[w // 2, h // 2]])
144
- labels = np.array([1])
145
- masks, scores, _ = predictor.predict(
146
- point_coords=center,
147
- point_labels=labels,
148
- multimask_output=True
149
- )
150
- m = np.array(masks)
151
- if m.ndim == 3:
152
- idx = int(np.argmax(scores)) if scores is not None else 0
153
- m = m[idx]
154
- elif m.ndim != 2:
155
- raise RuntimeError(f"Unexpected SAM2 mask shape: {m.shape}")
156
- return _to_mask01(m)
157
-
158
- except Exception as e:
159
- logger.warning("SAM2 segmentation failed: %s", e)
160
-
161
- return _simple_person_segmentation(frame) if fallback_enabled else np.ones(frame.shape[:2], dtype=np.float32)
162
-
163
- segment_person_hq_original = segment_person_hq # back-compat alias
164
-
165
- # ----------------------------------------------------------------------------
166
- # MatAnyOne helpers
167
- # ----------------------------------------------------------------------------
168
- def _to_tensor_chw(img_uint8_bgr: np.ndarray) -> "torch.Tensor":
169
- import torch
170
- rgb = cv2.cvtColor(img_uint8_bgr, cv2.COLOR_BGR2RGB)
171
- return torch.from_numpy(rgb).permute(2, 0, 1).contiguous().float() / 255.0 # (3,H,W)
172
-
173
- def _mask_to_tensor01(mask01: np.ndarray) -> "torch.Tensor":
174
- import torch
175
- return torch.from_numpy(mask01.astype(np.float32)).unsqueeze(0).unsqueeze(0) # (1,1,H,W)
176
-
177
- def _tensor_to_mask01(t: "torch.Tensor") -> np.ndarray:
178
- import torch
179
- if t.ndim == 4:
180
- t = t[0, 0]
181
- elif t.ndim == 3:
182
- t = t[0]
183
- return np.clip(t.detach().float().cpu().numpy(), 0.0, 1.0)
184
-
185
- def _remap_harden(mask01: np.ndarray, inside: float = 0.70, outside: float = 0.35) -> np.ndarray:
186
  """
187
- Pull the mask toward {0,1} to avoid 'ghost' translucency.
188
- Values <= outside -> 0; >= inside -> 1; linear in between.
189
  """
190
- m = mask01.astype(np.float32)
191
- if inside <= outside:
192
- return m
193
- m = (m - outside) / max(1e-6, (inside - outside))
194
- return np.clip(m, 0.0, 1.0)
195
-
196
- def _pad_and_smooth_edges(mask01: np.ndarray, dilate_px: int = 6, edge_blur_px: int = 2) -> np.ndarray:
197
- m = (mask01 * 255.0).astype(np.uint8)
198
- if dilate_px > 0:
199
- k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_px, dilate_px))
200
- m = cv2.dilate(m, k, iterations=1)
201
- if edge_blur_px > 0:
202
- ksize = edge_blur_px * 2 + 1
203
- m = cv2.GaussianBlur(m, (ksize, ksize), 0)
204
- return (m.astype(np.float32) / 255.0)
205
-
206
- def _try_matanyone_refine(
207
- matanyone: Any,
208
- frame_bgr: np.ndarray,
209
- mask01: np.ndarray
210
- ) -> Optional[np.ndarray]:
211
- """
212
- Try several MatAnyOne interfaces:
213
- 1) InferenceCore.infer(PIL_image, PIL_mask)
214
- 2) .step(image_tensor=NCHW, mask_tensor=NCHW)
215
- 3) .process(image_np, mask_np)
216
- 4) callable(image_tensor, mask_tensor) β†’ tensor
217
- Returns refined mask01 (np.ndarray) or None if not usable.
218
- """
219
- try:
220
- # --- (1) PIL infer path ------------------------------------------------
221
- if hasattr(matanyone, "infer"):
222
- try:
223
- from PIL import Image
224
- img_pil = Image.fromarray(cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB))
225
- m_pil = Image.fromarray((mask01 * 255.0).astype(np.uint8))
226
- out_pil = matanyone.infer(img_pil, m_pil)
227
- out_np = np.asarray(out_pil).astype(np.float32)
228
- return _to_mask01(out_np)
229
- except Exception as e:
230
- logger.debug("MatAnyOne.infer path failed: %s", e)
231
-
232
- # --- (2) tensor .step path --------------------------------------------
233
- if hasattr(matanyone, "step"):
234
- import torch
235
- device = "cuda" if torch.cuda.is_available() else "cpu"
236
- img_t = _to_tensor_chw(frame_bgr).unsqueeze(0).to(device) # (1,3,H,W)
237
- mask_t = _mask_to_tensor01(mask01).to(device) # (1,1,H,W)
238
- with torch.inference_mode():
239
- out = matanyone.step(
240
- image_tensor=img_t,
241
- mask_tensor=mask_t,
242
- objects=None,
243
- first_frame_pred=True
244
  )
245
- if hasattr(matanyone, "output_prob_to_mask"):
246
- out = matanyone.output_prob_to_mask(out)
247
- return _tensor_to_mask01(out)
248
-
249
- # --- (3) numpy .process path ------------------------------------------
250
- if hasattr(matanyone, "process"):
251
- out = matanyone.process(frame_bgr, mask01)
252
- return _to_mask01(np.asarray(out))
253
-
254
- # --- (4) callable / nn.Module path ------------------------------------
255
- if callable(matanyone):
256
- import torch
257
- device = "cuda" if torch.cuda.is_available() else "cpu"
258
- img_t = _to_tensor_chw(frame_bgr).unsqueeze(0).to(device)
259
- mask_t = _mask_to_tensor01(mask01).to(device)
260
- with torch.inference_mode():
261
- out = matanyone(img_t, mask_t)
262
- return _tensor_to_mask01(out)
263
-
264
- except Exception as e:
265
- logger.warning("MatAnyOne refine error: %s", e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- return None
268
 
269
  # ----------------------------------------------------------------------------
270
- # Refinement (MatAnyOne)
271
  # ----------------------------------------------------------------------------
272
  def refine_mask_hq(
273
  frame: np.ndarray,
274
  mask: np.ndarray,
275
  matanyone: Optional[Any] = None,
276
  fallback_enabled: bool = True,
277
- # backward-compat shims:
278
  use_matanyone: Optional[bool] = None,
279
  **_compat_kwargs,
280
  ) -> np.ndarray:
281
  """
282
- Refine single-channel mask with MatAnyOne if available.
283
- Backward-compat:
284
- - accepts use_matanyone (False β†’ skip model)
285
- - tolerates legacy arg order refine_mask_hq(mask, frame, ...)
286
  """
287
- # tolerate legacy order: refine_mask_hq(mask, frame, ...)
288
- if _looks_like_mask(frame) and _looks_like_mask(mask) and mask.ndim == 3 and mask.shape[2] == 3:
289
- frame, mask = mask, frame # swap
290
-
291
  mask01 = _to_mask01(mask)
292
-
293
- # Use MatAnyOne when possible
294
- if use_matanyone is not False and matanyone is not None:
295
- refined = _try_matanyone_refine(matanyone, frame, mask01)
296
- if refined is not None:
297
- # Hardening + edge handling to avoid translucent body/halo
298
- refined = _remap_harden(refined, inside=0.70, outside=0.35)
299
- refined = _pad_and_smooth_edges(refined, dilate_px=4, edge_blur_px=1)
300
- return refined
301
- else:
302
- logger.warning("MatAnyOne provided but no usable interface found; falling back.")
303
-
304
- # Simple refinement fallback
305
- m = (mask01 * 255.0).astype(np.uint8)
306
- m = cv2.GaussianBlur(m, (5, 5), 0)
307
- m = cv2.bilateralFilter(m, 9, 75, 75)
308
- m = (m.astype(np.float32) / 255.0)
309
- m = _remap_harden(m, inside=0.68, outside=0.40)
310
- m = _pad_and_smooth_edges(m, dilate_px=3, edge_blur_px=1)
311
- return m if fallback_enabled else mask01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
  # ----------------------------------------------------------------------------
314
  # Compositing
@@ -320,21 +352,31 @@ def replace_background_hq(
320
  fallback_enabled: bool = True,
321
  **_compat,
322
  ) -> np.ndarray:
 
323
  try:
324
  H, W = frame.shape[:2]
 
 
325
  if background.shape[:2] != (H, W):
326
  background = cv2.resize(background, (W, H), interpolation=cv2.INTER_LANCZOS4)
327
-
 
328
  m = _to_mask01(mask01)
329
- # Very light feather to hide stair-steps; most shaping already done
 
330
  m = _feather(m, k=1)
 
 
331
  m3 = np.repeat(m[:, :, None], 3, axis=2)
332
-
 
333
  comp = frame.astype(np.float32) * m3 + background.astype(np.float32) * (1.0 - m3)
 
334
  return np.clip(comp, 0, 255).astype(np.uint8)
 
335
  except Exception as e:
336
  if fallback_enabled:
337
- logger.warning("Compositing failed (%s) – returning original frame", e)
338
  return frame
339
  raise
340
 
@@ -350,30 +392,30 @@ def validate_video_file(video_path: str) -> Tuple[bool, str]:
350
  if size == 0:
351
  return False, "File is empty"
352
  if size > 2 * 1024 * 1024 * 1024:
353
- return False, "File > 2 GB β€” too large for the Space quota"
354
 
355
  cap = cv2.VideoCapture(video_path)
356
  if not cap.isOpened():
357
- return False, "OpenCV cannot read the file"
358
 
359
  n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
360
- fps = cap.get(cv2.CAP_PROP_FPS)
361
- w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
362
- h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
363
  cap.release()
364
 
365
  if n_frames == 0:
366
  return False, "No frames detected"
367
  if fps <= 0 or fps > 120:
368
- return False, f"Suspicious FPS: {fps}"
369
  if w <= 0 or h <= 0:
370
- return False, "Zero resolution"
371
  if w > 4096 or h > 4096:
372
- return False, f"Resolution {w}Γ—{h} too high (max 4 096Β²)"
373
  if (n_frames / fps) > 300:
374
  return False, "Video longer than 5 minutes"
375
 
376
- return True, f"OK β†’ {w}Γ—{h}, {fps:.1f} fps, {n_frames/fps:.1f} s"
377
 
378
  except Exception as e:
379
  logger.error(f"validate_video_file: {e}")
@@ -390,4 +432,4 @@ def validate_video_file(video_path: str) -> Tuple[bool, str]:
390
  "create_professional_background",
391
  "validate_video_file",
392
  "PROFESSIONAL_BACKGROUNDS",
393
- ]
 
1
  #!/usr/bin/env python3
2
  """
3
+ cv_processing.py Β· FIXED VERSION with proper SAM2 handling
 
 
 
 
 
 
 
 
4
  """
5
 
6
  from __future__ import annotations
 
15
  logger = logging.getLogger(__name__)
16
 
17
  # ----------------------------------------------------------------------------
18
+ # Background presets
19
  # ----------------------------------------------------------------------------
20
  PROFESSIONAL_BACKGROUNDS_LOCAL: Dict[str, Dict[str, Any]] = {
21
  "office": {"color": (240, 248, 255), "gradient": True},
 
25
  "white": {"color": (255, 255, 255), "gradient": False},
26
  "black": {"color": (0, 0, 0), "gradient": False},
27
  }
28
+ PROFESSIONAL_BACKGROUNDS = PROFESSIONAL_BACKGROUNDS_LOCAL
29
 
30
  # ----------------------------------------------------------------------------
31
  # Helpers
 
34
  if img is None:
35
  return img
36
  if img.ndim == 3 and img.shape[2] == 3:
 
37
  return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
38
  return img
39
 
 
64
  bg[y, :] = (r, g, b)
65
  return bg
66
 
 
 
 
 
 
 
 
 
67
  # ----------------------------------------------------------------------------
68
+ # Background creation
69
  # ----------------------------------------------------------------------------
70
  def create_professional_background(key_or_cfg: Any, width: int, height: int) -> np.ndarray:
71
  if isinstance(key_or_cfg, str):
 
85
  return _vertical_gradient(dark, color, width, height)
86
 
87
  # ----------------------------------------------------------------------------
88
+ # Improved Segmentation
89
  # ----------------------------------------------------------------------------
90
  def _simple_person_segmentation(frame_bgr: np.ndarray) -> np.ndarray:
91
+ """Basic fallback segmentation using color detection"""
92
+ h, w = frame_bgr.shape[:2]
93
+
94
+ # Convert to HSV for better color detection
95
  hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV)
96
+
97
+ # Detect skin tones (basic person detection)
98
+ lower_skin = np.array([0, 20, 70], dtype=np.uint8)
99
+ upper_skin = np.array([20, 255, 255], dtype=np.uint8)
100
+ skin_mask = cv2.inRange(hsv, lower_skin, upper_skin)
101
+
102
+ # Also detect non-green/non-white areas as potential person
103
  lower_green = np.array([40, 40, 40], dtype=np.uint8)
104
  upper_green = np.array([80, 255, 255], dtype=np.uint8)
105
  green_mask = cv2.inRange(hsv, lower_green, upper_green)
106
+
107
+ # Assume person is NOT green screen
108
+ person_mask = cv2.bitwise_not(green_mask)
109
+
110
+ # Combine with skin detection
111
+ person_mask = cv2.bitwise_or(person_mask, skin_mask)
112
+
113
+ # Clean up the mask
114
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
115
+ person_mask = cv2.morphologyEx(person_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
116
+ person_mask = cv2.morphologyEx(person_mask, cv2.MORPH_OPEN, kernel, iterations=1)
117
+
118
+ # Find largest contour (assume it's the person)
119
+ contours, _ = cv2.findContours(person_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
120
+ if contours:
121
+ largest_contour = max(contours, key=cv2.contourArea)
122
+ person_mask = np.zeros_like(person_mask)
123
+ cv2.drawContours(person_mask, [largest_contour], -1, 255, -1)
124
+
125
  return (person_mask.astype(np.float32) / 255.0)
126
 
127
  def segment_person_hq(
128
  frame: np.ndarray,
129
  predictor: Optional[Any] = None,
130
  fallback_enabled: bool = True,
 
131
  use_sam2: Optional[bool] = None,
132
  **_compat_kwargs,
133
  ) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  """
135
+ High-quality person segmentation with proper SAM2 handling
 
136
  """
137
+ h, w = frame.shape[:2]
138
+
139
+ # Skip SAM2 if explicitly disabled
140
+ if use_sam2 is False:
141
+ return _simple_person_segmentation(frame)
142
+
143
+ # Try SAM2 if available
144
+ if predictor is not None:
145
+ try:
146
+ # Ensure we have the right methods
147
+ if hasattr(predictor, "set_image") and hasattr(predictor, "predict"):
148
+ # Convert to RGB for SAM2
149
+ rgb = _ensure_rgb(frame)
150
+
151
+ # Set the image
152
+ predictor.set_image(rgb)
153
+
154
+ # Generate multiple prompt points for better coverage
155
+ points = []
156
+ labels = []
157
+
158
+ # Add center point
159
+ points.append([w // 2, h // 2])
160
+ labels.append(1) # Foreground
161
+
162
+ # Add points for head area (upper center)
163
+ points.append([w // 2, h // 4])
164
+ labels.append(1)
165
+
166
+ # Add body points
167
+ points.append([w // 2, h // 2 + h // 8])
168
+ labels.append(1)
169
+
170
+ # Convert to numpy arrays
171
+ point_coords = np.array(points, dtype=np.float32)
172
+ point_labels = np.array(labels, dtype=np.int32)
173
+
174
+ # Predict with multiple masks
175
+ result = predictor.predict(
176
+ point_coords=point_coords,
177
+ point_labels=point_labels,
178
+ multimask_output=True
 
 
 
 
 
 
 
 
 
 
 
 
179
  )
180
+
181
+ # Extract masks and scores
182
+ if isinstance(result, dict):
183
+ masks = result.get("masks", None)
184
+ scores = result.get("scores", None)
185
+ elif isinstance(result, tuple) and len(result) >= 2:
186
+ masks, scores = result[0], result[1]
187
+ else:
188
+ masks = result
189
+ scores = None
190
+
191
+ # Validate and process masks
192
+ if masks is not None:
193
+ masks = np.array(masks)
194
+
195
+ if masks.size > 0: # Check if not empty
196
+ # Handle different mask shapes
197
+ if masks.ndim == 3 and masks.shape[0] > 0:
198
+ # Multiple masks - choose best one
199
+ if scores is not None and len(scores) > 0:
200
+ best_idx = np.argmax(scores)
201
+ mask = masks[best_idx]
202
+ else:
203
+ # Use first mask if no scores
204
+ mask = masks[0]
205
+ elif masks.ndim == 2:
206
+ # Single mask
207
+ mask = masks
208
+ else:
209
+ logger.warning(f"Unexpected mask shape from SAM2: {masks.shape}")
210
+ mask = None
211
+
212
+ if mask is not None:
213
+ # Convert to proper format
214
+ mask = _to_mask01(mask)
215
+
216
+ # Validate mask has actual content
217
+ if mask.max() > 0.1: # At least 10% confidence somewhere
218
+ return mask
219
+ else:
220
+ logger.warning("SAM2 mask too weak, using fallback")
221
+ else:
222
+ logger.warning("SAM2 returned no masks")
223
+
224
+ except Exception as e:
225
+ logger.warning(f"SAM2 segmentation error: {e}")
226
+
227
+ # Fallback to simple segmentation
228
+ if fallback_enabled:
229
+ logger.debug("Using fallback segmentation")
230
+ return _simple_person_segmentation(frame)
231
+ else:
232
+ # Return full mask if no fallback
233
+ return np.ones((h, w), dtype=np.float32)
234
 
235
+ segment_person_hq_original = segment_person_hq
236
 
237
  # ----------------------------------------------------------------------------
238
+ # MatAnyone Refinement (Fixed)
239
  # ----------------------------------------------------------------------------
240
  def refine_mask_hq(
241
  frame: np.ndarray,
242
  mask: np.ndarray,
243
  matanyone: Optional[Any] = None,
244
  fallback_enabled: bool = True,
 
245
  use_matanyone: Optional[bool] = None,
246
  **_compat_kwargs,
247
  ) -> np.ndarray:
248
  """
249
+ Refine mask with MatAnyone - with proper handling
 
 
 
250
  """
251
+ # Convert mask to proper format
 
 
 
252
  mask01 = _to_mask01(mask)
253
+
254
+ # Skip MatAnyone if explicitly disabled
255
+ if use_matanyone is False:
256
+ return mask01
257
+
258
+ # Try MatAnyone if available
259
+ if matanyone is not None:
260
+ try:
261
+ # Try different MatAnyone interfaces
262
+ refined = None
263
+
264
+ # Method 1: Direct callable
265
+ if callable(matanyone):
266
+ try:
267
+ refined = matanyone(frame, mask01)
268
+ if refined is not None:
269
+ refined = _to_mask01(np.array(refined))
270
+ except Exception as e:
271
+ logger.debug(f"MatAnyone callable failed: {e}")
272
+
273
+ # Method 2: step method
274
+ if refined is None and hasattr(matanyone, 'step'):
275
+ try:
276
+ refined = matanyone.step(frame, mask01)
277
+ if refined is not None:
278
+ refined = _to_mask01(np.array(refined))
279
+ except Exception as e:
280
+ logger.debug(f"MatAnyone step failed: {e}")
281
+
282
+ # Method 3: process method
283
+ if refined is None and hasattr(matanyone, 'process'):
284
+ try:
285
+ refined = matanyone.process(frame, mask01)
286
+ if refined is not None:
287
+ refined = _to_mask01(np.array(refined))
288
+ except Exception as e:
289
+ logger.debug(f"MatAnyone process failed: {e}")
290
+
291
+ # Use refined mask if successful
292
+ if refined is not None and refined.max() > 0.1:
293
+ # Apply post-processing
294
+ refined = _postprocess_mask(refined)
295
+ return refined
296
+ else:
297
+ logger.warning("MatAnyone refinement failed or produced empty mask")
298
+
299
+ except Exception as e:
300
+ logger.warning(f"MatAnyone error: {e}")
301
+
302
+ # Fallback refinement
303
+ if fallback_enabled:
304
+ return _fallback_refine(mask01)
305
+ else:
306
+ return mask01
307
+
308
+ def _postprocess_mask(mask01: np.ndarray) -> np.ndarray:
309
+ """Post-process mask to clean edges and remove artifacts"""
310
+ # Convert to uint8
311
+ mask_uint8 = (mask01 * 255).astype(np.uint8)
312
+
313
+ # Remove small holes
314
+ kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
315
+ mask_uint8 = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel_close)
316
+
317
+ # Smooth edges
318
+ mask_uint8 = cv2.GaussianBlur(mask_uint8, (3, 3), 0)
319
+
320
+ # Threshold to clean up
321
+ _, mask_uint8 = cv2.threshold(mask_uint8, 127, 255, cv2.THRESH_BINARY)
322
+
323
+ # Final smooth
324
+ mask_uint8 = cv2.GaussianBlur(mask_uint8, (5, 5), 1)
325
+
326
+ return mask_uint8.astype(np.float32) / 255.0
327
+
328
+ def _fallback_refine(mask01: np.ndarray) -> np.ndarray:
329
+ """Simple fallback refinement"""
330
+ mask_uint8 = (mask01 * 255).astype(np.uint8)
331
+
332
+ # Bilateral filter for edge-preserving smoothing
333
+ mask_uint8 = cv2.bilateralFilter(mask_uint8, 9, 75, 75)
334
+
335
+ # Morphological operations
336
+ kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
337
+ mask_uint8 = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel)
338
+ mask_uint8 = cv2.morphologyEx(mask_uint8, cv2.MORPH_OPEN, kernel)
339
+
340
+ # Edge feathering
341
+ mask_uint8 = cv2.GaussianBlur(mask_uint8, (5, 5), 1)
342
+
343
+ return mask_uint8.astype(np.float32) / 255.0
344
 
345
  # ----------------------------------------------------------------------------
346
  # Compositing
 
352
  fallback_enabled: bool = True,
353
  **_compat,
354
  ) -> np.ndarray:
355
+ """High-quality background replacement with alpha blending"""
356
  try:
357
  H, W = frame.shape[:2]
358
+
359
+ # Resize background if needed
360
  if background.shape[:2] != (H, W):
361
  background = cv2.resize(background, (W, H), interpolation=cv2.INTER_LANCZOS4)
362
+
363
+ # Ensure mask is properly formatted
364
  m = _to_mask01(mask01)
365
+
366
+ # Apply slight feather for smooth edges
367
  m = _feather(m, k=1)
368
+
369
+ # Convert to 3-channel for multiplication
370
  m3 = np.repeat(m[:, :, None], 3, axis=2)
371
+
372
+ # Alpha blending
373
  comp = frame.astype(np.float32) * m3 + background.astype(np.float32) * (1.0 - m3)
374
+
375
  return np.clip(comp, 0, 255).astype(np.uint8)
376
+
377
  except Exception as e:
378
  if fallback_enabled:
379
+ logger.warning(f"Compositing failed ({e}) – returning original frame")
380
  return frame
381
  raise
382
 
 
392
  if size == 0:
393
  return False, "File is empty"
394
  if size > 2 * 1024 * 1024 * 1024:
395
+ return False, "File > 2 GB"
396
 
397
  cap = cv2.VideoCapture(video_path)
398
  if not cap.isOpened():
399
+ return False, "Cannot read file"
400
 
401
  n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
402
+ fps = cap.get(cv2.CAP_PROP_FPS)
403
+ w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
404
+ h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
405
  cap.release()
406
 
407
  if n_frames == 0:
408
  return False, "No frames detected"
409
  if fps <= 0 or fps > 120:
410
+ return False, f"Invalid FPS: {fps}"
411
  if w <= 0 or h <= 0:
412
+ return False, "Invalid resolution"
413
  if w > 4096 or h > 4096:
414
+ return False, f"Resolution {w}Γ—{h} too high"
415
  if (n_frames / fps) > 300:
416
  return False, "Video longer than 5 minutes"
417
 
418
+ return True, f"OK β†’ {w}Γ—{h}, {fps:.1f} fps, {n_frames/fps:.1f}s"
419
 
420
  except Exception as e:
421
  logger.error(f"validate_video_file: {e}")
 
432
  "create_professional_background",
433
  "validate_video_file",
434
  "PROFESSIONAL_BACKGROUNDS",
435
+ ]