MogensR commited on
Commit
990992c
·
1 Parent(s): e5a23a5

Update utils/cv_processing.py

Browse files
Files changed (1) hide show
  1. utils/cv_processing.py +234 -278
utils/cv_processing.py CHANGED
@@ -31,7 +31,7 @@
31
  PROFESSIONAL_BACKGROUNDS = {
32
  "office_modern": {
33
  "name": "Modern Office",
34
- "type": "gradient",
35
  "colors": ["#f8f9fa", "#e9ecef", "#dee2e6"],
36
  "direction": "diagonal",
37
  "description": "Clean, contemporary office environment",
@@ -102,12 +102,13 @@ class BackgroundReplacementError(Exception):
102
  pass
103
 
104
  # ============================================================================
105
- # BACKGROUND HELPERS (NEW)
106
  # ============================================================================
107
 
108
  def _fit_image_letterbox(img_rgb: np.ndarray, dst_w: int, dst_h: int, fill=(32, 32, 32)) -> np.ndarray:
109
  """
110
  Fit an RGB image into (dst_h, dst_w) with letterboxing (no stretch), borders filled with `fill`.
 
111
  """
112
  h, w = img_rgb.shape[:2]
113
  if h == 0 or w == 0:
@@ -138,100 +139,86 @@ def segment_person_hq(image: np.ndarray, predictor: Any, fallback_enabled: bool
138
  """High-quality person segmentation with intelligent automation"""
139
  if not USE_ENHANCED_SEGMENTATION:
140
  return segment_person_hq_original(image, predictor, fallback_enabled)
141
-
142
  logger.debug("Using ENHANCED segmentation with intelligent automation")
143
-
144
  if image is None or image.size == 0:
145
  raise SegmentationError("Invalid input image")
146
-
147
  try:
148
- # SAFE PREDICTOR CHECK - Added comprehensive validation
149
  if predictor is None:
150
  if fallback_enabled:
151
  logger.warning("SAM2 predictor not available, using fallback")
152
  return _fallback_segmentation(image)
153
- else:
154
- raise SegmentationError("SAM2 predictor not available")
155
-
156
- # Check if predictor has required methods
157
  if not hasattr(predictor, 'set_image') or not hasattr(predictor, 'predict'):
158
  logger.warning("Predictor missing required methods, using fallback")
159
  if fallback_enabled:
160
  return _fallback_segmentation(image)
161
- else:
162
- raise SegmentationError("Invalid predictor object")
163
-
164
- # Safe set_image call
165
  try:
166
  predictor.set_image(image)
167
  except Exception as e:
168
  logger.error(f"Failed to set image in predictor: {e}")
169
  if fallback_enabled:
170
  return _fallback_segmentation(image)
171
- else:
172
- raise SegmentationError(f"Predictor setup failed: {e}")
173
-
174
  if USE_INTELLIGENT_PROMPTING:
175
  mask = _segment_with_intelligent_prompts(image, predictor, fallback_enabled)
176
  else:
177
  mask = _segment_with_basic_prompts(image, predictor, fallback_enabled)
178
-
179
  if USE_ITERATIVE_REFINEMENT and mask is not None:
180
  mask = _auto_refine_mask_iteratively(image, mask, predictor)
181
-
182
  if not _validate_mask_quality(mask, image.shape[:2]):
183
  logger.warning("Mask quality validation failed")
184
  if fallback_enabled:
185
  return _fallback_segmentation(image)
186
- else:
187
- raise SegmentationError("Poor mask quality")
188
-
189
  logger.debug(f"Enhanced segmentation successful - mask range: {mask.min()}-{mask.max()}")
190
  return mask
191
-
192
  except SegmentationError:
193
  raise
194
  except Exception as e:
195
  logger.error(f"Unexpected segmentation error: {e}")
196
  if fallback_enabled:
197
  return _fallback_segmentation(image)
198
- else:
199
- raise SegmentationError(f"Unexpected error: {e}")
200
 
201
  def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
202
  """Original version of person segmentation for rollback"""
203
  if image is None or image.size == 0:
204
  raise SegmentationError("Invalid input image")
205
-
206
  try:
207
- # SAFE PREDICTOR CHECK - Added comprehensive validation
208
  if predictor is None:
209
  if fallback_enabled:
210
  logger.warning("SAM2 predictor not available, using fallback")
211
  return _fallback_segmentation(image)
212
- else:
213
- raise SegmentationError("SAM2 predictor not available")
214
-
215
- # Check if predictor has required methods
216
  if not hasattr(predictor, 'set_image') or not hasattr(predictor, 'predict'):
217
  logger.warning("Predictor missing required methods, using fallback")
218
  if fallback_enabled:
219
  return _fallback_segmentation(image)
220
- else:
221
- raise SegmentationError("Invalid predictor object")
222
-
223
- # Safe set_image call
224
  try:
225
  predictor.set_image(image)
226
  except Exception as e:
227
  logger.error(f"Failed to set image in predictor: {e}")
228
  if fallback_enabled:
229
  return _fallback_segmentation(image)
230
- else:
231
- raise SegmentationError(f"Predictor setup failed: {e}")
232
-
233
  h, w = image.shape[:2]
234
-
235
  points = np.array([
236
  [w//2, h//4],
237
  [w//2, h//2],
@@ -242,10 +229,9 @@ def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabl
242
  [w//4, 2*h//3],
243
  [3*w//4, 2*h//3],
244
  ], dtype=np.float32)
245
-
246
  labels = np.ones(len(points), dtype=np.int32)
247
-
248
- # Safe prediction with error handling
249
  try:
250
  with torch.no_grad():
251
  masks, scores, _ = predictor.predict(
@@ -257,16 +243,14 @@ def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabl
257
  logger.error(f"SAM2 prediction failed: {e}")
258
  if fallback_enabled:
259
  return _fallback_segmentation(image)
260
- else:
261
- raise SegmentationError(f"Prediction failed: {e}")
262
-
263
  if masks is None or len(masks) == 0:
264
  logger.warning("SAM2 returned no masks")
265
  if fallback_enabled:
266
  return _fallback_segmentation(image)
267
- else:
268
- raise SegmentationError("No masks generated")
269
-
270
  if scores is None or len(scores) == 0:
271
  logger.warning("SAM2 returned no scores")
272
  best_mask = masks[0]
@@ -274,122 +258,117 @@ def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabl
274
  best_idx = np.argmax(scores)
275
  best_mask = masks[best_idx]
276
  logger.debug(f"Selected mask {best_idx} with score {scores[best_idx]:.3f}")
277
-
278
  mask = _process_mask(best_mask)
279
-
280
  if not _validate_mask_quality(mask, image.shape[:2]):
281
  logger.warning("Mask quality validation failed")
282
  if fallback_enabled:
283
  return _fallback_segmentation(image)
284
- else:
285
- raise SegmentationError("Poor mask quality")
286
-
287
  logger.debug(f"Segmentation successful - mask range: {mask.min()}-{mask.max()}")
288
  return mask
289
-
290
  except SegmentationError:
291
  raise
292
  except Exception as e:
293
  logger.error(f"Unexpected segmentation error: {e}")
294
  if fallback_enabled:
295
  return _fallback_segmentation(image)
296
- else:
297
- raise SegmentationError(f"Unexpected error: {e}")
298
 
299
  # ============================================================================
300
- # MASK REFINEMENT FUNCTIONS
301
  # ============================================================================
302
 
303
- def refine_mask_hq(image: np.ndarray, mask: np.ndarray, matanyone_processor: Any,
304
  fallback_enabled: bool = True) -> np.ndarray:
305
  """Enhanced mask refinement with MatAnyone and robust fallbacks"""
306
  if image is None or mask is None:
307
  raise MaskRefinementError("Invalid input image or mask")
308
-
309
  try:
310
  mask = _process_mask(mask)
311
-
312
  if matanyone_processor is not None:
313
  try:
314
  logger.debug("Attempting MatAnyone refinement")
315
  refined_mask = _matanyone_refine(image, mask, matanyone_processor)
316
-
317
  if refined_mask is not None and _validate_mask_quality(refined_mask, image.shape[:2]):
318
  logger.debug("MatAnyone refinement successful")
319
  return refined_mask
320
  else:
321
  logger.warning("MatAnyone produced poor quality mask")
322
-
323
  except Exception as e:
324
  logger.warning(f"MatAnyone refinement failed: {e}")
325
-
326
  if fallback_enabled:
327
  logger.debug("Using enhanced OpenCV refinement")
328
  return enhance_mask_opencv_advanced(image, mask)
329
- else:
330
- raise MaskRefinementError("MatAnyone failed and fallback disabled")
331
-
332
  except MaskRefinementError:
333
  raise
334
  except Exception as e:
335
  logger.error(f"Unexpected mask refinement error: {e}")
336
  if fallback_enabled:
337
  return enhance_mask_opencv_advanced(image, mask)
338
- else:
339
- raise MaskRefinementError(f"Unexpected error: {e}")
340
 
341
  def enhance_mask_opencv_advanced(image: np.ndarray, mask: np.ndarray) -> np.ndarray:
342
  """Advanced OpenCV-based mask enhancement with multiple techniques"""
343
  try:
344
  if len(mask.shape) == 3:
345
  mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
346
-
347
  if mask.max() <= 1.0:
348
  mask = (mask * 255).astype(np.uint8)
349
-
350
  refined_mask = cv2.bilateralFilter(mask, 9, 75, 75)
351
  refined_mask = _guided_filter_approx(image, refined_mask, radius=8, eps=0.2)
352
-
353
  kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
354
  refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_CLOSE, kernel_close)
355
-
356
  kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
357
  refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_OPEN, kernel_open)
358
-
359
  refined_mask = cv2.GaussianBlur(refined_mask, (3, 3), 0.8)
360
-
361
  _, refined_mask = cv2.threshold(refined_mask, 127, 255, cv2.THRESH_BINARY)
362
-
363
  return refined_mask
364
-
365
  except Exception as e:
366
  logger.warning(f"Enhanced OpenCV refinement failed: {e}")
367
  return cv2.GaussianBlur(mask, (5, 5), 1.0)
368
 
369
  # ============================================================================
370
- # MATANYONE REFINEMENT (NEW LOGIC)
371
  # ============================================================================
372
 
373
  def _matanyone_refine(image: np.ndarray, mask: np.ndarray, matanyone_processor: Any) -> Optional[np.ndarray]:
374
  """Safe MatAnyOne refinement for a single frame with correct interface."""
375
  try:
376
- # Check for correct MatAnyOne interface
377
  if not hasattr(matanyone_processor, 'step') or not hasattr(matanyone_processor, 'output_prob_to_mask'):
378
  logger.warning("MatAnyOne processor missing required methods (step, output_prob_to_mask)")
379
  return None
380
 
381
- # Preprocess image: ensure float32, RGB, (C, H, W)
382
  if isinstance(image, np.ndarray):
383
  img = image.astype(np.float32)
384
  if img.max() > 1.0:
385
  img /= 255.0
386
  if img.shape[2] == 3:
387
- img = np.transpose(img, (2, 0, 1)) # (H, W, C) → (C, H, W)
388
  img_tensor = torch.from_numpy(img)
389
  else:
390
- img_tensor = image # assume already tensor
391
 
392
- # Preprocess mask: ensure float32, (H, W)
393
  if isinstance(mask, np.ndarray):
394
  mask_tensor = mask.astype(np.float32)
395
  if mask_tensor.max() > 1.0:
@@ -400,15 +379,12 @@ def _matanyone_refine(image: np.ndarray, mask: np.ndarray, matanyone_processor:
400
  else:
401
  mask_tensor = mask
402
 
403
- # Move tensors to processor's device if available
404
  device = getattr(matanyone_processor, 'device', 'cpu')
405
  img_tensor = img_tensor.to(device)
406
  mask_tensor = mask_tensor.to(device)
407
 
408
- # Step: encode mask on this frame
409
- objects = [1] # single object id
410
  with torch.no_grad():
411
- output_prob = matanyone_processor.step(img_tensor, mask_tensor, objects=objects)
412
  refined_mask_tensor = matanyone_processor.output_prob_to_mask(output_prob)
413
 
414
  refined_mask = refined_mask_tensor.squeeze().detach().cpu().numpy()
@@ -425,7 +401,7 @@ def _matanyone_refine(image: np.ndarray, mask: np.ndarray, matanyone_processor:
425
  return None
426
 
427
  # ============================================================================
428
- # BACKGROUND REPLACEMENT FUNCTIONS
429
  # ============================================================================
430
 
431
  def replace_background_hq(frame: np.ndarray, mask: np.ndarray, background: np.ndarray,
@@ -433,138 +409,138 @@ def replace_background_hq(frame: np.ndarray, mask: np.ndarray, background: np.nd
433
  """Enhanced background replacement with comprehensive error handling"""
434
  if frame is None or mask is None or background is None:
435
  raise BackgroundReplacementError("Invalid input frame, mask, or background")
436
-
437
  try:
438
- background = cv2.resize(background, (frame.shape[1], frame.shape[0]),
439
- interpolation=cv2.INTER_LANCZOS4)
440
-
441
  if len(mask.shape) == 3:
442
  mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
443
-
444
  if mask.dtype != np.uint8:
445
  mask = mask.astype(np.uint8)
446
-
447
  if mask.max() <= 1.0:
448
  logger.debug("Converting normalized mask to 0-255 range")
449
  mask = (mask * 255).astype(np.uint8)
450
-
451
  try:
452
  result = _advanced_compositing(frame, mask, background)
453
  logger.debug("Advanced compositing successful")
454
  return result
455
-
456
  except Exception as e:
457
  logger.warning(f"Advanced compositing failed: {e}")
458
  if fallback_enabled:
459
  return _simple_compositing(frame, mask, background)
460
- else:
461
- raise BackgroundReplacementError(f"Advanced compositing failed: {e}")
462
-
463
  except BackgroundReplacementError:
464
  raise
465
  except Exception as e:
466
  logger.error(f"Unexpected background replacement error: {e}")
467
  if fallback_enabled:
468
  return _simple_compositing(frame, mask, background)
469
- else:
470
- raise BackgroundReplacementError(f"Unexpected error: {e}")
471
 
472
  def create_professional_background(bg_config: Dict[str, Any] | str, width: int, height: int) -> np.ndarray:
473
  """
474
  Central background builder.
475
  - Accepts a style string OR a dict like:
476
  {'background_choice': 'minimalist', 'custom_path': '/path/to/image.jpg'}
477
- (also backwards compatible with older dicts that contained 'type'/'colors')
478
- - If 'custom_path' exists, we load that image and letterbox-fit it.
479
- - Returns RGB np.ndarray of shape (height, width, 3).
480
  """
481
- # Normalize inputs
482
  choice = "minimalist"
483
  custom_path = None
484
 
485
  if isinstance(bg_config, dict):
486
- # new form
487
  choice = bg_config.get("background_choice", bg_config.get("name", "minimalist"))
488
  custom_path = bg_config.get("custom_path")
489
- # Custom image takes precedence
 
490
  if custom_path and os.path.exists(custom_path):
491
  img_bgr = cv2.imread(custom_path, cv2.IMREAD_COLOR)
492
  if img_bgr is not None:
 
493
  img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
494
- return _fit_image_letterbox(img_rgb, width, height, fill=(32, 32, 32))
495
- else:
496
- logger.warning(f"Failed to read custom background at {custom_path}. Falling back to style.")
497
- # old form (has type/colors) build from spec if present
 
 
498
  if "type" in bg_config and "colors" in bg_config:
499
  if bg_config["type"] == "color":
500
- background = _create_solid_background(bg_config, width, height)
501
  else:
502
- background = _create_gradient_background_enhanced(bg_config, width, height)
503
  return _apply_background_adjustments(background, bg_config)
504
 
505
  elif isinstance(bg_config, str):
506
  choice = bg_config
507
 
508
- # No custom path → use our lightweight styles
509
  choice = (choice or "minimalist").lower()
510
  if choice not in PROFESSIONAL_BACKGROUNDS:
511
  choice = "minimalist"
512
  cfg = PROFESSIONAL_BACKGROUNDS[choice]
513
 
514
  if cfg.get("type") == "color":
515
- background = _create_solid_background(cfg, width, height)
516
  else:
517
- background = _create_gradient_background_enhanced(cfg, width, height)
518
 
519
  background = _apply_background_adjustments(background, cfg)
520
  return background
521
 
522
  # ============================================================================
523
- # VALIDATION FUNCTION
524
  # ============================================================================
525
 
526
  def validate_video_file(video_path: str) -> Tuple[bool, str]:
527
  """Enhanced video file validation with detailed checks"""
528
  if not video_path or not os.path.exists(video_path):
529
  return False, "Video file not found"
530
-
531
  try:
532
  file_size = os.path.getsize(video_path)
533
  if file_size == 0:
534
  return False, "Video file is empty"
535
-
536
  if file_size > 2 * 1024 * 1024 * 1024:
537
  return False, "Video file too large (>2GB)"
538
-
539
  cap = cv2.VideoCapture(video_path)
540
  if not cap.isOpened():
541
  return False, "Cannot open video file"
542
-
543
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
544
  fps = cap.get(cv2.CAP_PROP_FPS)
545
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
546
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
547
-
548
  cap.release()
549
-
550
  if frame_count == 0:
551
  return False, "Video appears to be empty (0 frames)"
552
-
553
  if fps <= 0 or fps > 120:
554
  return False, f"Invalid frame rate: {fps}"
555
-
556
  if width <= 0 or height <= 0:
557
  return False, f"Invalid resolution: {width}x{height}"
558
-
559
  if width > 4096 or height > 4096:
560
  return False, f"Resolution too high: {width}x{height} (max 4096x4096)"
561
-
562
  duration = frame_count / fps
563
  if duration > 300:
564
  return False, f"Video too long: {duration:.1f}s (max 300s)"
565
-
566
  return True, f"Valid video: {width}x{height}, {fps:.1f}fps, {duration:.1f}s"
567
-
568
  except Exception as e:
569
  return False, f"Error validating video: {str(e)}"
570
 
@@ -575,117 +551,111 @@ def validate_video_file(video_path: str) -> Tuple[bool, str]:
575
  def _segment_with_intelligent_prompts(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
576
  """Intelligent automatic prompt generation for segmentation with safe predictor access"""
577
  try:
578
- # Double-check predictor validity
579
  if predictor is None or not hasattr(predictor, 'predict'):
580
  if fallback_enabled:
581
  return _fallback_segmentation(image)
582
- else:
583
- raise SegmentationError("Invalid predictor in intelligent prompts")
584
-
585
  h, w = image.shape[:2]
586
  pos_points, neg_points = _generate_smart_prompts(image)
587
-
588
  if len(pos_points) == 0:
589
  pos_points = np.array([[w//2, h//2]], dtype=np.float32)
590
-
591
  points = np.vstack([pos_points, neg_points])
592
  labels = np.hstack([
593
  np.ones(len(pos_points), dtype=np.int32),
594
  np.zeros(len(neg_points), dtype=np.int32)
595
  ])
596
-
597
  logger.debug(f"Using {len(pos_points)} positive, {len(neg_points)} negative points")
598
-
599
  with torch.no_grad():
600
  masks, scores, _ = predictor.predict(
601
  point_coords=points,
602
  point_labels=labels,
603
  multimask_output=True
604
  )
605
-
606
  if masks is None or len(masks) == 0:
607
  raise SegmentationError("No masks generated")
608
-
609
  if scores is not None and len(scores) > 0:
610
  best_idx = np.argmax(scores)
611
  best_mask = masks[best_idx]
612
  logger.debug(f"Selected mask {best_idx} with score {scores[best_idx]:.3f}")
613
  else:
614
  best_mask = masks[0]
615
-
616
  return _process_mask(best_mask)
617
-
618
  except Exception as e:
619
  logger.error(f"Intelligent prompting failed: {e}")
620
  if fallback_enabled:
621
  return _fallback_segmentation(image)
622
- else:
623
- raise
624
 
625
  def _segment_with_basic_prompts(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
626
  """Basic prompting method for segmentation with safe predictor access"""
627
  try:
628
- # Double-check predictor validity
629
  if predictor is None or not hasattr(predictor, 'predict'):
630
  if fallback_enabled:
631
  return _fallback_segmentation(image)
632
- else:
633
- raise SegmentationError("Invalid predictor in basic prompts")
634
-
635
  h, w = image.shape[:2]
636
-
637
  positive_points = np.array([
638
  [w//2, h//3],
639
  [w//2, h//2],
640
  [w//2, 2*h//3],
641
  ], dtype=np.float32)
642
-
643
  negative_points = np.array([
644
  [w//10, h//10],
645
  [9*w//10, h//10],
646
  [w//10, 9*h//10],
647
  [9*w//10, 9*h//10],
648
  ], dtype=np.float32)
649
-
650
  points = np.vstack([positive_points, negative_points])
651
  labels = np.array([1, 1, 1, 0, 0, 0, 0], dtype=np.int32)
652
-
653
  with torch.no_grad():
654
  masks, scores, _ = predictor.predict(
655
  point_coords=points,
656
  point_labels=labels,
657
  multimask_output=True
658
  )
659
-
660
  if masks is None or len(masks) == 0:
661
  raise SegmentationError("No masks generated")
662
-
663
  best_idx = np.argmax(scores) if scores is not None and len(scores) > 0 else 0
664
  best_mask = masks[best_idx]
665
-
666
  return _process_mask(best_mask)
667
-
668
  except Exception as e:
669
  logger.error(f"Basic prompting failed: {e}")
670
  if fallback_enabled:
671
  return _fallback_segmentation(image)
672
- else:
673
- raise
674
 
675
  def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
676
  """Generate optimal positive/negative points automatically"""
677
  try:
678
  h, w = image.shape[:2]
679
-
680
  try:
681
  saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
682
  success, saliency_map = saliency.computeSaliency(image)
683
-
684
  if success:
685
  saliency_thresh = cv2.threshold(saliency_map, 0.7, 1, cv2.THRESH_BINARY)[1]
686
- contours, _ = cv2.findContours((saliency_thresh * 255).astype(np.uint8),
687
  cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
688
-
689
  positive_points = []
690
  if contours:
691
  for contour in sorted(contours, key=cv2.contourArea, reverse=True)[:3]:
@@ -695,13 +665,13 @@ def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
695
  cy = int(M["m01"] / M["m00"])
696
  if 0 < cx < w and 0 < cy < h:
697
  positive_points.append([cx, cy])
698
-
699
  if positive_points:
700
  logger.debug(f"Generated {len(positive_points)} saliency-based points")
701
  positive_points = np.array(positive_points, dtype=np.float32)
702
  else:
703
  raise Exception("No valid saliency points found")
704
-
705
  except Exception as e:
706
  logger.debug(f"Saliency method failed: {e}, using fallback")
707
  positive_points = np.array([
@@ -709,7 +679,7 @@ def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
709
  [w//2, h//2],
710
  [w//2, 2*h//3],
711
  ], dtype=np.float32)
712
-
713
  negative_points = np.array([
714
  [10, 10],
715
  [w-10, 10],
@@ -718,9 +688,9 @@ def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
718
  [w//2, 5],
719
  [w//2, h-5],
720
  ], dtype=np.float32)
721
-
722
  return positive_points, negative_points
723
-
724
  except Exception as e:
725
  logger.warning(f"Smart prompt generation failed: {e}")
726
  h, w = image.shape[:2]
@@ -732,32 +702,31 @@ def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
732
  # HELPER FUNCTIONS - REFINEMENT
733
  # ============================================================================
734
 
735
- def _auto_refine_mask_iteratively(image: np.ndarray, initial_mask: np.ndarray,
736
- predictor: Any, max_iterations: int = 2) -> np.ndarray:
737
  """Automatically refine mask based on quality assessment with safe predictor access"""
738
  try:
739
- # Check predictor validity before iterative refinement
740
  if predictor is None or not hasattr(predictor, 'predict'):
741
  logger.warning("Predictor invalid for iterative refinement, returning initial mask")
742
  return initial_mask
743
-
744
  current_mask = initial_mask.copy()
745
-
746
  for iteration in range(max_iterations):
747
  quality_score = _assess_mask_quality(current_mask, image)
748
  logger.debug(f"Iteration {iteration}: quality score = {quality_score:.3f}")
749
-
750
  if quality_score > 0.85:
751
  logger.debug(f"Quality sufficient after {iteration} iterations")
752
  break
753
-
754
  problem_areas = _find_mask_errors(current_mask, image)
755
-
756
  if np.any(problem_areas):
757
  corrective_points, corrective_labels = _generate_corrective_prompts(
758
  image, current_mask, problem_areas
759
  )
760
-
761
  if len(corrective_points) > 0:
762
  try:
763
  with torch.no_grad():
@@ -767,26 +736,26 @@ def _auto_refine_mask_iteratively(image: np.ndarray, initial_mask: np.ndarray,
767
  mask_input=current_mask[None, :, :],
768
  multimask_output=False
769
  )
770
-
771
  if masks is not None and len(masks) > 0:
772
  refined_mask = _process_mask(masks[0])
773
-
774
  if _assess_mask_quality(refined_mask, image) > quality_score:
775
  current_mask = refined_mask
776
  logger.debug(f"Improved mask in iteration {iteration}")
777
  else:
778
  logger.debug(f"Refinement didn't improve quality in iteration {iteration}")
779
  break
780
-
781
  except Exception as e:
782
  logger.debug(f"Refinement iteration {iteration} failed: {e}")
783
  break
784
  else:
785
  logger.debug("No problem areas detected")
786
  break
787
-
788
  return current_mask
789
-
790
  except Exception as e:
791
  logger.warning(f"Iterative refinement failed: {e}")
792
  return initial_mask
@@ -796,11 +765,11 @@ def _assess_mask_quality(mask: np.ndarray, image: np.ndarray) -> float:
796
  try:
797
  h, w = image.shape[:2]
798
  scores = []
799
-
800
  mask_area = np.sum(mask > 127)
801
  total_area = h * w
802
  area_ratio = mask_area / total_area
803
-
804
  if 0.05 <= area_ratio <= 0.8:
805
  area_score = 1.0
806
  elif area_ratio < 0.05:
@@ -808,32 +777,32 @@ def _assess_mask_quality(mask: np.ndarray, image: np.ndarray) -> float:
808
  else:
809
  area_score = max(0, 1.0 - (area_ratio - 0.8) / 0.2)
810
  scores.append(area_score)
811
-
812
  mask_binary = mask > 127
813
  if np.any(mask_binary):
814
  mask_center_y, mask_center_x = np.where(mask_binary)
815
  center_y = np.mean(mask_center_y) / h
816
  center_x = np.mean(mask_center_x) / w
817
-
818
  center_score = 1.0 - min(abs(center_x - 0.5), abs(center_y - 0.5))
819
  scores.append(center_score)
820
  else:
821
  scores.append(0.0)
822
-
823
  edges = cv2.Canny(mask, 50, 150)
824
  edge_density = np.sum(edges > 0) / total_area
825
  smoothness_score = max(0, 1.0 - edge_density * 10)
826
  scores.append(smoothness_score)
827
-
828
  num_labels, _ = cv2.connectedComponents(mask)
829
  connectivity_score = max(0, 1.0 - (num_labels - 2) * 0.2)
830
  scores.append(connectivity_score)
831
-
832
  weights = [0.3, 0.2, 0.3, 0.2]
833
  overall_score = np.average(scores, weights=weights)
834
-
835
  return overall_score
836
-
837
  except Exception as e:
838
  logger.warning(f"Quality assessment failed: {e}")
839
  return 0.5
@@ -852,35 +821,35 @@ def _find_mask_errors(mask: np.ndarray, image: np.ndarray) -> np.ndarray:
852
  logger.warning(f"Error detection failed: {e}")
853
  return np.zeros_like(mask, dtype=bool)
854
 
855
- def _generate_corrective_prompts(image: np.ndarray, mask: np.ndarray,
856
- problem_areas: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
857
  """Generate corrective prompts based on problem areas"""
858
  try:
859
- contours, _ = cv2.findContours(problem_areas.astype(np.uint8),
860
  cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
861
-
862
  corrective_points = []
863
  corrective_labels = []
864
-
865
  for contour in contours:
866
  if cv2.contourArea(contour) > 100:
867
  M = cv2.moments(contour)
868
  if M["m00"] != 0:
869
  cx = int(M["m10"] / M["m00"])
870
  cy = int(M["m01"] / M["m00"])
871
-
872
  current_mask_value = mask[cy, cx]
873
-
874
  if current_mask_value < 127:
875
  corrective_points.append([cx, cy])
876
  corrective_labels.append(1)
877
  else:
878
  corrective_points.append([cx, cy])
879
  corrective_labels.append(0)
880
-
881
  return (np.array(corrective_points, dtype=np.float32) if corrective_points else np.array([]).reshape(0, 2),
882
  np.array(corrective_labels, dtype=np.int32) if corrective_labels else np.array([], dtype=np.int32))
883
-
884
  except Exception as e:
885
  logger.warning(f"Corrective prompt generation failed: {e}")
886
  return np.array([]).reshape(0, 2), np.array([], dtype=np.int32)
@@ -894,10 +863,10 @@ def _process_mask(mask: np.ndarray) -> np.ndarray:
894
  try:
895
  if len(mask.shape) > 2:
896
  mask = mask.squeeze()
897
-
898
  if len(mask.shape) > 2:
899
  mask = mask[:, :, 0] if mask.shape[2] > 0 else mask.sum(axis=2)
900
-
901
  if mask.dtype == bool:
902
  mask = mask.astype(np.uint8) * 255
903
  elif mask.dtype == np.float32 or mask.dtype == np.float64:
@@ -907,15 +876,15 @@ def _process_mask(mask: np.ndarray) -> np.ndarray:
907
  mask = np.clip(mask, 0, 255).astype(np.uint8)
908
  else:
909
  mask = mask.astype(np.uint8)
910
-
911
  kernel = np.ones((3, 3), np.uint8)
912
  mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
913
  mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
914
-
915
  _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
916
-
917
  return mask
918
-
919
  except Exception as e:
920
  logger.error(f"Mask processing failed: {e}")
921
  h, w = mask.shape[:2] if len(mask.shape) >= 2 else (256, 256)
@@ -935,7 +904,6 @@ def _validate_mask_quality(mask: np.ndarray, image_shape: Tuple[int, int]) -> bo
935
  mask_area = int(np.sum(mask > 127))
936
  area_ratio = mask_area / total_area
937
 
938
- # Only reject extreme cases
939
  if area_ratio < 0.02 or area_ratio > 0.95:
940
  logger.warning(f"Suspicious mask area ratio (hard reject): {area_ratio:.3f}")
941
  return False
@@ -960,41 +928,41 @@ def _fallback_segmentation(image: np.ndarray) -> np.ndarray:
960
  try:
961
  logger.info("Using fallback segmentation strategy")
962
  h, w = image.shape[:2]
963
-
964
  try:
965
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
966
-
967
  edge_pixels = np.concatenate([
968
  gray[0, :], gray[-1, :], gray[:, 0], gray[:, -1]
969
  ])
970
  bg_color = np.median(edge_pixels)
971
-
972
  diff = np.abs(gray.astype(float) - bg_color)
973
  mask = (diff > 30).astype(np.uint8) * 255
974
-
975
  kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
976
  mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
977
  mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
978
-
979
  if _validate_mask_quality(mask, image.shape[:2]):
980
  logger.info("Background subtraction fallback successful")
981
  return mask
982
-
983
  except Exception as e:
984
  logger.warning(f"Background subtraction fallback failed: {e}")
985
-
986
  mask = np.zeros((h, w), dtype=np.uint8)
987
-
988
  center_x, center_y = w // 2, h // 2
989
  radius_x, radius_y = w // 3, h // 2.5
990
-
991
  y, x = np.ogrid[:h, :w]
992
  mask_ellipse = ((x - center_x) / radius_x) ** 2 + ((y - center_y) / radius_y) ** 2 <= 1
993
  mask[mask_ellipse] = 255
994
-
995
  logger.info("Using geometric fallback mask")
996
  return mask
997
-
998
  except Exception as e:
999
  logger.error(f"All fallback strategies failed: {e}")
1000
  h, w = image.shape[:2]
@@ -1008,28 +976,28 @@ def _guided_filter_approx(guide: np.ndarray, mask: np.ndarray, radius: int = 8,
1008
  guide_gray = cv2.cvtColor(guide, cv2.COLOR_BGR2GRAY) if len(guide.shape) == 3 else guide
1009
  guide_gray = guide_gray.astype(np.float32) / 255.0
1010
  mask_float = mask.astype(np.float32) / 255.0
1011
-
1012
  kernel_size = 2 * radius + 1
1013
-
1014
  mean_guide = cv2.boxFilter(guide_gray, -1, (kernel_size, kernel_size))
1015
  mean_mask = cv2.boxFilter(mask_float, -1, (kernel_size, kernel_size))
1016
  corr_guide_mask = cv2.boxFilter(guide_gray * mask_float, -1, (kernel_size, kernel_size))
1017
-
1018
  cov_guide_mask = corr_guide_mask - mean_guide * mean_mask
1019
  mean_guide_sq = cv2.boxFilter(guide_gray * guide_gray, -1, (kernel_size, kernel_size))
1020
  var_guide = mean_guide_sq - mean_guide * mean_guide
1021
-
1022
  a = cov_guide_mask / (var_guide + eps)
1023
  b = mean_mask - a * mean_guide
1024
-
1025
  mean_a = cv2.boxFilter(a, -1, (kernel_size, kernel_size))
1026
  mean_b = cv2.boxFilter(b, -1, (kernel_size, kernel_size))
1027
-
1028
  output = mean_a * guide_gray + mean_b
1029
  output = np.clip(output * 255, 0, 255).astype(np.uint8)
1030
-
1031
  return output
1032
-
1033
  except Exception as e:
1034
  logger.warning(f"Guided filter approximation failed: {e}")
1035
  return mask
@@ -1043,32 +1011,31 @@ def _advanced_compositing(frame: np.ndarray, mask: np.ndarray, background: np.nd
1043
  try:
1044
  threshold = 100
1045
  _, mask_binary = cv2.threshold(mask, threshold, 255, cv2.THRESH_BINARY)
1046
-
1047
  kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
1048
  mask_binary = cv2.morphologyEx(mask_binary, cv2.MORPH_CLOSE, kernel)
1049
  mask_binary = cv2.morphologyEx(mask_binary, cv2.MORPH_OPEN, kernel)
1050
-
1051
  mask_smooth = cv2.GaussianBlur(mask_binary.astype(np.float32), (5, 5), 1.0)
1052
  mask_smooth = mask_smooth / 255.0
1053
-
1054
  mask_smooth = np.power(mask_smooth, 0.8)
1055
-
1056
- mask_smooth = np.where(mask_smooth > 0.5,
1057
- np.minimum(mask_smooth * 1.1, 1.0),
1058
- mask_smooth * 0.9)
1059
-
1060
  frame_adjusted = _color_match_edges(frame, background, mask_smooth)
1061
-
1062
  alpha_3ch = np.stack([mask_smooth] * 3, axis=2)
1063
-
1064
  frame_float = frame_adjusted.astype(np.float32)
1065
  background_float = background.astype(np.float32)
1066
-
1067
  result = frame_float * alpha_3ch + background_float * (1 - alpha_3ch)
1068
  result = np.clip(result, 0, 255).astype(np.uint8)
1069
-
1070
  return result
1071
-
1072
  except Exception as e:
1073
  logger.error(f"Advanced compositing error: {e}")
1074
  raise
@@ -1079,25 +1046,25 @@ def _color_match_edges(frame: np.ndarray, background: np.ndarray, alpha: np.ndar
1079
  edge_mask = cv2.Sobel(alpha, cv2.CV_64F, 1, 1, ksize=3)
1080
  edge_mask = np.abs(edge_mask)
1081
  edge_mask = (edge_mask > 0.1).astype(np.float32)
1082
-
1083
  edge_areas = edge_mask > 0
1084
  if not np.any(edge_areas):
1085
  return frame
1086
-
1087
  frame_adjusted = frame.copy().astype(np.float32)
1088
  background_float = background.astype(np.float32)
1089
-
1090
  adjustment_strength = 0.1
1091
  for c in range(3):
1092
  frame_adjusted[:, :, c] = np.where(
1093
  edge_areas,
1094
- frame_adjusted[:, :, c] * (1 - adjustment_strength) +
1095
  background_float[:, :, c] * adjustment_strength,
1096
  frame_adjusted[:, :, c]
1097
  )
1098
-
1099
  return np.clip(frame_adjusted, 0, 255).astype(np.uint8)
1100
-
1101
  except Exception as e:
1102
  logger.warning(f"Color matching failed: {e}")
1103
  return frame
@@ -1106,22 +1073,22 @@ def _simple_compositing(frame: np.ndarray, mask: np.ndarray, background: np.ndar
1106
  """Simple fallback compositing method"""
1107
  try:
1108
  logger.info("Using simple compositing fallback")
1109
-
1110
  background = cv2.resize(background, (frame.shape[1], frame.shape[0]))
1111
-
1112
  if len(mask.shape) == 3:
1113
  mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
1114
  if mask.max() <= 1.0:
1115
  mask = (mask * 255).astype(np.uint8)
1116
-
1117
  _, mask_binary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
1118
-
1119
  mask_norm = mask_binary.astype(np.float32) / 255.0
1120
  mask_3ch = np.stack([mask_norm] * 3, axis=2)
1121
-
1122
  result = frame * mask_3ch + background * (1 - mask_3ch)
1123
  return result.astype(np.uint8)
1124
-
1125
  except Exception as e:
1126
  logger.error(f"Simple compositing failed: {e}")
1127
  return frame
@@ -1131,27 +1098,27 @@ def _simple_compositing(frame: np.ndarray, mask: np.ndarray, background: np.ndar
1131
  # ============================================================================
1132
 
1133
  def _create_solid_background(bg_config: Dict[str, Any], width: int, height: int) -> np.ndarray:
1134
- """Create solid color background"""
1135
  color_hex = bg_config["colors"][0].lstrip('#')
1136
  color_rgb = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
1137
  color_bgr = color_rgb[::-1]
1138
  return np.full((height, width, 3), color_bgr, dtype=np.uint8)
1139
 
1140
  def _create_gradient_background_enhanced(bg_config: Dict[str, Any], width: int, height: int) -> np.ndarray:
1141
- """Create enhanced gradient background with better quality"""
1142
  try:
1143
  colors = bg_config["colors"]
1144
  direction = bg_config.get("direction", "vertical")
1145
-
1146
  rgb_colors = []
1147
  for color_hex in colors:
1148
  color_hex = color_hex.lstrip('#')
1149
  rgb = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
1150
  rgb_colors.append(rgb)
1151
-
1152
  if not rgb_colors:
1153
  rgb_colors = [(128, 128, 128)]
1154
-
1155
  if direction == "vertical":
1156
  background = _create_vertical_gradient(rgb_colors, width, height)
1157
  elif direction == "horizontal":
@@ -1162,26 +1129,22 @@ def _create_gradient_background_enhanced(bg_config: Dict[str, Any], width: int,
1162
  background = _create_radial_gradient(rgb_colors, width, height, direction == "soft_radial")
1163
  else:
1164
  background = _create_vertical_gradient(rgb_colors, width, height)
1165
-
1166
  return cv2.cvtColor(background, cv2.COLOR_RGB2BGR)
1167
-
1168
  except Exception as e:
1169
  logger.error(f"Gradient creation error: {e}")
1170
  return np.full((height, width, 3), (128, 128, 128), dtype=np.uint8)
1171
 
1172
  def _create_vertical_gradient(colors: list, width: int, height: int) -> np.ndarray:
1173
- """Create vertical gradient using NumPy for performance"""
1174
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1175
-
1176
  for y in range(height):
1177
  progress = y / height if height > 0 else 0
1178
  color = _interpolate_color(colors, progress)
1179
  gradient[y, :] = color
1180
-
1181
  return gradient
1182
 
1183
  def _create_horizontal_gradient(colors: list, width: int, height: int) -> np.ndarray:
1184
- """Create horizontal gradient using NumPy for performance"""
1185
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1186
  for x in range(width):
1187
  progress = x / width if width > 0 else 0
@@ -1190,59 +1153,53 @@ def _create_horizontal_gradient(colors: list, width: int, height: int) -> np.nda
1190
  return gradient
1191
 
1192
  def _create_diagonal_gradient(colors: list, width: int, height: int) -> np.ndarray:
1193
- """Create diagonal gradient using vectorized operations"""
1194
  y_coords, x_coords = np.mgrid[0:height, 0:width]
1195
  max_distance = width + height
1196
  progress = (x_coords + y_coords) / max_distance
1197
  progress = np.clip(progress, 0, 1)
1198
-
1199
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1200
  for c in range(3):
1201
  gradient[:, :, c] = _vectorized_color_interpolation(colors, progress, c)
1202
-
1203
  return gradient
1204
 
1205
  def _create_radial_gradient(colors: list, width: int, height: int, soft: bool = False) -> np.ndarray:
1206
- """Create radial gradient using vectorized operations"""
1207
  center_x, center_y = width // 2, height // 2
1208
  max_distance = np.sqrt(center_x**2 + center_y**2)
1209
-
1210
- y_coords, x_coords = np.mgrid[0:height, 0:width]
1211
  distances = np.sqrt((x - center_x)**2 + (y - center_y)**2)
1212
  progress = distances / max_distance
1213
  progress = np.clip(progress, 0, 1)
1214
-
1215
  if soft:
1216
  progress = np.power(progress, 0.7)
1217
-
1218
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1219
  for c in range(3):
1220
  gradient[:, :, c] = _vectorized_color_interpolation(colors, progress, c)
1221
-
1222
  return gradient
1223
 
1224
  def _vectorized_color_interpolation(colors: list, progress: np.ndarray, channel: int) -> np.ndarray:
1225
- """Vectorized color interpolation for performance"""
1226
  if len(colors) == 1:
1227
  return np.full_like(progress, colors[0][channel], dtype=np.uint8)
1228
-
1229
  num_segments = len(colors) - 1
1230
  segment_progress = progress * num_segments
1231
  segment_indices = np.floor(segment_progress).astype(int)
1232
  segment_indices = np.clip(segment_indices, 0, num_segments - 1)
1233
  local_progress = segment_progress - segment_indices
1234
-
1235
  start_colors = np.array([colors[i][channel] for i in range(len(colors))])
1236
  end_colors = np.array([colors[min(i + 1, len(colors) - 1)][channel] for i in range(len(colors))])
1237
-
1238
  start_vals = start_colors[segment_indices]
1239
  end_vals = end_colors[segment_indices]
1240
-
1241
  result = start_vals + (end_vals - start_vals) * local_progress
1242
  return np.clip(result, 0, 255).astype(np.uint8)
1243
 
1244
  def _interpolate_color(colors: list, progress: float) -> tuple:
1245
- """Interpolate between multiple colors"""
1246
  if len(colors) == 1:
1247
  return colors[0]
1248
  elif len(colors) == 2:
@@ -1263,18 +1220,17 @@ def _interpolate_color(colors: list, progress: float) -> tuple:
1263
  return (r, g, b)
1264
 
1265
  def _apply_background_adjustments(background: np.ndarray, bg_config: Dict[str, Any]) -> np.ndarray:
1266
- """Apply brightness and contrast adjustments to background"""
1267
  try:
1268
  brightness = bg_config.get("brightness", 1.0)
1269
  contrast = bg_config.get("contrast", 1.0)
1270
-
1271
  if brightness != 1.0 or contrast != 1.0:
1272
  background = background.astype(np.float32)
1273
  background = background * contrast * brightness
1274
  background = np.clip(background, 0, 255).astype(np.uint8)
1275
-
1276
  return background
1277
-
1278
  except Exception as e:
1279
  logger.warning(f"Background adjustment failed: {e}")
1280
  return background
 
31
  PROFESSIONAL_BACKGROUNDS = {
32
  "office_modern": {
33
  "name": "Modern Office",
34
+ "type": "gradient",
35
  "colors": ["#f8f9fa", "#e9ecef", "#dee2e6"],
36
  "direction": "diagonal",
37
  "description": "Clean, contemporary office environment",
 
102
  pass
103
 
104
  # ============================================================================
105
+ # BACKGROUND HELPERS (LETTERBOX)
106
  # ============================================================================
107
 
108
  def _fit_image_letterbox(img_rgb: np.ndarray, dst_w: int, dst_h: int, fill=(32, 32, 32)) -> np.ndarray:
109
  """
110
  Fit an RGB image into (dst_h, dst_w) with letterboxing (no stretch), borders filled with `fill`.
111
+ Returns an RGB image.
112
  """
113
  h, w = img_rgb.shape[:2]
114
  if h == 0 or w == 0:
 
139
  """High-quality person segmentation with intelligent automation"""
140
  if not USE_ENHANCED_SEGMENTATION:
141
  return segment_person_hq_original(image, predictor, fallback_enabled)
142
+
143
  logger.debug("Using ENHANCED segmentation with intelligent automation")
144
+
145
  if image is None or image.size == 0:
146
  raise SegmentationError("Invalid input image")
147
+
148
  try:
 
149
  if predictor is None:
150
  if fallback_enabled:
151
  logger.warning("SAM2 predictor not available, using fallback")
152
  return _fallback_segmentation(image)
153
+ raise SegmentationError("SAM2 predictor not available")
154
+
 
 
155
  if not hasattr(predictor, 'set_image') or not hasattr(predictor, 'predict'):
156
  logger.warning("Predictor missing required methods, using fallback")
157
  if fallback_enabled:
158
  return _fallback_segmentation(image)
159
+ raise SegmentationError("Invalid predictor object")
160
+
 
 
161
  try:
162
  predictor.set_image(image)
163
  except Exception as e:
164
  logger.error(f"Failed to set image in predictor: {e}")
165
  if fallback_enabled:
166
  return _fallback_segmentation(image)
167
+ raise SegmentationError(f"Predictor setup failed: {e}")
168
+
 
169
  if USE_INTELLIGENT_PROMPTING:
170
  mask = _segment_with_intelligent_prompts(image, predictor, fallback_enabled)
171
  else:
172
  mask = _segment_with_basic_prompts(image, predictor, fallback_enabled)
173
+
174
  if USE_ITERATIVE_REFINEMENT and mask is not None:
175
  mask = _auto_refine_mask_iteratively(image, mask, predictor)
176
+
177
  if not _validate_mask_quality(mask, image.shape[:2]):
178
  logger.warning("Mask quality validation failed")
179
  if fallback_enabled:
180
  return _fallback_segmentation(image)
181
+ raise SegmentationError("Poor mask quality")
182
+
 
183
  logger.debug(f"Enhanced segmentation successful - mask range: {mask.min()}-{mask.max()}")
184
  return mask
185
+
186
  except SegmentationError:
187
  raise
188
  except Exception as e:
189
  logger.error(f"Unexpected segmentation error: {e}")
190
  if fallback_enabled:
191
  return _fallback_segmentation(image)
192
+ raise SegmentationError(f"Unexpected error: {e}")
 
193
 
194
  def segment_person_hq_original(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
195
  """Original version of person segmentation for rollback"""
196
  if image is None or image.size == 0:
197
  raise SegmentationError("Invalid input image")
198
+
199
  try:
 
200
  if predictor is None:
201
  if fallback_enabled:
202
  logger.warning("SAM2 predictor not available, using fallback")
203
  return _fallback_segmentation(image)
204
+ raise SegmentationError("SAM2 predictor not available")
205
+
 
 
206
  if not hasattr(predictor, 'set_image') or not hasattr(predictor, 'predict'):
207
  logger.warning("Predictor missing required methods, using fallback")
208
  if fallback_enabled:
209
  return _fallback_segmentation(image)
210
+ raise SegmentationError("Invalid predictor object")
211
+
 
 
212
  try:
213
  predictor.set_image(image)
214
  except Exception as e:
215
  logger.error(f"Failed to set image in predictor: {e}")
216
  if fallback_enabled:
217
  return _fallback_segmentation(image)
218
+ raise SegmentationError(f"Predictor setup failed: {e}")
219
+
 
220
  h, w = image.shape[:2]
221
+
222
  points = np.array([
223
  [w//2, h//4],
224
  [w//2, h//2],
 
229
  [w//4, 2*h//3],
230
  [3*w//4, 2*h//3],
231
  ], dtype=np.float32)
232
+
233
  labels = np.ones(len(points), dtype=np.int32)
234
+
 
235
  try:
236
  with torch.no_grad():
237
  masks, scores, _ = predictor.predict(
 
243
  logger.error(f"SAM2 prediction failed: {e}")
244
  if fallback_enabled:
245
  return _fallback_segmentation(image)
246
+ raise SegmentationError(f"Prediction failed: {e}")
247
+
 
248
  if masks is None or len(masks) == 0:
249
  logger.warning("SAM2 returned no masks")
250
  if fallback_enabled:
251
  return _fallback_segmentation(image)
252
+ raise SegmentationError("No masks generated")
253
+
 
254
  if scores is None or len(scores) == 0:
255
  logger.warning("SAM2 returned no scores")
256
  best_mask = masks[0]
 
258
  best_idx = np.argmax(scores)
259
  best_mask = masks[best_idx]
260
  logger.debug(f"Selected mask {best_idx} with score {scores[best_idx]:.3f}")
261
+
262
  mask = _process_mask(best_mask)
263
+
264
  if not _validate_mask_quality(mask, image.shape[:2]):
265
  logger.warning("Mask quality validation failed")
266
  if fallback_enabled:
267
  return _fallback_segmentation(image)
268
+ raise SegmentationError("Poor mask quality")
269
+
 
270
  logger.debug(f"Segmentation successful - mask range: {mask.min()}-{mask.max()}")
271
  return mask
272
+
273
  except SegmentationError:
274
  raise
275
  except Exception as e:
276
  logger.error(f"Unexpected segmentation error: {e}")
277
  if fallback_enabled:
278
  return _fallback_segmentation(image)
279
+ raise SegmentationError(f"Unexpected error: {e}")
 
280
 
281
  # ============================================================================
282
+ # MASK REFINEMENT
283
  # ============================================================================
284
 
285
+ def refine_mask_hq(image: np.ndarray, mask: np.ndarray, matanyone_processor: Any,
286
  fallback_enabled: bool = True) -> np.ndarray:
287
  """Enhanced mask refinement with MatAnyone and robust fallbacks"""
288
  if image is None or mask is None:
289
  raise MaskRefinementError("Invalid input image or mask")
290
+
291
  try:
292
  mask = _process_mask(mask)
293
+
294
  if matanyone_processor is not None:
295
  try:
296
  logger.debug("Attempting MatAnyone refinement")
297
  refined_mask = _matanyone_refine(image, mask, matanyone_processor)
298
+
299
  if refined_mask is not None and _validate_mask_quality(refined_mask, image.shape[:2]):
300
  logger.debug("MatAnyone refinement successful")
301
  return refined_mask
302
  else:
303
  logger.warning("MatAnyone produced poor quality mask")
304
+
305
  except Exception as e:
306
  logger.warning(f"MatAnyone refinement failed: {e}")
307
+
308
  if fallback_enabled:
309
  logger.debug("Using enhanced OpenCV refinement")
310
  return enhance_mask_opencv_advanced(image, mask)
311
+
312
+ raise MaskRefinementError("MatAnyone failed and fallback disabled")
313
+
314
  except MaskRefinementError:
315
  raise
316
  except Exception as e:
317
  logger.error(f"Unexpected mask refinement error: {e}")
318
  if fallback_enabled:
319
  return enhance_mask_opencv_advanced(image, mask)
320
+ raise MaskRefinementError(f"Unexpected error: {e}")
 
321
 
322
  def enhance_mask_opencv_advanced(image: np.ndarray, mask: np.ndarray) -> np.ndarray:
323
  """Advanced OpenCV-based mask enhancement with multiple techniques"""
324
  try:
325
  if len(mask.shape) == 3:
326
  mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
327
+
328
  if mask.max() <= 1.0:
329
  mask = (mask * 255).astype(np.uint8)
330
+
331
  refined_mask = cv2.bilateralFilter(mask, 9, 75, 75)
332
  refined_mask = _guided_filter_approx(image, refined_mask, radius=8, eps=0.2)
333
+
334
  kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
335
  refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_CLOSE, kernel_close)
336
+
337
  kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
338
  refined_mask = cv2.morphologyEx(refined_mask, cv2.MORPH_OPEN, kernel_open)
339
+
340
  refined_mask = cv2.GaussianBlur(refined_mask, (3, 3), 0.8)
 
341
  _, refined_mask = cv2.threshold(refined_mask, 127, 255, cv2.THRESH_BINARY)
342
+
343
  return refined_mask
344
+
345
  except Exception as e:
346
  logger.warning(f"Enhanced OpenCV refinement failed: {e}")
347
  return cv2.GaussianBlur(mask, (5, 5), 1.0)
348
 
349
  # ============================================================================
350
+ # MATANYONE REFINEMENT (SAFE)
351
  # ============================================================================
352
 
353
  def _matanyone_refine(image: np.ndarray, mask: np.ndarray, matanyone_processor: Any) -> Optional[np.ndarray]:
354
  """Safe MatAnyOne refinement for a single frame with correct interface."""
355
  try:
 
356
  if not hasattr(matanyone_processor, 'step') or not hasattr(matanyone_processor, 'output_prob_to_mask'):
357
  logger.warning("MatAnyOne processor missing required methods (step, output_prob_to_mask)")
358
  return None
359
 
360
+ # image float32 RGB CHW
361
  if isinstance(image, np.ndarray):
362
  img = image.astype(np.float32)
363
  if img.max() > 1.0:
364
  img /= 255.0
365
  if img.shape[2] == 3:
366
+ img = np.transpose(img, (2, 0, 1))
367
  img_tensor = torch.from_numpy(img)
368
  else:
369
+ img_tensor = image
370
 
371
+ # mask float32 HW
372
  if isinstance(mask, np.ndarray):
373
  mask_tensor = mask.astype(np.float32)
374
  if mask_tensor.max() > 1.0:
 
379
  else:
380
  mask_tensor = mask
381
 
 
382
  device = getattr(matanyone_processor, 'device', 'cpu')
383
  img_tensor = img_tensor.to(device)
384
  mask_tensor = mask_tensor.to(device)
385
 
 
 
386
  with torch.no_grad():
387
+ output_prob = matanyone_processor.step(img_tensor, mask_tensor, objects=[1])
388
  refined_mask_tensor = matanyone_processor.output_prob_to_mask(output_prob)
389
 
390
  refined_mask = refined_mask_tensor.squeeze().detach().cpu().numpy()
 
401
  return None
402
 
403
  # ============================================================================
404
+ # BACKGROUND REPLACEMENT
405
  # ============================================================================
406
 
407
  def replace_background_hq(frame: np.ndarray, mask: np.ndarray, background: np.ndarray,
 
409
  """Enhanced background replacement with comprehensive error handling"""
410
  if frame is None or mask is None or background is None:
411
  raise BackgroundReplacementError("Invalid input frame, mask, or background")
412
+
413
  try:
414
+ background = cv2.resize(background, (frame.shape[1], frame.shape[0]),
415
+ interpolation=cv2.INTER_LANCZOS4)
416
+
417
  if len(mask.shape) == 3:
418
  mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
419
+
420
  if mask.dtype != np.uint8:
421
  mask = mask.astype(np.uint8)
422
+
423
  if mask.max() <= 1.0:
424
  logger.debug("Converting normalized mask to 0-255 range")
425
  mask = (mask * 255).astype(np.uint8)
426
+
427
  try:
428
  result = _advanced_compositing(frame, mask, background)
429
  logger.debug("Advanced compositing successful")
430
  return result
431
+
432
  except Exception as e:
433
  logger.warning(f"Advanced compositing failed: {e}")
434
  if fallback_enabled:
435
  return _simple_compositing(frame, mask, background)
436
+ raise BackgroundReplacementError(f"Advanced compositing failed: {e}")
437
+
 
438
  except BackgroundReplacementError:
439
  raise
440
  except Exception as e:
441
  logger.error(f"Unexpected background replacement error: {e}")
442
  if fallback_enabled:
443
  return _simple_compositing(frame, mask, background)
444
+ raise BackgroundReplacementError(f"Unexpected error: {e}")
 
445
 
446
  def create_professional_background(bg_config: Dict[str, Any] | str, width: int, height: int) -> np.ndarray:
447
  """
448
  Central background builder.
449
  - Accepts a style string OR a dict like:
450
  {'background_choice': 'minimalist', 'custom_path': '/path/to/image.jpg'}
451
+ (backwards compatible with older dicts that contained 'type'/'colors')
452
+ - If 'custom_path' exists, load and letterbox-fit it.
453
+ - Returns **BGR** (consistent with OpenCV).
454
  """
 
455
  choice = "minimalist"
456
  custom_path = None
457
 
458
  if isinstance(bg_config, dict):
 
459
  choice = bg_config.get("background_choice", bg_config.get("name", "minimalist"))
460
  custom_path = bg_config.get("custom_path")
461
+
462
+ # 1) Custom image takes precedence
463
  if custom_path and os.path.exists(custom_path):
464
  img_bgr = cv2.imread(custom_path, cv2.IMREAD_COLOR)
465
  if img_bgr is not None:
466
+ # Fit in RGB, convert back to BGR for consistency
467
  img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
468
+ fitted_rgb = _fit_image_letterbox(img_rgb, width, height, fill=(32, 32, 32))
469
+ fitted_bgr = cv2.cvtColor(fitted_rgb, cv2.COLOR_RGB2BGR)
470
+ return fitted_bgr
471
+ logger.warning(f"Failed to read custom background at {custom_path}. Falling back to style.")
472
+
473
+ # 2) Old dict form with 'type'/'colors'
474
  if "type" in bg_config and "colors" in bg_config:
475
  if bg_config["type"] == "color":
476
+ background = _create_solid_background(bg_config, width, height) # already BGR
477
  else:
478
+ background = _create_gradient_background_enhanced(bg_config, width, height) # returns BGR
479
  return _apply_background_adjustments(background, bg_config)
480
 
481
  elif isinstance(bg_config, str):
482
  choice = bg_config
483
 
484
+ # 3) Built-in styles
485
  choice = (choice or "minimalist").lower()
486
  if choice not in PROFESSIONAL_BACKGROUNDS:
487
  choice = "minimalist"
488
  cfg = PROFESSIONAL_BACKGROUNDS[choice]
489
 
490
  if cfg.get("type") == "color":
491
+ background = _create_solid_background(cfg, width, height) # BGR
492
  else:
493
+ background = _create_gradient_background_enhanced(cfg, width, height) # BGR
494
 
495
  background = _apply_background_adjustments(background, cfg)
496
  return background
497
 
498
  # ============================================================================
499
+ # VALIDATION
500
  # ============================================================================
501
 
502
  def validate_video_file(video_path: str) -> Tuple[bool, str]:
503
  """Enhanced video file validation with detailed checks"""
504
  if not video_path or not os.path.exists(video_path):
505
  return False, "Video file not found"
506
+
507
  try:
508
  file_size = os.path.getsize(video_path)
509
  if file_size == 0:
510
  return False, "Video file is empty"
511
+
512
  if file_size > 2 * 1024 * 1024 * 1024:
513
  return False, "Video file too large (>2GB)"
514
+
515
  cap = cv2.VideoCapture(video_path)
516
  if not cap.isOpened():
517
  return False, "Cannot open video file"
518
+
519
  frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
520
  fps = cap.get(cv2.CAP_PROP_FPS)
521
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
522
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
523
+
524
  cap.release()
525
+
526
  if frame_count == 0:
527
  return False, "Video appears to be empty (0 frames)"
528
+
529
  if fps <= 0 or fps > 120:
530
  return False, f"Invalid frame rate: {fps}"
531
+
532
  if width <= 0 or height <= 0:
533
  return False, f"Invalid resolution: {width}x{height}"
534
+
535
  if width > 4096 or height > 4096:
536
  return False, f"Resolution too high: {width}x{height} (max 4096x4096)"
537
+
538
  duration = frame_count / fps
539
  if duration > 300:
540
  return False, f"Video too long: {duration:.1f}s (max 300s)"
541
+
542
  return True, f"Valid video: {width}x{height}, {fps:.1f}fps, {duration:.1f}s"
543
+
544
  except Exception as e:
545
  return False, f"Error validating video: {str(e)}"
546
 
 
551
  def _segment_with_intelligent_prompts(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
552
  """Intelligent automatic prompt generation for segmentation with safe predictor access"""
553
  try:
 
554
  if predictor is None or not hasattr(predictor, 'predict'):
555
  if fallback_enabled:
556
  return _fallback_segmentation(image)
557
+ raise SegmentationError("Invalid predictor in intelligent prompts")
558
+
 
559
  h, w = image.shape[:2]
560
  pos_points, neg_points = _generate_smart_prompts(image)
561
+
562
  if len(pos_points) == 0:
563
  pos_points = np.array([[w//2, h//2]], dtype=np.float32)
564
+
565
  points = np.vstack([pos_points, neg_points])
566
  labels = np.hstack([
567
  np.ones(len(pos_points), dtype=np.int32),
568
  np.zeros(len(neg_points), dtype=np.int32)
569
  ])
570
+
571
  logger.debug(f"Using {len(pos_points)} positive, {len(neg_points)} negative points")
572
+
573
  with torch.no_grad():
574
  masks, scores, _ = predictor.predict(
575
  point_coords=points,
576
  point_labels=labels,
577
  multimask_output=True
578
  )
579
+
580
  if masks is None or len(masks) == 0:
581
  raise SegmentationError("No masks generated")
582
+
583
  if scores is not None and len(scores) > 0:
584
  best_idx = np.argmax(scores)
585
  best_mask = masks[best_idx]
586
  logger.debug(f"Selected mask {best_idx} with score {scores[best_idx]:.3f}")
587
  else:
588
  best_mask = masks[0]
589
+
590
  return _process_mask(best_mask)
591
+
592
  except Exception as e:
593
  logger.error(f"Intelligent prompting failed: {e}")
594
  if fallback_enabled:
595
  return _fallback_segmentation(image)
596
+ raise
 
597
 
598
  def _segment_with_basic_prompts(image: np.ndarray, predictor: Any, fallback_enabled: bool = True) -> np.ndarray:
599
  """Basic prompting method for segmentation with safe predictor access"""
600
  try:
 
601
  if predictor is None or not hasattr(predictor, 'predict'):
602
  if fallback_enabled:
603
  return _fallback_segmentation(image)
604
+ raise SegmentationError("Invalid predictor in basic prompts")
605
+
 
606
  h, w = image.shape[:2]
607
+
608
  positive_points = np.array([
609
  [w//2, h//3],
610
  [w//2, h//2],
611
  [w//2, 2*h//3],
612
  ], dtype=np.float32)
613
+
614
  negative_points = np.array([
615
  [w//10, h//10],
616
  [9*w//10, h//10],
617
  [w//10, 9*h//10],
618
  [9*w//10, 9*h//10],
619
  ], dtype=np.float32)
620
+
621
  points = np.vstack([positive_points, negative_points])
622
  labels = np.array([1, 1, 1, 0, 0, 0, 0], dtype=np.int32)
623
+
624
  with torch.no_grad():
625
  masks, scores, _ = predictor.predict(
626
  point_coords=points,
627
  point_labels=labels,
628
  multimask_output=True
629
  )
630
+
631
  if masks is None or len(masks) == 0:
632
  raise SegmentationError("No masks generated")
633
+
634
  best_idx = np.argmax(scores) if scores is not None and len(scores) > 0 else 0
635
  best_mask = masks[best_idx]
636
+
637
  return _process_mask(best_mask)
638
+
639
  except Exception as e:
640
  logger.error(f"Basic prompting failed: {e}")
641
  if fallback_enabled:
642
  return _fallback_segmentation(image)
643
+ raise
 
644
 
645
  def _generate_smart_prompts(image: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
646
  """Generate optimal positive/negative points automatically"""
647
  try:
648
  h, w = image.shape[:2]
649
+
650
  try:
651
  saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
652
  success, saliency_map = saliency.computeSaliency(image)
653
+
654
  if success:
655
  saliency_thresh = cv2.threshold(saliency_map, 0.7, 1, cv2.THRESH_BINARY)[1]
656
+ contours, _ = cv2.findContours((saliency_thresh * 255).astype(np.uint8),
657
  cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
658
+
659
  positive_points = []
660
  if contours:
661
  for contour in sorted(contours, key=cv2.contourArea, reverse=True)[:3]:
 
665
  cy = int(M["m01"] / M["m00"])
666
  if 0 < cx < w and 0 < cy < h:
667
  positive_points.append([cx, cy])
668
+
669
  if positive_points:
670
  logger.debug(f"Generated {len(positive_points)} saliency-based points")
671
  positive_points = np.array(positive_points, dtype=np.float32)
672
  else:
673
  raise Exception("No valid saliency points found")
674
+
675
  except Exception as e:
676
  logger.debug(f"Saliency method failed: {e}, using fallback")
677
  positive_points = np.array([
 
679
  [w//2, h//2],
680
  [w//2, 2*h//3],
681
  ], dtype=np.float32)
682
+
683
  negative_points = np.array([
684
  [10, 10],
685
  [w-10, 10],
 
688
  [w//2, 5],
689
  [w//2, h-5],
690
  ], dtype=np.float32)
691
+
692
  return positive_points, negative_points
693
+
694
  except Exception as e:
695
  logger.warning(f"Smart prompt generation failed: {e}")
696
  h, w = image.shape[:2]
 
702
  # HELPER FUNCTIONS - REFINEMENT
703
  # ============================================================================
704
 
705
+ def _auto_refine_mask_iteratively(image: np.ndarray, initial_mask: np.ndarray,
706
+ predictor: Any, max_iterations: int = 2) -> np.ndarray:
707
  """Automatically refine mask based on quality assessment with safe predictor access"""
708
  try:
 
709
  if predictor is None or not hasattr(predictor, 'predict'):
710
  logger.warning("Predictor invalid for iterative refinement, returning initial mask")
711
  return initial_mask
712
+
713
  current_mask = initial_mask.copy()
714
+
715
  for iteration in range(max_iterations):
716
  quality_score = _assess_mask_quality(current_mask, image)
717
  logger.debug(f"Iteration {iteration}: quality score = {quality_score:.3f}")
718
+
719
  if quality_score > 0.85:
720
  logger.debug(f"Quality sufficient after {iteration} iterations")
721
  break
722
+
723
  problem_areas = _find_mask_errors(current_mask, image)
724
+
725
  if np.any(problem_areas):
726
  corrective_points, corrective_labels = _generate_corrective_prompts(
727
  image, current_mask, problem_areas
728
  )
729
+
730
  if len(corrective_points) > 0:
731
  try:
732
  with torch.no_grad():
 
736
  mask_input=current_mask[None, :, :],
737
  multimask_output=False
738
  )
739
+
740
  if masks is not None and len(masks) > 0:
741
  refined_mask = _process_mask(masks[0])
742
+
743
  if _assess_mask_quality(refined_mask, image) > quality_score:
744
  current_mask = refined_mask
745
  logger.debug(f"Improved mask in iteration {iteration}")
746
  else:
747
  logger.debug(f"Refinement didn't improve quality in iteration {iteration}")
748
  break
749
+
750
  except Exception as e:
751
  logger.debug(f"Refinement iteration {iteration} failed: {e}")
752
  break
753
  else:
754
  logger.debug("No problem areas detected")
755
  break
756
+
757
  return current_mask
758
+
759
  except Exception as e:
760
  logger.warning(f"Iterative refinement failed: {e}")
761
  return initial_mask
 
765
  try:
766
  h, w = image.shape[:2]
767
  scores = []
768
+
769
  mask_area = np.sum(mask > 127)
770
  total_area = h * w
771
  area_ratio = mask_area / total_area
772
+
773
  if 0.05 <= area_ratio <= 0.8:
774
  area_score = 1.0
775
  elif area_ratio < 0.05:
 
777
  else:
778
  area_score = max(0, 1.0 - (area_ratio - 0.8) / 0.2)
779
  scores.append(area_score)
780
+
781
  mask_binary = mask > 127
782
  if np.any(mask_binary):
783
  mask_center_y, mask_center_x = np.where(mask_binary)
784
  center_y = np.mean(mask_center_y) / h
785
  center_x = np.mean(mask_center_x) / w
786
+
787
  center_score = 1.0 - min(abs(center_x - 0.5), abs(center_y - 0.5))
788
  scores.append(center_score)
789
  else:
790
  scores.append(0.0)
791
+
792
  edges = cv2.Canny(mask, 50, 150)
793
  edge_density = np.sum(edges > 0) / total_area
794
  smoothness_score = max(0, 1.0 - edge_density * 10)
795
  scores.append(smoothness_score)
796
+
797
  num_labels, _ = cv2.connectedComponents(mask)
798
  connectivity_score = max(0, 1.0 - (num_labels - 2) * 0.2)
799
  scores.append(connectivity_score)
800
+
801
  weights = [0.3, 0.2, 0.3, 0.2]
802
  overall_score = np.average(scores, weights=weights)
803
+
804
  return overall_score
805
+
806
  except Exception as e:
807
  logger.warning(f"Quality assessment failed: {e}")
808
  return 0.5
 
821
  logger.warning(f"Error detection failed: {e}")
822
  return np.zeros_like(mask, dtype=bool)
823
 
824
+ def _generate_corrective_prompts(image: np.ndarray, mask: np.ndarray,
825
+ problem_areas: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
826
  """Generate corrective prompts based on problem areas"""
827
  try:
828
+ contours, _ = cv2.findContours(problem_areas.astype(np.uint8),
829
  cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
830
+
831
  corrective_points = []
832
  corrective_labels = []
833
+
834
  for contour in contours:
835
  if cv2.contourArea(contour) > 100:
836
  M = cv2.moments(contour)
837
  if M["m00"] != 0:
838
  cx = int(M["m10"] / M["m00"])
839
  cy = int(M["m01"] / M["m00"])
840
+
841
  current_mask_value = mask[cy, cx]
842
+
843
  if current_mask_value < 127:
844
  corrective_points.append([cx, cy])
845
  corrective_labels.append(1)
846
  else:
847
  corrective_points.append([cx, cy])
848
  corrective_labels.append(0)
849
+
850
  return (np.array(corrective_points, dtype=np.float32) if corrective_points else np.array([]).reshape(0, 2),
851
  np.array(corrective_labels, dtype=np.int32) if corrective_labels else np.array([], dtype=np.int32))
852
+
853
  except Exception as e:
854
  logger.warning(f"Corrective prompt generation failed: {e}")
855
  return np.array([]).reshape(0, 2), np.array([], dtype=np.int32)
 
863
  try:
864
  if len(mask.shape) > 2:
865
  mask = mask.squeeze()
866
+
867
  if len(mask.shape) > 2:
868
  mask = mask[:, :, 0] if mask.shape[2] > 0 else mask.sum(axis=2)
869
+
870
  if mask.dtype == bool:
871
  mask = mask.astype(np.uint8) * 255
872
  elif mask.dtype == np.float32 or mask.dtype == np.float64:
 
876
  mask = np.clip(mask, 0, 255).astype(np.uint8)
877
  else:
878
  mask = mask.astype(np.uint8)
879
+
880
  kernel = np.ones((3, 3), np.uint8)
881
  mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
882
  mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
883
+
884
  _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
885
+
886
  return mask
887
+
888
  except Exception as e:
889
  logger.error(f"Mask processing failed: {e}")
890
  h, w = mask.shape[:2] if len(mask.shape) >= 2 else (256, 256)
 
904
  mask_area = int(np.sum(mask > 127))
905
  area_ratio = mask_area / total_area
906
 
 
907
  if area_ratio < 0.02 or area_ratio > 0.95:
908
  logger.warning(f"Suspicious mask area ratio (hard reject): {area_ratio:.3f}")
909
  return False
 
928
  try:
929
  logger.info("Using fallback segmentation strategy")
930
  h, w = image.shape[:2]
931
+
932
  try:
933
  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
934
+
935
  edge_pixels = np.concatenate([
936
  gray[0, :], gray[-1, :], gray[:, 0], gray[:, -1]
937
  ])
938
  bg_color = np.median(edge_pixels)
939
+
940
  diff = np.abs(gray.astype(float) - bg_color)
941
  mask = (diff > 30).astype(np.uint8) * 255
942
+
943
  kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
944
  mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
945
  mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
946
+
947
  if _validate_mask_quality(mask, image.shape[:2]):
948
  logger.info("Background subtraction fallback successful")
949
  return mask
950
+
951
  except Exception as e:
952
  logger.warning(f"Background subtraction fallback failed: {e}")
953
+
954
  mask = np.zeros((h, w), dtype=np.uint8)
955
+
956
  center_x, center_y = w // 2, h // 2
957
  radius_x, radius_y = w // 3, h // 2.5
958
+
959
  y, x = np.ogrid[:h, :w]
960
  mask_ellipse = ((x - center_x) / radius_x) ** 2 + ((y - center_y) / radius_y) ** 2 <= 1
961
  mask[mask_ellipse] = 255
962
+
963
  logger.info("Using geometric fallback mask")
964
  return mask
965
+
966
  except Exception as e:
967
  logger.error(f"All fallback strategies failed: {e}")
968
  h, w = image.shape[:2]
 
976
  guide_gray = cv2.cvtColor(guide, cv2.COLOR_BGR2GRAY) if len(guide.shape) == 3 else guide
977
  guide_gray = guide_gray.astype(np.float32) / 255.0
978
  mask_float = mask.astype(np.float32) / 255.0
979
+
980
  kernel_size = 2 * radius + 1
981
+
982
  mean_guide = cv2.boxFilter(guide_gray, -1, (kernel_size, kernel_size))
983
  mean_mask = cv2.boxFilter(mask_float, -1, (kernel_size, kernel_size))
984
  corr_guide_mask = cv2.boxFilter(guide_gray * mask_float, -1, (kernel_size, kernel_size))
985
+
986
  cov_guide_mask = corr_guide_mask - mean_guide * mean_mask
987
  mean_guide_sq = cv2.boxFilter(guide_gray * guide_gray, -1, (kernel_size, kernel_size))
988
  var_guide = mean_guide_sq - mean_guide * mean_guide
989
+
990
  a = cov_guide_mask / (var_guide + eps)
991
  b = mean_mask - a * mean_guide
992
+
993
  mean_a = cv2.boxFilter(a, -1, (kernel_size, kernel_size))
994
  mean_b = cv2.boxFilter(b, -1, (kernel_size, kernel_size))
995
+
996
  output = mean_a * guide_gray + mean_b
997
  output = np.clip(output * 255, 0, 255).astype(np.uint8)
998
+
999
  return output
1000
+
1001
  except Exception as e:
1002
  logger.warning(f"Guided filter approximation failed: {e}")
1003
  return mask
 
1011
  try:
1012
  threshold = 100
1013
  _, mask_binary = cv2.threshold(mask, threshold, 255, cv2.THRESH_BINARY)
1014
+
1015
  kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
1016
  mask_binary = cv2.morphologyEx(mask_binary, cv2.MORPH_CLOSE, kernel)
1017
  mask_binary = cv2.morphologyEx(mask_binary, cv2.MORPH_OPEN, kernel)
1018
+
1019
  mask_smooth = cv2.GaussianBlur(mask_binary.astype(np.float32), (5, 5), 1.0)
1020
  mask_smooth = mask_smooth / 255.0
1021
+
1022
  mask_smooth = np.power(mask_smooth, 0.8)
1023
+ mask_smooth = np.where(mask_smooth > 0.5,
1024
+ np.minimum(mask_smooth * 1.1, 1.0),
1025
+ mask_smooth * 0.9)
1026
+
 
1027
  frame_adjusted = _color_match_edges(frame, background, mask_smooth)
1028
+
1029
  alpha_3ch = np.stack([mask_smooth] * 3, axis=2)
1030
+
1031
  frame_float = frame_adjusted.astype(np.float32)
1032
  background_float = background.astype(np.float32)
1033
+
1034
  result = frame_float * alpha_3ch + background_float * (1 - alpha_3ch)
1035
  result = np.clip(result, 0, 255).astype(np.uint8)
1036
+
1037
  return result
1038
+
1039
  except Exception as e:
1040
  logger.error(f"Advanced compositing error: {e}")
1041
  raise
 
1046
  edge_mask = cv2.Sobel(alpha, cv2.CV_64F, 1, 1, ksize=3)
1047
  edge_mask = np.abs(edge_mask)
1048
  edge_mask = (edge_mask > 0.1).astype(np.float32)
1049
+
1050
  edge_areas = edge_mask > 0
1051
  if not np.any(edge_areas):
1052
  return frame
1053
+
1054
  frame_adjusted = frame.copy().astype(np.float32)
1055
  background_float = background.astype(np.float32)
1056
+
1057
  adjustment_strength = 0.1
1058
  for c in range(3):
1059
  frame_adjusted[:, :, c] = np.where(
1060
  edge_areas,
1061
+ frame_adjusted[:, :, c] * (1 - adjustment_strength) +
1062
  background_float[:, :, c] * adjustment_strength,
1063
  frame_adjusted[:, :, c]
1064
  )
1065
+
1066
  return np.clip(frame_adjusted, 0, 255).astype(np.uint8)
1067
+
1068
  except Exception as e:
1069
  logger.warning(f"Color matching failed: {e}")
1070
  return frame
 
1073
  """Simple fallback compositing method"""
1074
  try:
1075
  logger.info("Using simple compositing fallback")
1076
+
1077
  background = cv2.resize(background, (frame.shape[1], frame.shape[0]))
1078
+
1079
  if len(mask.shape) == 3:
1080
  mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
1081
  if mask.max() <= 1.0:
1082
  mask = (mask * 255).astype(np.uint8)
1083
+
1084
  _, mask_binary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
1085
+
1086
  mask_norm = mask_binary.astype(np.float32) / 255.0
1087
  mask_3ch = np.stack([mask_norm] * 3, axis=2)
1088
+
1089
  result = frame * mask_3ch + background * (1 - mask_3ch)
1090
  return result.astype(np.uint8)
1091
+
1092
  except Exception as e:
1093
  logger.error(f"Simple compositing failed: {e}")
1094
  return frame
 
1098
  # ============================================================================
1099
 
1100
  def _create_solid_background(bg_config: Dict[str, Any], width: int, height: int) -> np.ndarray:
1101
+ """Create solid color background (returns BGR)"""
1102
  color_hex = bg_config["colors"][0].lstrip('#')
1103
  color_rgb = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
1104
  color_bgr = color_rgb[::-1]
1105
  return np.full((height, width, 3), color_bgr, dtype=np.uint8)
1106
 
1107
  def _create_gradient_background_enhanced(bg_config: Dict[str, Any], width: int, height: int) -> np.ndarray:
1108
+ """Create enhanced gradient background with better quality (returns BGR)"""
1109
  try:
1110
  colors = bg_config["colors"]
1111
  direction = bg_config.get("direction", "vertical")
1112
+
1113
  rgb_colors = []
1114
  for color_hex in colors:
1115
  color_hex = color_hex.lstrip('#')
1116
  rgb = tuple(int(color_hex[i:i+2], 16) for i in (0, 2, 4))
1117
  rgb_colors.append(rgb)
1118
+
1119
  if not rgb_colors:
1120
  rgb_colors = [(128, 128, 128)]
1121
+
1122
  if direction == "vertical":
1123
  background = _create_vertical_gradient(rgb_colors, width, height)
1124
  elif direction == "horizontal":
 
1129
  background = _create_radial_gradient(rgb_colors, width, height, direction == "soft_radial")
1130
  else:
1131
  background = _create_vertical_gradient(rgb_colors, width, height)
1132
+
1133
  return cv2.cvtColor(background, cv2.COLOR_RGB2BGR)
1134
+
1135
  except Exception as e:
1136
  logger.error(f"Gradient creation error: {e}")
1137
  return np.full((height, width, 3), (128, 128, 128), dtype=np.uint8)
1138
 
1139
  def _create_vertical_gradient(colors: list, width: int, height: int) -> np.ndarray:
 
1140
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
 
1141
  for y in range(height):
1142
  progress = y / height if height > 0 else 0
1143
  color = _interpolate_color(colors, progress)
1144
  gradient[y, :] = color
 
1145
  return gradient
1146
 
1147
  def _create_horizontal_gradient(colors: list, width: int, height: int) -> np.ndarray:
 
1148
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1149
  for x in range(width):
1150
  progress = x / width if width > 0 else 0
 
1153
  return gradient
1154
 
1155
  def _create_diagonal_gradient(colors: list, width: int, height: int) -> np.ndarray:
 
1156
  y_coords, x_coords = np.mgrid[0:height, 0:width]
1157
  max_distance = width + height
1158
  progress = (x_coords + y_coords) / max_distance
1159
  progress = np.clip(progress, 0, 1)
1160
+
1161
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1162
  for c in range(3):
1163
  gradient[:, :, c] = _vectorized_color_interpolation(colors, progress, c)
 
1164
  return gradient
1165
 
1166
  def _create_radial_gradient(colors: list, width: int, height: int, soft: bool = False) -> np.ndarray:
 
1167
  center_x, center_y = width // 2, height // 2
1168
  max_distance = np.sqrt(center_x**2 + center_y**2)
1169
+
1170
+ y, x = np.mgrid[0:height, 0:width]
1171
  distances = np.sqrt((x - center_x)**2 + (y - center_y)**2)
1172
  progress = distances / max_distance
1173
  progress = np.clip(progress, 0, 1)
1174
+
1175
  if soft:
1176
  progress = np.power(progress, 0.7)
1177
+
1178
  gradient = np.zeros((height, width, 3), dtype=np.uint8)
1179
  for c in range(3):
1180
  gradient[:, :, c] = _vectorized_color_interpolation(colors, progress, c)
 
1181
  return gradient
1182
 
1183
  def _vectorized_color_interpolation(colors: list, progress: np.ndarray, channel: int) -> np.ndarray:
 
1184
  if len(colors) == 1:
1185
  return np.full_like(progress, colors[0][channel], dtype=np.uint8)
1186
+
1187
  num_segments = len(colors) - 1
1188
  segment_progress = progress * num_segments
1189
  segment_indices = np.floor(segment_progress).astype(int)
1190
  segment_indices = np.clip(segment_indices, 0, num_segments - 1)
1191
  local_progress = segment_progress - segment_indices
1192
+
1193
  start_colors = np.array([colors[i][channel] for i in range(len(colors))])
1194
  end_colors = np.array([colors[min(i + 1, len(colors) - 1)][channel] for i in range(len(colors))])
1195
+
1196
  start_vals = start_colors[segment_indices]
1197
  end_vals = end_colors[segment_indices]
1198
+
1199
  result = start_vals + (end_vals - start_vals) * local_progress
1200
  return np.clip(result, 0, 255).astype(np.uint8)
1201
 
1202
  def _interpolate_color(colors: list, progress: float) -> tuple:
 
1203
  if len(colors) == 1:
1204
  return colors[0]
1205
  elif len(colors) == 2:
 
1220
  return (r, g, b)
1221
 
1222
  def _apply_background_adjustments(background: np.ndarray, bg_config: Dict[str, Any]) -> np.ndarray:
 
1223
  try:
1224
  brightness = bg_config.get("brightness", 1.0)
1225
  contrast = bg_config.get("contrast", 1.0)
1226
+
1227
  if brightness != 1.0 or contrast != 1.0:
1228
  background = background.astype(np.float32)
1229
  background = background * contrast * brightness
1230
  background = np.clip(background, 0, 255).astype(np.uint8)
1231
+
1232
  return background
1233
+
1234
  except Exception as e:
1235
  logger.warning(f"Background adjustment failed: {e}")
1236
  return background