MogensR commited on
Commit
49ce186
·
verified ·
1 Parent(s): 923ec96

Update processing/two_stage/two_stage_processor.py

Browse files
processing/two_stage/two_stage_processor.py CHANGED
@@ -1,28 +1,13 @@
1
  #!/usr/bin/env python3
2
  """
3
- ENHANCED Two-Stage Alpha Channel Processing System
4
- (Full file, ready to drop in)
5
- Date: 2025-09-09
6
-
7
- VIDEO-TO-VIDEO PIPELINE with proper audio support via AudioProcessor
8
-
9
- Key fixes in this build:
10
- - Uses AudioProcessor for ffmpeg-based audio extraction & muxing
11
- - Ensures full video duration processing (no 5-second truncation unless you pass trim_seconds explicitly)
12
- - Direct alpha compositing (no green screen intermediate), supports image or video backgrounds
13
- - Chunked processing for long videos, with seamless reassembly (video first, audio added after)
14
- - Robust MatAnyone integration (supports both known signature and positional fallback)
15
- - Defensive handling for SAM2 mask creation; combines first N frames for a stable trimap
16
- - Careful memory cleanup between stages (GPU/system) to avoid leaks in long runs
17
- - Extra logging and debug artifacts to help diagnose issues quickly
18
  """
19
 
20
- # ==============================================================================
21
- # IMPORTS
22
- # ==============================================================================
23
-
24
- from __future__ import annotations
25
-
26
  import os
27
  import sys
28
  import cv2
@@ -30,470 +15,47 @@
30
  import tempfile
31
  import shutil
32
  import logging
33
- import gc
34
  import time
35
  import traceback
36
  from pathlib import Path
37
  from typing import Optional, Tuple, Dict, Any, List
38
 
39
- # MoviePy (only used for probing durations, optional trimming, concat *video only*, and fallback audio mux)
40
- from moviepy.editor import (
41
- VideoFileClip, AudioFileClip, concatenate_videoclips
42
- )
43
- import moviepy.video.fx.all as vfx # noqa: F401 (kept for future effects)
44
 
45
- # Import the AudioProcessor for proper audio handling
46
- # Search both "processing.audio" and local "audio"
47
  try:
48
  from processing.audio import AudioProcessor
49
  except Exception:
50
  try:
51
- from audio import AudioProcessor # Fallback if in different location
52
  except Exception:
53
  AudioProcessor = None
54
 
55
- # Setup logging (do not override global logging level outside this module)
56
  logger = logging.getLogger(__name__)
57
- if not logger.handlers:
58
- logging.basicConfig(
59
- level=logging.INFO,
60
- format='%(asctime)s - %(levelname)s - %(message)s',
61
- datefmt='%Y-%m-%d %H:%M:%S'
62
- )
63
-
64
- # PyTorch memory management (optional, if available on the host)
65
- try:
66
- import torch
67
- TORCH_AVAILABLE = True
68
- except Exception:
69
- TORCH_AVAILABLE = False
70
- logger.warning("PyTorch not available, GPU memory management disabled")
71
-
72
- # ==============================================================================
73
- # CONFIGURATION
74
- # ==============================================================================
75
-
76
- class ProcessingConfig:
77
- """Configuration settings for two-stage processing with audio support."""
78
-
79
- # Reference mask creation
80
- REFERENCE_FRAMES = 3 # Number of frames to use for reference mask
81
-
82
- # Chunked processing settings
83
- MAX_CHUNK_DURATION = 300 # seconds (5 minutes). Videos longer than this will be chunked.
84
- CHUNK_OVERLAP_FRAMES = 5
85
- MAX_PROCESSING_RESOLUTION = None # e.g., 1920 for downscaling if needed
86
-
87
- # Quality settings (for writes we control)
88
- VIDEO_CODEC = 'libx264'
89
- VIDEO_BITRATE = '8000k'
90
- AUDIO_CODEC = 'aac'
91
- AUDIO_BITRATE = '192k'
92
-
93
- # Alpha refinement
94
- ALPHA_SMOOTHING = 3
95
- ALPHA_EDGE_BLUR = 1
96
- ALPHA_CONTRAST = 1.2
97
- ENABLE_DEFRINGING = True
98
- DEFRINGE_RADIUS = 1
99
-
100
- # Processing limits
101
- MAX_FRAMES_FOR_REFERENCE = 10
102
- MIN_FINAL_CHUNK_DURATION = 10
103
-
104
- # Memory management
105
- CLEAR_CACHE_AFTER_STAGE = True
106
- FORCE_GARBAGE_COLLECTION = True
107
- MEMORY_USAGE_THRESHOLD = 70
108
- MEMORY_CHECK_INTERVAL = 30 # every N frames during compositing
109
-
110
- # Debug settings
111
- SAVE_DEBUG_FILES = True
112
- DEBUG_FRAME_INTERVAL = 30
113
-
114
- # Audio settings
115
- PRESERVE_ORIGINAL_AUDIO = True
116
- AUDIO_QUALITY = 'high'
117
- USE_FFMPEG_FOR_AUDIO = True # Prefer ffmpeg over MoviePy
118
-
119
-
120
- # ==============================================================================
121
- # MEMORY MANAGER
122
- # ==============================================================================
123
-
124
- class MemoryManager:
125
- """Utilities for managing GPU and system memory."""
126
-
127
- def __init__(self, config: ProcessingConfig):
128
- self.config = config
129
- self.initial_memory = self.get_current_memory()
130
- self.peak_memory = self.initial_memory
131
- self.stage_memories: Dict[str, Dict[str, float]] = {}
132
-
133
- @staticmethod
134
- def clear_gpu_cache() -> float:
135
- if TORCH_AVAILABLE and torch.cuda.is_available():
136
- before = torch.cuda.memory_allocated()
137
- torch.cuda.empty_cache()
138
- try:
139
- torch.cuda.synchronize()
140
- except Exception:
141
- pass
142
- after = torch.cuda.memory_allocated()
143
- freed = (before - after) / 1024**3
144
- logger.info(f"GPU cache cleared: {freed:.2f}GB freed")
145
- return freed
146
- return 0.0
147
-
148
- @staticmethod
149
- def force_garbage_collection() -> Tuple[int, float]:
150
- before = 0
151
- if TORCH_AVAILABLE and torch.cuda.is_available():
152
- try:
153
- before = torch.cuda.memory_allocated()
154
- except Exception:
155
- before = 0
156
-
157
- collected = gc.collect()
158
-
159
- after = 0
160
- if TORCH_AVAILABLE and torch.cuda.is_available():
161
- try:
162
- after = torch.cuda.memory_allocated()
163
- except Exception:
164
- after = 0
165
-
166
- freed = (before - after) / 1024**3 if before > 0 else 0.0
167
- logger.info(f"Garbage collection: {collected} objects freed, {freed:.2f}GB GPU memory recovered")
168
- return collected, freed
169
-
170
- def get_current_memory(self) -> Dict[str, float]:
171
- memory = {
172
- 'gpu_allocated': 0.0,
173
- 'gpu_reserved': 0.0,
174
- 'gpu_free': 0.0,
175
- 'gpu_total': 0.0,
176
- 'gpu_usage_percent': 0.0
177
- }
178
-
179
- if TORCH_AVAILABLE and torch.cuda.is_available():
180
- try:
181
- memory['gpu_allocated'] = torch.cuda.memory_allocated() / 1024**3
182
- memory['gpu_reserved'] = torch.cuda.memory_reserved() / 1024**3
183
- props = torch.cuda.get_device_properties(0)
184
- memory['gpu_total'] = props.total_memory / 1024**3
185
- memory['gpu_free'] = memory['gpu_total'] - memory['gpu_allocated']
186
- memory['gpu_usage_percent'] = (memory['gpu_allocated'] / memory['gpu_total']) * 100 if memory['gpu_total'] > 0 else 0
187
- except Exception:
188
- pass
189
- return memory
190
-
191
- def get_gpu_memory_info(self) -> str:
192
- memory = self.get_current_memory()
193
- if memory['gpu_total'] > 0:
194
- return (f"GPU Memory: {memory['gpu_allocated']:.1f}GB allocated, "
195
- f"{memory['gpu_free']:.1f}GB free, "
196
- f"{memory['gpu_usage_percent']:.0f}% usage")
197
- return "GPU memory info unavailable"
198
-
199
- def should_clear_memory(self) -> bool:
200
- memory = self.get_current_memory()
201
- return memory['gpu_usage_percent'] > self.config.MEMORY_USAGE_THRESHOLD
202
-
203
- def cleanup_stage(self, stage_name: str, force: bool = False):
204
- logger.info(f"Cleaning up after {stage_name}…")
205
-
206
- before = self.get_current_memory()
207
-
208
- gpu_freed = 0.0
209
- gc_objects = 0
210
- gc_memory = 0.0
211
-
212
- if force or self.should_clear_memory():
213
- gpu_freed = self.clear_gpu_cache()
214
- gc_objects, gc_memory = self.force_garbage_collection()
215
-
216
- after = self.get_current_memory()
217
-
218
- self.stage_memories[stage_name] = {
219
- 'before_alloc': before['gpu_allocated'],
220
- 'after_alloc': after['gpu_allocated'],
221
- 'freed_total': gpu_freed + gc_memory
222
- }
223
-
224
- if after['gpu_allocated'] > self.peak_memory.get('gpu_allocated', 0):
225
- self.peak_memory = after
226
-
227
- logger.info(f"Memory cleanup completed for {stage_name}: {gpu_freed + gc_memory:.2f}GB total freed")
228
-
229
- def cleanup_model(self, model_handler: Any, handler_name: str, deep_cleanup: bool = False):
230
- try:
231
- if hasattr(model_handler, 'cleanup'):
232
- model_handler.cleanup()
233
- logger.info(f"{handler_name}: cleanup() called")
234
-
235
- if hasattr(model_handler, 'release'):
236
- model_handler.release()
237
- logger.info(f"{handler_name}: release() called")
238
-
239
- if deep_cleanup:
240
- if hasattr(model_handler, 'model'):
241
- try:
242
- delattr(model_handler, 'model')
243
- logger.info(f"{handler_name}: model deleted")
244
- except Exception:
245
- pass
246
- if hasattr(model_handler, 'predictor'):
247
- try:
248
- delattr(model_handler, 'predictor')
249
- logger.info(f"{handler_name}: predictor deleted")
250
- except Exception:
251
- pass
252
- except Exception as e:
253
- logger.warning(f"Failed to cleanup {handler_name}: {e}")
254
-
255
- def get_memory_report(self) -> str:
256
- report = ["="*60, "MEMORY USAGE REPORT", "="*60]
257
- current = self.get_current_memory()
258
- report.append(f"Current GPU Memory: {current['gpu_allocated']:.2f}GB / {current['gpu_total']:.2f}GB ({current['gpu_usage_percent']:.0f}%)")
259
- peak = self.peak_memory.get('gpu_allocated', 0.0)
260
- report.append(f"Peak GPU Memory: {peak:.2f}GB")
261
- if self.stage_memories:
262
- report.append("\nStage Memory Usage:")
263
- for stage, mem in self.stage_memories.items():
264
- report.append(f" {stage}: before={mem['before_alloc']:.2f}GB after={mem['after_alloc']:.2f}GB freed={mem['freed_total']:.2f}GB")
265
- report.append("="*60)
266
- return "\n".join(report)
267
-
268
-
269
- # ==============================================================================
270
- # QUALITY MANAGER
271
- # ==============================================================================
272
-
273
- class QualityManager:
274
- """Manages processing quality profiles."""
275
-
276
- PROFILES = {
277
- 'high': {
278
- 'max_size': None,
279
- 'bitrate': '8000k',
280
- 'preset': 'medium',
281
- 'crf': 18
282
- },
283
- 'medium': {
284
- 'max_size': None,
285
- 'bitrate': '4000k',
286
- 'preset': 'fast',
287
- 'crf': 23
288
- },
289
- 'fast': {
290
- 'max_size': 1920,
291
- 'bitrate': '2000k',
292
- 'preset': 'faster',
293
- 'crf': 28
294
- }
295
- }
296
-
297
- @classmethod
298
- def get_profile(cls, quality: str = 'medium') -> Dict[str, Any]:
299
- return cls.PROFILES.get(quality, cls.PROFILES['medium'])
300
-
301
-
302
- # ==============================================================================
303
- # ALPHA REFINEMENT
304
- # ==============================================================================
305
-
306
- class AlphaRefiner:
307
- """Utilities for refining alpha mattes."""
308
-
309
- @staticmethod
310
- def refine_alpha_matte(alpha: np.ndarray, config: ProcessingConfig) -> np.ndarray:
311
- # Ensure single channel
312
- if len(alpha.shape) == 3:
313
- alpha = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
314
-
315
- # Normalize to 0-1
316
- if alpha.dtype == np.uint8:
317
- alpha = alpha.astype(np.float32) / 255.0
318
-
319
- # Temporal smoothing
320
- if config.ALPHA_SMOOTHING > 0:
321
- k = config.ALPHA_SMOOTHING
322
- if k % 2 == 0:
323
- k += 1
324
- alpha = cv2.medianBlur((alpha * 255).astype(np.uint8), k).astype(np.float32) / 255.0
325
-
326
- # Edge feathering
327
- if config.ALPHA_EDGE_BLUR > 0:
328
- ks = config.ALPHA_EDGE_BLUR * 2 + 1
329
- alpha = cv2.GaussianBlur(alpha, (ks, ks), 0)
330
-
331
- # Contrast adjustment
332
- if abs(config.ALPHA_CONTRAST - 1.0) > 1e-6:
333
- alpha = np.power(alpha, config.ALPHA_CONTRAST)
334
-
335
- return np.clip(alpha, 0, 1)
336
-
337
- @staticmethod
338
- def defringe_edges(composite: np.ndarray, alpha: np.ndarray, radius: int = 1) -> np.ndarray:
339
- if len(alpha.shape) == 3:
340
- alpha = cv2.cvtColor(alpha, cv2.COLOR_BGR2GRAY)
341
-
342
- if alpha.dtype == np.uint8:
343
- alpha = alpha.astype(np.float32) / 255.0
344
-
345
- kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (radius*2+1, radius*2+1))
346
- dilated = cv2.dilate(alpha, kernel)
347
- eroded = cv2.erode(alpha, kernel)
348
- edge_mask = np.clip(dilated - eroded, 0, 1)
349
-
350
- blurred = cv2.GaussianBlur(composite, (3, 3), 0)
351
-
352
- edge_mask_3d = np.stack([edge_mask] * 3, axis=2)
353
- result = composite * (1 - edge_mask_3d) + blurred * edge_mask_3d
354
-
355
- return np.clip(result, 0, 255).astype(np.uint8)
356
-
357
-
358
- # ==============================================================================
359
- # CHUNKED VIDEO PROCESSOR
360
- # ==============================================================================
361
-
362
- class ChunkedVideoProcessor:
363
- """Handles splitting and reassembling videos (video-only; audio added later)."""
364
-
365
- def __init__(self, temp_dir: str, config: ProcessingConfig):
366
- self.temp_dir = temp_dir
367
- self.config = config
368
- self.chunks_dir = os.path.join(temp_dir, "chunks")
369
- os.makedirs(self.chunks_dir, exist_ok=True)
370
-
371
- def should_chunk_video(self, video_path: str) -> bool:
372
- try:
373
- with VideoFileClip(video_path) as clip:
374
- duration = clip.duration or 0
375
- should_chunk = duration > self.config.MAX_CHUNK_DURATION
376
- logger.info(f"Video duration: {duration:.1f}s, chunking: {should_chunk}")
377
- return should_chunk
378
- except Exception as e:
379
- logger.warning(f"Could not determine video duration: {e}")
380
- return False
381
-
382
- def split_video_into_chunks(self, video_path: str) -> List[str]:
383
- try:
384
- with VideoFileClip(video_path) as clip:
385
- duration = clip.duration or 0.0
386
- chunk_paths: List[str] = []
387
-
388
- chunk_duration = self.config.MAX_CHUNK_DURATION
389
- current_time = 0.0
390
- chunk_index = 0
391
-
392
- while current_time < duration - 1e-6:
393
- end_time = min(current_time + chunk_duration, duration)
394
-
395
- remaining_after = duration - end_time
396
- if remaining_after > 0 and remaining_after < self.config.MIN_FINAL_CHUNK_DURATION:
397
- end_time = duration
398
- logger.info(f"Including final {remaining_after:.1f}s in chunk {chunk_index}")
399
-
400
- chunk_clip = clip.subclip(current_time, end_time)
401
- chunk_path = os.path.join(self.chunks_dir, f"chunk_{chunk_index:03d}.mp4")
402
-
403
- # Write WITHOUT audio (we'll add it back at the end)
404
- chunk_clip.write_videofile(
405
- chunk_path,
406
- codec=self.config.VIDEO_CODEC,
407
- audio=False, # No audio in chunks
408
- verbose=False,
409
- logger=None
410
- )
411
- chunk_clip.close()
412
-
413
- chunk_paths.append(chunk_path)
414
- logger.info(f"Created chunk {chunk_index}: {current_time:.2f}s - {end_time:.2f}s")
415
-
416
- if end_time < duration:
417
- overlap_seconds = self.config.CHUNK_OVERLAP_FRAMES / (clip.fps or 30.0)
418
- current_time = max(0.0, end_time - overlap_seconds)
419
- else:
420
- current_time = duration
421
-
422
- chunk_index += 1
423
-
424
- logger.info(f"Split video into {len(chunk_paths)} chunks")
425
- return chunk_paths
426
-
427
- except Exception as e:
428
- logger.error(f"Video chunking failed: {e}")
429
- raise
430
-
431
- def reassemble_chunks(self, processed_chunk_paths: List[str], output_path: str) -> str:
432
- """Reassemble chunks (audio will be added separately)."""
433
- try:
434
- if len(processed_chunk_paths) == 1:
435
- shutil.copy2(processed_chunk_paths[0], output_path)
436
- logger.info("Single chunk, copied directly")
437
- return output_path
438
-
439
- # Compose video-only
440
- chunk_clips = [VideoFileClip(p) for p in processed_chunk_paths]
441
- logger.info(f"Concatenating {len(chunk_clips)} chunks…")
442
- final_clip = concatenate_videoclips(chunk_clips, method="compose")
443
- final_clip.write_videofile(
444
- output_path,
445
- codec=self.config.VIDEO_CODEC,
446
- audio=False,
447
- verbose=False,
448
- logger=None
449
- )
450
- final_clip.close()
451
- for c in chunk_clips:
452
- c.close()
453
-
454
- logger.info(f"Successfully reassembled video: {output_path}")
455
- return output_path
456
-
457
- except Exception as e:
458
- logger.error(f"Chunk reassembly failed: {e}")
459
- raise
460
-
461
-
462
- # ==============================================================================
463
- # MAIN TWOSTAGEPROCESSOR CLASS
464
- # ==============================================================================
465
 
466
  class TwoStageProcessor:
467
  """
468
- Enhanced two-stage alpha channel processor with proper audio support.
469
- Uses AudioProcessor for ffmpeg-based audio handling.
470
  """
471
 
472
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
473
  self.sam2_handler = sam2_handler
474
  self.matanyone_handler = matanyone_handler
475
- self.temp_dir = temp_dir or tempfile.mkdtemp(prefix='twostage_video_')
476
- self.config = ProcessingConfig()
477
- self.memory_manager = MemoryManager(self.config)
478
- self.chunked_processor = ChunkedVideoProcessor(self.temp_dir, self.config)
479
- self.alpha_refiner = AlphaRefiner()
480
 
481
- # Initialize AudioProcessor for proper audio handling
482
- if AudioProcessor and self.config.USE_FFMPEG_FOR_AUDIO:
483
  try:
484
  self.audio_processor = AudioProcessor(temp_dir=self.temp_dir)
485
  logger.info("AudioProcessor initialized for ffmpeg-based audio handling")
486
  except Exception as e:
487
  self.audio_processor = None
488
- logger.warning(f"AudioProcessor failed to initialize ({e}). Using MoviePy fallback for audio.")
489
  else:
490
  self.audio_processor = None
491
- logger.warning("AudioProcessor not available - using MoviePy fallback for audio")
492
 
493
  os.makedirs(self.temp_dir, exist_ok=True)
494
- logger.info(f"TwoStageProcessor initialized with temp dir: {self.temp_dir}")
495
- logger.info(f"Audio handling: {'ffmpeg via AudioProcessor' if self.audio_processor else 'MoviePy fallback'}")
496
- logger.info(self.memory_manager.get_gpu_memory_info())
497
 
498
  def process_video(self,
499
  video_path: str,
@@ -504,643 +66,362 @@ def process_video(self,
504
  callback: Optional[callable] = None,
505
  **kwargs) -> Tuple[Optional[str], str]:
506
  """
507
- Main processing pipeline with proper audio handling.
508
-
509
- Returns:
510
- (final_output_path, status_message)
511
  """
512
  try:
513
- logger.info(f"🎬 Enhanced Two-Stage Alpha Pipeline: {video_path}")
514
- logger.info(f"🎯 Background: {background_path}")
515
- logger.info(f"📁 Temp: {self.temp_dir}")
516
- logger.info(f"🎤 Audio: {'Enabled (ffmpeg)' if self.audio_processor else 'Enabled (MoviePy)'}")
517
- if trim_seconds is not None:
518
- logger.info(f"✂️ Trim requested: first {trim_seconds}s will be processed")
 
 
 
 
 
519
  else:
520
- logger.info("⏱️ No trimming requested; full duration will be processed")
521
 
522
- # Extract original audio (if any)
523
- original_audio_path = None
524
- if self.config.PRESERVE_ORIGINAL_AUDIO:
525
- original_audio_path = self._extract_audio(video_path)
526
- if original_audio_path:
527
- logger.info(f"✅ Audio extracted: {original_audio_path}")
528
- else:
529
- logger.info("ℹ️ No audio found in source video")
530
 
531
- # Determine if chunking is needed
532
- needs_chunking = self.chunked_processor.should_chunk_video(video_path)
 
 
533
 
534
- if needs_chunking:
535
- logger.info("Using chunked processing for long video…")
536
- result = self._process_chunked_video(
537
- video_path, background_path, output_path,
538
- quality, original_audio_path, callback
539
- )
540
- else:
541
- logger.info("Processing full video in single pass…")
542
- result = self._process_single_video(
543
- video_path, background_path, output_path,
544
- quality, trim_seconds, original_audio_path, callback
545
- )
546
 
547
- # Verify final output
548
- if result[0] and os.path.exists(result[0]):
549
- try:
550
- with VideoFileClip(result[0]) as clip:
551
- final_duration = clip.duration or 0.0
552
- has_audio = clip.audio is not None
553
- logger.info(f"✅ Final output: {final_duration:.1f}s, Audio: {has_audio}")
554
- except Exception:
555
- pass
556
 
557
- logger.info(self.memory_manager.get_memory_report())
558
- return result
 
 
 
 
 
 
 
 
 
 
 
559
 
560
  except Exception as e:
561
  error_msg = f"Processing failed: {str(e)}"
562
- logger.error(error_msg)
563
- logger.error(f"Traceback: {traceback.format_exc()}")
564
  return None, error_msg
565
 
566
- # ------------------------------------------------------------------
567
- # Audio helpers
568
- # ------------------------------------------------------------------
569
- def _extract_audio(self, video_path: str) -> Optional[str]:
570
- """Extract audio using AudioProcessor (ffmpeg) or MoviePy fallback."""
571
  try:
 
 
572
  if self.audio_processor:
573
- # Use AudioProcessor with ffmpeg
574
- logger.info("Extracting audio using ffmpeg")
575
- audio_path = self.audio_processor.extract_audio(
576
  video_path=video_path,
577
- output_path=os.path.join(self.temp_dir, "original_audio.aac"),
578
  audio_format='aac',
579
- quality=self.config.AUDIO_QUALITY
580
  )
581
- return audio_path
582
  else:
583
  # Fallback to MoviePy
584
- logger.info("Extracting audio using MoviePy")
585
- audio_path = os.path.join(self.temp_dir, "original_audio.m4a")
586
  with VideoFileClip(video_path) as clip:
587
  if clip.audio is not None:
588
  clip.audio.write_audiofile(
589
- audio_path,
590
- codec=self.config.AUDIO_CODEC,
591
- bitrate=self.config.AUDIO_BITRATE,
592
  verbose=False,
593
  logger=None
594
  )
595
- return audio_path
596
  return None
597
-
598
- except Exception as e:
599
- logger.warning(f"Could not extract audio: {e}")
600
- return None
601
-
602
- def _add_audio_to_video(self, video_path: str, audio_path: Optional[str], output_path: str) -> str:
603
- """Add audio to video using AudioProcessor (ffmpeg) or MoviePy fallback."""
604
- try:
605
- if self.audio_processor and audio_path:
606
- # Using ffmpeg via AudioProcessor.
607
- # Note: AudioProcessor.add_audio_to_video accepts an input for "original_video".
608
- # It's OK to pass an audio-only file here; the function maps [1:a:0] as audio.
609
- logger.info("Adding audio using ffmpeg (lossless where possible)…")
610
- return self.audio_processor.add_audio_to_video(
611
- original_video=audio_path, # Source of audio (audio-only or original video)
612
- processed_video=video_path, # Video without audio
613
- output_path=output_path,
614
- audio_quality=self.config.AUDIO_QUALITY
615
- )
616
- elif audio_path:
617
- # Fallback to MoviePy
618
- logger.info("Adding audio using MoviePy…")
619
- with VideoFileClip(video_path) as video:
620
- with AudioFileClip(audio_path) as audio:
621
- final = video.set_audio(audio)
622
- final.write_videofile(
623
- output_path,
624
- codec=self.config.VIDEO_CODEC,
625
- audio_codec=self.config.AUDIO_CODEC,
626
- audio_bitrate=self.config.AUDIO_BITRATE,
627
- temp_audiofile=os.path.join(self.temp_dir, "temp_audio.m4a"),
628
- remove_temp=True,
629
- verbose=False,
630
- logger=None
631
- )
632
- return output_path
633
- else:
634
- # No audio to add; just copy
635
- shutil.copy2(video_path, output_path)
636
- return output_path
637
-
638
- except Exception as e:
639
- logger.error(f"Failed to add audio: {e}")
640
- # Return video without audio rather than failing completely
641
- try:
642
- shutil.copy2(video_path, output_path)
643
- except Exception:
644
- pass
645
- return output_path
646
-
647
- # ------------------------------------------------------------------
648
- # Chunked processing
649
- # ------------------------------------------------------------------
650
- def _process_chunked_video(self, video_path: str, background_path: str,
651
- output_path: str, quality: str,
652
- original_audio_path: Optional[str],
653
- callback: Optional[callable]) -> Tuple[Optional[str], str]:
654
- """Process long video in chunks."""
655
- try:
656
- if callback:
657
- callback("Splitting video into chunks…", 5)
658
-
659
- chunk_paths = self.chunked_processor.split_video_into_chunks(video_path)
660
- if not chunk_paths:
661
- raise RuntimeError("No chunks were created")
662
-
663
- processed_chunks: List[str] = []
664
- total_chunks = len(chunk_paths)
665
-
666
- for i, chunk_path in enumerate(chunk_paths):
667
- if callback:
668
- progress = 10 + (i * 70 // max(1, total_chunks))
669
- callback(f"Processing chunk {i+1}/{total_chunks}…", progress)
670
-
671
- logger.info(f"Processing chunk {i+1}/{total_chunks}")
672
-
673
- chunk_output = os.path.join(self.temp_dir, f"processed_chunk_{i:03d}.mp4")
674
-
675
- # Process chunk WITHOUT audio
676
- result_path, status = self._process_single_video(
677
- chunk_path, background_path, chunk_output, quality,
678
- trim_seconds=None, original_audio_path=None, callback=None
679
- )
680
-
681
- if result_path and os.path.exists(result_path):
682
- processed_chunks.append(result_path)
683
- else:
684
- raise RuntimeError(f"Chunk {i+1} processing failed: {status}")
685
 
686
- self.memory_manager.cleanup_stage(f"Chunk_{i+1}", force=True)
687
-
688
- if callback:
689
- callback("Reassembling video…", 85)
690
-
691
- # Reassemble chunks (video only)
692
- temp_output = os.path.join(self.temp_dir, "reassembled_no_audio.mp4")
693
- final_video_no_audio = self.chunked_processor.reassemble_chunks(processed_chunks, temp_output)
694
-
695
- # Add original audio back
696
- if original_audio_path:
697
- if callback:
698
- callback("Adding audio track…", 95)
699
- final_path = self._add_audio_to_video(final_video_no_audio, original_audio_path, output_path)
700
- else:
701
- shutil.move(final_video_no_audio, output_path)
702
- final_path = output_path
703
-
704
- if callback:
705
- callback("Processing completed!", 100)
706
-
707
- return final_path, f"Success - Processed {total_chunks} chunks"
708
-
709
  except Exception as e:
710
- logger.error(f"Chunked processing failed: {e}")
711
- return None, f"Chunked processing failed: {e}"
712
 
713
- # ------------------------------------------------------------------
714
- # Single-pass processing
715
- # ------------------------------------------------------------------
716
- def _process_single_video(self, video_path: str, background_path: str,
717
- output_path: str, quality: str,
718
- trim_seconds: Optional[float],
719
- original_audio_path: Optional[str],
720
- callback: Optional[callable]) -> Tuple[Optional[str], str]:
721
- """Process single video/chunk (video-only here; audio added at the end)."""
722
- try:
723
- # Optional trim (explicit only)
724
- input_for_masks = video_path
725
- if trim_seconds is not None and trim_seconds > 0:
726
- # If you pass trim_seconds, we trim for *this* run.
727
- trimmed_path = os.path.join(self.temp_dir, "trimmed_input.mp4")
728
- self._trim_video(video_path, trimmed_path, trim_seconds)
729
- input_for_masks = trimmed_path
730
- logger.info(f"Trim applied: {trim_seconds}s (only because trim_seconds was provided)")
731
- else:
732
- logger.info("No trimming applied in this run")
733
-
734
- # Stage 1: Reference mask
735
- if callback:
736
- callback("Creating reference mask…", 10)
737
- reference_mask_path = self._stage1_create_reference_mask(input_for_masks)
738
- self.memory_manager.cleanup_stage("SAM2", force=True)
739
-
740
- # Stage 2: MatAnyone
741
- if callback:
742
- callback("Processing with MatAnyone…", 30)
743
- alpha_video_path = self._stage2_matanyone_processing(
744
- video_path=input_for_masks, reference_mask_path=reference_mask_path
745
- )
746
- self.memory_manager.cleanup_stage("MatAnyone", force=True)
747
-
748
- # Stage 3: Alpha compositing (video-only)
749
- if callback:
750
- callback("Compositing with background…", 70)
751
- temp_output = os.path.join(self.temp_dir, "composite_no_audio.mp4")
752
- composite_path = self._stage3_alpha_composite(
753
- original_video_path=input_for_masks,
754
- alpha_video_path=alpha_video_path,
755
- background_path=background_path,
756
- output_path=temp_output,
757
- quality=quality
758
  )
759
-
760
- # Add audio if we have it
761
- if original_audio_path:
762
- if callback:
763
- callback("Adding audio track…", 90)
764
- final_path = self._add_audio_to_video(composite_path, original_audio_path, output_path)
765
- else:
766
- shutil.move(composite_path, output_path)
767
- final_path = output_path
768
-
769
- if callback:
770
- callback("Processing completed!", 100)
771
-
772
- return final_path, "Success"
773
-
774
- except Exception as e:
775
- logger.error(f"Single video processing failed: {e}")
776
- return None, f"Single video processing failed: {e}"
777
 
778
- # ------------------------------------------------------------------
779
- # Stage 1: Reference mask creation
780
- # ------------------------------------------------------------------
781
- def _stage1_create_reference_mask(self, video_path: str) -> str:
782
- """Create reference mask from first frames using SAM2."""
783
- cap = None
784
  try:
785
  cap = cv2.VideoCapture(video_path)
786
- if not cap.isOpened():
787
- raise RuntimeError(f"Cannot open video: {video_path}")
788
-
789
- frames: List[np.ndarray] = []
790
- frame_count = 0
791
-
792
- while frame_count < self.config.REFERENCE_FRAMES:
793
- ret, frame = cap.read()
794
- if not ret:
795
- break
796
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
797
- frames.append(frame_rgb)
798
- frame_count += 1
799
 
800
- if not frames:
801
- raise RuntimeError("No frames extracted for reference mask")
802
 
803
- logger.info(f"Extracted {len(frames)} reference frames")
 
804
 
805
- masks: List[np.ndarray] = []
806
- for i, frame in enumerate(frames):
807
- try:
808
- logger.info(f"Creating mask for frame {i+1}/{len(frames)}…")
809
- mask = self._create_sam2_mask(frame)
810
- if mask is not None:
811
- if mask.dtype == np.uint8:
812
- mask = mask.astype(np.float32) / 255.0
813
- masks.append(mask)
814
- if self.config.SAVE_DEBUG_FILES:
815
- debug_path = os.path.join(self.temp_dir, f"debug_mask_{i}.png")
816
- cv2.imwrite(debug_path, np.clip(mask * 255, 0, 255).astype(np.uint8))
817
- except Exception as e:
818
- logger.warning(f"Failed to create mask for frame {i}: {e}")
819
- continue
820
 
821
- if not masks:
822
- raise RuntimeError("No valid reference masks created")
 
823
 
824
- combined_mask = self._combine_reference_masks(masks)
825
- reference_mask_path = os.path.join(self.temp_dir, "reference_mask.png")
826
- cv2.imwrite(reference_mask_path, np.clip(combined_mask * 255, 0, 255).astype(np.uint8))
827
- logger.info(f"✅ Reference mask created: {reference_mask_path}")
828
- return reference_mask_path
829
 
830
  except Exception as e:
831
- logger.error(f"Stage 1 failed: {e}")
832
  raise
833
- finally:
834
- try:
835
- if cap is not None:
836
- cap.release()
837
- except Exception:
838
- pass
839
-
840
- def _create_sam2_mask(self, frame_rgb: np.ndarray) -> Optional[np.ndarray]:
841
- """Create mask using SAM2Handler; try a few signatures."""
842
- try:
843
- methods_to_try = [
844
- lambda: self.sam2_handler.create_mask(frame_rgb),
845
- lambda: self.sam2_handler.create_mask(frame_rgb, None, None, None),
846
- lambda: self.sam2_handler.predict(frame_rgb) if hasattr(self.sam2_handler, 'predict') else None
847
- ]
848
- for method in methods_to_try:
849
- try:
850
- mask = method()
851
- if mask is not None:
852
- if isinstance(mask, dict) and 'mask' in mask:
853
- mask = mask['mask']
854
- if mask.dtype == np.uint8:
855
- mask = mask.astype(np.float32) / 255.0
856
- return np.clip(mask, 0.0, 1.0)
857
- except (TypeError, AttributeError):
858
- continue
859
- logger.warning("All SAM2 methods failed")
860
- return None
861
- except Exception as e:
862
- logger.error(f"SAM2 mask creation failed: {e}")
863
- return None
864
 
865
- def _combine_reference_masks(self, masks: List[np.ndarray]) -> np.ndarray:
866
- """Combine multiple masks with simple voting/averaging."""
867
- if len(masks) == 1:
868
- return masks[0]
869
- stacked = np.stack(masks, axis=0)
870
- avg_mask = np.mean(stacked, axis=0)
871
- threshold = 0.3
872
- result = np.where(avg_mask > threshold, avg_mask, 0.0).astype(np.float32)
873
- logger.info(f"Combined {len(masks)} masks with threshold {threshold}")
874
- return result
875
-
876
- # ------------------------------------------------------------------
877
- # Stage 2: MatAnyone
878
- # ------------------------------------------------------------------
879
- def _stage2_matanyone_processing(self, video_path: str, reference_mask_path: str) -> str:
880
- """Run MatAnyone to produce an alpha video (no trimming here)."""
881
  try:
882
- matanyone_dir = os.path.join(self.temp_dir, "matanyone_out")
883
- os.makedirs(matanyone_dir, exist_ok=True)
884
 
885
- profile = QualityManager.get_profile('high')
886
- max_size = profile.get('max_size', 1920)
887
 
888
- # Known kwargs signature (from your logs):
889
- # ['input_path','mask_path','output_path','n_warmup','r_erode','r_dilate','suffix','save_image','max_size']
890
- tried = False
891
  try:
892
- self.matanyone_handler.process_video(
 
893
  input_path=video_path,
894
- mask_path=reference_mask_path,
895
- output_path=matanyone_dir,
896
- n_warmup=0, # do NOT limit to 5 seconds
 
897
  r_erode=0,
898
  r_dilate=15,
899
  suffix='pha',
900
  save_image=False,
901
- max_size=max_size
902
  )
903
- tried = True
 
904
  except Exception as e:
905
- logger.warning(f"Explicit kwargs failed: {e}")
906
-
907
- if not tried:
908
- # Positional fallback (older builds sometimes expose a simpler signature)
909
  try:
910
- self.matanyone_handler.process_video(video_path, reference_mask_path, matanyone_dir)
911
- tried = True
912
- except Exception as e:
913
- logger.error(f"Positional call to MatAnyone failed: {e}")
914
-
915
- if not tried:
916
- raise RuntimeError("MatAnyone.process_video could not be invoked with known signatures")
917
-
918
- # Find alpha video in output
919
- alpha_output_path = None
920
- for filename in os.listdir(matanyone_dir):
921
- low = filename.lower()
922
- if low.endswith('_pha.mp4') or low == 'pha.mp4' or ('alpha' in low and low.endswith('.mp4')):
923
- alpha_output_path = os.path.join(matanyone_dir, filename)
924
  break
925
 
926
- if not alpha_output_path or not os.path.exists(alpha_output_path):
927
- # Sometimes MatAnyone may place files inside a nested folder; check subdirs
928
- for root, _, files in os.walk(matanyone_dir):
929
- for f in files:
930
- low = f.lower()
931
- if low.endswith('_pha.mp4') or low == 'pha.mp4' or ('alpha' in low and low.endswith('.mp4')):
932
- alpha_output_path = os.path.join(root, f)
933
  break
934
- if alpha_output_path:
935
  break
936
 
937
- if not alpha_output_path or not os.path.exists(alpha_output_path):
938
- raise RuntimeError("MatAnyone did not create an alpha video")
939
-
940
- # Verify basic properties
941
- cap = cv2.VideoCapture(alpha_output_path)
942
- frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
943
- fps = cap.get(cv2.CAP_PROP_FPS) or 0.0
944
- duration = frame_count / fps if fps > 0 else 0
945
- cap.release()
946
 
947
- if frame_count == 0:
948
- raise RuntimeError("Alpha video has no frames")
 
 
 
 
 
 
 
 
949
 
950
- logger.info(f"✅ MatAnyone created alpha video: {frame_count} frames, {duration:.1f}s")
951
- return alpha_output_path
952
 
953
  except Exception as e:
954
- logger.error(f"Stage 2 failed: {e}")
955
  raise
956
 
957
- # ------------------------------------------------------------------
958
- # Stage 3: Alpha compositing (video-only)
959
- # ------------------------------------------------------------------
960
- def _stage3_alpha_composite(self, original_video_path: str,
961
- alpha_video_path: str,
962
- background_path: str,
963
- output_path: str,
964
- quality: str) -> str:
965
- """Composite original RGB over background using alpha video (no audio here)."""
966
- original_cap = None
967
- alpha_cap = None
968
- bg_cap = None
969
  try:
970
- original_cap = cv2.VideoCapture(original_video_path)
971
- alpha_cap = cv2.VideoCapture(alpha_video_path)
972
 
973
- if not original_cap.isOpened() or not alpha_cap.isOpened():
974
- raise RuntimeError("Cannot open videos for compositing")
 
975
 
976
- fps = float(original_cap.get(cv2.CAP_PROP_FPS) or 30.0)
977
- width = int(original_cap.get(cv2.CAP_PROP_FRAME_WIDTH) or 0)
978
- height = int(original_cap.get(cv2.CAP_PROP_FRAME_HEIGHT) or 0)
979
- total_frames = int(original_cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0)
980
 
981
- # Load background (image or looping video)
982
- bg_is_video = False
983
- bg_image = None
984
- if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv', '.webm')):
985
  bg_cap = cv2.VideoCapture(background_path)
986
- if not bg_cap.isOpened():
987
- raise RuntimeError(f"Cannot open background video: {background_path}")
988
  bg_is_video = True
989
  else:
990
  bg_image = cv2.imread(background_path)
991
- if bg_image is None:
992
- raise RuntimeError(f"Cannot load background image: {background_path}")
993
  bg_image = cv2.resize(bg_image, (width, height))
 
 
994
 
995
- # Setup video writer (NO AUDIO)
996
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
997
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
998
- if not out.isOpened():
999
- raise RuntimeError("Failed to open VideoWriter for composite")
1000
 
1001
  frame_count = 0
1002
  while True:
1003
- ret_orig, orig_frame = original_cap.read()
1004
  ret_alpha, alpha_frame = alpha_cap.read()
1005
 
1006
- if not ret_orig or not ret_alpha:
1007
  break
1008
 
1009
- # Background frame
 
 
 
 
 
 
 
1010
  if bg_is_video:
1011
  ret_bg, bg_frame = bg_cap.read()
1012
  if not ret_bg:
1013
- bg_cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # loop
1014
  ret_bg, bg_frame = bg_cap.read()
1015
  bg_frame = cv2.resize(bg_frame, (width, height))
1016
  else:
1017
  bg_frame = bg_image.copy()
1018
 
1019
- # Alpha extraction (single channel 0..1)
1020
  if alpha_frame.ndim == 3:
1021
- alpha_mask = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
1022
  else:
1023
- alpha_mask = alpha_frame
1024
- if alpha_mask.shape[:2] != orig_frame.shape[:2]:
1025
- alpha_mask = cv2.resize(alpha_mask, (width, height), interpolation=cv2.INTER_LINEAR)
1026
 
1027
- # Refine & normalize
1028
- alpha_refined = self.alpha_refiner.refine_alpha_matte(alpha_mask, self.config)
1029
 
1030
- # Composite (use float math, then cast)
1031
- a3 = np.dstack([alpha_refined] * 3).astype(np.float32)
1032
- fg = orig_frame.astype(np.float32)
1033
- bg = bg_frame.astype(np.float32)
1034
- mixed = a3 * fg + (1.0 - a3) * bg
1035
-
1036
- # Optional defringing (light blur at edges)
1037
- if self.config.ENABLE_DEFRINGING:
1038
- mixed = self.alpha_refiner.defringe_edges(
1039
- mixed.astype(np.uint8),
1040
- alpha_refined,
1041
- self.config.DEFRINGE_RADIUS
1042
- )
1043
 
1044
- out.write(mixed.astype(np.uint8))
1045
  frame_count += 1
1046
 
1047
- if frame_count % max(1, self.config.MEMORY_CHECK_INTERVAL) == 0:
1048
- logger.info(f"Compositing: {frame_count}/{total_frames}")
1049
-
 
 
 
 
 
1050
  out.release()
1051
- logger.info(f"✅ Compositing completed: {frame_count} frames → {output_path}")
 
1052
  return output_path
1053
 
1054
  except Exception as e:
1055
- logger.error(f"Stage 3 compositing failed: {e}")
1056
  raise
1057
- finally:
1058
- try:
1059
- if original_cap is not None:
1060
- original_cap.release()
1061
- except Exception:
1062
- pass
1063
- try:
1064
- if alpha_cap is not None:
1065
- alpha_cap.release()
1066
- except Exception:
1067
- pass
1068
- try:
1069
- if bg_cap is not None:
1070
- bg_cap.release()
1071
- except Exception:
1072
- pass
1073
 
1074
- # ------------------------------------------------------------------
1075
- # Small helpers
1076
- # ------------------------------------------------------------------
1077
- def _trim_video(self, input_path: str, output_path: str, seconds: float):
1078
- """Trim video to specified duration (video-only)."""
1079
  try:
1080
- with VideoFileClip(input_path) as clip:
1081
- trimmed = clip.subclip(0, min(seconds, clip.duration or seconds))
1082
- trimmed.write_videofile(
1083
- output_path,
1084
- codec=self.config.VIDEO_CODEC,
1085
- audio=False, # Don't process audio in trim
1086
- verbose=False,
1087
- logger=None
 
 
 
 
 
1088
  )
1089
- logger.info(f"Video trimmed to {seconds}s → {output_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
  except Exception as e:
1091
- logger.error(f"Video trimming failed: {e}")
1092
- raise
 
 
 
 
 
1093
 
1094
  def cleanup(self):
1095
- """Clean up resources."""
1096
  try:
1097
- self.memory_manager.cleanup_model(self.sam2_handler, "SAM2", deep_cleanup=True)
1098
- self.memory_manager.cleanup_model(self.matanyone_handler, "MatAnyone", deep_cleanup=True)
1099
- self.memory_manager.cleanup_stage("Cleanup", force=True)
1100
-
1101
  if os.path.exists(self.temp_dir):
1102
  shutil.rmtree(self.temp_dir)
1103
  logger.info(f"Cleaned up temp directory: {self.temp_dir}")
1104
-
1105
  except Exception as e:
1106
  logger.warning(f"Cleanup failed: {e}")
1107
-
1108
- def get_processing_stats(self) -> Dict[str, Any]:
1109
- """Get processing statistics."""
1110
- stats: Dict[str, Any] = {
1111
- 'temp_dir': self.temp_dir,
1112
- 'memory_info': self.memory_manager.get_gpu_memory_info(),
1113
- 'audio_processor': 'ffmpeg' if self.audio_processor else 'MoviePy',
1114
- 'config': {
1115
- 'audio_enabled': self.config.PRESERVE_ORIGINAL_AUDIO,
1116
- 'audio_quality': self.config.AUDIO_QUALITY,
1117
- 'chunk_duration': self.config.MAX_CHUNK_DURATION
1118
- }
1119
- }
1120
- try:
1121
- if self.audio_processor:
1122
- stats['audio_stats'] = self.audio_processor.get_stats()
1123
- except Exception:
1124
- pass
1125
- return stats
1126
-
1127
-
1128
- # ==============================================================================
1129
- # EXPORT INTERFACE
1130
- # ==============================================================================
1131
-
1132
- def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs) -> TwoStageProcessor:
1133
- """Factory function to create TwoStageProcessor."""
1134
- return TwoStageProcessor(sam2_handler, matanyone_handler, **kwargs)
1135
 
1136
 
1137
- if __name__ == "__main__":
1138
- logger.info("Enhanced TwoStageProcessor with Audio Support")
1139
- logger.info("Features:")
1140
- logger.info(" ✅ ffmpeg-based audio via AudioProcessor")
1141
- logger.info(" ✅ Full video duration processing (no 5-second limit unless you pass trim_seconds)")
1142
- logger.info(" ✅ Direct alpha compositing (no green screen)")
1143
- logger.info(" ✅ Memory-efficient chunked processing")
1144
- logger.info(" ✅ Robust MatAnyone integration with signature fallbacks")
1145
- logger.info("")
1146
- logger.info("Use create_two_stage_processor(sam2_handler, matanyone_handler) to initialize")
 
1
  #!/usr/bin/env python3
2
  """
3
+ FIXED Two-Stage Alpha Channel Processing System
4
+ Key fixes:
5
+ - Removed all hardcoded duration limits
6
+ - Fixed MatAnyone parameter handling
7
+ - Proper audio preservation throughout pipeline
8
+ - Simplified and more reliable processing flow
 
 
 
 
 
 
 
 
 
9
  """
10
 
 
 
 
 
 
 
11
  import os
12
  import sys
13
  import cv2
 
15
  import tempfile
16
  import shutil
17
  import logging
 
18
  import time
19
  import traceback
20
  from pathlib import Path
21
  from typing import Optional, Tuple, Dict, Any, List
22
 
23
+ from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
 
 
 
 
24
 
25
+ # Try to import AudioProcessor
 
26
  try:
27
  from processing.audio import AudioProcessor
28
  except Exception:
29
  try:
30
+ from audio import AudioProcessor
31
  except Exception:
32
  AudioProcessor = None
33
 
 
34
  logger = logging.getLogger(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  class TwoStageProcessor:
37
  """
38
+ Fixed two-stage processor that handles full video length and preserves audio.
 
39
  """
40
 
41
  def __init__(self, sam2_handler, matanyone_handler, temp_dir: Optional[str] = None):
42
  self.sam2_handler = sam2_handler
43
  self.matanyone_handler = matanyone_handler
44
+ self.temp_dir = temp_dir or tempfile.mkdtemp(prefix='twostage_')
 
 
 
 
45
 
46
+ # Initialize AudioProcessor if available
47
+ if AudioProcessor:
48
  try:
49
  self.audio_processor = AudioProcessor(temp_dir=self.temp_dir)
50
  logger.info("AudioProcessor initialized for ffmpeg-based audio handling")
51
  except Exception as e:
52
  self.audio_processor = None
53
+ logger.warning(f"AudioProcessor init failed: {e}. Using MoviePy fallback.")
54
  else:
55
  self.audio_processor = None
56
+ logger.info("AudioProcessor not available - using MoviePy for audio")
57
 
58
  os.makedirs(self.temp_dir, exist_ok=True)
 
 
 
59
 
60
  def process_video(self,
61
  video_path: str,
 
66
  callback: Optional[callable] = None,
67
  **kwargs) -> Tuple[Optional[str], str]:
68
  """
69
+ FIXED: Main processing pipeline with proper full-length handling.
 
 
 
70
  """
71
  try:
72
+ logger.info(f"Processing video: {video_path}")
73
+ logger.info(f"Trim requested: {trim_seconds}s" if trim_seconds else "Full video processing")
74
+
75
+ # Extract original audio FIRST (before any processing)
76
+ original_audio_path = self._extract_original_audio(video_path)
77
+
78
+ # Prepare input video (trim only if explicitly requested)
79
+ processing_video = video_path
80
+ if trim_seconds and trim_seconds > 0:
81
+ processing_video = self._create_trimmed_video(video_path, trim_seconds)
82
+ logger.info(f"Created trimmed version for processing: {trim_seconds}s")
83
  else:
84
+ logger.info("Processing full video - no trimming applied")
85
 
86
+ # Stage 1: Create reference mask from first frame(s)
87
+ if callback:
88
+ callback("Creating reference mask...", 10)
89
+ reference_mask = self._create_reference_mask(processing_video)
 
 
 
 
90
 
91
+ # Stage 2: MatAnyone processing (FIXED to handle full video)
92
+ if callback:
93
+ callback("Processing with MatAnyone...", 30)
94
+ alpha_video = self._process_with_matanyone(processing_video, reference_mask)
95
 
96
+ # Stage 3: Composite with background
97
+ if callback:
98
+ callback("Compositing with background...", 70)
99
+ composite_video = self._composite_with_background(
100
+ processing_video, alpha_video, background_path
101
+ )
 
 
 
 
 
 
102
 
103
+ # Stage 4: Add original audio back
104
+ if callback:
105
+ callback("Adding audio...", 90)
106
+ final_output = self._add_audio_to_final(
107
+ composite_video, original_audio_path, output_path
108
+ )
 
 
 
109
 
110
+ if callback:
111
+ callback("Complete!", 100)
112
+
113
+ # Verify output
114
+ if final_output and os.path.exists(final_output):
115
+ with VideoFileClip(final_output) as clip:
116
+ duration = clip.duration or 0
117
+ has_audio = clip.audio is not None
118
+ logger.info(f"Final output: {duration:.1f}s, Audio: {has_audio}")
119
+
120
+ return final_output, f"Success: {duration:.1f}s video with audio"
121
+ else:
122
+ return None, "Processing completed but output file not found"
123
 
124
  except Exception as e:
125
  error_msg = f"Processing failed: {str(e)}"
126
+ logger.error(f"{error_msg}\n{traceback.format_exc()}")
 
127
  return None, error_msg
128
 
129
+ def _extract_original_audio(self, video_path: str) -> Optional[str]:
130
+ """Extract audio from original video before any processing."""
 
 
 
131
  try:
132
+ audio_output = os.path.join(self.temp_dir, "original_audio.aac")
133
+
134
  if self.audio_processor:
135
+ # Use AudioProcessor (ffmpeg) - check if it has duration limits
136
+ logger.info("Extracting audio with ffmpeg...")
137
+ result = self.audio_processor.extract_audio(
138
  video_path=video_path,
139
+ output_path=audio_output,
140
  audio_format='aac',
141
+ quality='high'
142
  )
143
+ return result
144
  else:
145
  # Fallback to MoviePy
146
+ logger.info("Extracting audio with MoviePy...")
 
147
  with VideoFileClip(video_path) as clip:
148
  if clip.audio is not None:
149
  clip.audio.write_audiofile(
150
+ audio_output,
151
+ codec='aac',
152
+ bitrate='192k',
153
  verbose=False,
154
  logger=None
155
  )
156
+ return audio_output
157
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  except Exception as e:
160
+ logger.warning(f"Audio extraction failed: {e}")
161
+ return None
162
 
163
+ def _create_trimmed_video(self, video_path: str, trim_seconds: float) -> str:
164
+ """Create trimmed version only when explicitly requested."""
165
+ trimmed_path = os.path.join(self.temp_dir, "trimmed_for_processing.mp4")
166
+
167
+ with VideoFileClip(video_path) as clip:
168
+ duration = min(trim_seconds, clip.duration or trim_seconds)
169
+ trimmed = clip.subclip(0, duration)
170
+ trimmed.write_videofile(
171
+ trimmed_path,
172
+ codec='libx264',
173
+ audio=False, # We handle audio separately
174
+ verbose=False,
175
+ logger=None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  )
177
+ trimmed.close()
178
+
179
+ return trimmed_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
+ def _create_reference_mask(self, video_path: str) -> str:
182
+ """Create reference mask from video using SAM2."""
 
 
 
 
183
  try:
184
  cap = cv2.VideoCapture(video_path)
185
+ ret, frame = cap.read()
186
+ cap.release()
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ if not ret:
189
+ raise RuntimeError("Could not read frame from video")
190
 
191
+ # Convert to RGB for SAM2
192
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
193
 
194
+ # Create mask using SAM2
195
+ mask = self.sam2_handler.create_mask(frame_rgb)
196
+ if mask is None:
197
+ raise RuntimeError("SAM2 failed to create mask")
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # Save mask
200
+ mask_path = os.path.join(self.temp_dir, "reference_mask.png")
201
+ cv2.imwrite(mask_path, mask)
202
 
203
+ logger.info(f"Reference mask created: {mask_path}")
204
+ return mask_path
 
 
 
205
 
206
  except Exception as e:
207
+ logger.error(f"Reference mask creation failed: {e}")
208
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ def _process_with_matanyone(self, video_path: str, mask_path: str) -> str:
211
+ """FIXED: Process with MatAnyone without duration limits."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  try:
213
+ output_dir = os.path.join(self.temp_dir, "matanyone_output")
214
+ os.makedirs(output_dir, exist_ok=True)
215
 
216
+ # CRITICAL FIX: Check what parameters MatAnyone actually accepts
217
+ # and ensure no duration limits are set
218
 
 
 
 
219
  try:
220
+ # Try the signature you showed, but ensure no duration limits
221
+ result = self.matanyone_handler.process_video(
222
  input_path=video_path,
223
+ mask_path=mask_path,
224
+ output_path=output_dir,
225
+ # REMOVED: n_warmup parameter - this might be causing 5-second limit
226
+ # REMOVED: any max_frames, duration, or similar parameters
227
  r_erode=0,
228
  r_dilate=15,
229
  suffix='pha',
230
  save_image=False,
231
+ max_size=1920 # Only limit resolution, not duration
232
  )
233
+ logger.info("MatAnyone processing completed with named parameters")
234
+
235
  except Exception as e:
236
+ logger.warning(f"Named parameters failed: {e}")
237
+ # Fallback to minimal parameters
 
 
238
  try:
239
+ result = self.matanyone_handler.process_video(
240
+ video_path, mask_path, output_dir
241
+ )
242
+ logger.info("MatAnyone processing completed with minimal parameters")
243
+ except Exception as e2:
244
+ raise RuntimeError(f"MatAnyone failed with both signatures: {e}, {e2}")
245
+
246
+ # Find the alpha video output
247
+ alpha_video = None
248
+ for filename in os.listdir(output_dir):
249
+ if 'pha' in filename.lower() and filename.endswith('.mp4'):
250
+ alpha_video = os.path.join(output_dir, filename)
 
 
251
  break
252
 
253
+ if not alpha_video:
254
+ # Search more broadly
255
+ for root, dirs, files in os.walk(output_dir):
256
+ for file in files:
257
+ if file.endswith('.mp4') and ('alpha' in file.lower() or 'pha' in file.lower()):
258
+ alpha_video = os.path.join(root, file)
 
259
  break
260
+ if alpha_video:
261
  break
262
 
263
+ if not alpha_video or not os.path.exists(alpha_video):
264
+ raise RuntimeError("MatAnyone did not produce alpha video output")
 
 
 
 
 
 
 
265
 
266
+ # Verify the alpha video has reasonable length
267
+ with VideoFileClip(alpha_video) as clip:
268
+ alpha_duration = clip.duration or 0
269
+ logger.info(f"Alpha video duration: {alpha_duration:.1f}s")
270
+
271
+ # Check if it's suspiciously short (indicates 5-second limit bug)
272
+ with VideoFileClip(video_path) as orig_clip:
273
+ orig_duration = orig_clip.duration or 0
274
+ if alpha_duration < 6 and orig_duration > 10:
275
+ logger.warning(f"Alpha video ({alpha_duration:.1f}s) much shorter than original ({orig_duration:.1f}s) - possible duration limit bug")
276
 
277
+ return alpha_video
 
278
 
279
  except Exception as e:
280
+ logger.error(f"MatAnyone processing failed: {e}")
281
  raise
282
 
283
+ def _composite_with_background(self, original_video: str, alpha_video: str, background_path: str) -> str:
284
+ """Composite original video with background using alpha channel."""
 
 
 
 
 
 
 
 
 
 
285
  try:
286
+ output_path = os.path.join(self.temp_dir, "composite_no_audio.mp4")
 
287
 
288
+ # Open video captures
289
+ orig_cap = cv2.VideoCapture(original_video)
290
+ alpha_cap = cv2.VideoCapture(alpha_video)
291
 
292
+ # Get video properties
293
+ fps = orig_cap.get(cv2.CAP_PROP_FPS)
294
+ width = int(orig_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
295
+ height = int(orig_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
296
 
297
+ # Load background
298
+ if background_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
 
 
299
  bg_cap = cv2.VideoCapture(background_path)
 
 
300
  bg_is_video = True
301
  else:
302
  bg_image = cv2.imread(background_path)
 
 
303
  bg_image = cv2.resize(bg_image, (width, height))
304
+ bg_is_video = False
305
+ bg_cap = None
306
 
307
+ # Setup video writer
308
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
309
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
 
 
310
 
311
  frame_count = 0
312
  while True:
313
+ ret_orig, orig_frame = orig_cap.read()
314
  ret_alpha, alpha_frame = alpha_cap.read()
315
 
316
+ if not ret_orig:
317
  break
318
 
319
+ # Handle alpha frame
320
+ if not ret_alpha:
321
+ # If alpha video is shorter, use last alpha frame
322
+ alpha_cap.set(cv2.CAP_PROP_POS_FRAMES,
323
+ alpha_cap.get(cv2.CAP_PROP_FRAME_COUNT) - 1)
324
+ ret_alpha, alpha_frame = alpha_cap.read()
325
+
326
+ # Get background frame
327
  if bg_is_video:
328
  ret_bg, bg_frame = bg_cap.read()
329
  if not ret_bg:
330
+ bg_cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
331
  ret_bg, bg_frame = bg_cap.read()
332
  bg_frame = cv2.resize(bg_frame, (width, height))
333
  else:
334
  bg_frame = bg_image.copy()
335
 
336
+ # Extract alpha channel
337
  if alpha_frame.ndim == 3:
338
+ alpha = cv2.cvtColor(alpha_frame, cv2.COLOR_BGR2GRAY)
339
  else:
340
+ alpha = alpha_frame
 
 
341
 
342
+ alpha = cv2.resize(alpha, (width, height))
343
+ alpha = alpha.astype(np.float32) / 255.0
344
 
345
+ # Composite
346
+ alpha_3ch = np.stack([alpha] * 3, axis=2)
347
+ composite = (alpha_3ch * orig_frame.astype(np.float32) +
348
+ (1 - alpha_3ch) * bg_frame.astype(np.float32))
 
 
 
 
 
 
 
 
 
349
 
350
+ out.write(composite.astype(np.uint8))
351
  frame_count += 1
352
 
353
+ if frame_count % 30 == 0:
354
+ logger.info(f"Composited {frame_count} frames")
355
+
356
+ # Cleanup
357
+ orig_cap.release()
358
+ alpha_cap.release()
359
+ if bg_cap:
360
+ bg_cap.release()
361
  out.release()
362
+
363
+ logger.info(f"Compositing complete: {frame_count} frames")
364
  return output_path
365
 
366
  except Exception as e:
367
+ logger.error(f"Compositing failed: {e}")
368
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
+ def _add_audio_to_final(self, video_path: str, audio_path: Optional[str], output_path: str) -> str:
371
+ """Add original audio to final video."""
 
 
 
372
  try:
373
+ if not audio_path or not os.path.exists(audio_path):
374
+ # No audio to add, just copy video
375
+ shutil.copy2(video_path, output_path)
376
+ logger.info("No audio to add - copied video as-is")
377
+ return output_path
378
+
379
+ if self.audio_processor:
380
+ # Use AudioProcessor (ffmpeg)
381
+ result = self.audio_processor.add_audio_to_video(
382
+ original_video=audio_path, # Source of audio
383
+ processed_video=video_path, # Video without audio
384
+ output_path=output_path,
385
+ audio_quality='high'
386
  )
387
+ logger.info("Audio added using ffmpeg")
388
+ return result
389
+ else:
390
+ # Use MoviePy fallback
391
+ with VideoFileClip(video_path) as video:
392
+ with AudioFileClip(audio_path) as audio:
393
+ final = video.set_audio(audio)
394
+ final.write_videofile(
395
+ output_path,
396
+ codec='libx264',
397
+ audio_codec='aac',
398
+ verbose=False,
399
+ logger=None
400
+ )
401
+ final.close()
402
+
403
+ logger.info("Audio added using MoviePy")
404
+ return output_path
405
+
406
  except Exception as e:
407
+ logger.error(f"Failed to add audio: {e}")
408
+ # Return video without audio rather than failing
409
+ try:
410
+ shutil.copy2(video_path, output_path)
411
+ return output_path
412
+ except Exception:
413
+ raise
414
 
415
  def cleanup(self):
416
+ """Clean up temporary files."""
417
  try:
 
 
 
 
418
  if os.path.exists(self.temp_dir):
419
  shutil.rmtree(self.temp_dir)
420
  logger.info(f"Cleaned up temp directory: {self.temp_dir}")
 
421
  except Exception as e:
422
  logger.warning(f"Cleanup failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
 
425
+ # Factory function for compatibility
426
+ def create_two_stage_processor(sam2_handler, matanyone_handler, **kwargs):
427
+ return TwoStageProcessor(sam2_handler, matanyone_handler, **kwargs)