|
import torch
|
|
import gc
|
|
import os
|
|
import subprocess
|
|
import datetime
|
|
|
|
class AudioVideoSync:
|
|
def __init__(self, wav2lip_dir="Wav2Lip"):
|
|
self.wav2lip_dir = wav2lip_dir
|
|
self.checkpoint_path = os.path.join(wav2lip_dir, "checkpoints", "wav2lip_gan.pth")
|
|
|
|
def print_memory_usage(self, stage=""):
|
|
"""Prints GPU memory usage at different stages."""
|
|
os.system('nvidia-smi')
|
|
print(f"[{stage}] Allocated Memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
|
|
print(f"[{stage}] Reserved Memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
|
|
print("-" * 50)
|
|
|
|
def sync_audio_video(self, video_path, audio_path, output_video=None):
|
|
"""Syncs audio and video using Wav2Lip."""
|
|
if not os.path.exists(video_path) or not os.path.exists(audio_path):
|
|
raise FileNotFoundError("Video or Audio file not found.")
|
|
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
result_video = output_video or f"result_{timestamp}.mp4"
|
|
|
|
|
|
gc.collect()
|
|
torch.cuda.empty_cache()
|
|
self.print_memory_usage("Before Wav2Lip Inference")
|
|
|
|
|
|
|
|
print("Running Wav2Lip for better lip movement...")
|
|
result = subprocess.run([
|
|
"python", os.path.join(self.wav2lip_dir, "inference.py"),
|
|
"--checkpoint_path", self.checkpoint_path,
|
|
"--face", video_path,
|
|
"--audio", audio_path,
|
|
"--outfile", result_video,
|
|
"--wav2lip_batch_size", "1",
|
|
"--resize_factor", "2",
|
|
"--nosmooth"
|
|
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
|
|
if result.returncode != 0:
|
|
print("Error in Wav2Lip inference:", result.stderr.decode())
|
|
return None
|
|
|
|
|
|
gc.collect()
|
|
torch.cuda.empty_cache()
|
|
self.print_memory_usage("After Wav2Lip Inference")
|
|
|
|
print(f" Output saved at: {result_video}")
|
|
return result_video
|
|
|
|
|