xd / sync_audio_video.py
jnjj's picture
Upload 80 files
0711651 verified
import torch
import gc
import os
import subprocess
import datetime
class AudioVideoSync:
def __init__(self, wav2lip_dir="Wav2Lip"):
self.wav2lip_dir = wav2lip_dir
self.checkpoint_path = os.path.join(wav2lip_dir, "checkpoints", "wav2lip_gan.pth")
def print_memory_usage(self, stage=""):
"""Prints GPU memory usage at different stages."""
os.system('nvidia-smi')
print(f"[{stage}] Allocated Memory: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
print(f"[{stage}] Reserved Memory: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
print("-" * 50)
def sync_audio_video(self, video_path, audio_path, output_video=None):
"""Syncs audio and video using Wav2Lip."""
if not os.path.exists(video_path) or not os.path.exists(audio_path):
raise FileNotFoundError("Video or Audio file not found.")
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
result_video = output_video or f"result_{timestamp}.mp4"
# Before running Wav2Lip, clear cache and check memory
gc.collect()
torch.cuda.empty_cache()
self.print_memory_usage("Before Wav2Lip Inference")
# Run Wav2Lip
# Run Wav2Lip inference with more accurate lip sync
print("Running Wav2Lip for better lip movement...")
result = subprocess.run([
"python", os.path.join(self.wav2lip_dir, "inference.py"),
"--checkpoint_path", self.checkpoint_path,
"--face", video_path,
"--audio", audio_path,
"--outfile", result_video,
"--wav2lip_batch_size", "1",
"--resize_factor", "2", # Better accuracy for lips
"--nosmooth" # Ensures smoother transitions
], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if result.returncode != 0:
print("Error in Wav2Lip inference:", result.stderr.decode())
return None
# After inference, free memory
gc.collect()
torch.cuda.empty_cache()
self.print_memory_usage("After Wav2Lip Inference")
print(f" Output saved at: {result_video}")
return result_video