from faster_whisper import WhisperModel import subprocess import os import datetime # Load model một lần khi khởi tạo model = WhisperModel("guillaumekln/faster-whisper-small", compute_type="int8") def format_timestamp(seconds: float) -> str: hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = int(seconds % 60) millis = int((seconds - int(seconds)) * 1000) return f"{hours:02}:{minutes:02}:{secs:02},{millis:03}" def extract_audio(video_path): audio_path = video_path.replace(".mp4", ".wav") command = [ "ffmpeg", "-y", "-i", video_path, "-vn", # Không lấy hình "-acodec", "pcm_s16le", # WAV "-ar", "16000", # 16kHz "-ac", "1", # Mono audio_path ] subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return audio_path def generate_subtitles(video_path): # 🔊 Tách audio từ video audio_path = extract_audio(video_path) # 🧠 Dùng audio để sinh phụ đề segments, _ = model.transcribe(audio_path, language='zh', task='translate') srt_path = os.path.abspath(video_path.replace(".mp4", ".srt")) with open(srt_path, "w", encoding="utf-8") as f: for i, seg in enumerate(segments): f.write(f"{i+1}\n") f.write(f"{format_timestamp(seg.start)} --> {format_timestamp(seg.end)}\n") f.write(f"{seg.text.strip()}\n\n") print(f"✅ Subtitle saved at: {srt_path}") return srt_path def burn_subtitles(video_path, srt_path): if not os.path.exists(srt_path): raise FileNotFoundError(f"❌ Không tìm thấy phụ đề: {srt_path}") video_path = os.path.abspath(video_path) srt_path = os.path.abspath(srt_path) output_path = video_path.replace(".mp4", "_subtitled.mp4") filter_arg = f"subtitles='{srt_path}'" command = [ "ffmpeg", "-y", "-i", video_path, "-vf", filter_arg, "-c:v", "libx264", "-c:a", "copy", "-preset", "ultrafast", "-threads", str(os.cpu_count()), "-c:a", "copy", output_path ] print(f"🚀 Running FFmpeg command:\n{' '.join(command)}") result = subprocess.run(command, capture_output=True, text=True) if result.returncode != 0: print("❌ FFmpeg error output:") print(result.stderr) raise RuntimeError("⚠️ Lỗi khi chạy FFmpeg để chèn phụ đề!") print(f"✅ Video with subtitles saved at: {output_path}") return output_path def process_video(video_path): srt_path = generate_subtitles(video_path) final_video = burn_subtitles(video_path, srt_path) return final_video