FOREIGN-WHISPERS / main.py
sc45's picture
Initial Commit
f0ceee4
raw history blame
No virus
2.46 kB
import argparse
import os
from yt_download import download_video
from video_to_text import convert_video_to_text
from opus import translate_file
from diarization import perform_diarization
from tts import main as tts_main
from translated_video import create_translated_video
def get_transcription_filename(video_path):
base_name = os.path.splitext(os.path.basename(video_path))[0]
return f'./transcribed/{base_name}.txt'
def get_audio_filename(video_path):
base_name = os.path.splitext(os.path.basename(video_path))[0]
return f'./audio/{base_name}.wav'
def main(youtube_url):
# Ensure necessary directories exist
if not os.path.exists('./downloads'):
os.makedirs('./downloads')
if not os.path.exists('./audio'):
os.makedirs('./audio')
if not os.path.exists('./transcribed'):
os.makedirs('./transcribed')
if not os.path.exists('./translated'):
os.makedirs('./translated')
# Step 1: Download the video
downloaded_video_path = download_video(youtube_url)
# Step 2: Transcribe the video's audio
transcribed_text_path = get_transcription_filename(downloaded_video_path)
model_type = 'base' # You can specify the Whisper model type
convert_video_to_text(downloaded_video_path, model_type)
# Step 3: Translate the transcribed text to Spanish
translated_text_path = './translated/translated_text.txt'
translate_file(transcribed_text_path, translated_text_path)
# Step 4: Perform diarization
audio_path = get_audio_filename(downloaded_video_path)
diarized_audio_dir = './audio/diarization'
perform_diarization(audio_path, translated_text_path)
# Step 5: Generate speech for translated text
speaker_directory = './audio/diarization'
aligned_text_file = './audio/diarization/aligned_text.txt' # Ensure this is the correct path
output_audio_file = './translated/final_audio.wav'
tts_main(speaker_directory, aligned_text_file, output_audio_file)
# Step 6: Create the final translated video
final_video_path = create_translated_video(downloaded_video_path, output_audio_file, translated_text_path)
print(f"Final translated video created at {final_video_path}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process a YouTube video with multiple steps.")
parser.add_argument("youtube_url", help="YouTube video URL")
args = parser.parse_args()
main(args.youtube_url)