import openai from pytube import YouTube import argparse import os from tqdm import tqdm from SRT import SRT_script import stable_whisper import whisper from srt2ass import srt2ass import logging from datetime import datetime import torch import subprocess import time def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False) parser.add_argument("--video_file", help="local video path here", default=None, type=str, required=False) parser.add_argument("--audio_file", help="local audio path here", default=None, type=str, required=False) parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False) # New argument parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False) parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False) parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False) parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-4") # default change to gpt-4 parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False) parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true') parser.add_argument("-v", help="auto encode script with video", action='store_true') args = parser.parse_args() return args def get_sources(args, download_path, result_path, video_name): # get source audio audio_path = None audio_file = None video_path = None if args.link is not None and args.video_file is None: # Download audio from YouTube video_link = args.link video = None audio = None try: yt = YouTube(video_link) video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() if video: video.download(f'{download_path}/video') print('Video download completed!') else: print("Error: Video stream not found") audio = yt.streams.filter(only_audio=True, file_extension='mp4').first() if audio: audio.download(f'{download_path}/audio') print('Audio download completed!') else: print("Error: Audio stream not found") except Exception as e: print("Connection Error") print(e) exit() video_path = f'{download_path}/video/{video.default_filename}' audio_path = '{}/audio/{}'.format(download_path, audio.default_filename) audio_file = open(audio_path, "rb") if video_name == 'placeholder': video_name = audio.default_filename.split('.')[0] elif args.video_file is not None: # Read from local video_path = args.video_file if args.audio_file is not None: audio_file= open(args.audio_file, "rb") audio_path = args.audio_file else: output_audio_path = f'{download_path}/audio/{video_name}.mp3' subprocess.run(['ffmpeg', '-i', video_path, '-f', 'mp3', '-ab', '192000', '-vn', output_audio_path]) audio_file = open(output_audio_path, "rb") audio_path = output_audio_path if not os.path.exists(f'{result_path}/{video_name}'): os.mkdir(f'{result_path}/{video_name}') if args.audio_file is not None: audio_file= open(args.audio_file, "rb") audio_path = args.audio_file pass return audio_path, audio_file, video_path, video_name def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"): # Instead of using the script_en variable directly, we'll use script_input if srt_file_en is not None: srt = SRT_script.parse_from_srt_file(srt_file_en) else: # using whisper to perform speech-to-text and save it in