import openai from pytube import YouTube import argparse import os from tqdm import tqdm from SRT import SRT_script import stable_whisper import subprocess import time parser = argparse.ArgumentParser() parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False) parser.add_argument("--video_file", help="local video path here", default=None, type=str, required=False) parser.add_argument("--audio_file", help="local audio path here", default=None, type=str, required=False) parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False) # New argument parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False) parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False) parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False) parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo") parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true') parser.add_argument("-v", help="auto encode script with video", action='store_true') args = parser.parse_args() # input should be either video file or youtube video link. if args.link is None and args.video_file is None and args.srt_file is None and args.audio_file is None: print("need video source or srt file") exit() # set up openai.api_key = os.getenv("OPENAI_API_KEY") DOWNLOAD_PATH = args.download if not os.path.exists(DOWNLOAD_PATH): os.mkdir(DOWNLOAD_PATH) os.mkdir(f'{DOWNLOAD_PATH}/audio') os.mkdir(f'{DOWNLOAD_PATH}/video') RESULT_PATH = args.output_dir if not os.path.exists(RESULT_PATH): os.mkdir(RESULT_PATH) # set video name as the input file name if not specified if args.video_name == 'placeholder' : # set video name to upload file name if args.video_file is not None: VIDEO_NAME = args.video_file.split('/')[-1].split('.')[0] elif args.audio_file is not None: VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0] elif args.srt_file is not None: VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0] else: VIDEO_NAME = args.video_name else: VIDEO_NAME = args.video_name model_name = args.model_name threshold = 30 # get source audio if args.link is not None and args.video_file is None: # Download audio from YouTube video_link = args.link video = None audio = None try: yt = YouTube(video_link) video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() if video: video.download(f'{DOWNLOAD_PATH}/video') print('Video download completed!') else: print("Error: Video stream not found") audio = yt.streams.filter(only_audio=True, file_extension='mp4').first() if audio: audio.download(f'{DOWNLOAD_PATH}/audio') print('Audio download completed!') else: print("Error: Audio stream not found") except Exception as e: print("Connection Error") print(e) exit() video_path = f'{DOWNLOAD_PATH}/video/{video.default_filename}' audio_path = '{}/audio/{}'.format(DOWNLOAD_PATH, audio.default_filename) audio_file = open(audio_path, "rb") if VIDEO_NAME == 'placeholder': VIDEO_NAME = audio.default_filename.split('.')[0] elif args.video_file is not None: # Read from local video_path = args.video_file # audio_path = "{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3".format(DOWNLOAD_PATH, VIDEO_NAME) if args.audio_file is not None: audio_file= open(args.audio_file, "rb") audio_path = args.audio_file else: # escaped_video_path = args.video_file.replace('(', '\(').replace(')', '\)').replace(' ', '\ ') # print(escaped_video_path) # os.system(f'ffmpeg -i {escaped_video_path} -f mp3 -ab 192000 -vn {DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3') # audio_file= open(f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3', "rb") # audio_path = f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3' output_audio_path = f'{DOWNLOAD_PATH}/audio/{VIDEO_NAME}.mp3' # print(video_path) # print(output_audio_path) subprocess.run(['ffmpeg', '-i', video_path, '-f', 'mp3', '-ab', '192000', '-vn', output_audio_path]) audio_file = open(output_audio_path, "rb") audio_path = output_audio_path if not os.path.exists(f'{RESULT_PATH}/{VIDEO_NAME}'): os.mkdir(f'{RESULT_PATH}/{VIDEO_NAME}') if args.audio_file is not None: audio_file= open(args.audio_file, "rb") audio_path = args.audio_file # Instead of using the script_en variable directly, we'll use script_input srt_file_en = args.srt_file if srt_file_en is not None: srt = SRT_script.parse_from_srt_file(srt_file_en) else: # using whisper to perform speech-to-text and save it in