Spaces:
Sleeping
Sleeping
import openai | |
from pytube import YouTube | |
import argparse | |
import os | |
import io | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False) | |
parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False) | |
parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False) | |
parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False) | |
parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False) | |
args = parser.parse_args() | |
if args.link is None and args.local_path is None: | |
print("need video source") | |
exit() | |
openai.api_key = "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" | |
# openai.api_key = os.getenv("OPENAI_API_KEY") | |
DOWNLOAD_PATH = args.download | |
RESULT_PATH = args.result | |
VIDEO_NAME = args.video_name | |
n_threshold = 5000 | |
model_name = "text-davinci-003" # replace this to our own fintune model | |
# get source audio | |
if args.link is not None: | |
# Download audio from YouTube | |
video_link = args.link | |
try: | |
video = YouTube(video_link) | |
audio = video.streams.filter(only_audio=True, file_extension='mp4').first() | |
audio.download(DOWNLOAD_PATH) | |
print('Download Completed!') | |
except Exception as e: | |
print("Connection Error") | |
print(e) | |
audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb") | |
VIDEO_NAME = audio.default_filename.split('.')[0] | |
else: | |
# Read from local | |
audio_file= open(args.local_path, "rb") | |
# perform speech-to-text and save it in <video name>_en.txt under RESULT PATH. | |
if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)): | |
transcript = openai.Audio.transcribe("whisper-1", audio_file) | |
with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f: | |
f.write(transcript['text']) | |
# split the video script(open ai prompt limit: about 5000) | |
with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f: | |
script_en = f.read() | |
N = len(script_en) | |
script_split = script_en.split('.') | |
# Split the video script by sentences and create chunks within the token limit | |
n_threshold = 4096 # Token limit for the GPT-3 model | |
script_split = script_en.split('.') | |
script_arr = [] | |
script = "" | |
for sentence in script_split: | |
if len(script) + len(sentence) + 1 <= n_threshold: | |
script += sentence + '.' | |
else: | |
script_arr.append(script.strip()) | |
script = sentence + '.' | |
if script.strip(): | |
script_arr.append(script.strip()) | |
# Translate and save | |
for s in script_arr: | |
prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n" | |
response = openai.Completion.create( | |
model=model_name, | |
prompt=prompt, | |
temperature=0.1, | |
max_tokens=2000, | |
top_p=1.0, | |
frequency_penalty=0.0, | |
presence_penalty=0.0 | |
) | |
with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f: | |
f.write(response['choices'][0]['text'].strip()) | |
f.write('\n') |