Spaces:

StarPigeon
/

ViDove

Running

App Files Files Community

JiaenLiu commited on Mar 17, 2023

Commit

ae0ed1b

1 Parent(s): 87a205e

add gpt3.5 and text input

Browse files

Former-commit-id: 0a72cc1d3f69a74fc35bb0981462370dbe566518

Files changed (2) hide show

README.md +2 -0
pipeline.py +61 -36

README.md CHANGED Viewed

@@ -16,6 +16,8 @@ example online: python3 pipeline.py --link https://www.youtube.com/watch?v=XbgFI
 example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
 options:
   -h, --help            show this help message and exit
   --link LINK           youtube video link here

 example offline: python3 pipeline.py --local_path test_translation.m4a --result ./results --video_name test_translation
+example text input: python pipeline.py --text_file "/home/jiaenliu/project-t/results/huanghe_translation_en.txt" --result "/home/jiaenliu/project-t/results" --video_name "huanghe_test"
 options:
   -h, --help            show this help message and exit
   --link LINK           youtube video link here

pipeline.py CHANGED Viewed

@@ -4,32 +4,33 @@ import argparse
 import os
 import io
 parser = argparse.ArgumentParser()
 parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
 parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
 parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
 parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
 parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
 args = parser.parse_args()
-if args.link is None and args.local_path is None:
-    print("need video source")
     exit()
-# openai.api_key = "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"
-openai.api_key = "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"
-# openai.api_key = os.getenv("OPENAI_API_KEY")
 DOWNLOAD_PATH = args.download
 RESULT_PATH = args.result
 VIDEO_NAME = args.video_name
-n_threshold = 5000
 # model_name = "text-davinci-003" # replace this to our own fintune model
-model_name = "gpt-3.5-turbo"
 # get source audio
-if args.link is not None:
     # Download audio from YouTube
     video_link = args.link
     try:
@@ -42,25 +43,33 @@ if args.link is not None:
         print(e)
     audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
     VIDEO_NAME = audio.default_filename.split('.')[0]
-else:
     # Read from local
     audio_file= open(args.local_path, "rb")
-# perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
-if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
-    transcript = openai.Audio.transcribe("whisper-1", audio_file)
-    with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
-        f.write(transcript['text'])
-# split the video script(open ai prompt limit: about 5000)
-with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
-    script_en = f.read()
-    N = len(script_en)
-    script_split = script_en.split('.')
 # Split the video script by sentences and create chunks within the token limit
-n_threshold = 2048  # Token limit for the GPT-3 model
-script_split = script_en.split('.')
 script_arr = []
 script = ""
@@ -75,17 +84,33 @@ if script.strip():
 # Translate and save
 for s in script_arr:
-    prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
-    response = openai.Completion.create(
-        model=model_name,
-        prompt=prompt,
-        temperature=0.1,
-        max_tokens=2048,
-        top_p=1.0,
-        frequency_penalty=0.0,
-        presence_penalty=0.0
-    )
-    with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
-        f.write(response['choices'][0]['text'].strip())
-        f.write('\n')

 import os
 import io
 parser = argparse.ArgumentParser()
 parser.add_argument("--link", help="youtube video link here", default=None, type=str, required=False)
 parser.add_argument("--local_path", help="local video path here", default=None, type=str, required=False)
+parser.add_argument("--text_file", help="text file path here", default=None, type=str, required=False)  # New argument
 parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
 parser.add_argument("--result", help="translate result path", default='./results', type=str, required=False)
 parser.add_argument("--video_name", help="video name", default='placeholder', type=str, required=False)
+parser.add_argument("--model_name", help="model name only support text-davinci-003 and gpt-3.5-turbo", default='placeholder', type=str, required=False, default="gpt-3.5-turbo")
 args = parser.parse_args()
+if args.link is None and args.local_path is None and args.text_file is None:
+    print("need video source or text file")
     exit()
+openai.api_key = os.getenv("OPENAI_API_KEY")
 DOWNLOAD_PATH = args.download
 RESULT_PATH = args.result
 VIDEO_NAME = args.video_name
+n_threshold = 1000  # Token limit for the GPT-3.5 model
 # model_name = "text-davinci-003" # replace this to our own fintune model
+model_name = args.model_name
+# model_name = "davinci"
 # get source audio
+if args.link is not None and args.local_path is None:
     # Download audio from YouTube
     video_link = args.link
     try:
         print(e)
     audio_file = open('{}/{}'.format(DOWNLOAD_PATH, audio.default_filename), "rb")
     VIDEO_NAME = audio.default_filename.split('.')[0]
+elif args.local_path is not None:
     # Read from local
     audio_file= open(args.local_path, "rb")
+# Instead of using the script_en variable directly, we'll use script_input
+if args.text_file is not None:
+    with open(args.text_file, 'r') as f:
+        script_input = f.read()
+else:
+    # perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
+    if not os.path.exists("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME)):
+        transcript = openai.Audio.transcribe("whisper-1", audio_file)
+        with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'w') as f:
+            f.write(transcript['text'])
+    # split the video script(open ai prompt limit: about 5000)
+    with open("{}/{}_en.txt".format(RESULT_PATH, VIDEO_NAME), 'r') as f:
+        script_en = f.read()
+        # N = len(script_en)
+        # script_split = script_en.split('.')
+        script_input = script_en
 # Split the video script by sentences and create chunks within the token limit
+n_threshold = 4096  # Token limit for the GPT-3 model
+script_split = script_input.split('.')
 script_arr = []
 script = ""
 # Translate and save
 for s in script_arr:
+    # using chatgpt model
+    if model_name == "gpt-3.5-turbo":
+        print(s + "\n")
+        response = openai.ChatCompletion.create(
+            model=model_name,
+            messages = [
+                {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
+                {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(s)}
+            ],
+            temperature=0.1
+        )
+        with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
+            f.write(response['choices'][0]['message']['content'].strip())
+            f.write('\n')
+    if model_name == "text-davinci-003":
+        prompt = f"Please help me translate this into Chinese:\n\n{s}\n\n"
+        print(prompt)
+        response = openai.Completion.create(
+            model=model_name,
+            prompt=prompt,
+            temperature=0.1,
+            max_tokens=2000,
+            top_p=1.0,
+            frequency_penalty=0.0,
+            presence_penalty=0.0
+        )
+        with open(f"{RESULT_PATH}/{VIDEO_NAME}_zh.txt", 'a+') as f:
+            f.write(response['choices'][0]['text'].strip())
+            f.write('\n')