Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

Yuhan-Lu commited on Apr 16, 2023

Commit

3e39830

1 Parent(s): f76455f

merge sentence issue, rerun 5 times

Browse files

Former-commit-id: b807248fae583ddbecee84cd9a5b8ffdc2888cc8

Files changed (1) hide show

pipeline.py +42 -1

pipeline.py CHANGED Viewed

@@ -24,7 +24,7 @@ def parse_args():
     parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
     parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
     parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
-    parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-4") # default change to gpt-4
     parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
     parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
     parser.add_argument("-v", help="auto encode script with video", action='store_true')
@@ -186,8 +186,33 @@ def script_split(script_in, chunk_size = 1000):
 #     print('temp_contents')
 #     print(srt.get_source_only())
 def get_response(model_name, sentence):
     if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
         response = openai.ChatCompletion.create(
             model=model_name,
@@ -223,12 +248,28 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
             flag = False
             try:
                 translate = get_response(model_name, sentence)
             except Exception as e:
                 logging.debug("An error has occurred during translation:",e)
                 print("An error has occurred during translation:",e)
                 print("Retrying... the script will continue after 30 seconds.")
                 time.sleep(30)
                 flag = True
         srt.set_translation(translate, range, model_name, video_name, video_link)

     parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
     parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
     parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
+    parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo") # default change to gpt-4
     parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
     parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
     parser.add_argument("-v", help="auto encode script with video", action='store_true')
 #     print('temp_contents')
 #     print(srt.get_source_only())
+def check_translation(sentence, translation):
+    """
+    check merge sentence issue from openai translation
+    """
+    sentence_count = sentence.count('\n\n') + 1
+    translation_count = translation.count('\n\n') + 1
+    print("sentence length: ", len(sentence), sentence_count)
+    print("translation length: ",  len(translation), translation_count)
+    if sentence_count != translation_count:
+        return False
+    else:
+        return True
 def get_response(model_name, sentence):
+    """
+    Generates a translated response for a given sentence using a specified OpenAI model.
+    Args:
+    model_name (str): The name of the OpenAI model to be used for translation, either "gpt-3.5-turbo" or "gpt-4".
+    sentence (str): The English sentence related to StarCraft 2 videos that needs to be translated into Chinese.
+    Returns:
+    str: The translated Chinese sentence, maintaining the original format, meaning, and number of lines.
+    """
     if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
         response = openai.ChatCompletion.create(
             model=model_name,
             flag = False
             try:
                 translate = get_response(model_name, sentence)
+                # detect merge sentence issue and try to solve for five times:
+                attempt_left = 5
+                while not check_translation(sentence, translate) and attempt_left > 0:
+                    translate = get_response(model_name, sentence)
+                    attempt_left -= 1
+                # if failure still happen, split into smaller tokens
+                if attempt_left == 0:
+                    single_sentences = sentence.split("\n\n")
+                    print("merge sentence issue found: ", len(single_sentences), len(translate), single_sentences, translate)
+                    translate = ""
+                    for single_sentence in single_sentences:
+                        translate += get_response(model_name, single_sentence) + "\n\n"
+                    print("after correction: ", "chinese length: ", len(translate), translate)
+                # print(type(translate))
             except Exception as e:
                 logging.debug("An error has occurred during translation:",e)
                 print("An error has occurred during translation:",e)
                 print("Retrying... the script will continue after 30 seconds.")
                 time.sleep(30)
                 flag = True
         srt.set_translation(translate, range, model_name, video_name, video_link)