Yuhan-Lu commited on
Commit
3e39830
1 Parent(s): f76455f

merge sentence issue, rerun 5 times

Browse files

Former-commit-id: b807248fae583ddbecee84cd9a5b8ffdc2888cc8

Files changed (1) hide show
  1. pipeline.py +42 -1
pipeline.py CHANGED
@@ -24,7 +24,7 @@ def parse_args():
24
  parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
25
  parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
26
  parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
27
- parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-4") # default change to gpt-4
28
  parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
29
  parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
30
  parser.add_argument("-v", help="auto encode script with video", action='store_true')
@@ -186,8 +186,33 @@ def script_split(script_in, chunk_size = 1000):
186
  # print('temp_contents')
187
  # print(srt.get_source_only())
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
  def get_response(model_name, sentence):
 
 
 
 
 
 
 
 
 
 
 
191
  if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
192
  response = openai.ChatCompletion.create(
193
  model=model_name,
@@ -223,12 +248,28 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
223
  flag = False
224
  try:
225
  translate = get_response(model_name, sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  except Exception as e:
227
  logging.debug("An error has occurred during translation:",e)
228
  print("An error has occurred during translation:",e)
229
  print("Retrying... the script will continue after 30 seconds.")
230
  time.sleep(30)
231
  flag = True
 
232
  srt.set_translation(translate, range, model_name, video_name, video_link)
233
 
234
 
 
24
  parser.add_argument("--download", help="download path", default='./downloads', type=str, required=False)
25
  parser.add_argument("--output_dir", help="translate result path", default='./results', type=str, required=False)
26
  parser.add_argument("--video_name", help="video name, if use video link as input, the name will auto-filled by youtube video name", default='placeholder', type=str, required=False)
27
+ parser.add_argument("--model_name", help="model name only support gpt-4 and gpt-3.5-turbo", type=str, required=False, default="gpt-3.5-turbo") # default change to gpt-4
28
  parser.add_argument("--log_dir", help="log path", default='./logs', type=str, required=False)
29
  parser.add_argument("-only_srt", help="set script output to only .srt file", action='store_true')
30
  parser.add_argument("-v", help="auto encode script with video", action='store_true')
 
186
  # print('temp_contents')
187
  # print(srt.get_source_only())
188
 
189
+ def check_translation(sentence, translation):
190
+ """
191
+ check merge sentence issue from openai translation
192
+ """
193
+ sentence_count = sentence.count('\n\n') + 1
194
+ translation_count = translation.count('\n\n') + 1
195
+ print("sentence length: ", len(sentence), sentence_count)
196
+ print("translation length: ", len(translation), translation_count)
197
+
198
+ if sentence_count != translation_count:
199
+ return False
200
+ else:
201
+ return True
202
+
203
 
204
  def get_response(model_name, sentence):
205
+ """
206
+ Generates a translated response for a given sentence using a specified OpenAI model.
207
+
208
+ Args:
209
+ model_name (str): The name of the OpenAI model to be used for translation, either "gpt-3.5-turbo" or "gpt-4".
210
+ sentence (str): The English sentence related to StarCraft 2 videos that needs to be translated into Chinese.
211
+
212
+ Returns:
213
+ str: The translated Chinese sentence, maintaining the original format, meaning, and number of lines.
214
+ """
215
+
216
  if model_name == "gpt-3.5-turbo" or model_name == "gpt-4":
217
  response = openai.ChatCompletion.create(
218
  model=model_name,
 
248
  flag = False
249
  try:
250
  translate = get_response(model_name, sentence)
251
+ # detect merge sentence issue and try to solve for five times:
252
+ attempt_left = 5
253
+ while not check_translation(sentence, translate) and attempt_left > 0:
254
+ translate = get_response(model_name, sentence)
255
+ attempt_left -= 1
256
+
257
+ # if failure still happen, split into smaller tokens
258
+ if attempt_left == 0:
259
+ single_sentences = sentence.split("\n\n")
260
+ print("merge sentence issue found: ", len(single_sentences), len(translate), single_sentences, translate)
261
+ translate = ""
262
+ for single_sentence in single_sentences:
263
+ translate += get_response(model_name, single_sentence) + "\n\n"
264
+ print("after correction: ", "chinese length: ", len(translate), translate)
265
+ # print(type(translate))
266
  except Exception as e:
267
  logging.debug("An error has occurred during translation:",e)
268
  print("An error has occurred during translation:",e)
269
  print("Retrying... the script will continue after 30 seconds.")
270
  time.sleep(30)
271
  flag = True
272
+
273
  srt.set_translation(translate, range, model_name, video_name, video_link)
274
 
275