Yuhan-Lu commited on
Commit
090e123
1 Parent(s): ac6e110

update docString of translate, set default value for 'attempts_count' of translate

Browse files
Files changed (1) hide show
  1. pipeline.py +23 -7
pipeline.py CHANGED
@@ -233,7 +233,24 @@ def get_response(model_name, sentence):
233
 
234
 
235
  # Translate and save
236
- def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  logging.info("start translating...")
238
  previous_length = 0
239
  for sentence, range in tqdm(zip(script_arr, range_arr)):
@@ -249,15 +266,14 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
249
  try:
250
  translate = get_response(model_name, sentence)
251
  # detect merge sentence issue and try to solve for five times:
252
- attempt_left = 5
253
- while not check_translation(sentence, translate) and attempt_left > 0:
254
  translate = get_response(model_name, sentence)
255
- attempt_left -= 1
256
 
257
  # if failure still happen, split into smaller tokens
258
- if attempt_left == 0:
259
  single_sentences = sentence.split("\n\n")
260
- print("merge sentence issue found for range", range)
261
  translate = ""
262
  for i, single_sentence in enumerate(single_sentences):
263
  if i == len(single_sentences) - 1:
@@ -265,7 +281,7 @@ def translate(srt, script_arr, range_arr, model_name, video_name, video_link):
265
  else:
266
  translate += get_response(model_name, single_sentence) + "\n\n"
267
  # print(single_sentence, translate.split("\n\n")[-2])
268
- print("solved by individually translation!")
269
 
270
  except Exception as e:
271
  logging.debug("An error has occurred during translation:",e)
 
233
 
234
 
235
  # Translate and save
236
+ def translate(srt, script_arr, range_arr, model_name, video_name, video_link, attempts_count = 5):
237
+ """
238
+ Translates the given script array into another language using the chatgpt and writes to the SRT file.
239
+
240
+ This function takes a script array, a range array, a model name, a video name, and a video link as input. It iterates
241
+ through sentences and range in the script and range arrays. If the translation check fails for five times, the function
242
+ will attempt to resolve merge sentence issues and split the sentence into smaller tokens for a better translation.
243
+
244
+ Args:
245
+ srt (Subtitle): An instance of the Subtitle class representing the SRT file.
246
+ script_arr (list): A list of strings representing the original script sentences to be translated.
247
+ range_arr (list): A list of tuples representing the start and end positions of sentences in the script.
248
+ model_name (str): The name of the translation model to be used.
249
+ video_name (str): The name of the video.
250
+ video_link (str): The link to the video.
251
+ attempts_count (int): Number of attemps of failures for unmatched sentences.
252
+ """
253
+
254
  logging.info("start translating...")
255
  previous_length = 0
256
  for sentence, range in tqdm(zip(script_arr, range_arr)):
 
266
  try:
267
  translate = get_response(model_name, sentence)
268
  # detect merge sentence issue and try to solve for five times:
269
+ while not check_translation(sentence, translate) and attempts_count > 0:
 
270
  translate = get_response(model_name, sentence)
271
+ attempts_count -= 1
272
 
273
  # if failure still happen, split into smaller tokens
274
+ if attempts_count == 0:
275
  single_sentences = sentence.split("\n\n")
276
+ logging.info("merge sentence issue found for range", range)
277
  translate = ""
278
  for i, single_sentence in enumerate(single_sentences):
279
  if i == len(single_sentences) - 1:
 
281
  else:
282
  translate += get_response(model_name, single_sentence) + "\n\n"
283
  # print(single_sentence, translate.split("\n\n")[-2])
284
+ logging.info("solved by individually translation!")
285
 
286
  except Exception as e:
287
  logging.debug("An error has occurred during translation:",e)