worldqwq commited on
Commit
259f806
1 Parent(s): 491821e

Prompt update and removed sentence number passing with prompt

Browse files
Files changed (2) hide show
  1. SRT.py +11 -10
  2. pipeline.py +9 -6
SRT.py CHANGED
@@ -417,19 +417,20 @@ class SRT_script():
417
  [real_word, pos] = self.get_real_word(word)
418
  if not dict.check(word[:pos]):
419
  suggest = term_spellDict.suggest(real_word)
420
-
421
  if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4: # relax spell check
422
 
423
- #with open("dislog.log","a") as log:
424
- # if not os.path.exists("dislog.log"):
425
- # log.write("word \t suggest \t levenshtein \n")
426
- # log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
427
- print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
428
-
429
-
430
  new_word = word.replace(word[:pos],suggest[0])
431
  else:
432
  new_word = word
 
 
 
 
433
  else:
434
  new_word = word
435
  ready_words[i] = new_word
@@ -489,8 +490,8 @@ class SRT_script():
489
  # return a string with pure source text
490
  result = ""
491
  for i, seg in enumerate(self.segments):
492
- result += f'SENTENCE {i + 1}: {seg.source_text}\n\n\n'
493
-
494
  return result
495
 
496
  def reform_src_str(self):
 
417
  [real_word, pos] = self.get_real_word(word)
418
  if not dict.check(word[:pos]):
419
  suggest = term_spellDict.suggest(real_word)
 
420
  if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4: # relax spell check
421
 
422
+ with open("dislog.log","a") as log:
423
+ if not os.path.exists("dislog.log"):
424
+ log.write("word \t suggest \t levenshtein \n")
425
+ log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
426
+ #print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
 
 
427
  new_word = word.replace(word[:pos],suggest[0])
428
  else:
429
  new_word = word
430
+ else:
431
+ new_word = word
432
+ else:
433
+ new_word = word
434
  else:
435
  new_word = word
436
  ready_words[i] = new_word
 
490
  # return a string with pure source text
491
  result = ""
492
  for i, seg in enumerate(self.segments):
493
+ result+=f'{seg.source_text}\n\n\n'#f'SENTENCE {i+1}: {seg.source_text}\n\n\n'
494
+
495
  return result
496
 
497
  def reform_src_str(self):
pipeline.py CHANGED
@@ -82,7 +82,7 @@ def get_sources(args, download_path, result_path, video_name):
82
 
83
  return audio_path, audio_file, video_path, video_name
84
 
85
- def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'base', method = "stable"):
86
  # Instead of using the script_en variable directly, we'll use script_input
87
  if srt_file_en is not None:
88
  srt = SRT_script.parse_from_srt_file(srt_file_en)
@@ -181,11 +181,14 @@ def get_response(model_name, sentence):
181
  response = openai.ChatCompletion.create(
182
  model=model_name,
183
  messages = [
184
- {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
185
- {"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
186
- {"role": "system", "content": "Your translation needs to be consistent with the number of sentences in the original."},
187
- {"role": "system", "content": "There is no need for you to add any comments or notes."},
188
- {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(sentence)}
 
 
 
189
  ],
190
  temperature=0.15
191
  )
 
82
 
83
  return audio_path, audio_file, video_path, video_name
84
 
85
+ def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
86
  # Instead of using the script_en variable directly, we'll use script_input
87
  if srt_file_en is not None:
88
  srt = SRT_script.parse_from_srt_file(srt_file_en)
 
181
  response = openai.ChatCompletion.create(
182
  model=model_name,
183
  messages = [
184
+ #{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
185
+ #{"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
186
+ #{"role": "system", "content": "Your translation needs to be consistent with the number of sentences in the original."},
187
+ #{"role": "system", "content": "There is no need for you to add any comments or notes."},
188
+ #{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(sentence)}
189
+
190
+ {"role": "system", "content": "你是一个翻译助理,你的任务是翻译星际争霸视频,你会被提供一个按行分割的英文段落,你需要在保证句意和行数的情况下输出翻译后的文本。"},
191
+ {"role": "user", "content": sentence}
192
  ],
193
  temperature=0.15
194
  )