Spaces:

StarPigeon
/

ViDove

Sleeping

worldqwq commited on Apr 10, 2023

Commit

259f806

•

1 Parent(s): 491821e

Prompt update and removed sentence number passing with prompt

Files changed (2) hide show

SRT.py CHANGED Viewed

@@ -417,19 +417,20 @@ class SRT_script():
                 [real_word, pos] = self.get_real_word(word)
                 if not dict.check(word[:pos]):
                     suggest = term_spellDict.suggest(real_word)
                     if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4:  # relax spell check
-                        #with open("dislog.log","a") as log:
-                        #    if not os.path.exists("dislog.log"):
-                        #        log.write("word \t suggest \t levenshtein \n")
-                        #    log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
-                        print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
                         new_word = word.replace(word[:pos],suggest[0])
                     else:
                         new_word = word
                 else:
                     new_word = word
                 ready_words[i] = new_word
@@ -489,8 +490,8 @@ class SRT_script():
         # return a string with pure source text
         result = ""
         for i, seg in enumerate(self.segments):
-            result += f'SENTENCE {i + 1}: {seg.source_text}\n\n\n'
         return result
     def reform_src_str(self):

                 [real_word, pos] = self.get_real_word(word)
                 if not dict.check(word[:pos]):
                     suggest = term_spellDict.suggest(real_word)
                     if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4:  # relax spell check
+                        with open("dislog.log","a") as log:
+                            if not os.path.exists("dislog.log"):
+                                log.write("word \t suggest \t levenshtein \n")
+                            log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
+                        #print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
                         new_word = word.replace(word[:pos],suggest[0])
                     else:
                         new_word = word
+                else:
+                    new_word = word
+                    else:
+                        new_word = word
                 else:
                     new_word = word
                 ready_words[i] = new_word
         # return a string with pure source text
         result = ""
         for i, seg in enumerate(self.segments):
+            result+=f'{seg.source_text}\n\n\n'#f'SENTENCE {i+1}: {seg.source_text}\n\n\n'
         return result
     def reform_src_str(self):

pipeline.py CHANGED Viewed

@@ -82,7 +82,7 @@ def get_sources(args, download_path, result_path, video_name):
     return audio_path, audio_file, video_path, video_name
-def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'base', method = "stable"):
     # Instead of using the script_en variable directly, we'll use script_input
     if srt_file_en is not None:
         srt = SRT_script.parse_from_srt_file(srt_file_en)
@@ -181,11 +181,14 @@ def get_response(model_name, sentence):
         response = openai.ChatCompletion.create(
             model=model_name,
             messages = [
-                {"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
-                {"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
-                {"role": "system", "content": "Your translation needs to be consistent with the number of sentences in the original."},
-                {"role": "system", "content": "There is no need for you to add any comments or notes."},
-                {"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(sentence)}
             ],
             temperature=0.15
         )

     return audio_path, audio_file, video_path, video_name
+def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
     # Instead of using the script_en variable directly, we'll use script_input
     if srt_file_en is not None:
         srt = SRT_script.parse_from_srt_file(srt_file_en)
         response = openai.ChatCompletion.create(
             model=model_name,
             messages = [
+                #{"role": "system", "content": "You are a helpful assistant that translates English to Chinese and have decent background in starcraft2."},
+                #{"role": "system", "content": "Your translation has to keep the orginal format and be as accurate as possible."},
+                #{"role": "system", "content": "Your translation needs to be consistent with the number of sentences in the original."},
+                #{"role": "system", "content": "There is no need for you to add any comments or notes."},
+                #{"role": "user", "content": 'Translate the following English text to Chinese: "{}"'.format(sentence)}
+                {"role": "system", "content": "你是一个翻译助理，你的任务是翻译星际争霸视频，你会被提供一个按行分割的英文段落，你需要在保证句意和行数的情况下输出翻译后的文本。"},
+                {"role": "user", "content": sentence}
             ],
             temperature=0.15
         )