Spaces:

StarPigeon
/

ViDove

Sleeping

Eason Lu commited on Mar 28, 2023

Commit

e75254e

1 Parent(s): e3825f8

solve: maximum recursion

Former-commit-id: e8ac2862b0a0745b3093a2129dda8abf2672f05e

Files changed (2) hide show

SRT.py CHANGED Viewed

@@ -62,6 +62,16 @@ class SRT_segment(object):
     def get_bilingual_str(self) -> str:
         return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
 class SRT_script():
     def __init__(self, segments) -> None:
         self.segments = []
@@ -181,8 +191,12 @@ class SRT_script():
                     #print(lines[i])
         pass
-    def split_seg(self, seg, threshold=500):
-        # TODO: evenly split seg to 2 parts and add new seg into self.segments
         source_text = seg.source_text
         translation = seg.translation
         src_commas = [m.start() for m in re.finditer(',', source_text)]
@@ -333,7 +347,6 @@ class SRT_script():
     def spell_check_term(self):
         ## known bug: I've will be replaced because i've is not in the dict
         import enchant
         dict = enchant.Dict('en_US')

     def get_bilingual_str(self) -> str:
         return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
+    # def set_translation(self, trans):
+    #     if trans[0] == '，':
+    #         trans = trans[1:]
+    #     self.translation = trans
+    # def set_src_text(self, src_text):
+    #     if src_text[0] == ',':
+    #         src_text = src_text[1:]
+    #     self.source_text = src_text
 class SRT_script():
     def __init__(self, segments) -> None:
         self.segments = []
                     #print(lines[i])
         pass
+    def split_seg(self, seg, threshold):
+        # evenly split seg to 2 parts and add new seg into self.segments
+        if seg.source_text[0] == ',':
+            seg.source_text = seg.source_text[1:]
+        if seg.translation[0] == '，':
+            seg.translation = seg.translation[1:]
         source_text = seg.source_text
         translation = seg.translation
         src_commas = [m.start() for m in re.finditer(',', source_text)]
     def spell_check_term(self):
         ## known bug: I've will be replaced because i've is not in the dict
         import enchant
         dict = enchant.Dict('en_US')

pipeline.py CHANGED Viewed

@@ -144,7 +144,7 @@ else:
 # srt class preprocess
 srt.form_whole_sentence()
-srt.spell_check_term()
 srt.correct_with_force_term()
 srt.write_srt_file_src(srt_file_en)
 script_input = srt.get_source_only()
@@ -259,14 +259,13 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
             time.sleep(30)
             flag = True
     # add read-time output back and modify the post-processing by using one batch as an unit.
-    print(translate)
     srt.set_translation(translate, range, model_name)
-    add_length = srt.check_len_and_split_range(range)
-    srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
-    srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
-# srt.check_len_and_split()
-# srt.write_srt_file_translate(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt")
 # srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
 if not args.only_srt:

 # srt class preprocess
 srt.form_whole_sentence()
+# srt.spell_check_term()
 srt.correct_with_force_term()
 srt.write_srt_file_src(srt_file_en)
 script_input = srt.get_source_only()
             time.sleep(30)
             flag = True
     # add read-time output back and modify the post-processing by using one batch as an unit.
     srt.set_translation(translate, range, model_name)
+    # add_length = srt.check_len_and_split_range(range)
+    # srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
+    # srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
+srt.check_len_and_split()
+srt.write_srt_file_translate(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt")
 # srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
 if not args.only_srt: