Eason Lu commited on
Commit
e75254e
1 Parent(s): e3825f8

solve: maximum recursion

Browse files

Former-commit-id: e8ac2862b0a0745b3093a2129dda8abf2672f05e

Files changed (2) hide show
  1. SRT.py +16 -3
  2. pipeline.py +6 -7
SRT.py CHANGED
@@ -62,6 +62,16 @@ class SRT_segment(object):
62
  def get_bilingual_str(self) -> str:
63
  return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
64
 
 
 
 
 
 
 
 
 
 
 
65
  class SRT_script():
66
  def __init__(self, segments) -> None:
67
  self.segments = []
@@ -181,8 +191,12 @@ class SRT_script():
181
  #print(lines[i])
182
  pass
183
 
184
- def split_seg(self, seg, threshold=500):
185
- # TODO: evenly split seg to 2 parts and add new seg into self.segments
 
 
 
 
186
  source_text = seg.source_text
187
  translation = seg.translation
188
  src_commas = [m.start() for m in re.finditer(',', source_text)]
@@ -333,7 +347,6 @@ class SRT_script():
333
 
334
  def spell_check_term(self):
335
  ## known bug: I've will be replaced because i've is not in the dict
336
-
337
 
338
  import enchant
339
  dict = enchant.Dict('en_US')
 
62
  def get_bilingual_str(self) -> str:
63
  return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
64
 
65
+ # def set_translation(self, trans):
66
+ # if trans[0] == ',':
67
+ # trans = trans[1:]
68
+ # self.translation = trans
69
+
70
+ # def set_src_text(self, src_text):
71
+ # if src_text[0] == ',':
72
+ # src_text = src_text[1:]
73
+ # self.source_text = src_text
74
+
75
  class SRT_script():
76
  def __init__(self, segments) -> None:
77
  self.segments = []
 
191
  #print(lines[i])
192
  pass
193
 
194
+ def split_seg(self, seg, threshold):
195
+ # evenly split seg to 2 parts and add new seg into self.segments
196
+ if seg.source_text[0] == ',':
197
+ seg.source_text = seg.source_text[1:]
198
+ if seg.translation[0] == ',':
199
+ seg.translation = seg.translation[1:]
200
  source_text = seg.source_text
201
  translation = seg.translation
202
  src_commas = [m.start() for m in re.finditer(',', source_text)]
 
347
 
348
  def spell_check_term(self):
349
  ## known bug: I've will be replaced because i've is not in the dict
 
350
 
351
  import enchant
352
  dict = enchant.Dict('en_US')
pipeline.py CHANGED
@@ -144,7 +144,7 @@ else:
144
 
145
  # srt class preprocess
146
  srt.form_whole_sentence()
147
- srt.spell_check_term()
148
  srt.correct_with_force_term()
149
  srt.write_srt_file_src(srt_file_en)
150
  script_input = srt.get_source_only()
@@ -259,14 +259,13 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
259
  time.sleep(30)
260
  flag = True
261
  # add read-time output back and modify the post-processing by using one batch as an unit.
262
- print(translate)
263
  srt.set_translation(translate, range, model_name)
264
- add_length = srt.check_len_and_split_range(range)
265
- srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
266
- srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
267
 
268
- # srt.check_len_and_split()
269
- # srt.write_srt_file_translate(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt")
270
  # srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
271
 
272
  if not args.only_srt:
 
144
 
145
  # srt class preprocess
146
  srt.form_whole_sentence()
147
+ # srt.spell_check_term()
148
  srt.correct_with_force_term()
149
  srt.write_srt_file_src(srt_file_en)
150
  script_input = srt.get_source_only()
 
259
  time.sleep(30)
260
  flag = True
261
  # add read-time output back and modify the post-processing by using one batch as an unit.
 
262
  srt.set_translation(translate, range, model_name)
263
+ # add_length = srt.check_len_and_split_range(range)
264
+ # srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
265
+ # srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
266
 
267
+ srt.check_len_and_split()
268
+ srt.write_srt_file_translate(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt")
269
  # srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
270
 
271
  if not args.only_srt: