JiaenLiu commited on
Commit
0b15a1d
1 Parent(s): 9ad8b62

fix real-time output

Browse files

Former-commit-id: 1fea3d98f33d848dc57260e285ecf9f434c14690

Files changed (2) hide show
  1. SRT.py +6 -4
  2. pipeline.py +3 -2
SRT.py CHANGED
@@ -247,21 +247,23 @@ class SRT_script():
247
 
248
  pass
249
 
250
- def check_len_and_split_range(self, range, threshold=30000):
251
  # TODO: if sentence length >= threshold, split this segments to two
252
  start_seg_id = range[0]
253
  end_seg_id = range[1]
 
254
  segments = []
255
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
256
  if len(seg.translation) > threshold:
257
  seg_list = self.split_seg(seg, threshold)
258
  segments += seg_list
 
259
  else:
260
  segments.append(seg)
261
 
262
  self.segments[start_seg_id-1:end_seg_id] = segments
263
 
264
- return len(segments)
265
 
266
  def get_source_only(self):
267
  # return a string with pure source text
@@ -411,7 +413,7 @@ class SRT_script():
411
  # f.write(seg.get_trans_str())
412
  for i, seg in enumerate(self.segments):
413
  if i<range[0]-1: continue
414
- if i>=range[1]:break
415
  f.write(f'{i+idx}\n')
416
  f.write(seg.get_trans_str())
417
  pass
@@ -422,7 +424,7 @@ class SRT_script():
422
  with open(path, "a", encoding='utf-8') as f:
423
  for i, seg in enumerate(self.segments):
424
  if i<range[0]-1: continue
425
- if i>=range[1]:break
426
  f.write(f'{i+idx}\n')
427
  f.write(seg.get_bilingual_str())
428
  pass
 
247
 
248
  pass
249
 
250
+ def check_len_and_split_range(self, range, threshold=30):
251
  # TODO: if sentence length >= threshold, split this segments to two
252
  start_seg_id = range[0]
253
  end_seg_id = range[1]
254
+ extra_len = 0
255
  segments = []
256
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
257
  if len(seg.translation) > threshold:
258
  seg_list = self.split_seg(seg, threshold)
259
  segments += seg_list
260
+ extra_len += len(seg_list) - 1
261
  else:
262
  segments.append(seg)
263
 
264
  self.segments[start_seg_id-1:end_seg_id] = segments
265
 
266
+ return extra_len
267
 
268
  def get_source_only(self):
269
  # return a string with pure source text
 
413
  # f.write(seg.get_trans_str())
414
  for i, seg in enumerate(self.segments):
415
  if i<range[0]-1: continue
416
+ if i>=range[1] + length:break
417
  f.write(f'{i+idx}\n')
418
  f.write(seg.get_trans_str())
419
  pass
 
424
  with open(path, "a", encoding='utf-8') as f:
425
  for i, seg in enumerate(self.segments):
426
  if i<range[0]-1: continue
427
+ if i>=range[1] + length:break
428
  f.write(f'{i+idx}\n')
429
  f.write(seg.get_bilingual_str())
430
  pass
pipeline.py CHANGED
@@ -47,8 +47,8 @@ if args.video_name == 'placeholder' :
47
  VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
48
  elif args.srt_file is not None:
49
  VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
50
- else:
51
- VIDEO_NAME = args.video_name
52
 
53
  model_name = args.model_name
54
 
@@ -262,6 +262,7 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
262
  print(translate)
263
  srt.set_translation(translate, range, model_name)
264
  add_length = srt.check_len_and_split_range(range)
 
265
  srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
266
  srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
267
 
 
47
  VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
48
  elif args.srt_file is not None:
49
  VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
50
+ else:
51
+ VIDEO_NAME = args.video_name
52
 
53
  model_name = args.model_name
54
 
 
262
  print(translate)
263
  srt.set_translation(translate, range, model_name)
264
  add_length = srt.check_len_and_split_range(range)
265
+ print(add_length)
266
  srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
267
  srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
268