JiaenLiu commited on
Commit
915e02d
·
2 Parent(s): e3825f8 0b15a1d

Merge pull request #18 from project-kxkg/jiaen/batch_output

Browse files

Jiaen/batch output

Former-commit-id: 2c8615467fa22cb75d9bf1bf74b1714fa73d1faa

Files changed (2) hide show
  1. SRT.py +13 -4
  2. pipeline.py +3 -2
SRT.py CHANGED
@@ -233,7 +233,7 @@ class SRT_script():
233
  return result_list
234
 
235
 
236
- def check_len_and_split(self, threshold=30):
237
  # TODO: if sentence length >= threshold, split this segments to two
238
  segments = []
239
  for seg in self.segments:
@@ -251,17 +251,19 @@ class SRT_script():
251
  # TODO: if sentence length >= threshold, split this segments to two
252
  start_seg_id = range[0]
253
  end_seg_id = range[1]
 
254
  segments = []
255
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
256
  if len(seg.translation) > threshold:
257
  seg_list = self.split_seg(seg, threshold)
258
  segments += seg_list
 
259
  else:
260
  segments.append(seg)
261
 
262
  self.segments[start_seg_id-1:end_seg_id] = segments
263
 
264
- return len(segments)
265
 
266
  def get_source_only(self):
267
  # return a string with pure source text
@@ -406,7 +408,12 @@ class SRT_script():
406
  start_seg_id = range[0]
407
  end_seg_id = range[1]
408
  with open(path, "a", encoding='utf-8') as f:
409
- for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id+length]):
 
 
 
 
 
410
  f.write(f'{i+idx}\n')
411
  f.write(seg.get_trans_str())
412
  pass
@@ -415,7 +422,9 @@ class SRT_script():
415
  start_seg_id = range[0]
416
  end_seg_id = range[1]
417
  with open(path, "a", encoding='utf-8') as f:
418
- for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id+length]):
 
 
419
  f.write(f'{i+idx}\n')
420
  f.write(seg.get_bilingual_str())
421
  pass
 
233
  return result_list
234
 
235
 
236
+ def check_len_and_split(self, threshold=30000):
237
  # TODO: if sentence length >= threshold, split this segments to two
238
  segments = []
239
  for seg in self.segments:
 
251
  # TODO: if sentence length >= threshold, split this segments to two
252
  start_seg_id = range[0]
253
  end_seg_id = range[1]
254
+ extra_len = 0
255
  segments = []
256
  for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
257
  if len(seg.translation) > threshold:
258
  seg_list = self.split_seg(seg, threshold)
259
  segments += seg_list
260
+ extra_len += len(seg_list) - 1
261
  else:
262
  segments.append(seg)
263
 
264
  self.segments[start_seg_id-1:end_seg_id] = segments
265
 
266
+ return extra_len
267
 
268
  def get_source_only(self):
269
  # return a string with pure source text
 
408
  start_seg_id = range[0]
409
  end_seg_id = range[1]
410
  with open(path, "a", encoding='utf-8') as f:
411
+ # for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id+length]):
412
+ # f.write(f'{i+idx}\n')
413
+ # f.write(seg.get_trans_str())
414
+ for i, seg in enumerate(self.segments):
415
+ if i<range[0]-1: continue
416
+ if i>=range[1] + length:break
417
  f.write(f'{i+idx}\n')
418
  f.write(seg.get_trans_str())
419
  pass
 
422
  start_seg_id = range[0]
423
  end_seg_id = range[1]
424
  with open(path, "a", encoding='utf-8') as f:
425
+ for i, seg in enumerate(self.segments):
426
+ if i<range[0]-1: continue
427
+ if i>=range[1] + length:break
428
  f.write(f'{i+idx}\n')
429
  f.write(seg.get_bilingual_str())
430
  pass
pipeline.py CHANGED
@@ -47,8 +47,8 @@ if args.video_name == 'placeholder' :
47
  VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
48
  elif args.srt_file is not None:
49
  VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
50
- else:
51
- VIDEO_NAME = args.video_name
52
 
53
  model_name = args.model_name
54
 
@@ -262,6 +262,7 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
262
  print(translate)
263
  srt.set_translation(translate, range, model_name)
264
  add_length = srt.check_len_and_split_range(range)
 
265
  srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
266
  srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
267
 
 
47
  VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
48
  elif args.srt_file is not None:
49
  VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
50
+ else:
51
+ VIDEO_NAME = args.video_name
52
 
53
  model_name = args.model_name
54
 
 
262
  print(translate)
263
  srt.set_translation(translate, range, model_name)
264
  add_length = srt.check_len_and_split_range(range)
265
+ print(add_length)
266
  srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
267
  srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
268