Spaces:
Sleeping
Sleeping
JiaenLiu
commited on
Commit
•
0b15a1d
1
Parent(s):
9ad8b62
fix real-time output
Browse filesFormer-commit-id: 1fea3d98f33d848dc57260e285ecf9f434c14690
- SRT.py +6 -4
- pipeline.py +3 -2
SRT.py
CHANGED
@@ -247,21 +247,23 @@ class SRT_script():
|
|
247 |
|
248 |
pass
|
249 |
|
250 |
-
def check_len_and_split_range(self, range, threshold=
|
251 |
# TODO: if sentence length >= threshold, split this segments to two
|
252 |
start_seg_id = range[0]
|
253 |
end_seg_id = range[1]
|
|
|
254 |
segments = []
|
255 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
256 |
if len(seg.translation) > threshold:
|
257 |
seg_list = self.split_seg(seg, threshold)
|
258 |
segments += seg_list
|
|
|
259 |
else:
|
260 |
segments.append(seg)
|
261 |
|
262 |
self.segments[start_seg_id-1:end_seg_id] = segments
|
263 |
|
264 |
-
return
|
265 |
|
266 |
def get_source_only(self):
|
267 |
# return a string with pure source text
|
@@ -411,7 +413,7 @@ class SRT_script():
|
|
411 |
# f.write(seg.get_trans_str())
|
412 |
for i, seg in enumerate(self.segments):
|
413 |
if i<range[0]-1: continue
|
414 |
-
if i>=range[1]:break
|
415 |
f.write(f'{i+idx}\n')
|
416 |
f.write(seg.get_trans_str())
|
417 |
pass
|
@@ -422,7 +424,7 @@ class SRT_script():
|
|
422 |
with open(path, "a", encoding='utf-8') as f:
|
423 |
for i, seg in enumerate(self.segments):
|
424 |
if i<range[0]-1: continue
|
425 |
-
if i>=range[1]:break
|
426 |
f.write(f'{i+idx}\n')
|
427 |
f.write(seg.get_bilingual_str())
|
428 |
pass
|
|
|
247 |
|
248 |
pass
|
249 |
|
250 |
+
def check_len_and_split_range(self, range, threshold=30):
|
251 |
# TODO: if sentence length >= threshold, split this segments to two
|
252 |
start_seg_id = range[0]
|
253 |
end_seg_id = range[1]
|
254 |
+
extra_len = 0
|
255 |
segments = []
|
256 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
257 |
if len(seg.translation) > threshold:
|
258 |
seg_list = self.split_seg(seg, threshold)
|
259 |
segments += seg_list
|
260 |
+
extra_len += len(seg_list) - 1
|
261 |
else:
|
262 |
segments.append(seg)
|
263 |
|
264 |
self.segments[start_seg_id-1:end_seg_id] = segments
|
265 |
|
266 |
+
return extra_len
|
267 |
|
268 |
def get_source_only(self):
|
269 |
# return a string with pure source text
|
|
|
413 |
# f.write(seg.get_trans_str())
|
414 |
for i, seg in enumerate(self.segments):
|
415 |
if i<range[0]-1: continue
|
416 |
+
if i>=range[1] + length:break
|
417 |
f.write(f'{i+idx}\n')
|
418 |
f.write(seg.get_trans_str())
|
419 |
pass
|
|
|
424 |
with open(path, "a", encoding='utf-8') as f:
|
425 |
for i, seg in enumerate(self.segments):
|
426 |
if i<range[0]-1: continue
|
427 |
+
if i>=range[1] + length:break
|
428 |
f.write(f'{i+idx}\n')
|
429 |
f.write(seg.get_bilingual_str())
|
430 |
pass
|
pipeline.py
CHANGED
@@ -47,8 +47,8 @@ if args.video_name == 'placeholder' :
|
|
47 |
VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
|
48 |
elif args.srt_file is not None:
|
49 |
VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
|
50 |
-
else:
|
51 |
-
|
52 |
|
53 |
model_name = args.model_name
|
54 |
|
@@ -262,6 +262,7 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
|
|
262 |
print(translate)
|
263 |
srt.set_translation(translate, range, model_name)
|
264 |
add_length = srt.check_len_and_split_range(range)
|
|
|
265 |
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
266 |
srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
267 |
|
|
|
47 |
VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
|
48 |
elif args.srt_file is not None:
|
49 |
VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
|
50 |
+
else:
|
51 |
+
VIDEO_NAME = args.video_name
|
52 |
|
53 |
model_name = args.model_name
|
54 |
|
|
|
262 |
print(translate)
|
263 |
srt.set_translation(translate, range, model_name)
|
264 |
add_length = srt.check_len_and_split_range(range)
|
265 |
+
print(add_length)
|
266 |
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
267 |
srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
268 |
|