Spaces:
Sleeping
Sleeping
Merge pull request #18 from project-kxkg/jiaen/batch_output
Browse filesJiaen/batch output
Former-commit-id: 2c8615467fa22cb75d9bf1bf74b1714fa73d1faa
- SRT.py +13 -4
- pipeline.py +3 -2
SRT.py
CHANGED
@@ -233,7 +233,7 @@ class SRT_script():
|
|
233 |
return result_list
|
234 |
|
235 |
|
236 |
-
def check_len_and_split(self, threshold=
|
237 |
# TODO: if sentence length >= threshold, split this segments to two
|
238 |
segments = []
|
239 |
for seg in self.segments:
|
@@ -251,17 +251,19 @@ class SRT_script():
|
|
251 |
# TODO: if sentence length >= threshold, split this segments to two
|
252 |
start_seg_id = range[0]
|
253 |
end_seg_id = range[1]
|
|
|
254 |
segments = []
|
255 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
256 |
if len(seg.translation) > threshold:
|
257 |
seg_list = self.split_seg(seg, threshold)
|
258 |
segments += seg_list
|
|
|
259 |
else:
|
260 |
segments.append(seg)
|
261 |
|
262 |
self.segments[start_seg_id-1:end_seg_id] = segments
|
263 |
|
264 |
-
return
|
265 |
|
266 |
def get_source_only(self):
|
267 |
# return a string with pure source text
|
@@ -406,7 +408,12 @@ class SRT_script():
|
|
406 |
start_seg_id = range[0]
|
407 |
end_seg_id = range[1]
|
408 |
with open(path, "a", encoding='utf-8') as f:
|
409 |
-
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id+length]):
|
|
|
|
|
|
|
|
|
|
|
410 |
f.write(f'{i+idx}\n')
|
411 |
f.write(seg.get_trans_str())
|
412 |
pass
|
@@ -415,7 +422,9 @@ class SRT_script():
|
|
415 |
start_seg_id = range[0]
|
416 |
end_seg_id = range[1]
|
417 |
with open(path, "a", encoding='utf-8') as f:
|
418 |
-
for i, seg in enumerate(self.segments
|
|
|
|
|
419 |
f.write(f'{i+idx}\n')
|
420 |
f.write(seg.get_bilingual_str())
|
421 |
pass
|
|
|
233 |
return result_list
|
234 |
|
235 |
|
236 |
+
def check_len_and_split(self, threshold=30000):
|
237 |
# TODO: if sentence length >= threshold, split this segments to two
|
238 |
segments = []
|
239 |
for seg in self.segments:
|
|
|
251 |
# TODO: if sentence length >= threshold, split this segments to two
|
252 |
start_seg_id = range[0]
|
253 |
end_seg_id = range[1]
|
254 |
+
extra_len = 0
|
255 |
segments = []
|
256 |
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
|
257 |
if len(seg.translation) > threshold:
|
258 |
seg_list = self.split_seg(seg, threshold)
|
259 |
segments += seg_list
|
260 |
+
extra_len += len(seg_list) - 1
|
261 |
else:
|
262 |
segments.append(seg)
|
263 |
|
264 |
self.segments[start_seg_id-1:end_seg_id] = segments
|
265 |
|
266 |
+
return extra_len
|
267 |
|
268 |
def get_source_only(self):
|
269 |
# return a string with pure source text
|
|
|
408 |
start_seg_id = range[0]
|
409 |
end_seg_id = range[1]
|
410 |
with open(path, "a", encoding='utf-8') as f:
|
411 |
+
# for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id+length]):
|
412 |
+
# f.write(f'{i+idx}\n')
|
413 |
+
# f.write(seg.get_trans_str())
|
414 |
+
for i, seg in enumerate(self.segments):
|
415 |
+
if i<range[0]-1: continue
|
416 |
+
if i>=range[1] + length:break
|
417 |
f.write(f'{i+idx}\n')
|
418 |
f.write(seg.get_trans_str())
|
419 |
pass
|
|
|
422 |
start_seg_id = range[0]
|
423 |
end_seg_id = range[1]
|
424 |
with open(path, "a", encoding='utf-8') as f:
|
425 |
+
for i, seg in enumerate(self.segments):
|
426 |
+
if i<range[0]-1: continue
|
427 |
+
if i>=range[1] + length:break
|
428 |
f.write(f'{i+idx}\n')
|
429 |
f.write(seg.get_bilingual_str())
|
430 |
pass
|
pipeline.py
CHANGED
@@ -47,8 +47,8 @@ if args.video_name == 'placeholder' :
|
|
47 |
VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
|
48 |
elif args.srt_file is not None:
|
49 |
VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
|
50 |
-
else:
|
51 |
-
|
52 |
|
53 |
model_name = args.model_name
|
54 |
|
@@ -262,6 +262,7 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
|
|
262 |
print(translate)
|
263 |
srt.set_translation(translate, range, model_name)
|
264 |
add_length = srt.check_len_and_split_range(range)
|
|
|
265 |
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
266 |
srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
267 |
|
|
|
47 |
VIDEO_NAME = args.audio_file.split('/')[-1].split('.')[0]
|
48 |
elif args.srt_file is not None:
|
49 |
VIDEO_NAME = args.srt_file.split('/')[-1].split('.')[0]
|
50 |
+
else:
|
51 |
+
VIDEO_NAME = args.video_name
|
52 |
|
53 |
model_name = args.model_name
|
54 |
|
|
|
262 |
print(translate)
|
263 |
srt.set_translation(translate, range, model_name)
|
264 |
add_length = srt.check_len_and_split_range(range)
|
265 |
+
print(add_length)
|
266 |
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
267 |
srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
268 |
|