Spaces:
Sleeping
Sleeping
Eason Lu
commited on
Commit
•
e75254e
1
Parent(s):
e3825f8
solve: maximum recursion
Browse filesFormer-commit-id: e8ac2862b0a0745b3093a2129dda8abf2672f05e
- SRT.py +16 -3
- pipeline.py +6 -7
SRT.py
CHANGED
@@ -62,6 +62,16 @@ class SRT_segment(object):
|
|
62 |
def get_bilingual_str(self) -> str:
|
63 |
return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
class SRT_script():
|
66 |
def __init__(self, segments) -> None:
|
67 |
self.segments = []
|
@@ -181,8 +191,12 @@ class SRT_script():
|
|
181 |
#print(lines[i])
|
182 |
pass
|
183 |
|
184 |
-
def split_seg(self, seg, threshold
|
185 |
-
#
|
|
|
|
|
|
|
|
|
186 |
source_text = seg.source_text
|
187 |
translation = seg.translation
|
188 |
src_commas = [m.start() for m in re.finditer(',', source_text)]
|
@@ -333,7 +347,6 @@ class SRT_script():
|
|
333 |
|
334 |
def spell_check_term(self):
|
335 |
## known bug: I've will be replaced because i've is not in the dict
|
336 |
-
|
337 |
|
338 |
import enchant
|
339 |
dict = enchant.Dict('en_US')
|
|
|
62 |
def get_bilingual_str(self) -> str:
|
63 |
return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
|
64 |
|
65 |
+
# def set_translation(self, trans):
|
66 |
+
# if trans[0] == ',':
|
67 |
+
# trans = trans[1:]
|
68 |
+
# self.translation = trans
|
69 |
+
|
70 |
+
# def set_src_text(self, src_text):
|
71 |
+
# if src_text[0] == ',':
|
72 |
+
# src_text = src_text[1:]
|
73 |
+
# self.source_text = src_text
|
74 |
+
|
75 |
class SRT_script():
|
76 |
def __init__(self, segments) -> None:
|
77 |
self.segments = []
|
|
|
191 |
#print(lines[i])
|
192 |
pass
|
193 |
|
194 |
+
def split_seg(self, seg, threshold):
|
195 |
+
# evenly split seg to 2 parts and add new seg into self.segments
|
196 |
+
if seg.source_text[0] == ',':
|
197 |
+
seg.source_text = seg.source_text[1:]
|
198 |
+
if seg.translation[0] == ',':
|
199 |
+
seg.translation = seg.translation[1:]
|
200 |
source_text = seg.source_text
|
201 |
translation = seg.translation
|
202 |
src_commas = [m.start() for m in re.finditer(',', source_text)]
|
|
|
347 |
|
348 |
def spell_check_term(self):
|
349 |
## known bug: I've will be replaced because i've is not in the dict
|
|
|
350 |
|
351 |
import enchant
|
352 |
dict = enchant.Dict('en_US')
|
pipeline.py
CHANGED
@@ -144,7 +144,7 @@ else:
|
|
144 |
|
145 |
# srt class preprocess
|
146 |
srt.form_whole_sentence()
|
147 |
-
srt.spell_check_term()
|
148 |
srt.correct_with_force_term()
|
149 |
srt.write_srt_file_src(srt_file_en)
|
150 |
script_input = srt.get_source_only()
|
@@ -259,14 +259,13 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
|
|
259 |
time.sleep(30)
|
260 |
flag = True
|
261 |
# add read-time output back and modify the post-processing by using one batch as an unit.
|
262 |
-
print(translate)
|
263 |
srt.set_translation(translate, range, model_name)
|
264 |
-
add_length = srt.check_len_and_split_range(range)
|
265 |
-
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
266 |
-
srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
267 |
|
268 |
-
|
269 |
-
|
270 |
# srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
|
271 |
|
272 |
if not args.only_srt:
|
|
|
144 |
|
145 |
# srt class preprocess
|
146 |
srt.form_whole_sentence()
|
147 |
+
# srt.spell_check_term()
|
148 |
srt.correct_with_force_term()
|
149 |
srt.write_srt_file_src(srt_file_en)
|
150 |
script_input = srt.get_source_only()
|
|
|
259 |
time.sleep(30)
|
260 |
flag = True
|
261 |
# add read-time output back and modify the post-processing by using one batch as an unit.
|
|
|
262 |
srt.set_translation(translate, range, model_name)
|
263 |
+
# add_length = srt.check_len_and_split_range(range)
|
264 |
+
# srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
265 |
+
# srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
266 |
|
267 |
+
srt.check_len_and_split()
|
268 |
+
srt.write_srt_file_translate(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt")
|
269 |
# srt.write_srt_file_bilingual(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt")
|
270 |
|
271 |
if not args.only_srt:
|