Spaces:
Sleeping
Sleeping
CanYing0913
commited on
Commit
Β·
7d74f8e
1
Parent(s):
cba75b6
Update srt.py and file hierarchy
Browse filesFormer-commit-id: d36b43736cb3447da3e26e3caef1e351bf431dc3
- doc/Installation.md +7 -0
- doc/struct.md +7 -0
- pipeline.py +5 -5
- srt_util/__init__.py +0 -0
- SRT.py β srt_util/srt.py +16 -23
- srt2ass.py β srt_util/srt2ass.py +0 -0
doc/Installation.md
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### **Recommended:**
|
2 |
+
We recommend you to configure your environment using [mamba](https://pypi.org/project/mamba/). The following packages are required:
|
3 |
+
```
|
4 |
+
openai
|
5 |
+
openai-whisper
|
6 |
+
|
7 |
+
```
|
doc/struct.md
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Structure of Repository
|
2 |
+
```
|
3 |
+
βββ doc # Baseline implementation of SpMM algorithm.
|
4 |
+
βββββββ struct.md # Document of repository structure.
|
5 |
+
βββ finetune_data #
|
6 |
+
βββ README.md
|
7 |
+
```
|
pipeline.py
CHANGED
@@ -3,10 +3,10 @@ from pytube import YouTube
|
|
3 |
import argparse
|
4 |
import os
|
5 |
from tqdm import tqdm
|
6 |
-
from
|
7 |
import stable_whisper
|
8 |
import whisper
|
9 |
-
from srt2ass import srt2ass
|
10 |
|
11 |
import subprocess
|
12 |
|
@@ -85,7 +85,7 @@ def get_sources(args, download_path, result_path, video_name):
|
|
85 |
def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
|
86 |
# Instead of using the script_en variable directly, we'll use script_input
|
87 |
if srt_file_en is not None:
|
88 |
-
srt =
|
89 |
else:
|
90 |
# using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
|
91 |
srt_file_en = "{}/{}/{}_en.srt".format(result_path, video_name, video_name)
|
@@ -115,10 +115,10 @@ def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file =
|
|
115 |
else:
|
116 |
raise ValueError("invalid speech to text method")
|
117 |
|
118 |
-
srt =
|
119 |
|
120 |
else:
|
121 |
-
srt =
|
122 |
return srt_file_en, srt
|
123 |
|
124 |
# Split the video script by sentences and create chunks within the token limit
|
|
|
3 |
import argparse
|
4 |
import os
|
5 |
from tqdm import tqdm
|
6 |
+
from srt_util.srt import SrtScript
|
7 |
import stable_whisper
|
8 |
import whisper
|
9 |
+
from srt_util.srt2ass import srt2ass
|
10 |
|
11 |
import subprocess
|
12 |
|
|
|
85 |
def get_srt_class(srt_file_en, result_path, video_name, audio_path, audio_file = None, whisper_model = 'large', method = "stable"):
|
86 |
# Instead of using the script_en variable directly, we'll use script_input
|
87 |
if srt_file_en is not None:
|
88 |
+
srt = SrtScript.parse_from_srt_file(srt_file_en)
|
89 |
else:
|
90 |
# using whisper to perform speech-to-text and save it in <video name>_en.txt under RESULT PATH.
|
91 |
srt_file_en = "{}/{}/{}_en.srt".format(result_path, video_name, video_name)
|
|
|
115 |
else:
|
116 |
raise ValueError("invalid speech to text method")
|
117 |
|
118 |
+
srt = SrtScript(transcript['segments']) # read segments to SRT class
|
119 |
|
120 |
else:
|
121 |
+
srt = SrtScript.parse_from_srt_file(srt_file_en)
|
122 |
return srt_file_en, srt
|
123 |
|
124 |
# Split the video script by sentences and create chunks within the token limit
|
srt_util/__init__.py
ADDED
File without changes
|
SRT.py β srt_util/srt.py
RENAMED
@@ -7,7 +7,7 @@ from datetime import timedelta
|
|
7 |
import openai
|
8 |
|
9 |
|
10 |
-
class
|
11 |
def __init__(self, *args) -> None:
|
12 |
if isinstance(args[0], dict):
|
13 |
segment = args[0]
|
@@ -63,28 +63,23 @@ class SRT_segment(object):
|
|
63 |
self.end = seg.end
|
64 |
self.end_ms = seg.end_ms
|
65 |
self.duration = f"{self.start_time_str} --> {self.end_time_str}"
|
66 |
-
pass
|
67 |
|
68 |
def __add__(self, other):
|
69 |
"""
|
70 |
Merge the segment seg with the current segment, and return the new constructed segment.
|
71 |
No in-place modification.
|
|
|
72 |
:param other: Another segment that is strictly next to added segment.
|
73 |
:return: new segment of the two sub-segments
|
74 |
"""
|
75 |
# assert other.start_ms == self.end_ms, f"cannot merge discontinuous segments."
|
76 |
result = deepcopy(self)
|
77 |
-
result.
|
78 |
-
result.translation += f' {other.translation}'
|
79 |
-
result.end_time_str = other.end_time_str
|
80 |
-
result.end = other.end
|
81 |
-
result.end_ms = other.end_ms
|
82 |
-
result.duration = f"{self.start_time_str} --> {self.end_time_str}"
|
83 |
return result
|
84 |
|
85 |
-
def remove_trans_punc(self):
|
86 |
"""
|
87 |
-
remove punctuations in translation text
|
88 |
:return: None
|
89 |
"""
|
90 |
punc_cn = "οΌγοΌοΌ"
|
@@ -101,12 +96,9 @@ class SRT_segment(object):
|
|
101 |
return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
|
102 |
|
103 |
|
104 |
-
class
|
105 |
def __init__(self, segments) -> None:
|
106 |
-
self.segments = []
|
107 |
-
for seg in segments:
|
108 |
-
srt_seg = SRT_segment(seg)
|
109 |
-
self.segments.append(srt_seg)
|
110 |
|
111 |
@classmethod
|
112 |
def parse_from_srt_file(cls, path: str):
|
@@ -114,13 +106,12 @@ class SRT_script():
|
|
114 |
script_lines = [line.rstrip() for line in f.readlines()]
|
115 |
|
116 |
segments = []
|
117 |
-
for i in range(len(script_lines)):
|
118 |
-
|
119 |
-
segments.append(list(script_lines[i:i + 4]))
|
120 |
|
121 |
return cls(segments)
|
122 |
|
123 |
-
def merge_segs(self, idx_list) ->
|
124 |
"""
|
125 |
Merge entire segment list to a single segment
|
126 |
:param idx_list: List of index to merge
|
@@ -145,6 +136,7 @@ class SRT_script():
|
|
145 |
"""
|
146 |
merge_list = [] # a list of indices that should be merged e.g. [[0], [1, 2, 3, 4], [5, 6], [7]]
|
147 |
sentence = []
|
|
|
148 |
for i, seg in enumerate(self.segments):
|
149 |
if seg.source_text[-1] in ['.', '!', '?'] and len(seg.source_text) > 10 and 'vs.' not in seg.source_text:
|
150 |
sentence.append(i)
|
@@ -153,6 +145,7 @@ class SRT_script():
|
|
153 |
else:
|
154 |
sentence.append(i)
|
155 |
|
|
|
156 |
segments = []
|
157 |
for idx_list in merge_list:
|
158 |
segments.append(self.merge_segs(idx_list))
|
@@ -327,14 +320,14 @@ class SRT_script():
|
|
327 |
seg1_dict['text'] = src_seg1
|
328 |
seg1_dict['start'] = start_seg1
|
329 |
seg1_dict['end'] = end_seg1
|
330 |
-
seg1 =
|
331 |
seg1.translation = trans_seg1
|
332 |
|
333 |
seg2_dict = {}
|
334 |
seg2_dict['text'] = src_seg2
|
335 |
seg2_dict['start'] = start_seg2
|
336 |
seg2_dict['end'] = end_seg2
|
337 |
-
seg2 =
|
338 |
seg2.translation = trans_seg2
|
339 |
|
340 |
result_list = []
|
@@ -386,7 +379,7 @@ class SRT_script():
|
|
386 |
## force term correction
|
387 |
|
388 |
# load term dictionary
|
389 |
-
with open("
|
390 |
term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
|
391 |
|
392 |
# change term
|
@@ -455,7 +448,7 @@ class SRT_script():
|
|
455 |
pos = uncover(word)[1]
|
456 |
new_word = word
|
457 |
if arg == 0: # term translate mode
|
458 |
-
with open("finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
|
459 |
term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
|
460 |
if real_word in term_enzh_dict:
|
461 |
new_word = word.replace(word[:pos], term_enzh_dict.get(real_word))
|
|
|
7 |
import openai
|
8 |
|
9 |
|
10 |
+
class SrtSegment(object):
|
11 |
def __init__(self, *args) -> None:
|
12 |
if isinstance(args[0], dict):
|
13 |
segment = args[0]
|
|
|
63 |
self.end = seg.end
|
64 |
self.end_ms = seg.end_ms
|
65 |
self.duration = f"{self.start_time_str} --> {self.end_time_str}"
|
|
|
66 |
|
67 |
def __add__(self, other):
|
68 |
"""
|
69 |
Merge the segment seg with the current segment, and return the new constructed segment.
|
70 |
No in-place modification.
|
71 |
+
This is used for '+' operator.
|
72 |
:param other: Another segment that is strictly next to added segment.
|
73 |
:return: new segment of the two sub-segments
|
74 |
"""
|
75 |
# assert other.start_ms == self.end_ms, f"cannot merge discontinuous segments."
|
76 |
result = deepcopy(self)
|
77 |
+
result.merge_seg(other)
|
|
|
|
|
|
|
|
|
|
|
78 |
return result
|
79 |
|
80 |
+
def remove_trans_punc(self) -> None:
|
81 |
"""
|
82 |
+
remove CN punctuations in translation text
|
83 |
:return: None
|
84 |
"""
|
85 |
punc_cn = "οΌγοΌοΌ"
|
|
|
96 |
return f'{self.duration}\n{self.source_text}\n{self.translation}\n\n'
|
97 |
|
98 |
|
99 |
+
class SrtScript(object):
|
100 |
def __init__(self, segments) -> None:
|
101 |
+
self.segments = [SrtSegment(seg) for seg in segments]
|
|
|
|
|
|
|
102 |
|
103 |
@classmethod
|
104 |
def parse_from_srt_file(cls, path: str):
|
|
|
106 |
script_lines = [line.rstrip() for line in f.readlines()]
|
107 |
|
108 |
segments = []
|
109 |
+
for i in range(0, len(script_lines), 4):
|
110 |
+
segments.append(list(script_lines[i:i + 4]))
|
|
|
111 |
|
112 |
return cls(segments)
|
113 |
|
114 |
+
def merge_segs(self, idx_list) -> SrtSegment:
|
115 |
"""
|
116 |
Merge entire segment list to a single segment
|
117 |
:param idx_list: List of index to merge
|
|
|
136 |
"""
|
137 |
merge_list = [] # a list of indices that should be merged e.g. [[0], [1, 2, 3, 4], [5, 6], [7]]
|
138 |
sentence = []
|
139 |
+
# Get each entire sentence of distinct segments, fill indices to merge_list
|
140 |
for i, seg in enumerate(self.segments):
|
141 |
if seg.source_text[-1] in ['.', '!', '?'] and len(seg.source_text) > 10 and 'vs.' not in seg.source_text:
|
142 |
sentence.append(i)
|
|
|
145 |
else:
|
146 |
sentence.append(i)
|
147 |
|
148 |
+
# Reconstruct segments, each with an entire sentence
|
149 |
segments = []
|
150 |
for idx_list in merge_list:
|
151 |
segments.append(self.merge_segs(idx_list))
|
|
|
320 |
seg1_dict['text'] = src_seg1
|
321 |
seg1_dict['start'] = start_seg1
|
322 |
seg1_dict['end'] = end_seg1
|
323 |
+
seg1 = SrtSegment(seg1_dict)
|
324 |
seg1.translation = trans_seg1
|
325 |
|
326 |
seg2_dict = {}
|
327 |
seg2_dict['text'] = src_seg2
|
328 |
seg2_dict['start'] = start_seg2
|
329 |
seg2_dict['end'] = end_seg2
|
330 |
+
seg2 = SrtSegment(seg2_dict)
|
331 |
seg2.translation = trans_seg2
|
332 |
|
333 |
result_list = []
|
|
|
379 |
## force term correction
|
380 |
|
381 |
# load term dictionary
|
382 |
+
with open("../finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
|
383 |
term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
|
384 |
|
385 |
# change term
|
|
|
448 |
pos = uncover(word)[1]
|
449 |
new_word = word
|
450 |
if arg == 0: # term translate mode
|
451 |
+
with open("../finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
|
452 |
term_enzh_dict = {rows[0]: rows[1] for rows in reader(f)}
|
453 |
if real_word in term_enzh_dict:
|
454 |
new_word = word.replace(word[:pos], term_enzh_dict.get(real_word))
|
srt2ass.py β srt_util/srt2ass.py
RENAMED
File without changes
|