Eason Lu commited on
Commit
f1a218d
·
1 Parent(s): 007de42

solve empty time stamp;working on split

Browse files

Former-commit-id: 5d21ec7ff41e2e8fb5bfa670c885d5b5e0afb352

Files changed (1) hide show
  1. SRT.py +50 -16
SRT.py CHANGED
@@ -6,28 +6,30 @@ class SRT_segment(object):
6
  def __init__(self, *args) -> None:
7
  if isinstance(args[0], dict):
8
  segment = args[0]
9
- start_ms = int((segment['start']*100)%100*10)
10
- end_ms = int((segment['end']*100)%100*10)
11
-
12
- if start_ms == end_ms and int(segment['start']) == int(segment['end']): # avoid empty time stamp
13
- end_ms+=500
14
-
15
- start_time = str(timedelta(seconds=int(segment['start']), milliseconds=start_ms))
16
- end_time = str(timedelta(seconds=int(segment['end']), milliseconds=end_ms))
17
- if start_ms == 0:
18
- self.start_time_str = str(0)+start_time.split('.')[0]+',000'
 
 
19
  else:
20
- self.start_time_str = str(0)+start_time.split('.')[0]+','+start_time.split('.')[1][:3]
21
- if end_ms == 0:
22
- self.end_time_str = str(0)+end_time.split('.')[0]+',000'
23
  else:
24
- self.end_time_str = str(0)+end_time.split('.')[0]+','+end_time.split('.')[1][:3]
25
  self.source_text = segment['text'][1:]
26
  self.duration = f"{self.start_time_str} --> {self.end_time_str}"
27
  self.translation = ""
28
 
29
  elif isinstance(args[0], list):
30
- self.source_text = args[0][2][:-1]
31
  self.duration = args[0][1]
32
  self.start_time_str = self.duration.split(" --> ")[0]
33
  self.end_time_str = self.duration.split(" --> ")[1]
@@ -122,12 +124,44 @@ class SRT_script():
122
  #print(lines[i])
123
  pass
124
 
125
- def split_seg(self, seg_id):
126
  # TODO: evenly split seg to 2 parts and add new seg into self.segments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  pass
128
 
129
  def check_len_and_split(self, threshold):
130
  # TODO: if sentence length >= threshold, split this segments to two
 
131
  pass
132
 
133
  def get_source_only(self):
 
6
  def __init__(self, *args) -> None:
7
  if isinstance(args[0], dict):
8
  segment = args[0]
9
+ self.start = segment['start']
10
+ self.end = segment['end']
11
+ self.start_ms = int((segment['start']*100)%100*10)
12
+ self.end_ms = int((segment['end']*100)%100*10)
13
+
14
+ if self.start_ms == self.end_ms and int(segment['start']) == int(segment['end']): # avoid empty time stamp
15
+ self.end_ms+=500
16
+
17
+ self.start_time = timedelta(seconds=int(segment['start']), milliseconds=self.start_ms)
18
+ self.end_time = timedelta(seconds=int(segment['end']), milliseconds=self.end_ms)
19
+ if self.start_ms == 0:
20
+ self.start_time_str = str(0)+str(self.start_time).split('.')[0]+',000'
21
  else:
22
+ self.start_time_str = str(0)+str(self.start_time).split('.')[0]+','+self.start_time.split('.')[1][:3]
23
+ if self.end_ms == 0:
24
+ self.end_time_str = str(0)+str(self.end_time).split('.')[0]+',000'
25
  else:
26
+ self.end_time_str = str(0)+str(self.end_time).split('.')[0]+','+self.end_time.split('.')[1][:3]
27
  self.source_text = segment['text'][1:]
28
  self.duration = f"{self.start_time_str} --> {self.end_time_str}"
29
  self.translation = ""
30
 
31
  elif isinstance(args[0], list):
32
+ self.source_text = args[0][2]
33
  self.duration = args[0][1]
34
  self.start_time_str = self.duration.split(" --> ")[0]
35
  self.end_time_str = self.duration.split(" --> ")[1]
 
124
  #print(lines[i])
125
  pass
126
 
127
+ def split_seg(self, seg_idx):
128
  # TODO: evenly split seg to 2 parts and add new seg into self.segments
129
+ seg = self.segments[seg_idx]
130
+ source_text = seg.source_text
131
+ translation = seg.translation
132
+ src_commas = [m.start() for m in re.finditer(',', source_text)]
133
+ trans_commas = [m.start() for m in re.finditer(',', translation)]
134
+ src_split_idx = src_commas[len(src_commas)//2 + 1] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2]
135
+ trans_split_idx = trans_commas[len(src_commas)//2 + 1] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2]
136
+ src_seg1 = source_text[:src_split_idx]
137
+ src_seg2 = source_text[src_split_idx+1:]
138
+ trans_seg1 = translation[:trans_split_idx]
139
+ trans_seg2 = translation[trans_split_idx+1:]
140
+ start_seg1 = seg.start
141
+ end_seg1 = start_seg2 = seg.start + (seg.end - seg.start)/2
142
+ end_seg2 = seg.end
143
+ seg1_dict = {}
144
+ seg1_dict['text'] = src_seg1
145
+ seg1_dict['start'] = start_seg1
146
+ seg1_dict['end'] = end_seg1
147
+ seg1 = SRT_segment(seg1_dict)
148
+ seg1.translation = trans_seg1
149
+
150
+ seg2_dict = {}
151
+ seg2_dict['text'] = src_seg2
152
+ seg2_dict['start'] = start_seg2
153
+ seg2_dict['end'] = end_seg2
154
+ seg2 = SRT_segment(seg2_dict)
155
+ seg2.translation = trans_seg2
156
+
157
+
158
+
159
+
160
  pass
161
 
162
  def check_len_and_split(self, threshold):
163
  # TODO: if sentence length >= threshold, split this segments to two
164
+
165
  pass
166
 
167
  def get_source_only(self):