Macrodove commited on
Commit
0362dd5
1 Parent(s): bd1cc4e

Added comment, changed output to arrays of SRTsegment

Browse files

Former-commit-id: 97fea915a8272bdfd5a632e672dfe8e9709c4188

Files changed (1) hide show
  1. evaluation/alignment.py +39 -21
evaluation/alignment.py CHANGED
@@ -3,69 +3,87 @@ import numpy as np
3
  sys.path.append('../src')
4
  from srt_util.srt import SrtScript
5
 
 
 
 
 
 
6
  def procedure(anchor, subsec, S_arr, subidx):
7
  cache_idx = 0
8
- while subidx != cache_idx:
9
  cache_idx = subidx
10
- if subidx >= len(subsec):
 
11
  break
12
  sub = subsec[subidx]
13
  if anchor.end < sub.start:
14
  continue
 
15
  if (anchor.start <= sub.start) and (sub.end <= anchor.end) or anchor.end - sub.start > sub.end - anchor.start:
16
- S_arr[-1] += sub.source_text
17
  subidx += 1
18
- return subidx - 1
19
 
 
 
 
 
 
 
20
  def alignment(pred_path, gt_path):
21
  pred = SrtScript.parse_from_srt_file(pred_path).segments
22
  gt = SrtScript.parse_from_srt_file(gt_path).segments
23
  pred_arr, gt_arr = [], []
24
- idx_p, idx_t = 0, 0
25
 
26
  while idx_p < len(pred) or idx_t < len(gt):
 
27
  ps = pred[idx_p] if idx_p < len(pred) else None
28
  gs = gt[idx_t] if idx_t < len(gt) else None
29
-
30
  if not ps:
31
- gt_arr.append(gs.source_text)
 
32
  pred_arr.append('')
33
  idx_t += 1
34
  continue
35
 
36
  if not gs:
37
- pred_arr.append(ps.source_text)
 
38
  gt_arr.append('')
39
  idx_p += 1
40
  continue
41
 
42
  ps_dur = ps.end - ps.start
43
  gs_dur = gs.end - gs.start
44
-
 
45
  if ps_dur <= gs_dur:
 
46
  if ps.end < gs.start:
47
- pred_arr.append(ps.source_text)
48
- gt_arr.append('')
49
- idx_t -= 1
50
  else:
51
- gt_arr.append(gs.source_text)
52
  if gs.end >= ps.start:
53
- pred_arr.append(ps.source_text)
54
  idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
55
- else:
56
  pred_arr.append('')
57
  idx_p -= 1
58
  else:
 
59
  if gs.end < ps.start:
60
- gt_arr.append(gs.source_text)
61
- pred_arr.append('')
62
- idx_p -= 1
63
  else:
64
- pred_arr.append(ps.source_text)
65
  if ps.end >= gs.start:
66
- gt_arr.append(gs.source_text)
67
  idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
68
- else:
69
  gt_arr.append('')
70
  idx_t -= 1
71
 
 
3
  sys.path.append('../src')
4
  from srt_util.srt import SrtScript
5
 
6
+
7
+ # Helper method
8
+ # Align sub anchor segment pair via greedy approach
9
+ # Input: anchor segment, SRT segments, output array of sub, index of current sub
10
+ # Output: updated index of sub
11
  def procedure(anchor, subsec, S_arr, subidx):
12
  cache_idx = 0
13
+ while subidx != cache_idx: # Terminate when alignment stablizes
14
  cache_idx = subidx
15
+ # if sub segment runs out during the loop, terminate
16
+ if subidx >= len(subsec):
17
  break
18
  sub = subsec[subidx]
19
  if anchor.end < sub.start:
20
  continue
21
+ # If next sub has a heavier overlap compartment, add to current alignment
22
  if (anchor.start <= sub.start) and (sub.end <= anchor.end) or anchor.end - sub.start > sub.end - anchor.start:
23
+ S_arr[-1] += sub#.source_text
24
  subidx += 1
 
25
 
26
+ return subidx - 1 # Reset last invalid update from loop
27
+
28
+
29
+ # Input: path1, path2
30
+ # Output: aligned array of SRTsegment corresponding to path1 path2
31
+ # Note: Modify comment with .source_text to get output array with string only
32
  def alignment(pred_path, gt_path):
33
  pred = SrtScript.parse_from_srt_file(pred_path).segments
34
  gt = SrtScript.parse_from_srt_file(gt_path).segments
35
  pred_arr, gt_arr = [], []
36
+ idx_p, idx_t = 0, 0 # idx_p: current index of pred segment, idx_t for ground truth
37
 
38
  while idx_p < len(pred) or idx_t < len(gt):
39
+ # Check if one srt file runs out while reading
40
  ps = pred[idx_p] if idx_p < len(pred) else None
41
  gs = gt[idx_t] if idx_t < len(gt) else None
42
+
43
  if not ps:
44
+ # If ps runs out, align gs segment with filler one by one
45
+ gt_arr.append(gs)#.source_text
46
  pred_arr.append('')
47
  idx_t += 1
48
  continue
49
 
50
  if not gs:
51
+ # If gs runs out, align ps segment with filler one by one
52
+ pred_arr.append(ps)#.source_text
53
  gt_arr.append('')
54
  idx_p += 1
55
  continue
56
 
57
  ps_dur = ps.end - ps.start
58
  gs_dur = gs.end - gs.start
59
+
60
+ # Check for duration to decide anchor and sub
61
  if ps_dur <= gs_dur:
62
+ # Detect segment with no overlap
63
  if ps.end < gs.start:
64
+ pred_arr.append(ps)#.source_text
65
+ gt_arr.append('') # append filler
66
+ idx_t -= 1 # reset ground truth index
67
  else:
68
+ gt_arr.append(gs)#.source_text
69
  if gs.end >= ps.start:
70
+ pred_arr.append(ps)#.source_text
71
  idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
72
+ else: # filler pairing
73
  pred_arr.append('')
74
  idx_p -= 1
75
  else:
76
+ # same overlap checking procedure
77
  if gs.end < ps.start:
78
+ gt_arr.append(gs)#.source_text
79
+ pred_arr.append('') # filler
80
+ idx_p -= 1 # reset
81
  else:
82
+ pred_arr.append(ps)#.source_text
83
  if ps.end >= gs.start:
84
+ gt_arr.append(gs)#.source_text
85
  idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
86
+ else: # filler pairing
87
  gt_arr.append('')
88
  idx_t -= 1
89