Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

Macrodove commited on Sep 22, 2023

Commit

1cbd56e

•

1 Parent(s): 3f9cb68

Draft version, one bug need to be fixed

Browse files

Former-commit-id: ebeb78f42b55688a10d5b48df701b27a074da087

Files changed (1) hide show

evaluation/alignment.py +89 -0

evaluation/alignment.py CHANGED Viewed

	@@ -0,0 +1,89 @@

+import sys
+import numpy as np
+sys.path.append('../src')
+from srt_util.srt import SrtScript
+def procedure(anchor,subsec,S_arr,subidx):
+    temp = subsec[subidx - 1]
+    print('------------------------------')
+    print(anchor)
+    print(temp)
+    cache_idx = 0
+    while subidx != cache_idx:  # loop until alignment stablized
+        cache_idx = subidx  # reinitialize cache
+        # Inside interval
+        if subidx >= len(subsec): continue
+        sub = subsec[subidx]
+        if (anchor.end < sub.start): continue
+        if (anchor.start < sub.start) & (sub.end < anchor.end):
+            S_arr[len(S_arr) - 1] += sub.source_text
+            subidx += 1
+        elif anchor.end - sub.start > sub.end - anchor.start:
+            S_arr[len(S_arr) - 1] += sub.source_text
+            subidx += 1
+    print(sub)
+    print(S_arr[len(S_arr) - 1])
+    print('------------------------------')
+    subidx -= 1  # reset subidx to last segment
+def alignment(pred_path,gt_path,threshold = 0.3):
+    pred = SrtScript.parse_from_srt_file(pred_path).segments
+    gt = SrtScript.parse_from_srt_file(gt_path).segments
+    pred_arr = []
+    gt_arr = []
+    duration = 0
+    #count = 0
+    #for ps,gs in zip(pred,gt):
+    #    duration += ps.end + gs.end - ps.start - gs.start
+    #    count += len(ps.source_text) + len(gs.source_text)
+    #density = count / duration  #word density
+    idx_p, idx_t = -1, -1
+    while idx_p < len(pred) or idx_t < len(gt):
+        idx_p += 1
+        idx_t += 1
+        try:
+            ps = pred[idx_p]
+            gs = gt[idx_t]
+        except IndexError:
+            if idx_t >= len(gt):
+                pred_arr.append(ps.source_text)
+                continue
+            if idx_p >= len(pred):
+                gs = gt[idx_t]
+                gt_arr.append(gs.source_text)
+                continue
+        #print('init' + str(idx_t) + str(idx_p))
+        #duration
+        ps_dur = ps.end - ps.start
+        gs_dur = gs.end - gs.start
+        #forward/backward
+        if ps_dur <= gs_dur:
+            gt_arr.append(gs.source_text)
+            if gs.end < ps.start:
+                idx_p -= 1  # reset idx if no match
+                continue
+            pred_arr.append(ps.source_text)
+            idx_p += 1
+            procedure(gs,pred,pred_arr,idx_p)
+        else:
+            pred_arr.append(ps.source_text)
+            if ps.end < gs.start:
+                idx_t -= 1  # reset idx if no match
+                continue
+            gt_arr.append(gs.source_text)
+            idx_t += 1
+            procedure(ps,gt,gt_arr,idx_t)
+    #print(pred_arr)
+    #print(gt_arr)
+    return zip(pred_arr,gt_arr)
+alignment('../results/OVB/OVB_en.srt','../results/OVM/OVM_en.srt')