naveed92 commited on
Commit
a382f9d
1 Parent(s): 078f51e

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +80 -0
utils.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from itertools import islice
2
+ import numpy as np
3
+
4
+ # Sliding window function
5
+ def window(seq, n=3):
6
+ """https://stackoverflow.com/questions/6822725/rolling-or-sliding-window-iterator
7
+ Returns a sliding window of width n over data from the iterable seq"""
8
+
9
+ it = iter(seq)
10
+ result = tuple(islice(it, n))
11
+ if len(result) == n:
12
+ yield result
13
+ for elem in it:
14
+ result = result[1:] + (elem,)
15
+ yield result
16
+
17
+ # Compute depth scores
18
+ def get_depths(scores):
19
+ """Given a sequence of coherence scores of length n, compute a sequence of depth scores of similar length"""
20
+
21
+ def climb(seq, i, mode='left'):
22
+ """Given a sequence seq of values and index i, advance the index either to the right or left while the
23
+ value keeps increasing, then return the value at new index
24
+ """
25
+ if mode == 'left':
26
+ while True:
27
+ curr = seq[i]
28
+ if i == 0:
29
+ return curr
30
+ i = i-1
31
+ if not seq[i] > curr:
32
+ return curr
33
+
34
+ if mode == 'right':
35
+ while True:
36
+ curr = seq[i]
37
+ if i == (len(seq)-1):
38
+ return curr
39
+ i = i+1
40
+ if not seq[i] > curr:
41
+ return curr
42
+
43
+ depths = []
44
+ for i in range(len(scores)):
45
+ score = scores[i]
46
+ l_peak = climb(scores, i, mode='left')
47
+ r_peak = climb(scores, i, mode='right')
48
+ depth = 0.5 * (l_peak + r_peak - (2*score))
49
+ depths.append(depth)
50
+
51
+ return np.array(depths)
52
+
53
+
54
+ from scipy.signal import argrelmax
55
+
56
+ # Filter out local maxima
57
+ def get_local_maxima(depth_scores, order=1):
58
+ """Given a sequence of depth scores, return a filtered sequence where only local maxima
59
+ selected based on the given order"""
60
+
61
+ maxima_ids = argrelmax(depth_scores, order=order)[0]
62
+ filtered_scores = np.zeros(len(depth_scores))
63
+ filtered_scores[maxima_ids] = depth_scores[maxima_ids]
64
+ return filtered_scores
65
+
66
+ # Automatic threshold computation
67
+ def compute_threshold(scores):
68
+ """From Texttiling: https://aclanthology.org/J97-1003.pdf
69
+ Automatically compute an appropriate threshold given a sequence of depth scores
70
+ """
71
+
72
+ s = scores[np.nonzero(scores)]
73
+ threshold = np.mean(s) - (np.std(s) / 2)
74
+ # threshold = np.mean(s) - (np.std(s))
75
+ return threshold
76
+
77
+ def get_threshold_segments(scores, threshold=0.1):
78
+ """Given a sequence of depth scores, return indexes where the value is greater than the threshold"""
79
+ segment_ids = np.where(scores >= threshold)[0]
80
+ return segment_ids