awacke1 commited on
Commit
aa1d6fa
·
1 Parent(s): 8d7c44f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -1
app.py CHANGED
@@ -2,17 +2,79 @@ import streamlit as st
2
  import spacy
3
  import numpy as np
4
  from gensim import corpora, models
5
- from utils import window, get_depths, get_local_maxima, compute_threshold, get_threshold_segments
6
  from itertools import chain
7
  from sklearn.preprocessing import MultiLabelBinarizer
8
  from sklearn.metrics.pairwise import cosine_similarity
 
 
9
 
10
  nlp = spacy.load('en_core_web_sm')
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def print_list(lst):
13
  for e in lst:
14
  st.markdown("- " + e)
15
 
 
16
  st.subheader("Topic Modeling with Segmentation")
17
  uploaded_file = st.file_uploader("choose a text file", type=["txt"])
18
  if uploaded_file is not None:
 
2
  import spacy
3
  import numpy as np
4
  from gensim import corpora, models
5
+ # from utils import window, get_depths, get_local_maxima, compute_threshold, get_threshold_segments
6
  from itertools import chain
7
  from sklearn.preprocessing import MultiLabelBinarizer
8
  from sklearn.metrics.pairwise import cosine_similarity
9
+ from itertools import islice
10
+ from scipy.signal import argrelmax
11
 
12
  nlp = spacy.load('en_core_web_sm')
13
 
14
+
15
+ def window(seq, n=3):
16
+ it = iter(seq)
17
+ result = tuple(islice(it, n))
18
+ if len(result) == n:
19
+ yield result
20
+ for elem in it:
21
+ result = result[1:] + (elem,)
22
+ yield result
23
+
24
+ def get_depths(scores):
25
+
26
+ def climb(seq, i, mode='left'):
27
+
28
+ if mode == 'left':
29
+ while True:
30
+ curr = seq[i]
31
+ if i == 0:
32
+ return curr
33
+ i = i-1
34
+ if not seq[i] > curr:
35
+ return curr
36
+
37
+ if mode == 'right':
38
+ while True:
39
+ curr = seq[i]
40
+ if i == (len(seq)-1):
41
+ return curr
42
+ i = i+1
43
+ if not seq[i] > curr:
44
+ return curr
45
+
46
+ depths = []
47
+ for i in range(len(scores)):
48
+ score = scores[i]
49
+ l_peak = climb(scores, i, mode='left')
50
+ r_peak = climb(scores, i, mode='right')
51
+ depth = 0.5 * (l_peak + r_peak - (2*score))
52
+ depths.append(depth)
53
+
54
+ return np.array(depths)
55
+
56
+
57
+ def get_local_maxima(depth_scores, order=1):
58
+ maxima_ids = argrelmax(depth_scores, order=order)[0]
59
+ filtered_scores = np.zeros(len(depth_scores))
60
+ filtered_scores[maxima_ids] = depth_scores[maxima_ids]
61
+ return filtered_scores
62
+
63
+ def compute_threshold(scores):
64
+ s = scores[np.nonzero(scores)]
65
+ threshold = np.mean(s) - (np.std(s) / 2)
66
+ return threshold
67
+
68
+ def get_threshold_segments(scores, threshold=0.1):
69
+ segment_ids = np.where(scores >= threshold)[0]
70
+ return segment_ids
71
+
72
+
73
  def print_list(lst):
74
  for e in lst:
75
  st.markdown("- " + e)
76
 
77
+
78
  st.subheader("Topic Modeling with Segmentation")
79
  uploaded_file = st.file_uploader("choose a text file", type=["txt"])
80
  if uploaded_file is not None: