Spaces:

AI-Dashboards
/

Topic-Modeling-Clusters-Free-Text

Sleeping

App Files Files Community

awacke1 commited on Mar 20, 2023

Commit

aa1d6fa

1 Parent(s): 8d7c44f

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -1

app.py CHANGED Viewed

@@ -2,17 +2,79 @@ import streamlit as st
 import spacy
 import numpy as np
 from gensim import corpora, models
-from utils import window, get_depths, get_local_maxima, compute_threshold, get_threshold_segments
 from itertools import chain
 from sklearn.preprocessing import MultiLabelBinarizer
 from sklearn.metrics.pairwise import cosine_similarity
 nlp = spacy.load('en_core_web_sm')
 def print_list(lst):
     for e in lst:
         st.markdown("- " + e)
 st.subheader("Topic Modeling with Segmentation")
 uploaded_file = st.file_uploader("choose a text file", type=["txt"])
 if uploaded_file is not None:

 import spacy
 import numpy as np
 from gensim import corpora, models
+# from utils import window, get_depths, get_local_maxima, compute_threshold, get_threshold_segments
 from itertools import chain
 from sklearn.preprocessing import MultiLabelBinarizer
 from sklearn.metrics.pairwise import cosine_similarity
+from itertools import islice
+from scipy.signal import argrelmax
 nlp = spacy.load('en_core_web_sm')
+def window(seq, n=3):
+    it = iter(seq)
+    result = tuple(islice(it, n))
+    if len(result) == n:
+        yield result
+    for elem in it:
+        result = result[1:] + (elem,)
+        yield result
+def get_depths(scores):
+    def climb(seq, i, mode='left'):
+        if mode == 'left':
+            while True:
+                curr = seq[i]
+                if i == 0:
+                    return curr
+                i = i-1
+                if not seq[i] > curr:
+                    return curr
+        if mode == 'right':
+            while True:
+                curr = seq[i]
+                if i == (len(seq)-1):
+                    return curr
+                i = i+1
+                if not seq[i] > curr:
+                    return curr
+    depths = []
+    for i in range(len(scores)):
+        score = scores[i]
+        l_peak = climb(scores, i, mode='left')
+        r_peak = climb(scores, i, mode='right')
+        depth = 0.5 * (l_peak + r_peak - (2*score))
+        depths.append(depth)
+    return np.array(depths)
+def get_local_maxima(depth_scores, order=1):
+    maxima_ids = argrelmax(depth_scores, order=order)[0]
+    filtered_scores = np.zeros(len(depth_scores))
+    filtered_scores[maxima_ids] = depth_scores[maxima_ids]
+    return filtered_scores
+def compute_threshold(scores):
+    s = scores[np.nonzero(scores)]
+    threshold = np.mean(s) - (np.std(s) / 2)
+    return threshold
+def get_threshold_segments(scores, threshold=0.1):
+    segment_ids = np.where(scores >= threshold)[0]
+    return segment_ids
 def print_list(lst):
     for e in lst:
         st.markdown("- " + e)
 st.subheader("Topic Modeling with Segmentation")
 uploaded_file = st.file_uploader("choose a text file", type=["txt"])
 if uploaded_file is not None: