awacke1 commited on
Commit
4dae6a4
1 Parent(s): e05e42b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -189,33 +189,33 @@ if file_text:
189
  display_context_table(context_words)
190
 
191
  with st.expander("📝 Sentence Clustering", expanded=True):
192
- sentences = [line.strip() for line in text_without_timestamps.split('\n') if len(line.strip()) > 10]
193
-
194
  num_sentences = len(sentences)
195
  st.write(f"Total Sentences: {num_sentences}")
196
-
197
  num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
198
  clustered_sentences = cluster_sentences(sentences, num_clusters)
199
-
200
  col1, col2 = st.columns(2)
201
-
202
  with col1:
203
  st.subheader("Original Text")
204
  original_text = "\n".join(sentences)
205
  st.text_area("Original Sentences", value=original_text, height=400)
206
-
207
  with col2:
208
  st.subheader("Clustered Text")
209
  clustered_text = ""
210
  cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
211
-
212
  for i, cluster in enumerate(clustered_sentences):
213
  cluster_text = "\n".join(cluster)
214
  high_info_words = ", ".join(cluster_high_info_words[i])
215
  clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
216
-
217
  st.text_area("Clustered Sentences", value=clustered_text, height=400)
218
-
219
  # Verify that all sentences are accounted for in the clustered output
220
  clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
221
  if set(sentences) == set(clustered_sentences_flat):
 
189
  display_context_table(context_words)
190
 
191
  with st.expander("📝 Sentence Clustering", expanded=True):
192
+ sentences = [line.strip() for line in file_text.split('\n') if len(line.strip()) > 10]
193
+
194
  num_sentences = len(sentences)
195
  st.write(f"Total Sentences: {num_sentences}")
196
+
197
  num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
198
  clustered_sentences = cluster_sentences(sentences, num_clusters)
199
+
200
  col1, col2 = st.columns(2)
201
+
202
  with col1:
203
  st.subheader("Original Text")
204
  original_text = "\n".join(sentences)
205
  st.text_area("Original Sentences", value=original_text, height=400)
206
+
207
  with col2:
208
  st.subheader("Clustered Text")
209
  clustered_text = ""
210
  cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
211
+
212
  for i, cluster in enumerate(clustered_sentences):
213
  cluster_text = "\n".join(cluster)
214
  high_info_words = ", ".join(cluster_high_info_words[i])
215
  clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
216
+
217
  st.text_area("Clustered Sentences", value=clustered_text, height=400)
218
+
219
  # Verify that all sentences are accounted for in the clustered output
220
  clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
221
  if set(sentences) == set(clustered_sentences_flat):