Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -189,33 +189,33 @@ if file_text:
|
|
189 |
display_context_table(context_words)
|
190 |
|
191 |
with st.expander("📝 Sentence Clustering", expanded=True):
|
192 |
-
sentences = [line.strip() for line in
|
193 |
-
|
194 |
num_sentences = len(sentences)
|
195 |
st.write(f"Total Sentences: {num_sentences}")
|
196 |
-
|
197 |
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
198 |
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
199 |
-
|
200 |
col1, col2 = st.columns(2)
|
201 |
-
|
202 |
with col1:
|
203 |
st.subheader("Original Text")
|
204 |
original_text = "\n".join(sentences)
|
205 |
st.text_area("Original Sentences", value=original_text, height=400)
|
206 |
-
|
207 |
with col2:
|
208 |
st.subheader("Clustered Text")
|
209 |
clustered_text = ""
|
210 |
cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
|
211 |
-
|
212 |
for i, cluster in enumerate(clustered_sentences):
|
213 |
cluster_text = "\n".join(cluster)
|
214 |
high_info_words = ", ".join(cluster_high_info_words[i])
|
215 |
clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
|
216 |
-
|
217 |
st.text_area("Clustered Sentences", value=clustered_text, height=400)
|
218 |
-
|
219 |
# Verify that all sentences are accounted for in the clustered output
|
220 |
clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
|
221 |
if set(sentences) == set(clustered_sentences_flat):
|
|
|
189 |
display_context_table(context_words)
|
190 |
|
191 |
with st.expander("📝 Sentence Clustering", expanded=True):
|
192 |
+
sentences = [line.strip() for line in file_text.split('\n') if len(line.strip()) > 10]
|
193 |
+
|
194 |
num_sentences = len(sentences)
|
195 |
st.write(f"Total Sentences: {num_sentences}")
|
196 |
+
|
197 |
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
198 |
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
199 |
+
|
200 |
col1, col2 = st.columns(2)
|
201 |
+
|
202 |
with col1:
|
203 |
st.subheader("Original Text")
|
204 |
original_text = "\n".join(sentences)
|
205 |
st.text_area("Original Sentences", value=original_text, height=400)
|
206 |
+
|
207 |
with col2:
|
208 |
st.subheader("Clustered Text")
|
209 |
clustered_text = ""
|
210 |
cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
|
211 |
+
|
212 |
for i, cluster in enumerate(clustered_sentences):
|
213 |
cluster_text = "\n".join(cluster)
|
214 |
high_info_words = ", ".join(cluster_high_info_words[i])
|
215 |
clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
|
216 |
+
|
217 |
st.text_area("Clustered Sentences", value=clustered_text, height=400)
|
218 |
+
|
219 |
# Verify that all sentences are accounted for in the clustered output
|
220 |
clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
|
221 |
if set(sentences) == set(clustered_sentences_flat):
|