Spaces:

awacke1
/

Transcript-EDA-NLTK

Sleeping

App Files Files Community

awacke1 commited on Mar 14

Commit

4dae6a4

•

1 Parent(s): e05e42b

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -189,33 +189,33 @@ if file_text:
         display_context_table(context_words)
     with st.expander("📝 Sentence Clustering", expanded=True):
-        sentences = [line.strip() for line in text_without_timestamps.split('\n') if len(line.strip()) > 10]
         num_sentences = len(sentences)
         st.write(f"Total Sentences: {num_sentences}")
         num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
         clustered_sentences = cluster_sentences(sentences, num_clusters)
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Original Text")
             original_text = "\n".join(sentences)
             st.text_area("Original Sentences", value=original_text, height=400)
         with col2:
             st.subheader("Clustered Text")
             clustered_text = ""
             cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
             for i, cluster in enumerate(clustered_sentences):
                 cluster_text = "\n".join(cluster)
                 high_info_words = ", ".join(cluster_high_info_words[i])
                 clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
             st.text_area("Clustered Sentences", value=clustered_text, height=400)
             # Verify that all sentences are accounted for in the clustered output
             clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
             if set(sentences) == set(clustered_sentences_flat):

         display_context_table(context_words)
     with st.expander("📝 Sentence Clustering", expanded=True):
+        sentences = [line.strip() for line in file_text.split('\n') if len(line.strip()) > 10]
         num_sentences = len(sentences)
         st.write(f"Total Sentences: {num_sentences}")
         num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
         clustered_sentences = cluster_sentences(sentences, num_clusters)
         col1, col2 = st.columns(2)
         with col1:
             st.subheader("Original Text")
             original_text = "\n".join(sentences)
             st.text_area("Original Sentences", value=original_text, height=400)
         with col2:
             st.subheader("Clustered Text")
             clustered_text = ""
             cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
             for i, cluster in enumerate(clustered_sentences):
                 cluster_text = "\n".join(cluster)
                 high_info_words = ", ".join(cluster_high_info_words[i])
                 clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
             st.text_area("Clustered Sentences", value=clustered_text, height=400)
             # Verify that all sentences are accounted for in the clustered output
             clustered_sentences_flat = [sentence for cluster in clustered_sentences for sentence in cluster]
             if set(sentences) == set(clustered_sentences_flat):