awacke1 commited on
Commit
22a036b
1 Parent(s): 0718e3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -14
app.py CHANGED
@@ -108,7 +108,6 @@ def load_example_files():
108
 
109
  return None
110
 
111
-
112
  def cluster_sentences(sentences, num_clusters):
113
  # Filter sentences with length over 10 characters
114
  sentences = [sentence for sentence in sentences if len(sentence) > 10]
@@ -142,7 +141,6 @@ def cluster_sentences(sentences, num_clusters):
142
  # Return the ordered clustered sentences without similarity scores for display
143
  return [[sentence for _, sentence in cluster] for cluster in clustered_sentences]
144
 
145
-
146
  # Function to convert text to a downloadable file
147
  def get_text_file_download_link(text_to_download, filename='Output.txt', button_label="💾 Save"):
148
  buffer = BytesIO()
@@ -152,6 +150,14 @@ def get_text_file_download_link(text_to_download, filename='Output.txt', button_
152
  href = f'<a href="data:file/txt;base64,{b64}" download="{filename}" style="margin-top:20px;">{button_label}</a>'
153
  return href
154
 
 
 
 
 
 
 
 
 
155
  # Main code for UI
156
  uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
157
 
@@ -188,17 +194,23 @@ if file_text:
188
  num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
189
  clustered_sentences = cluster_sentences(sentences, num_clusters)
190
 
191
- for i, cluster in enumerate(clustered_sentences):
192
- st.subheader(f"Cluster {i+1}")
193
- cluster_text = "\n".join(cluster)
194
- st.text_area(f"Cluster {i+1} Sentences", value=cluster_text, height=200)
195
-
196
- # Input for custom filename
197
- default_filename = f"Cluster_{i+1}_Output.txt"
198
- filename = st.text_input("Enter filename for download:", value=default_filename, key=f"filename_{i}")
199
-
200
- # Download button
201
- download_link = get_text_file_download_link(cluster_text, filename, f"💾 Save Cluster {i+1}")
202
- st.markdown(download_link, unsafe_allow_html=True)
 
 
 
 
 
 
203
 
204
  st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
 
108
 
109
  return None
110
 
 
111
  def cluster_sentences(sentences, num_clusters):
112
  # Filter sentences with length over 10 characters
113
  sentences = [sentence for sentence in sentences if len(sentence) > 10]
 
141
  # Return the ordered clustered sentences without similarity scores for display
142
  return [[sentence for _, sentence in cluster] for cluster in clustered_sentences]
143
 
 
144
  # Function to convert text to a downloadable file
145
  def get_text_file_download_link(text_to_download, filename='Output.txt', button_label="💾 Save"):
146
  buffer = BytesIO()
 
150
  href = f'<a href="data:file/txt;base64,{b64}" download="{filename}" style="margin-top:20px;">{button_label}</a>'
151
  return href
152
 
153
+ def get_high_info_words_per_cluster(cluster_sentences, num_words=5):
154
+ cluster_high_info_words = []
155
+ for cluster in cluster_sentences:
156
+ cluster_text = " ".join(cluster)
157
+ high_info_words = extract_high_information_words(cluster_text, num_words)
158
+ cluster_high_info_words.append(high_info_words)
159
+ return cluster_high_info_words
160
+
161
  # Main code for UI
162
  uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
163
 
 
194
  num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
195
  clustered_sentences = cluster_sentences(sentences, num_clusters)
196
 
197
+ col1, col2 = st.columns(2)
198
+
199
+ with col1:
200
+ st.subheader("Original Text")
201
+ original_text = "\n".join(sentences)
202
+ st.text_area("Original Sentences", value=original_text, height=400)
203
+
204
+ with col2:
205
+ st.subheader("Clustered Text")
206
+ clustered_text = ""
207
+ cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
208
+
209
+ for i, cluster in enumerate(clustered_sentences):
210
+ cluster_text = "\n".join(cluster)
211
+ high_info_words = ", ".join(cluster_high_info_words[i])
212
+ clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
213
+
214
+ st.text_area("Clustered Sentences", value=clustered_text, height=400)
215
 
216
  st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")