Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -108,7 +108,6 @@ def load_example_files():
|
|
108 |
|
109 |
return None
|
110 |
|
111 |
-
|
112 |
def cluster_sentences(sentences, num_clusters):
|
113 |
# Filter sentences with length over 10 characters
|
114 |
sentences = [sentence for sentence in sentences if len(sentence) > 10]
|
@@ -142,7 +141,6 @@ def cluster_sentences(sentences, num_clusters):
|
|
142 |
# Return the ordered clustered sentences without similarity scores for display
|
143 |
return [[sentence for _, sentence in cluster] for cluster in clustered_sentences]
|
144 |
|
145 |
-
|
146 |
# Function to convert text to a downloadable file
|
147 |
def get_text_file_download_link(text_to_download, filename='Output.txt', button_label="💾 Save"):
|
148 |
buffer = BytesIO()
|
@@ -152,6 +150,14 @@ def get_text_file_download_link(text_to_download, filename='Output.txt', button_
|
|
152 |
href = f'<a href="data:file/txt;base64,{b64}" download="{filename}" style="margin-top:20px;">{button_label}</a>'
|
153 |
return href
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
# Main code for UI
|
156 |
uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
|
157 |
|
@@ -188,17 +194,23 @@ if file_text:
|
|
188 |
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
189 |
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
st.
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
|
|
|
108 |
|
109 |
return None
|
110 |
|
|
|
111 |
def cluster_sentences(sentences, num_clusters):
|
112 |
# Filter sentences with length over 10 characters
|
113 |
sentences = [sentence for sentence in sentences if len(sentence) > 10]
|
|
|
141 |
# Return the ordered clustered sentences without similarity scores for display
|
142 |
return [[sentence for _, sentence in cluster] for cluster in clustered_sentences]
|
143 |
|
|
|
144 |
# Function to convert text to a downloadable file
|
145 |
def get_text_file_download_link(text_to_download, filename='Output.txt', button_label="💾 Save"):
|
146 |
buffer = BytesIO()
|
|
|
150 |
href = f'<a href="data:file/txt;base64,{b64}" download="{filename}" style="margin-top:20px;">{button_label}</a>'
|
151 |
return href
|
152 |
|
153 |
+
def get_high_info_words_per_cluster(cluster_sentences, num_words=5):
|
154 |
+
cluster_high_info_words = []
|
155 |
+
for cluster in cluster_sentences:
|
156 |
+
cluster_text = " ".join(cluster)
|
157 |
+
high_info_words = extract_high_information_words(cluster_text, num_words)
|
158 |
+
cluster_high_info_words.append(high_info_words)
|
159 |
+
return cluster_high_info_words
|
160 |
+
|
161 |
# Main code for UI
|
162 |
uploaded_file = st.file_uploader("📁 Choose a .txt file", type=['txt'])
|
163 |
|
|
|
194 |
num_clusters = st.slider("Number of Clusters", min_value=2, max_value=10, value=5)
|
195 |
clustered_sentences = cluster_sentences(sentences, num_clusters)
|
196 |
|
197 |
+
col1, col2 = st.columns(2)
|
198 |
+
|
199 |
+
with col1:
|
200 |
+
st.subheader("Original Text")
|
201 |
+
original_text = "\n".join(sentences)
|
202 |
+
st.text_area("Original Sentences", value=original_text, height=400)
|
203 |
+
|
204 |
+
with col2:
|
205 |
+
st.subheader("Clustered Text")
|
206 |
+
clustered_text = ""
|
207 |
+
cluster_high_info_words = get_high_info_words_per_cluster(clustered_sentences)
|
208 |
+
|
209 |
+
for i, cluster in enumerate(clustered_sentences):
|
210 |
+
cluster_text = "\n".join(cluster)
|
211 |
+
high_info_words = ", ".join(cluster_high_info_words[i])
|
212 |
+
clustered_text += f"Cluster {i+1} (High Info Words: {high_info_words}):\n{cluster_text}\n\n"
|
213 |
+
|
214 |
+
st.text_area("Clustered Sentences", value=clustered_text, height=400)
|
215 |
|
216 |
st.markdown("For more information and updates, visit our [help page](https://huggingface.co/awacke1).")
|