Spaces:

ShadowDominator
/

Sentence-Clustering

Runtime error

App Files Files Community

ShadowDominator commited on May 22, 2023

Commit

6e81c07

•

1 Parent(s): bb679fd

Upload 2 files

Browse files

Files changed (2) hide show

app.py +61 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import gradio as gr
+from sentence_transformers import SentenceTransformer
+from sklearn.cluster import KMeans
+import pandas as pd
+embedder = SentenceTransformer('all-MiniLM-L6-v2')
+example = {'sentence': [  "Today is a beautiful day, with clear blue skies and a gentle breeze.",  "I love to read books and explore new ideas and concepts.",  "My favorite hobby is hiking in the mountains and enjoying the stunning views.",  "I am grateful for my family and friends, who always support and encourage me.",  "Life is full of challenges and opportunities, and it's up to us to make the most of them.",  "The sound of the waves crashing on the shore is incredibly soothing to me.",  "I believe that laughter is the best medicine for any problem or difficulty in life.",  "Learning a new language is a challenging but rewarding experience.",  "The beauty of nature always fills me with a sense of awe and wonder.",  "I am constantly amazed by the resilience and strength of the human spirit in the face of adversity."]}
+df_example = pd.DataFrame(example)
+def sentence(k_value,all_sentence):
+    length = all_sentence['sentence'].apply(lambda x: len(x))
+    all_sentence['class'] = length
+    corpus = [i for i in all_sentence['sentence']]
+    corpus_embeddings = embedder.encode(corpus)
+    # Perform kmean clustering
+    num_clusters = int(k_value)
+    clustering_model = KMeans(n_clusters=num_clusters)
+    clustering_model.fit(corpus_embeddings)
+    cluster_assignment = clustering_model.labels_
+    clustered_sentences = [[] for i in range(num_clusters)]
+    for sentence_id, cluster_id in enumerate(cluster_assignment):
+        clustered_sentences[cluster_id].append(corpus[sentence_id])
+    df = pd.DataFrame(columns=['class', 'sentence'])
+    for i, cluster in enumerate(clustered_sentences):
+        for sentence in cluster:
+            df = pd.concat([df, pd.DataFrame({'class': chr(65+i), 'sentence': sentence}, index=[0])], ignore_index=True)
+    return df
+with gr.Blocks(title="Sentence Clustering") as demo:
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                with gr.Column(min_width=20):
+                    num = gr.Number(label="Number of clutering",value=4)
+                with gr.Column():
+                    pass
+            inputs = [
+                num,
+                gr.Dataframe(
+                    value=df_example,
+                    datatype=["str"],
+                    col_count=(1,False)
+                ),
+            ]
+        with gr.Column():
+            outputs = gr.Dataframe(
+            headers=["class", "sentence"],
+            datatype=["str", "str"],
+        )
+    greet_btn = gr.Button("RUN")
+    greet_btn.click(fn=sentence, inputs=inputs, outputs=outputs,api_name="Sentence Clustering")
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==3.32.0
+pandas==2.0.0
+scikit_learn==1.2.2
+sentence_transformers==2.2.2