ShadowDominator commited on
Commit
6e81c07
1 Parent(s): bb679fd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +61 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.cluster import KMeans
5
+ import pandas as pd
6
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
7
+
8
+ example = {'sentence': [ "Today is a beautiful day, with clear blue skies and a gentle breeze.", "I love to read books and explore new ideas and concepts.", "My favorite hobby is hiking in the mountains and enjoying the stunning views.", "I am grateful for my family and friends, who always support and encourage me.", "Life is full of challenges and opportunities, and it's up to us to make the most of them.", "The sound of the waves crashing on the shore is incredibly soothing to me.", "I believe that laughter is the best medicine for any problem or difficulty in life.", "Learning a new language is a challenging but rewarding experience.", "The beauty of nature always fills me with a sense of awe and wonder.", "I am constantly amazed by the resilience and strength of the human spirit in the face of adversity."]}
9
+ df_example = pd.DataFrame(example)
10
+ def sentence(k_value,all_sentence):
11
+ length = all_sentence['sentence'].apply(lambda x: len(x))
12
+ all_sentence['class'] = length
13
+ corpus = [i for i in all_sentence['sentence']]
14
+ corpus_embeddings = embedder.encode(corpus)
15
+ # Perform kmean clustering
16
+ num_clusters = int(k_value)
17
+ clustering_model = KMeans(n_clusters=num_clusters)
18
+ clustering_model.fit(corpus_embeddings)
19
+ cluster_assignment = clustering_model.labels_
20
+
21
+ clustered_sentences = [[] for i in range(num_clusters)]
22
+ for sentence_id, cluster_id in enumerate(cluster_assignment):
23
+ clustered_sentences[cluster_id].append(corpus[sentence_id])
24
+ df = pd.DataFrame(columns=['class', 'sentence'])
25
+
26
+ for i, cluster in enumerate(clustered_sentences):
27
+ for sentence in cluster:
28
+ df = pd.concat([df, pd.DataFrame({'class': chr(65+i), 'sentence': sentence}, index=[0])], ignore_index=True)
29
+
30
+ return df
31
+
32
+ with gr.Blocks(title="Sentence Clustering") as demo:
33
+
34
+ with gr.Row():
35
+ with gr.Column():
36
+ with gr.Row():
37
+ with gr.Column(min_width=20):
38
+ num = gr.Number(label="Number of clutering",value=4)
39
+ with gr.Column():
40
+ pass
41
+
42
+ inputs = [
43
+ num,
44
+ gr.Dataframe(
45
+ value=df_example,
46
+ datatype=["str"],
47
+ col_count=(1,False)
48
+ ),
49
+ ]
50
+ with gr.Column():
51
+ outputs = gr.Dataframe(
52
+ headers=["class", "sentence"],
53
+ datatype=["str", "str"],
54
+ )
55
+
56
+ greet_btn = gr.Button("RUN")
57
+ greet_btn.click(fn=sentence, inputs=inputs, outputs=outputs,api_name="Sentence Clustering")
58
+
59
+
60
+ demo.launch()
61
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio==3.32.0
2
+ pandas==2.0.0
3
+ scikit_learn==1.2.2
4
+ sentence_transformers==2.2.2