import gradio as gr from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans import pandas as pd embedder = SentenceTransformer('all-MiniLM-L6-v2') example = {'sentence': [ "Today is a beautiful day, with clear blue skies and a gentle breeze.", "I love to read books and explore new ideas and concepts.", "My favorite hobby is hiking in the mountains and enjoying the stunning views.", "I am grateful for my family and friends, who always support and encourage me.", "Life is full of challenges and opportunities, and it's up to us to make the most of them.", "The sound of the waves crashing on the shore is incredibly soothing to me.", "I believe that laughter is the best medicine for any problem or difficulty in life.", "Learning a new language is a challenging but rewarding experience.", "The beauty of nature always fills me with a sense of awe and wonder.", "I am constantly amazed by the resilience and strength of the human spirit in the face of adversity."]} df_example = pd.DataFrame(example) def sentence(k_value,all_sentence): length = all_sentence['sentence'].apply(lambda x: len(x)) all_sentence['class'] = length corpus = [i for i in all_sentence['sentence']] corpus_embeddings = embedder.encode(corpus) # Perform kmean clustering num_clusters = int(k_value) clustering_model = KMeans(n_clusters=num_clusters) clustering_model.fit(corpus_embeddings) cluster_assignment = clustering_model.labels_ clustered_sentences = [[] for i in range(num_clusters)] for sentence_id, cluster_id in enumerate(cluster_assignment): clustered_sentences[cluster_id].append(corpus[sentence_id]) df = pd.DataFrame(columns=['class', 'sentence']) for i, cluster in enumerate(clustered_sentences): for sentence in cluster: df = pd.concat([df, pd.DataFrame({'class': chr(65+i), 'sentence': sentence}, index=[0])], ignore_index=True) return df with gr.Blocks(title="Sentence Clustering") as demo: with gr.Row(): with gr.Column(): with gr.Row(): with gr.Column(min_width=20): num = gr.Number(label="Number of clutering",value=4) with gr.Column(): pass inputs = [ num, gr.Dataframe( value=df_example, datatype=["str"], col_count=(1,False) ), ] with gr.Column(): outputs = gr.Dataframe( headers=["class", "sentence"], datatype=["str", "str"], ) greet_btn = gr.Button("RUN") greet_btn.click(fn=sentence, inputs=inputs, outputs=outputs,api_name="Sentence Clustering") demo.launch()