Spaces:

Sudhansu
/

06SL-NLP-SentenceSimilarity-Heatmap-Cluster

Runtime error

App Files Files Community

06SL-NLP-SentenceSimilarity-Heatmap-Cluster / app.py

Sudhansu

Upload 2 files

7ac0fa7 over 1 year ago

raw history blame contribute delete

No virus

2.91 kB

	import streamlit as st
	import nltk
	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	from scipy.spatial.distance import cosine
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt
	from sklearn.cluster import KMeans
	import tensorflow as tf
	import tensorflow_hub as hub


	def cluster_examples(messages, embed, nc=3):
	km = KMeans(
	n_clusters=nc, init='random',
	n_init=10, max_iter=300,
	tol=1e-04, random_state=0
	)
	km = km.fit_predict(embed)
	for n in range(nc):
	idxs = [i for i in range(len(km)) if km[i] == n]
	ms = [messages[i] for i in idxs]
	st.markdown ("CLUSTER : %d"%n)
	for m in ms:
	st.markdown (m)


	def plot_heatmap(labels, heatmap, rotation=90):
	sns.set(font_scale=1.2)
	fig, ax = plt.subplots()
	g = sns.heatmap(
	heatmap,
	xticklabels=labels,
	yticklabels=labels,
	vmin=-1,
	vmax=1,
	cmap="coolwarm")
	g.set_xticklabels(labels, rotation=rotation)
	g.set_title("Textual Similarity")

	st.pyplot(fig)
	#plt.show()

	#st.header("Sentence Similarity Demo")

	# Streamlit text boxes
	text = st.text_area('Enter sentences:', value="Self confidence in outcomes helps us win and to make us successful.\nShe has a seriously impressive intellect and mind.\nStimulating and deep conversation helps us develop and grow.\nFrom basic quantum particles we get aerodynamics, friction, surface tension, weather, electromagnetism.\nIf she actively engages and comments positively, her anger disappears adapting into win-win's favor.\nI love interesting topics of conversation and the understanding and exploration of thoughts.\nThere is the ability to manipulate things the way you want in your mind to go how you want when you are self confident, that we don’t understand yet.")

	nc = st.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)

	model_type = st.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)

	# Model setup
	if model_type == "Sentence Transformer":
	model = SentenceTransformer('paraphrase-distilroberta-base-v1')
	elif model_type == "Universal Sentence Encoder":
	model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
	model = hub.load(model_url)

	nltk.download('punkt')

	# Run model
	if text:
	sentences = nltk.tokenize.sent_tokenize(text)
	if model_type == "Sentence Transformer":
	embed = model.encode(sentences)
	elif model_type == "Universal Sentence Encoder":
	embed = model(sentences).numpy()
	sim = np.zeros([len(embed), len(embed)])
	for i,em in enumerate(embed):
	for j,ea in enumerate(embed):
	sim[i][j] = 1.0-cosine(em,ea)
	st.subheader("Similarity Heatmap")
	plot_heatmap(sentences, sim)
	st.subheader("Results from K-Means Clustering")
	cluster_examples(sentences, embed, nc)