Preetesh commited on
Commit
faae2cd
1 Parent(s): 777082b

Create new file

Browse files
Files changed (1) hide show
  1. app.py +73 -0
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import nltk
3
+ from transformers import pipeline
4
+ from sentence_transformers import SentenceTransformer
5
+ from scipy.spatial.distance import cosine
6
+ import numpy as np
7
+ import seaborn as sns
8
+ import matplotlib.pyplot as plt
9
+ from sklearn.cluster import KMeans
10
+ import tensorflow as tf
11
+ import tensorflow_hub as hub
12
+
13
+
14
+ def cluster_examples(messages, embed, nc=3):
15
+ km = KMeans(
16
+ n_clusters=nc, init='random',
17
+ n_init=10, max_iter=300,
18
+ tol=1e-04, random_state=0
19
+ )
20
+ km = km.fit_predict(embed)
21
+ for n in range(nc):
22
+ idxs = [i for i in range(len(km)) if km[i] == n]
23
+ ms = [messages[i] for i in idxs]
24
+ st.markdown ("CLUSTER : %d"%n)
25
+ for m in ms:
26
+ st.markdown (m)
27
+
28
+
29
+ def plot_heatmap(labels, heatmap, rotation=90):
30
+ sns.set(font_scale=1.2)
31
+ fig, ax = plt.subplots()
32
+ g = sns.heatmap(
33
+ heatmap,
34
+ xticklabels=labels,
35
+ yticklabels=labels,
36
+ vmin=-1,
37
+ vmax=1,
38
+ cmap="coolwarm")
39
+ g.set_xticklabels(labels, rotation=rotation)
40
+ g.set_title("Textual Similarity")
41
+ st.pyplot(fig)
42
+
43
+ # Streamlit text boxes
44
+ text = st.text_area('Enter sentences:', value="Behavior right this is a kind of Heisenberg uncertainty principle situation if I told you, then you behave differently. What would be the impressive thing is you have talked about winning a nobel prize in a system winning a nobel prize. Adjusting it and then making your own. That is when I fell in love with computers. I realized that they were a very magical device. Can go to sleep come back the next day and it is solved. You know that feels magical to me.")
45
+
46
+ nc = st.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)
47
+
48
+ model_type = st.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)
49
+
50
+ # Model setup
51
+ if model_type == "Sentence Transformer":
52
+ model = SentenceTransformer('paraphrase-distilroberta-base-v1')
53
+ elif model_type == "Universal Sentence Encoder":
54
+ model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
55
+ model = hub.load(model_url)
56
+
57
+ nltk.download('punkt')
58
+
59
+ # Run model
60
+ if text:
61
+ sentences = nltk.tokenize.sent_tokenize(text)
62
+ if model_type == "Sentence Transformer":
63
+ embed = model.encode(sentences)
64
+ elif model_type == "Universal Sentence Encoder":
65
+ embed = model(sentences).numpy()
66
+ sim = np.zeros([len(embed), len(embed)])
67
+ for i,em in enumerate(embed):
68
+ for j,ea in enumerate(embed):
69
+ sim[i][j] = 1.0-cosine(em,ea)
70
+ st.subheader("Similarity Heatmap")
71
+ plot_heatmap(sentences, sim)
72
+ st.subheader("Results from K-Means Clustering")
73
+ cluster_examples(sentences, embed, nc)