mmazuecos commited on
Commit
9c52068
1 Parent(s): a882e73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py CHANGED
@@ -1,6 +1,14 @@
1
  import streamlit as st
 
 
 
2
  from sentence_transformers.util import cos_sim
3
  from sentence_transformers import SentenceTransformer
 
 
 
 
 
4
 
5
  @st.cache
6
  def load_model():
@@ -8,6 +16,12 @@ def load_model():
8
  model.eval()
9
  return model
10
 
 
 
 
 
 
 
11
  st.title("Sentence Embedding for Spanish with Bertin")
12
  st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
13
 
@@ -20,6 +34,41 @@ if st.button('Compute similarity'):
20
  encodings = model.encode([sent1, sent2])
21
  sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
22
  st.text('Cosine Similarity: {0:.4f}'.format(sim))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  else:
24
  st.write('Missing a sentences')
25
  else:
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
  from sentence_transformers.util import cos_sim
6
  from sentence_transformers import SentenceTransformer
7
+ from bokeh.plotting import figure, output_notebook, show, save
8
+ from bokeh.io import output_file, show
9
+ from bokeh.models import ColumnDataSource, HoverTool
10
+ from sklearn.manifold import TSNE
11
+
12
 
13
  @st.cache
14
  def load_model():
 
16
  model.eval()
17
  return model
18
 
19
+ @st.cache
20
+ def load_plot_data():
21
+ embs = np.load('semeval2015-embs.npy')
22
+ data = pd.read_csv('semeval2015-data.csv')
23
+ return embs, data
24
+
25
  st.title("Sentence Embedding for Spanish with Bertin")
26
  st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
27
 
 
34
  encodings = model.encode([sent1, sent2])
35
  sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
36
  st.text('Cosine Similarity: {0:.4f}'.format(sim))
37
+
38
+ print('Generating visualization...')
39
+ sentembs, data = load_plot_data()
40
+ X_embedded = TSNE(n_components=2, learning_rate='auto',
41
+ init='random').fit_transform(np.concatenate([sentembs, encodings], axis=1))
42
+
43
+ data.append({'sent': sent1, 'color': '#F0E442'}) # sentence 1
44
+ data.append({'sent': sent2, 'color': '#D55E00'}) # sentence 2
45
+ data['x'] = X_embedded[:,0]
46
+ data['y'] = X_embedded[:,1]
47
+
48
+ source = ColumnDataSource(data)
49
+
50
+ p = figure(title="Embeddings in space")
51
+ p.circle(
52
+ x='x',
53
+ y='y',
54
+ legend_label="Objects",
55
+ #fill_color=["red"],
56
+ color='color',
57
+ fill_alpha=0.5,
58
+ line_color="blue",
59
+ size=14,
60
+ source=source
61
+ )
62
+ p.add_tools(HoverTool(
63
+ tooltips=[
64
+ ('sent', '@sent')
65
+ ],
66
+ formatters={
67
+ '@sent': 'printf'
68
+ },
69
+ mode='mouse'
70
+ ))
71
+ st.bokeh_chart(p, use_container_width=True)
72
  else:
73
  st.write('Missing a sentences')
74
  else: