Lautaro commited on
Commit
6a92e9f
1 Parent(s): a29db1f

Adding App

Browse files
Files changed (2) hide show
  1. app.py +79 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ from sentence_transformers.util import cos_sim
6
+ from sentence_transformers import SentenceTransformer
7
+ from bokeh.plotting import figure, output_notebook, show, save
8
+ from bokeh.io import output_file, show
9
+ from bokeh.models import ColumnDataSource, HoverTool
10
+ from sklearn.manifold import TSNE
11
+
12
+
13
+ @st.cache
14
+ def load_model():
15
+ model = SentenceTransformer('hackathon-pln-es/bertin-roberta-base-finetuning-esnli')
16
+ model.eval()
17
+ return model
18
+
19
+ @st.cache
20
+ def load_plot_data():
21
+ embs = np.load('semeval2015-embs.npy')
22
+ data = pd.read_csv('semeval2015-data.csv')
23
+ return embs, data
24
+
25
+ st.title("Sentence Embedding for Spanish with Bertin")
26
+ st.write("Sentence embedding for spanish trained on NLI. Used for Sentence Textual Similarity. Based on the model hackathon-pln-es/bertin-roberta-base-finetuning-esnli.")
27
+ st.write("Introduce two sentence to see their cosine similarity and a graph showing them in the embedding space.")
28
+ st.write("Authors: Anibal Pérez, Emilio Tomás Ariza, Lautaro Gesuelli y Mauricio Mazuecos.")
29
+
30
+ sent1 = st.text_area('Enter sentence 1')
31
+ sent2 = st.text_area('Enter sentence 2')
32
+
33
+ if st.button('Compute similarity'):
34
+ if sent1 and sent2:
35
+ model = load_model()
36
+ encodings = model.encode([sent1, sent2])
37
+ sim = cos_sim(encodings[0], encodings[1]).numpy().tolist()[0][0]
38
+ st.text('Cosine Similarity: {0:.4f}'.format(sim))
39
+
40
+ print('Generating visualization...')
41
+ sentembs, data = load_plot_data()
42
+ X_embedded = TSNE(n_components=2, learning_rate='auto',
43
+ init='random').fit_transform(np.concatenate([sentembs, encodings], axis=0))
44
+
45
+ data = data.append({'sent': sent1, 'color': '#F0E442'}, ignore_index=True) # sentence 1
46
+ data = data.append({'sent': sent2, 'color': '#D55E00'}, ignore_index=True) # sentence 2
47
+ data['x'] = X_embedded[:,0]
48
+ data['y'] = X_embedded[:,1]
49
+
50
+ source = ColumnDataSource(data)
51
+
52
+ p = figure(title="Embeddings in space")
53
+ p.circle(
54
+ x='x',
55
+ y='y',
56
+ legend_label="Objects",
57
+ #fill_color=["red"],
58
+ color='color',
59
+ fill_alpha=0.5,
60
+ line_color="blue",
61
+ size=14,
62
+ source=source
63
+ )
64
+ p.add_tools(HoverTool(
65
+ tooltips=[
66
+ ('sent', '@sent')
67
+ ],
68
+ formatters={
69
+ '@sent': 'printf'
70
+ },
71
+ mode='mouse'
72
+ ))
73
+ st.bokeh_chart(p, use_container_width=True)
74
+ else:
75
+ st.write('Missing a sentences')
76
+ else:
77
+ pass
78
+
79
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ sentence-transformers==2.2.0
2
+ transformers==4.17.0
3
+ torch==1.10.2
4
+ sklearn==0.0
5
+ bokeh==2.4.1