Narrativa commited on
Commit
2fa9b6a
β€’
1 Parent(s): 36c37c1

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer,util
4
+ import torch
5
+ import numpy as np
6
+ from os.path import exists
7
+
8
+
9
+ st.sidebar.image("./NarrativaLogoBlanco.png")
10
+ topK = st.sidebar.slider("Number of results: ", 1, 20, 5, 1)
11
+
12
+ st.write("# Semantic News Search πŸ”πŸ“°")
13
+
14
+ model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
15
+
16
+ df = pd.read_csv('data/financial-sentences.csv')
17
+ sentences = df['sentences'].to_list()
18
+
19
+ # check if embedding is available
20
+
21
+ if exists('data/embeddings.npy'):
22
+ corpus_embeddings = np.load('data/embeddings.npy')
23
+ else:
24
+ corpus_embeddings = model.encode(sentences, batch_size=23, show_progress_bar=False, convert_to_tensor=True)
25
+ np.save('data/embeddings.npy', np.array(corpus_embeddings.cpu()))
26
+
27
+
28
+ sentence = st.text_input('Enter a sentence:')
29
+
30
+ if sentence:
31
+
32
+ embedding = model.encode(sentences=[sentence], convert_to_tensor=True)
33
+ cosine_scores = util.cos_sim(embedding, corpus_embeddings)[0]
34
+ top_results = torch.topk(cosine_scores, k=topK)
35
+ st.write()
36
+ st.write(" **Query:**", sentence)
37
+ st.write(f"\n **Top {topK} most similar sentences in corpus:**\n")
38
+
39
+ for score, idx in zip(top_results[0], top_results[1]):
40
+ st.write(sentences[idx])
41
+ st.write(f"*Score:* {score:.4f}")
42
+ st.write()
43
+ st.write()
44
+
45
+
46
+
47
+
48
+
49
+
50
+
51
+