File size: 1,321 Bytes
2fa9b6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a1e56b
2fa9b6a
 
 
 
8a1e56b
 
2fa9b6a
 
8a1e56b
2fa9b6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer,util
import torch
import numpy as np
from os.path import exists


st.sidebar.image("./NarrativaLogoBlanco.png")
topK = st.sidebar.slider("Number of results: ", 1, 20, 5, 1)

st.write("# Semantic News Search πŸ”πŸ“°")

model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')

df = pd.read_csv('financial-sentences.csv')
sentences = df['sentences'].to_list()

# check if embedding is available

if exists('embeddings.npy'):
    corpus_embeddings = np.load('embeddings.npy')
else:
    corpus_embeddings = model.encode(sentences, batch_size=23, show_progress_bar=False, convert_to_tensor=True)
    np.save('embeddings.npy',  np.array(corpus_embeddings.cpu()))


sentence = st.text_input('Enter a sentence:')

if sentence:

    embedding = model.encode(sentences=[sentence], convert_to_tensor=True)
    cosine_scores = util.cos_sim(embedding, corpus_embeddings)[0]
    top_results = torch.topk(cosine_scores, k=topK)
    st.write()
    st.write(" **Query:**", sentence)
    st.write(f"\n **Top {topK} most similar sentences in corpus:**\n")

    for score, idx in zip(top_results[0], top_results[1]):
        st.write(sentences[idx])
        st.write(f"*Score:* {score:.4f}")
        st.write()
        st.write()