Spaces:
Runtime error
Runtime error
File size: 1,321 Bytes
2fa9b6a 8a1e56b 2fa9b6a 8a1e56b 2fa9b6a 8a1e56b 2fa9b6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer,util
import torch
import numpy as np
from os.path import exists
st.sidebar.image("./NarrativaLogoBlanco.png")
topK = st.sidebar.slider("Number of results: ", 1, 20, 5, 1)
st.write("# Semantic News Search ππ°")
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
df = pd.read_csv('financial-sentences.csv')
sentences = df['sentences'].to_list()
# check if embedding is available
if exists('embeddings.npy'):
corpus_embeddings = np.load('embeddings.npy')
else:
corpus_embeddings = model.encode(sentences, batch_size=23, show_progress_bar=False, convert_to_tensor=True)
np.save('embeddings.npy', np.array(corpus_embeddings.cpu()))
sentence = st.text_input('Enter a sentence:')
if sentence:
embedding = model.encode(sentences=[sentence], convert_to_tensor=True)
cosine_scores = util.cos_sim(embedding, corpus_embeddings)[0]
top_results = torch.topk(cosine_scores, k=topK)
st.write()
st.write(" **Query:**", sentence)
st.write(f"\n **Top {topK} most similar sentences in corpus:**\n")
for score, idx in zip(top_results[0], top_results[1]):
st.write(sentences[idx])
st.write(f"*Score:* {score:.4f}")
st.write()
st.write()
|