Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer,util | |
import torch | |
import numpy as np | |
from os.path import exists | |
st.sidebar.image("./NarrativaLogoBlanco.png") | |
topK = st.sidebar.slider("Number of results: ", 1, 20, 5, 1) | |
st.write("# Semantic News Search ππ°") | |
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu') | |
df = pd.read_csv('data/financial-sentences.csv') | |
sentences = df['sentences'].to_list() | |
# check if embedding is available | |
if exists('data/embeddings.npy'): | |
corpus_embeddings = np.load('data/embeddings.npy') | |
else: | |
corpus_embeddings = model.encode(sentences, batch_size=23, show_progress_bar=False, convert_to_tensor=True) | |
np.save('data/embeddings.npy', np.array(corpus_embeddings.cpu())) | |
sentence = st.text_input('Enter a sentence:') | |
if sentence: | |
embedding = model.encode(sentences=[sentence], convert_to_tensor=True) | |
cosine_scores = util.cos_sim(embedding, corpus_embeddings)[0] | |
top_results = torch.topk(cosine_scores, k=topK) | |
st.write() | |
st.write(" **Query:**", sentence) | |
st.write(f"\n **Top {topK} most similar sentences in corpus:**\n") | |
for score, idx in zip(top_results[0], top_results[1]): | |
st.write(sentences[idx]) | |
st.write(f"*Score:* {score:.4f}") | |
st.write() | |
st.write() | |