mrmft's picture
adding project source
4da642e
"""
This is a simple application for sentence embeddings: semantic search
We have a corpus with various sentences. Then, for a given query sentence,
we want to find the most similar sentence in this corpus.
This script outputs for various queries the top 5 most similar sentences in the corpus.
"""
from sentence_transformers import util
import torch
def get_sorted_keywords(embedder, text, keywords):
top_k = len(keywords)
keywords_embedding = embedder.encode(keywords, convert_to_tensor=True)
text_embedding = embedder.encode(text, convert_to_tensor=True)
cos_scores = util.cos_sim(keywords_embedding, text_embedding).squeeze(dim=1)
# print(cos_scores.size())
top_results = torch.topk(cos_scores, k=top_k)
return [(keywords[idx], top_results[0][index].item()) for index, idx in enumerate(top_results[1])]
# return [keywords[idx] for idx in top_results[1]]
# for score, idx in zip(top_results[0], top_results[1]):
# print(keywords[idx], "(Score: {:.4f})".format(score))