""" This is a simple application for sentence embeddings: semantic search We have a corpus with various sentences. Then, for a given query sentence, we want to find the most similar sentence in this corpus. This script outputs for various queries the top 5 most similar sentences in the corpus. """ from sentence_transformers import util import torch def get_sorted_keywords(embedder, text, keywords): top_k = len(keywords) keywords_embedding = embedder.encode(keywords, convert_to_tensor=True) text_embedding = embedder.encode(text, convert_to_tensor=True) cos_scores = util.cos_sim(keywords_embedding, text_embedding).squeeze(dim=1) # print(cos_scores.size()) top_results = torch.topk(cos_scores, k=top_k) return [(keywords[idx], top_results[0][index].item()) for index, idx in enumerate(top_results[1])] # return [keywords[idx] for idx in top_results[1]] # for score, idx in zip(top_results[0], top_results[1]): # print(keywords[idx], "(Score: {:.4f})".format(score))