File size: 1,025 Bytes
4da642e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
"""
This is a simple application for sentence embeddings: semantic search

We have a corpus with various sentences. Then, for a given query sentence,
we want to find the most similar sentence in this corpus.

This script outputs for various queries the top 5 most similar sentences in the corpus.
"""
from sentence_transformers import util
import torch


def get_sorted_keywords(embedder, text, keywords):
    top_k = len(keywords)
    keywords_embedding = embedder.encode(keywords, convert_to_tensor=True)
    text_embedding =  embedder.encode(text, convert_to_tensor=True)

    cos_scores = util.cos_sim(keywords_embedding, text_embedding).squeeze(dim=1)
    # print(cos_scores.size())
    top_results = torch.topk(cos_scores, k=top_k)
    return [(keywords[idx], top_results[0][index].item()) for index, idx in enumerate(top_results[1])]
    # return [keywords[idx] for idx in top_results[1]]
    # for score, idx in zip(top_results[0], top_results[1]):
    #     print(keywords[idx], "(Score: {:.4f})".format(score))