File size: 732 Bytes
b30ed6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from sentence_transformers import SentenceTransformer, util
import json
import numpy as np
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

def similarity(strQuery):

    inputs = json.load(open('chunks.json','r'))
    lstCorpus = [dct['text'] for dct in inputs]

    strQuery = "How many different document types?"
    qryEmbedding = model.encode(strQuery, convert_to_tensor=True)
    corpusEmbedding= model.encode(lstCorpus, convert_to_tensor=True)
        
    sim_mat = util.pytorch_cos_sim(qryEmbedding, corpusEmbedding)
    lstSim = sim_mat[0].tolist()
    npSim = np.array(lstSim)
    indexMax = npSim.argmax()
    scoreMax = npSim.max()

    return(inputs[indexMax]['start'], inputs[indexMax]['end'])