ai_seeker / similarity.py
srikanth-nm's picture
Upload 19 files
b30ed6a
raw
history blame contribute delete
732 Bytes
from sentence_transformers import SentenceTransformer, util
import json
import numpy as np
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
def similarity(strQuery):
inputs = json.load(open('chunks.json','r'))
lstCorpus = [dct['text'] for dct in inputs]
strQuery = "How many different document types?"
qryEmbedding = model.encode(strQuery, convert_to_tensor=True)
corpusEmbedding= model.encode(lstCorpus, convert_to_tensor=True)
sim_mat = util.pytorch_cos_sim(qryEmbedding, corpusEmbedding)
lstSim = sim_mat[0].tolist()
npSim = np.array(lstSim)
indexMax = npSim.argmax()
scoreMax = npSim.max()
return(inputs[indexMax]['start'], inputs[indexMax]['end'])