import json import numpy import os # Opening JSON file f = open('thirukural_git.json') # returns JSON object as # a dictionary data = json.load(f) en_translations=[] kurals=[] # Iterating through the json # list for kural in data['kurals']: en_translations.append((kural['meaning']['en'].lower())) kurals.append(kural['kural']) # Closing file f.close() from sentence_transformers import SentenceTransformer model = SentenceTransformer('all-MiniLM-L6-v2') # model.tokenizer.add_special_tokens({'pad_token':'[thiyaga]'}) #Encoding: sen_embeddings = model.encode(en_translations) # sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768)) # sen_embeddings.tofile('trainedmodel') def find_similarities(input:str): input_embeddings = model.encode([input.lower()]) from sklearn.metrics.pairwise import cosine_similarity #let's calculate cosine similarity for sentence 0: similarity_matrix=cosine_similarity( [input_embeddings[0]], sen_embeddings[1:] ) indices=[numpy.argpartition(similarity_matrix[0],-3)[-3:]] response='' for index in indices[0]: print(similarity_matrix[0][index]) response+=en_translations[index+1] print(en_translations[index+1]) response += "\n"+"\n".join(kurals[index+1])+"\n" print("\n".join(kurals[index+1])) return response # while True: # text=input('Ask valluvar: ') # if( text == 'exit'): # break # find_similarities(text)