Spaces:

thiyagab
/

Thamizh

Build error

App Files Files Community

thiyagab commited on Jan 9, 2023

Commit

9376927

•

1 Parent(s): 843f624

Sort the response based on similarity

Browse files

Files changed (1) hide show

semanticsearch.py +33 -29

semanticsearch.py CHANGED Viewed

@@ -10,66 +10,70 @@ f = open('thirukural_git.json')
 # a dictionary
 data = json.load(f)
-en_translations=[]
-kurals=[]
 # Iterating through the json
 # list
 for kural in data['kurals']:
     en_translations.append((kural['meaning']['en'].lower()))
     kurals.append(kural['kural'])
 # Closing file
 f.close()
 from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 sen_embeddings = model.encode(en_translations)
 # sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
 # sen_embeddings.tofile('trainedmodel')
-def preprocess(input:str):
     if input.startswith('/'):
-        #TODO
         return False
     values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
     if values:
-        index=values[0]
-        return kural_definition(index)
     else:
-       return False
-def find_similarities(input:str):
     response = preprocess(input)
     if response:
         return response
     input_embeddings = model.encode([input.lower()])
     from sklearn.metrics.pairwise import cosine_similarity
-    #let's calculate cosine similarity for sentence 0:
-    similarity_matrix=cosine_similarity(
         [input_embeddings[0]],
         sen_embeddings[1:]
     )
-    indices=[numpy.argpartition(similarity_matrix[0],-3)[-3:]]
-    indices.sort(reverse=True)
-    response=''
-    for index in indices[0]:
-       response+=kural_definition(index)
     return response
-def kural_definition(index:int):
-    response=''
-    print(en_translations[index + 1])
-    response += "\n".join(kurals[index + 1]) + "\n"
-    response += en_translations[index + 1] + "\n\n"
-    print("\n".join(kurals[index + 1]))
     return response
-# while True:
-#     text=input('Ask valluvar: ')
-#     if( text == 'exit'):
-#         break
-#     find_similarities(text)

 # a dictionary
 data = json.load(f)
+en_translations = []
+kurals = []
 # Iterating through the json
 # list
 for kural in data['kurals']:
     en_translations.append((kural['meaning']['en'].lower()))
     kurals.append(kural['kural'])
 # Closing file
 f.close()
 from sentence_transformers import SentenceTransformer
 model = SentenceTransformer('all-MiniLM-L6-v2')
 sen_embeddings = model.encode(en_translations)
 # sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
 # sen_embeddings.tofile('trainedmodel')
+def preprocess(input: str):
     if input.startswith('/'):
+        # TODO
         return False
     values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
     if values:
+        index = values[0]
+        if index > 0:
+            return kural_definition(index - 1)
     else:
+        return False
+def find_similarities(input: str):
     response = preprocess(input)
     if response:
         return response
     input_embeddings = model.encode([input.lower()])
     from sklearn.metrics.pairwise import cosine_similarity
+    # let's calculate cosine similarity for sentence 0:
+    similarity_matrix = cosine_similarity(
         [input_embeddings[0]],
         sen_embeddings[1:]
     )
+    indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
+    indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
+    response = ''
+    for index in indices:
+        print(similarity_matrix[0][index])
+        response += kural_definition(index + 1)
     return response
+def kural_definition(index: int):
+    response = ''
+    print(en_translations[index])
+    response += "\n".join(kurals[index]) + "\n"
+    response += en_translations[index] + "\n\n"
+    print("\n".join(kurals[index]))
     return response
+while True:
+    text = input('Ask valluvar: ')
+    if (text == 'exit'):
+        break
+    find_similarities(text)