thiyagab commited on
Commit
9376927
1 Parent(s): 843f624

Sort the response based on similarity

Browse files
Files changed (1) hide show
  1. semanticsearch.py +33 -29
semanticsearch.py CHANGED
@@ -10,66 +10,70 @@ f = open('thirukural_git.json')
10
  # a dictionary
11
  data = json.load(f)
12
 
13
- en_translations=[]
14
- kurals=[]
15
  # Iterating through the json
16
  # list
17
  for kural in data['kurals']:
18
  en_translations.append((kural['meaning']['en'].lower()))
19
  kurals.append(kural['kural'])
20
 
21
-
22
-
23
  # Closing file
24
  f.close()
25
  from sentence_transformers import SentenceTransformer
 
26
  model = SentenceTransformer('all-MiniLM-L6-v2')
27
  sen_embeddings = model.encode(en_translations)
28
 
 
29
  # sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
30
  # sen_embeddings.tofile('trainedmodel')
31
 
32
 
33
- def preprocess(input:str):
34
  if input.startswith('/'):
35
- #TODO
36
  return False
37
  values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
38
  if values:
39
- index=values[0]
40
- return kural_definition(index)
 
41
  else:
42
- return False
43
- def find_similarities(input:str):
 
 
44
  response = preprocess(input)
45
  if response:
46
  return response
47
  input_embeddings = model.encode([input.lower()])
48
  from sklearn.metrics.pairwise import cosine_similarity
49
- #let's calculate cosine similarity for sentence 0:
50
- similarity_matrix=cosine_similarity(
51
  [input_embeddings[0]],
52
  sen_embeddings[1:]
53
  )
54
-
55
- indices=[numpy.argpartition(similarity_matrix[0],-3)[-3:]]
56
- indices.sort(reverse=True)
57
- response=''
58
- for index in indices[0]:
59
- response+=kural_definition(index)
60
  return response
61
 
62
 
63
- def kural_definition(index:int):
64
- response=''
65
- print(en_translations[index + 1])
66
- response += "\n".join(kurals[index + 1]) + "\n"
67
- response += en_translations[index + 1] + "\n\n"
68
- print("\n".join(kurals[index + 1]))
69
  return response
70
 
71
- # while True:
72
- # text=input('Ask valluvar: ')
73
- # if( text == 'exit'):
74
- # break
75
- # find_similarities(text)
 
 
10
  # a dictionary
11
  data = json.load(f)
12
 
13
+ en_translations = []
14
+ kurals = []
15
  # Iterating through the json
16
  # list
17
  for kural in data['kurals']:
18
  en_translations.append((kural['meaning']['en'].lower()))
19
  kurals.append(kural['kural'])
20
 
 
 
21
  # Closing file
22
  f.close()
23
  from sentence_transformers import SentenceTransformer
24
+
25
  model = SentenceTransformer('all-MiniLM-L6-v2')
26
  sen_embeddings = model.encode(en_translations)
27
 
28
+
29
  # sen_embeddings= numpy.memmap('trainedmodel',mode="r",dtype=numpy.float32,shape=(1330,768))
30
  # sen_embeddings.tofile('trainedmodel')
31
 
32
 
33
+ def preprocess(input: str):
34
  if input.startswith('/'):
35
+ # TODO
36
  return False
37
  values = [int(s) for s in re.findall(r'-?\d+\.?\d*', input)]
38
  if values:
39
+ index = values[0]
40
+ if index > 0:
41
+ return kural_definition(index - 1)
42
  else:
43
+ return False
44
+
45
+
46
+ def find_similarities(input: str):
47
  response = preprocess(input)
48
  if response:
49
  return response
50
  input_embeddings = model.encode([input.lower()])
51
  from sklearn.metrics.pairwise import cosine_similarity
52
+ # let's calculate cosine similarity for sentence 0:
53
+ similarity_matrix = cosine_similarity(
54
  [input_embeddings[0]],
55
  sen_embeddings[1:]
56
  )
57
+ indices = [numpy.argpartition(similarity_matrix[0], -3)[-3:]]
58
+ indices=sorted(indices[0],key=lambda x:-similarity_matrix[0][x])
59
+ response = ''
60
+ for index in indices:
61
+ print(similarity_matrix[0][index])
62
+ response += kural_definition(index + 1)
63
  return response
64
 
65
 
66
+ def kural_definition(index: int):
67
+ response = ''
68
+ print(en_translations[index])
69
+ response += "\n".join(kurals[index]) + "\n"
70
+ response += en_translations[index] + "\n\n"
71
+ print("\n".join(kurals[index]))
72
  return response
73
 
74
+
75
+ while True:
76
+ text = input('Ask valluvar: ')
77
+ if (text == 'exit'):
78
+ break
79
+ find_similarities(text)