has12zen commited on
Commit
1ed207d
1 Parent(s): 6118d1d
Files changed (1) hide show
  1. utils.py +6 -4
utils.py CHANGED
@@ -57,8 +57,8 @@ def process_tokens(tokens,st_global_words):
57
 
58
  def main(input1,input2):
59
  processed_files = [ read_file(file) for file in files ]
60
- processed_files.append(input1)
61
- processed_files.append(input2)
62
  processed_strings = [ process_string(file) for file in processed_files ]
63
  st_global_words = set()
64
  for tokens in processed_strings:
@@ -108,6 +108,8 @@ def euclidean(A,B):
108
  def final_main(input1,input2):
109
  tf_idf_vals = main(input1,input2)
110
  outputString = ""
111
- outputString+= f"Cosine sim: {cosine_diff(tf_idf_vals[1],tf_idf_vals[2])}\n"
112
- outputString+= f"Euclidean difference: {euclidean(tf_idf_vals[1],tf_idf_vals[2])}\n"
 
 
113
  return outputString
 
57
 
58
  def main(input1,input2):
59
  processed_files = [ read_file(file) for file in files ]
60
+ processed_files.insert(0,input2)
61
+ processed_files.insert(0,input1)
62
  processed_strings = [ process_string(file) for file in processed_files ]
63
  st_global_words = set()
64
  for tokens in processed_strings:
 
108
  def final_main(input1,input2):
109
  tf_idf_vals = main(input1,input2)
110
  outputString = ""
111
+ similarity = cosine_diff(tf_idf_vals[0],tf_idf_vals[1])
112
+ outputString+=f"Cosine similarity:{round(similarity*100,2)}%"
113
+ diff = euclidean(tf_idf_vals[0],tf_idf_vals[1])
114
+ outputString += f"Euclidean Distance(difference): {round(math.sqrt(diff)*100,2)}%"
115
  return outputString