beweinreich commited on
Commit
1c5cc00
1 Parent(s): e3071d6

improved the scoring

Browse files
Files changed (1) hide show
  1. similarity_fast.py +9 -4
similarity_fast.py CHANGED
@@ -66,16 +66,21 @@ class SimilarityFast:
66
  input_embedding = generate_embedding(self.model, input_word_clean)
67
  similarities = []
68
 
69
- contains_cooked = 'cooked' in input_word_clean.lower()
 
 
 
 
 
70
 
71
  for key, val in self.dictionary_embeddings.items():
72
  similarity_score = cosine_similarity(input_embedding, val['v'])
73
- if contains_cooked and 'cooked' in key.lower():
74
- similarity_score *= 1.05
 
75
  similarities.append((key, val['d'], similarity_score))
76
 
77
  most_similar_word, dictionary_word, highest_score = max(similarities, key=lambda x: x[2])
78
-
79
  confidence_score, similar_words_str = calculate_confidence_and_similar_words_str(similarities, highest_score)
80
 
81
  return most_similar_word, dictionary_word, highest_score, confidence_score, similar_words_str
 
66
  input_embedding = generate_embedding(self.model, input_word_clean)
67
  similarities = []
68
 
69
+ adjustment_factors = {
70
+ 'cooked': 'cooked' in input_word_clean.lower(),
71
+ 'frozen': 'frozen' in input_word_clean.lower(),
72
+ 'canned': 'canned' in input_word_clean.lower() or 'can' in input_word_clean.lower(),
73
+ 'can': 'canned' in input_word_clean.lower() or 'can' in input_word_clean.lower()
74
+ }
75
 
76
  for key, val in self.dictionary_embeddings.items():
77
  similarity_score = cosine_similarity(input_embedding, val['v'])
78
+ for term, is_present in adjustment_factors.items():
79
+ if is_present and term in key.lower():
80
+ similarity_score *= 1.07
81
  similarities.append((key, val['d'], similarity_score))
82
 
83
  most_similar_word, dictionary_word, highest_score = max(similarities, key=lambda x: x[2])
 
84
  confidence_score, similar_words_str = calculate_confidence_and_similar_words_str(similarities, highest_score)
85
 
86
  return most_similar_word, dictionary_word, highest_score, confidence_score, similar_words_str