beweinreich commited on
Commit
68d1a5f
1 Parent(s): 7450395

updates for heterogeneous mixture

Browse files
Files changed (1) hide show
  1. algo.py +34 -26
algo.py CHANGED
@@ -43,19 +43,18 @@ class Algo:
43
  # if slow_mapping['similarity_score'] > mapping['similarity_score']:
44
  # mapping = slow_mapping
45
 
46
- if mapping['similarity_score'] < similarity_threshold and len(input_word.split(' ')) > 1:
47
- print(" - Attempting reverse mapping")
48
- reversed_input_word = ' '.join(input_word.split(' ')[::-1])
49
- reversed_mapping = self.similarity_fast.find_most_similar_word(reversed_input_word)
50
- if reversed_mapping['similarity_score'] > mapping['similarity_score']:
51
- reversed_mapping.update(
52
- {
53
- 'input_word': input_word,
54
- 'cleaned_word': mapping['cleaned_word']
55
- }
56
- )
57
- mapping = reversed_mapping
58
-
59
 
60
  # check if the cleaned_word is a substring of the matching_word
61
  is_substring = mapping['cleaned_word'] in mapping['matching_word']
@@ -147,27 +146,36 @@ class Algo:
147
  'food_nonfood_score': None
148
  }
149
 
150
- self.db_cursor.execute(
151
- f"SELECT DISTINCT wweia_category FROM dictionary WHERE description IN ({','.join(['%s']*len(dictionary_words))})",
152
- dictionary_words
153
- )
154
- wweia_categories = self.db_cursor.fetchall()
155
- wweia_categories = [x[0] for x in wweia_categories]
156
- print("categories -> ", wweia_categories)
157
- mixture = "Heterogeneous Mixture"
158
- if len(set(wweia_categories)) == 1:
159
- mixture = "Homogenous Mixture"
160
-
 
 
 
 
 
161
  return {
162
  'input_word': input_word,
163
  'cleaned_word': None,
164
- 'matching_word': mixture,
165
- 'dictionary_word': mixture,
166
  'similarity_score': None,
167
  'confidence_score': None,
168
  'similar_words': None,
169
  'is_food': True,
170
  'food_nonfood_score': 1.0,
 
 
 
 
171
  }
172
 
173
  def handle_single_item(self, input_word):
 
43
  # if slow_mapping['similarity_score'] > mapping['similarity_score']:
44
  # mapping = slow_mapping
45
 
46
+ # if mapping['similarity_score'] < similarity_threshold and len(input_word.split(' ')) > 1:
47
+ # print(" - Attempting reverse mapping")
48
+ # reversed_input_word = ' '.join(input_word.split(' ')[::-1])
49
+ # reversed_mapping = self.similarity_fast.find_most_similar_word(reversed_input_word)
50
+ # if reversed_mapping['similarity_score'] > mapping['similarity_score']:
51
+ # reversed_mapping.update(
52
+ # {
53
+ # 'input_word': input_word,
54
+ # 'cleaned_word': mapping['cleaned_word']
55
+ # }
56
+ # )
57
+ # mapping = reversed_mapping
 
58
 
59
  # check if the cleaned_word is a substring of the matching_word
60
  is_substring = mapping['cleaned_word'] in mapping['matching_word']
 
146
  'food_nonfood_score': None
147
  }
148
 
149
+ # check if "Heterogenous" is in the wweia category of any of the mappings
150
+ # otherwise we find the mapping with the lowest DMC value, and return that as the dictionary word, dmc, wc, and leakage values
151
+ heterogenous_exists = False
152
+ most_conservative_mapping = None
153
+ for mapping in mappings:
154
+ if mapping['wweia_category'] == "Heterogenous":
155
+ heterogenous_exists = True
156
+ break
157
+ else:
158
+ if most_conservative_mapping is None or mapping['dry_matter_content'] < most_conservative_mapping['dry_matter_content']:
159
+ most_conservative_mapping = mapping
160
+
161
+ dictionary_word = 'Hetereogenous Mixture'
162
+ if most_conservative_mapping is not None:
163
+ dictionary_word = f"{most_conservative_mapping['dictionary_word']} (Lowest DMC)"
164
+
165
  return {
166
  'input_word': input_word,
167
  'cleaned_word': None,
168
+ 'matching_word': dictionary_word,
169
+ 'dictionary_word': dictionary_word,
170
  'similarity_score': None,
171
  'confidence_score': None,
172
  'similar_words': None,
173
  'is_food': True,
174
  'food_nonfood_score': 1.0,
175
+ 'wweia_category': most_conservative_mapping['wweia_category'],
176
+ 'water_content': most_conservative_mapping['water_content'],
177
+ 'dry_matter_content': most_conservative_mapping['dry_matter_content'],
178
+ 'leakage': most_conservative_mapping['leakage']
179
  }
180
 
181
  def handle_single_item(self, input_word):