Spaces:

madebybread
/

brightly-ai

Paused

App Files Files Community

beweinreich commited on Jun 14

Commit

ecfb899

•

1 Parent(s): c92bac3

return all items from list no matter if its a noun

Browse files

Files changed (3) hide show

algo.py +57 -8
multi_food_item_detector.py +23 -2
similarity_fast.py +0 -2

algo.py CHANGED Viewed

@@ -7,7 +7,7 @@ from food_nonfood import classify_as_food_nonfood, pessimistic_food_nonfood_scor
 from utils import clean_word
 from db.db_utils import store_mapping_to_db, get_mapping_from_db
 from ask_gpt import query_gpt
-from multi_food_item_detector import extract_food_phrases
 similarity_threshold = 0.75
@@ -106,15 +106,68 @@ class Algo:
         # If it has more slashes, its slash-delimited
         # If it has equal number of commas and slashes, we'll go with slashes
-        input_word_parts = extract_food_phrases(input_word)
         mappings = []
         for part in input_word_parts:
             mapping = self.handle_single_item(part)
             mappings.append(mapping)
-        # TODO categorize the whole mapping list as homogenous, heterogenous, or non-food item
-        return None
     def handle_single_item(self, input_word):
         input_word_clean = clean_word(input_word)
@@ -173,10 +226,6 @@ class Algo:
             print()
             print(f"Processing: {input_word}")
-            if "&" in input_word or "and" in input_word:
-                print(" - Skipping multi-item word")
-                continue
             # if the word has a "," or "/" in it, let's skip it for now
             if ',' in input_word or '/' in input_word:
                 mapping = self.handle_multi_item(input_word)

 from utils import clean_word
 from db.db_utils import store_mapping_to_db, get_mapping_from_db
 from ask_gpt import query_gpt
+from multi_food_item_detector import extract_items
 similarity_threshold = 0.75
         # If it has more slashes, its slash-delimited
         # If it has equal number of commas and slashes, we'll go with slashes
+        input_word_parts = extract_items(input_word)
         mappings = []
         for part in input_word_parts:
             mapping = self.handle_single_item(part)
             mappings.append(mapping)
+        # look up the dictionary values for each mapping
+        # find the wweia category
+        # if all mappings have the same wweia category, return "homogenous", else "heterogenous"
+        # if is_food is False for any mappings, return "Non-Food Item" as dictionary word
+        for mapping in mappings:
+            if mapping['is_food'] == False:
+                return {
+                    'input_word': input_word,
+                    'cleaned_word': mapping['cleaned_word'],
+                    'matching_word': 'Non-Food Item',
+                    'dictionary_word': 'Non-Food Item',
+                    'similarity_score': None,
+                    'confidence_score': None,
+                    'similar_words': None,
+                    'is_food': False,
+                    'food_nonfood_score': 1.0
+                }
+                break
+        dictionary_words = [mapping['dictionary_word'] for mapping in mappings]
+        if len(set(dictionary_words)) == 0:
+            return {
+                'input_word': input_word,
+                'cleaned_word': None,
+                'matching_word': None,
+                'dictionary_word': None,
+                'similarity_score': None,
+                'confidence_score': None,
+                'similar_words': None,
+                'is_food': None,
+                'food_nonfood_score': None
+            }
+        self.db_cursor.execute(
+            f"SELECT DISTINCT wweia_category FROM dictionary WHERE description IN ({','.join(['%s']*len(dictionary_words))})",
+            dictionary_words
+        )
+        wweia_categories = self.db_cursor.fetchall()
+        wweia_categories = [x[0] for x in wweia_categories]
+        print("categories -> ", wweia_categories)
+        mixture = "Heterogeneous Mixture"
+        if len(set(wweia_categories)) == 1:
+            mixture = "Homogenous Mixture"
+        return {
+            'input_word': input_word,
+            'cleaned_word': None,
+            'matching_word': mixture,
+            'dictionary_word': mixture,
+            'similarity_score': None,
+            'confidence_score': None,
+            'similar_words': None,
+            'is_food': True,
+            'food_nonfood_score': 1.0,
+        }
     def handle_single_item(self, input_word):
         input_word_clean = clean_word(input_word)
             print()
             print(f"Processing: {input_word}")
             # if the word has a "," or "/" in it, let's skip it for now
             if ',' in input_word or '/' in input_word:
                 mapping = self.handle_multi_item(input_word)

multi_food_item_detector.py CHANGED Viewed

@@ -16,8 +16,8 @@ def extract_food_phrases(text):
     elif ',' in text:
         delimiter = ','
     else:
-      # if it's not comma or slash delimited, return the text as is
-      # this will be an edge-case and we'll handle it later
         return [text]
     # Split the text using the identified delimiter
@@ -36,3 +36,24 @@ def extract_food_phrases(text):
     return food_items

     elif ',' in text:
         delimiter = ','
     else:
+        # If it's not comma or slash delimited, return the text as is
+        # this will be an edge-case and we'll handle it later
         return [text]
     # Split the text using the identified delimiter
     return food_items
+def extract_items(text):
+    # Determine the delimiter
+    if '/' in text:
+        delimiter = '/'
+    elif ',' in text:
+        delimiter = ','
+    else:
+        # If it's not comma or slash delimited, return the text as is
+        return [text]
+    # Split the text using the identified delimiter
+    items = [item.strip() for item in text.split(delimiter)]
+    # Get the food items
+    food_items = extract_food_phrases(text)
+    # Find the items that were not matched as food items
+    non_food_items = [item for item in items if item not in food_items]
+    # Combine the food items and non_food_items
+    return food_items + non_food_items

similarity_fast.py CHANGED Viewed

@@ -64,8 +64,6 @@ class SimilarityFast:
         return most_similar_word, dictionary_word, highest_score, confidence_score, similar_words_str
     def find_most_similar_word(self, input_word):
         if not isinstance(input_word, str) or not input_word:
             return None

         return most_similar_word, dictionary_word, highest_score, confidence_score, similar_words_str
     def find_most_similar_word(self, input_word):
         if not isinstance(input_word, str) or not input_word:
             return None