import spacy import re # Load the spaCy model nlp = spacy.load("en_core_web_trf") def get_nouns(text): doc = nlp(text) nouns = [token.text for token in doc if token.pos_ == "NOUN"] return nouns def extract_food_phrases(text): # Determine the delimiter if '/' in text: delimiter = '/' elif ',' in text: delimiter = ',' else: # If it's not comma or slash delimited, return the text as is # this will be an edge-case and we'll handle it later return [text] # Split the text using the identified delimiter items = [item.strip() for item in text.split(delimiter)] # Process each item to find food items food_items = [] for item in items: doc = nlp(item) tokens = [token.text for token in doc] # Check if any noun in the list of known nouns is present in the tokens for token in doc: if token.pos_ == "NOUN": food_items.append(item.strip()) break return food_items def extract_items(text): # Determine the delimiter if '/' in text: delimiter = '/' elif ',' in text: delimiter = ',' else: # If it's not comma or slash delimited, return the text as is return [text] # Split the text using the identified delimiter items = [item.strip() for item in text.split(delimiter)] # Get the food items food_items = extract_food_phrases(text) if len(food_items) > 0: return food_items # Find the items that were not matched as food items non_food_items = [item for item in items if item not in food_items] # Combine the food items and non_food_items return food_items + non_food_items