Spaces:
Paused
Paused
import spacy | |
import re | |
# Load the spaCy model | |
nlp = spacy.load("en_core_web_trf") | |
def get_nouns(text): | |
doc = nlp(text) | |
nouns = [token.text for token in doc if token.pos_ == "NOUN"] | |
return nouns | |
def extract_food_phrases(text): | |
# Determine the delimiter | |
if '/' in text: | |
delimiter = '/' | |
elif ',' in text: | |
delimiter = ',' | |
else: | |
# If it's not comma or slash delimited, return the text as is | |
# this will be an edge-case and we'll handle it later | |
return [text] | |
# Split the text using the identified delimiter | |
items = [item.strip() for item in text.split(delimiter)] | |
# Process each item to find food items | |
food_items = [] | |
for item in items: | |
doc = nlp(item) | |
tokens = [token.text for token in doc] | |
# Check if any noun in the list of known nouns is present in the tokens | |
for token in doc: | |
if token.pos_ == "NOUN": | |
food_items.append(item.strip()) | |
break | |
return food_items | |
def extract_items(text): | |
# Determine the delimiter | |
if '/' in text: | |
delimiter = '/' | |
elif ',' in text: | |
delimiter = ',' | |
else: | |
# If it's not comma or slash delimited, return the text as is | |
return [text] | |
# Split the text using the identified delimiter | |
items = [item.strip() for item in text.split(delimiter)] | |
# Get the food items | |
food_items = extract_food_phrases(text) | |
if len(food_items) > 0: | |
return food_items | |
# Find the items that were not matched as food items | |
non_food_items = [item for item in items if item not in food_items] | |
# Combine the food items and non_food_items | |
return food_items + non_food_items | |