brightly-ai / multi_food_item_detector.py
beweinreich's picture
switched which model were loading
313433a
raw
history blame
No virus
1.03 kB
import spacy
import re
# Load the spaCy model
nlp = spacy.load("en_core_web_trf")
def get_nouns(text):
doc = nlp(text)
nouns = [token.text for token in doc if token.pos_ == "NOUN"]
return nouns
def extract_food_phrases(text):
# Determine the delimiter
if '/' in text:
delimiter = '/'
elif ',' in text:
delimiter = ','
else:
# if it's not comma or slash delimited, return the text as is
# this will be an edge-case and we'll handle it later
return [text]
# Split the text using the identified delimiter
items = [item.strip() for item in text.split(delimiter)]
# Process each item to find food items
food_items = []
for item in items:
doc = nlp(item)
tokens = [token.text for token in doc]
# Check if any noun in the list of known nouns is present in the tokens
for token in doc:
if token.pos_ == "NOUN":
food_items.append(item.strip())
break
return food_items