Spaces:

madebybread
/

brightly-ai

Paused

brightly-ai / multi_food_item_detector.py

prefer food items, but if there are none, then lets skip it

4a1be51 about 1 month ago

No virus

1.75 kB

	import spacy
	import re

	# Load the spaCy model
	nlp = spacy.load("en_core_web_trf")

	def get_nouns(text):
	doc = nlp(text)
	nouns = [token.text for token in doc if token.pos_ == "NOUN"]
	return nouns

	def extract_food_phrases(text):
	# Determine the delimiter
	if '/' in text:
	delimiter = '/'
	elif ',' in text:
	delimiter = ','
	else:
	# If it's not comma or slash delimited, return the text as is
	# this will be an edge-case and we'll handle it later
	return [text]

	# Split the text using the identified delimiter
	items = [item.strip() for item in text.split(delimiter)]

	# Process each item to find food items
	food_items = []
	for item in items:
	doc = nlp(item)
	tokens = [token.text for token in doc]
	# Check if any noun in the list of known nouns is present in the tokens
	for token in doc:
	if token.pos_ == "NOUN":
	food_items.append(item.strip())
	break

	return food_items

	def extract_items(text):
	# Determine the delimiter
	if '/' in text:
	delimiter = '/'
	elif ',' in text:
	delimiter = ','
	else:
	# If it's not comma or slash delimited, return the text as is
	return [text]

	# Split the text using the identified delimiter
	items = [item.strip() for item in text.split(delimiter)]

	# Get the food items
	food_items = extract_food_phrases(text)
	if len(food_items) > 0:
	return food_items

	# Find the items that were not matched as food items
	non_food_items = [item for item in items if item not in food_items]

	# Combine the food items and non_food_items
	return food_items + non_food_items