import spacy
import re

# Load the spaCy model
nlp = spacy.load("en_core_web_trf")

def get_nouns(text):
    doc = nlp(text)
    nouns = [token.text for token in doc if token.pos_ == "NOUN"]
    return nouns

def extract_food_phrases(text):
    # Determine the delimiter
    if '/' in text:
        delimiter = '/'
    elif ',' in text:
        delimiter = ','
    else:
        # If it's not comma or slash delimited, return the text as is
        # this will be an edge-case and we'll handle it later
        return [text]

    # Split the text using the identified delimiter
    items = [item.strip() for item in text.split(delimiter)]
    
    # Process each item to find food items
    food_items = []
    for item in items:
        doc = nlp(item)
        tokens = [token.text for token in doc]
        # Check if any noun in the list of known nouns is present in the tokens
        for token in doc:
            if token.pos_ == "NOUN":
                food_items.append(item.strip())
                break

    return food_items

def extract_items(text):
    # Determine the delimiter
    if '/' in text:
        delimiter = '/'
    elif ',' in text:
        delimiter = ','
    else:
        # If it's not comma or slash delimited, return the text as is
        return [text]

    # Split the text using the identified delimiter
    items = [item.strip() for item in text.split(delimiter)]
    
    # Get the food items
    food_items = extract_food_phrases(text)
    if len(food_items) > 0:
        return food_items
    
    # Find the items that were not matched as food items
    non_food_items = [item for item in items if item not in food_items]
    
    # Combine the food items and non_food_items
    return food_items + non_food_items