import spacy
import re

# Load the spaCy model
nlp = spacy.load("en_core_web_trf")

def analyze_text(text):
    # Track the positions of slashes in the original text
    original_slash_positions = [m.start() for m in re.finditer(r'\/', text)]
    
    # Replace different delimiters with a uniform delimiter (comma)
    normalized_text = re.sub(r'[\/,]', ',', text)
    
    doc = nlp(normalized_text)
    
    # Print tokens with their attributes
    for token in doc:
        print(f"Text: {token.text}, POS: {token.pos_}, Dep: {token.dep_}, Head: {token.head.text}")
    
    items = []
    current_item = []
    current_position = 0
    root_noun_found = False
    
    for token in doc:
        token_start = text.find(token.text, current_position)
        token_end = token_start + len(token.text)
        
        # If the token is punctuation and a root noun has been found, finalize the current item
        if token.pos_ == 'PUNCT' and token.text == ',':
            if root_noun_found:
                items.append(" ".join(current_item))
                current_item = []
                root_noun_found = False
            # Check if the comma was originally a slash
            if token_start in original_slash_positions:
                items.append('/')
            else:
                items.append(',')
        else:
            # If token is part of a compound noun or an adjective, add to the current item
            if token.dep_ in ('compound', 'amod'):
                current_item.append(token.text)
            elif token.dep_ == 'ROOT' and token.pos_ == 'NOUN':
                current_item.append(token.text)
                root_noun_found = True
            elif token.dep_ == 'appos':
                if current_item:
                    current_item.append(token.text)
                else:
                    current_item = [token.text]
                root_noun_found = True
            else:
                current_item.append(token.text)
        
        current_position = token_end

    # Add the last item if it exists
    if current_item:
        items.append(" ".join(current_item))
    
    # Process items to handle delimiters correctly
    final_items = []
    temp_item = []
    for item in items:
        if item in [',', '/']:
            if temp_item:
                final_items.append("".join(temp_item).strip())
                temp_item = []
            if item == '/':
                final_items.append('/')
        else:
            temp_item.append(item + " ")

    if temp_item:
        final_items.append("".join(temp_item).strip())

    # Combine items separated by slashes into single items
    combined_items = []
    i = 0
    while i < len(final_items):
        if final_items[i] == '/':
            combined_items[-1] += '/' + final_items[i + 1]
            i += 2
        else:
            combined_items.append(final_items[i])
            i += 1

    # Determine if the text is a single noun phrase or multiple items
    non_delimiter_items = [item for item in combined_items if item not in [',', '/']]
    is_single_noun_phrase = len(non_delimiter_items) == 1

    delimiter = determine_delimiter(text)

    return is_single_noun_phrase, delimiter, combined_items
    
def determine_delimiter(text):
    number_of_slashes = text.count('/')
    number_of_commas = text.count(',')
    number_of_spaces = text.count(' ')

    if number_of_slashes > 0 and number_of_slashes >= number_of_commas:
        # prefer slash over comma, since its rarer
        return '/'
    elif number_of_commas > 0:
        return ','
    else:
        return ' '

def extract_items(text):
    is_single_noun_phrase, delimiter, _ = analyze_text(text)
    
    if is_single_noun_phrase:
        return [text]
    else:
        items = text.split(delimiter)
        return items