Spaces:

madebybread
/

brightly-ai

Paused

App Files Files Community

brightly-ai / playground.py

beweinreich

updated chatgpt audit, and added flagged col

73fda7b about 1 month ago

raw

history blame

No virus

2.58 kB

	import spacy
	import re

	# Load the spaCy model
	nlp = spacy.load("en_core_web_trf")

	def analyze_text(text):
	# Replace different delimiters with a uniform delimiter (comma)
	normalized_text = re.sub(r'[\/,]', ',', text)

	doc = nlp(normalized_text)

	# Print tokens with their attributes
	for token in doc:
	print(f"Text: {token.text}, POS: {token.pos_}, Dep: {token.dep_}, Head: {token.head.text}")

	items = []
	current_item = []

	for token in doc:
	# If the token is punctuation, finalize the current item
	if token.pos_ == 'PUNCT' and token.text == ',':
	if current_item:
	items.append(" ".join(current_item))
	current_item = []
	else:
	# If token is part of a compound noun or an adjective, add to the current item
	if token.dep_ in ('compound', 'amod'):
	current_item.append(token.text)
	elif token.dep_ in ('ROOT', 'appos'):
	if current_item:
	current_item.append(token.text)
	else:
	current_item = [token.text]
	if token.head.dep_ == 'ROOT':
	items.append(" ".join(current_item))
	current_item = []
	else:
	current_item.append(token.text)

	# Add the last item if it exists
	if current_item:
	items.append(" ".join(current_item))

	# Determine if the text is a single noun phrase or multiple items
	is_single_noun_phrase = len(items) == 1

	delimiter = determine_delimiter(text)

	return is_single_noun_phrase, delimiter, items

	def determine_delimiter(text):
	number_of_slashes = text.count('/')
	number_of_commas = text.count(',')
	number_of_spaces = text.count(' ')

	if number_of_slashes > 0 and number_of_slashes >= number_of_commas:
	# prefer slash over comma, since its rarer
	return '/'
	elif number_of_commas > 0:
	return ','
	else:
	return ' '

	def extract_items(text):
	is_single_noun_phrase, delimiter, _ = analyze_text(text)

	if is_single_noun_phrase:
	return [text]
	else:
	items = text.split(delimiter)
	return items

	# Example usages
	texts = [
	"salads, sandwiches & sushi",
	]

	for text in texts:
	print(f"Analyzing: {text}")
	is_single_noun_phrase, delimiter, items = analyze_text(text)
	print(f"Is single noun phrase: {is_single_noun_phrase}")
	print(f"Delimiter: {delimiter}")
	print(f"Items: {items}")
	print()