Spaces:

fffiloni
/

x-decoder-video

Paused

Upload 6 files

3bbe5bd over 1 year ago

1.77 kB

	import random

	import nltk
	nltk.data.path.append('/mnt/data/nltk_data')
	import numpy as np

	from utils.constants import IMAGENET_DEFAULT_TEMPLATES


	def get_tag(tokenized, tags):
	if not isinstance(tags, (list, tuple)):
	tags = [tags]
	ret = []
	for (word, pos) in nltk.pos_tag(tokenized):
	for tag in tags:
	if pos == tag:
	ret.append(word)
	return ret

	def get_noun_phrase(tokenized):
	# Taken from Su Nam Kim Paper...
	grammar = r"""
	NBAR:
	{<NN.\|JJ><NN.*>} # Nouns and Adjectives, terminated with Nouns

	NP:
	{<NBAR>}
	{<NBAR><IN><NBAR>} # Above, connected with in/of/etc...
	"""
	chunker = nltk.RegexpParser(grammar)

	chunked = chunker.parse(nltk.pos_tag(tokenized))
	continuous_chunk = []
	current_chunk = []

	for subtree in chunked:
	if isinstance(subtree, nltk.Tree):
	current_chunk.append(' '.join([token for token, pos in subtree.leaves()]))
	elif current_chunk:
	named_entity = ' '.join(current_chunk)
	if named_entity not in continuous_chunk:
	continuous_chunk.append(named_entity)
	current_chunk = []
	else:
	continue

	return continuous_chunk

	def text_noun_with_prompt_all(text, phrase_prob=0.0, append_text=True):
	tokenized = nltk.word_tokenize(text)

	if random.random() >= phrase_prob:
	nouns = get_tag(tokenized, ['NN', 'NNS', 'NNP'])
	else:
	nouns = get_noun_phrase(tokenized)


	prompt_texts = [np.random.choice(IMAGENET_DEFAULT_TEMPLATES).format(noun) for noun in nouns]

	if append_text:
	prompt_texts += [text]
	nouns += [text]

	return prompt_texts, nouns