Spaces:

Fucius
/

OMG

Running on Zero

App Files Files Community

OMG / src /prompt_attention /seq_aligner.py

Fucius

Upload 52 files

ad5354d verified 4 months ago

raw

history blame

No virus

2.51 kB

	import torch
	import numpy as np


	def get_word_inds(text: str, word_place: int, tokenizer):
	split_text = text.split(" ")
	if type(word_place) is str:
	word_place = [i for i, word in enumerate(split_text) if word_place == word]
	elif type(word_place) is int:
	word_place = [word_place]
	out = []
	if len(word_place) > 0:
	words_encode = [tokenizer.decode([item]).strip("#") for item in tokenizer.encode(text)][1:-1]
	cur_len, ptr = 0, 0

	for i in range(len(words_encode)):
	cur_len += len(words_encode[i])
	if ptr in word_place:
	out.append(i + 1)
	if cur_len >= len(split_text[ptr]):
	ptr += 1
	cur_len = 0
	return np.array(out)

	def get_replacement_mapper_(x: str, y: str, tokenizer, max_len=77):
	words_x = x.split(' ')
	words_y = y.split(' ')
	if len(words_x) != len(words_y):
	raise ValueError(f"attention replacement edit can only be applied on prompts with the same length"
	f" but prompt A has {len(words_x)} words and prompt B has {len(words_y)} words.")
	inds_replace = [i for i in range(len(words_y)) if words_y[i] != words_x[i]]
	inds_source = [get_word_inds(x, i, tokenizer) for i in inds_replace]
	inds_target = [get_word_inds(y, i, tokenizer) for i in inds_replace]
	mapper = np.zeros((max_len, max_len))
	i = j = 0
	cur_inds = 0
	while i < max_len and j < max_len:
	if cur_inds < len(inds_source) and inds_source[cur_inds][0] == i:
	inds_source_, inds_target_ = inds_source[cur_inds], inds_target[cur_inds]
	if len(inds_source_) == len(inds_target_):
	mapper[inds_source_, inds_target_] = 1
	else:
	ratio = 1 / len(inds_target_)
	for i_t in inds_target_:
	mapper[inds_source_, i_t] = ratio
	cur_inds += 1
	i += len(inds_source_)
	j += len(inds_target_)
	elif cur_inds < len(inds_source):
	mapper[i, j] = 1
	i += 1
	j += 1
	else:
	mapper[j, j] = 1
	i += 1
	j += 1

	return torch.from_numpy(mapper).float()

	def get_replacement_mapper(prompts, tokenizer, max_len=77):
	x_seq = prompts[0]
	mappers = []
	for i in range(1, len(prompts)):
	mapper = get_replacement_mapper_(x_seq, prompts[i], tokenizer, max_len)
	mappers.append(mapper)
	return torch.stack(mappers)