Spaces:

Kotomiya07
/

danbooru-tags-upsampler-animagine-xl-3.1

Running

App Files Files Community

danbooru-tags-upsampler-animagine-xl-3.1 / tags.py

Kotomiya07

Add application file

27e3c58 about 1 month ago

raw history blame contribute delete

No virus

11.9 kB

	from pathlib import Path
	import time
	import os

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	from optimum.onnxruntime import ORTModelForCausalLM


	import gradio as gr

	MODEL_NAME = (
	os.environ.get("MODEL_NAME")
	if os.environ.get("MODEL_NAME") is not None
	else "p1atdev/dart-v1-sft"
	)
	HF_READ_TOKEN = os.environ.get("HF_READ_TOKEN")
	MODEL_BACKEND = (
	os.environ.get("MODEL_BACKEND")
	if os.environ.get("MODEL_BACKEND") is not None
	else "ONNX (quantized)"
	)

	assert isinstance(MODEL_NAME, str)
	assert isinstance(MODEL_BACKEND, str)

	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_NAME,
	trust_remote_code=True,
	token=HF_READ_TOKEN,
	)
	model = {
	"default": AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	token=HF_READ_TOKEN,
	),
	"ort": ORTModelForCausalLM.from_pretrained(
	MODEL_NAME,
	),
	"ort_qantized": ORTModelForCausalLM.from_pretrained(
	MODEL_NAME,
	file_name="model_quantized.onnx",
	),
	}

	MODEL_BACKEND_MAP = {
	"Default": "default",
	"ONNX (normal)": "ort",
	"ONNX (quantized)": "ort_qantized",
	}

	try:
	model["default"].to("cuda")
	except:
	print("No GPU")

	try:
	model["default"] = torch.compile(model["default"])
	except:
	print("torch.compile is not supported")

	BOS = "<\|bos\|>"
	EOS = "<\|eos\|>"
	RATING_BOS = "<rating>"
	RATING_EOS = "</rating>"
	COPYRIGHT_BOS = "<copyright>"
	COPYRIGHT_EOS = "</copyright>"
	CHARACTER_BOS = "<character>"
	CHARACTER_EOS = "</character>"
	GENERAL_BOS = "<general>"
	GENERAL_EOS = "</general>"

	INPUT_END = "<\|input_end\|>"

	LENGTH_VERY_SHORT = "<\|very_short\|>"
	LENGTH_SHORT = "<\|short\|>"
	LENGTH_LONG = "<\|long\|>"
	LENGTH_VERY_LONG = "<\|very_long\|>"

	RATING_BOS_ID = tokenizer.convert_tokens_to_ids(RATING_BOS)
	RATING_EOS_ID = tokenizer.convert_tokens_to_ids(RATING_EOS)
	COPYRIGHT_BOS_ID = tokenizer.convert_tokens_to_ids(COPYRIGHT_BOS)
	COPYRIGHT_EOS_ID = tokenizer.convert_tokens_to_ids(COPYRIGHT_EOS)
	CHARACTER_BOS_ID = tokenizer.convert_tokens_to_ids(CHARACTER_BOS)
	CHARACTER_EOS_ID = tokenizer.convert_tokens_to_ids(CHARACTER_EOS)
	GENERAL_BOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_BOS)
	GENERAL_EOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_EOS)

	assert isinstance(RATING_BOS_ID, int)
	assert isinstance(RATING_EOS_ID, int)
	assert isinstance(COPYRIGHT_BOS_ID, int)
	assert isinstance(COPYRIGHT_EOS_ID, int)
	assert isinstance(CHARACTER_BOS_ID, int)
	assert isinstance(CHARACTER_EOS_ID, int)
	assert isinstance(GENERAL_BOS_ID, int)
	assert isinstance(GENERAL_EOS_ID, int)

	SPECIAL_TAGS = [
	BOS,
	EOS,
	RATING_BOS,
	RATING_EOS,
	COPYRIGHT_BOS,
	COPYRIGHT_EOS,
	CHARACTER_BOS,
	CHARACTER_EOS,
	GENERAL_BOS,
	GENERAL_EOS,
	INPUT_END,
	LENGTH_VERY_SHORT,
	LENGTH_SHORT,
	LENGTH_LONG,
	LENGTH_VERY_LONG,
	]

	SPECIAL_TAG_IDS = tokenizer.convert_tokens_to_ids(SPECIAL_TAGS)
	assert isinstance(SPECIAL_TAG_IDS, list)
	assert all([token_id != tokenizer.unk_token_id for token_id in SPECIAL_TAG_IDS])

	RATING_TAGS = {
	"sfw": "rating:sfw",
	"nsfw": "rating:nsfw",
	"general": "rating:general",
	"sensitive": "rating:sensitive",
	"questionable": "rating:questionable",
	"explicit": "rating:explicit",
	}
	RATING_TAG_IDS = {k: tokenizer.convert_tokens_to_ids(v) for k, v in RATING_TAGS.items()}

	LENGTH_TAGS = {
	"very short": LENGTH_VERY_SHORT,
	"short": LENGTH_SHORT,
	"long": LENGTH_LONG,
	"very long": LENGTH_VERY_LONG,
	}


	def load_tags(path: str \| Path):
	if isinstance(path, str):
	path = Path(path)

	with open(path, "r", encoding="utf-8") as file:
	lines = [line.strip() for line in file.readlines() if line.strip() != ""]

	return lines


	COPYRIGHT_TAGS_LIST: list[str] = load_tags("./tags/copyright.txt")
	CHARACTER_TAGS_LIST: list[str] = load_tags("./tags/character.txt")
	PEOPLE_TAGS_LIST: list[str] = load_tags("./tags/people.txt")

	PEOPLE_TAG_IDS_LIST = tokenizer.convert_tokens_to_ids(PEOPLE_TAGS_LIST)

	assert isinstance(PEOPLE_TAG_IDS_LIST, list)


	@torch.no_grad()
	def generate(
	input_text: str,
	model_backend: str,
	max_new_tokens: int = 128,
	min_new_tokens: int = 0,
	do_sample: bool = True,
	temperature: float = 1.0,
	top_p: float = 1,
	top_k: int = 20,
	num_beams: int = 1,
	bad_words_ids: list[int] \| None = None,
	cfg_scale: float = 1.5,
	negative_input_text: str \| None = None,
	) -> list[int]:
	inputs = tokenizer(
	input_text,
	return_tensors="pt",
	).input_ids.to(model[MODEL_BACKEND_MAP[model_backend]].device)
	negative_inputs = (
	tokenizer(
	negative_input_text,
	return_tensors="pt",
	).input_ids.to(model[MODEL_BACKEND_MAP[model_backend]].device)
	if negative_input_text is not None
	else None
	)

	generated = model[MODEL_BACKEND_MAP[model_backend]].generate(
	inputs,
	max_new_tokens=max_new_tokens,
	min_new_tokens=min_new_tokens,
	do_sample=do_sample,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	num_beams=num_beams,
	bad_words_ids=(
	[[token] for token in bad_words_ids] if bad_words_ids is not None else None
	),
	negative_prompt_ids=negative_inputs,
	guidance_scale=cfg_scale,
	no_repeat_ngram_size=1,
	)[0]

	return generated.tolist()


	def decode_normal(token_ids: list[int], skip_special_tokens: bool = True):
	return tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)


	def decode_general_only(token_ids: list[int]):
	token_ids = token_ids[token_ids.index(GENERAL_BOS_ID) :]
	decoded = tokenizer.decode(token_ids, skip_special_tokens=True)
	tags = [tag for tag in decoded.split(", ")]
	tags = sorted(tags)
	return ", ".join(tags)


	def split_people_tokens_part(token_ids: list[int]):
	people_tokens = []
	other_tokens = []

	for token in token_ids:
	if token in PEOPLE_TAG_IDS_LIST:
	people_tokens.append(token)
	else:
	other_tokens.append(token)

	return people_tokens, other_tokens


	def decode_animagine(token_ids: list[int]):
	def get_part(eos_token_id: int, remains_part: list[int]):
	part = []
	for i, token_id in enumerate(remains_part):
	if token_id == eos_token_id:
	return part, remains_part[i:]

	part.append(token_id)

	raise Exception("The provided EOS token was not found in the token_ids.")

	# get each part
	rating_part, remains = get_part(RATING_EOS_ID, token_ids)
	copyright_part, remains = get_part(COPYRIGHT_EOS_ID, remains)
	character_part, remains = get_part(CHARACTER_EOS_ID, remains)
	general_part, _ = get_part(GENERAL_EOS_ID, remains)

	# separete people tags (1girl, 1boy, no humans...)
	people_part, other_general_part = split_people_tokens_part(general_part)

	# remove "rating:sfw"
	rating_part = [token for token in rating_part if token != RATING_TAG_IDS["sfw"]]

	# AnimagineXL v3 style order
	rearranged_tokens = (
	people_part + character_part + copyright_part + other_general_part + rating_part
	)
	rearranged_tokens = [
	token for token in rearranged_tokens if token not in SPECIAL_TAG_IDS
	]

	decoded = tokenizer.decode(rearranged_tokens, skip_special_tokens=True)

	# fix "nsfw" tag
	decoded = decoded.replace("rating:nsfw", "nsfw")

	return decoded


	def prepare_rating_tags(rating: str):
	tag = RATING_TAGS[rating]
	if tag in [RATING_TAGS["general"], RATING_TAGS["sensitive"]]:
	parent = RATING_TAGS["sfw"]
	else:
	parent = RATING_TAGS["nsfw"]

	return f"{parent}, {tag}"


	def handle_inputs(
	rating_tags: str,
	copyright_tags_list: list[str],
	character_tags_list: list[str],
	general_tags: str,
	ban_tags: str,
	do_cfg: bool = False,
	cfg_scale: float = 1.5,
	negative_tags: str = "",
	total_token_length: str = "long",
	max_new_tokens: int = 128,
	min_new_tokens: int = 0,
	temperature: float = 1.0,
	top_p: float = 1.0,
	top_k: int = 20,
	num_beams: int = 1,
	# model_backend: str = "Default",
	):
	"""
	Returns:
	[
	output_tags_natural,
	output_tags_general_only,
	output_tags_animagine,
	input_prompt_raw,
	output_tags_raw,
	elapsed_time,
	output_tags_natural_copy_btn,
	output_tags_general_only_copy_btn,
	output_tags_animagine_copy_btn
	]
	"""

	start_time = time.time()

	copyright_tags = ", ".join(copyright_tags_list)
	character_tags = ", ".join(character_tags_list)

	token_length_tag = LENGTH_TAGS[total_token_length]

	prompt: str = tokenizer.apply_chat_template(
	{ # type: ignore
	"rating": prepare_rating_tags(rating_tags),
	"copyright": copyright_tags,
	"character": character_tags,
	"general": general_tags,
	"length": token_length_tag,
	},
	tokenize=False,
	)

	negative_prompt: str = tokenizer.apply_chat_template(
	{ # type: ignore
	"rating": prepare_rating_tags(rating_tags),
	"copyright": "",
	"character": "",
	"general": negative_tags,
	"length": token_length_tag,
	},
	tokenize=False,
	)

	bad_words_ids = tokenizer.encode_plus(
	ban_tags if negative_tags.strip() == "" else ban_tags + ", " + negative_tags
	).input_ids

	generated_ids = generate(
	prompt,
	model_backend=MODEL_BACKEND,
	max_new_tokens=max_new_tokens,
	min_new_tokens=min_new_tokens,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	num_beams=num_beams,
	bad_words_ids=bad_words_ids if len(bad_words_ids) > 0 else None,
	cfg_scale=cfg_scale,
	negative_input_text=negative_prompt if do_cfg else None,
	)

	decoded_normal = decode_normal(generated_ids, skip_special_tokens=True)
	decoded_general_only = decode_general_only(generated_ids)
	decoded_animagine = decode_animagine(generated_ids)
	decoded_raw = decode_normal(generated_ids, skip_special_tokens=False)

	end_time = time.time()
	elapsed_time = f"Elapsed: {(end_time - start_time) * 1000:.2f} ms"

	# update visibility of buttons
	set_visible = gr.update(visible=True)

	return [
	decoded_normal,
	decoded_general_only,
	decoded_animagine,
	prompt,
	decoded_raw,
	elapsed_time,
	set_visible,
	set_visible,
	set_visible,
	]


	# ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
	def copy_text(_text: None):
	gr.Info("Copied!")


	def get_model_backend():
	return MODEL_BACKEND

	def get_length_tags():
	return LENGTH_TAGS

	def get_copyright_tags_list():
	return COPYRIGHT_TAGS_LIST

	def get_character_tags_list():
	return CHARACTER_TAGS_LIST

	def add_tags(prompt, rating_tags, copyright_tags_list, character_tags_list, general_tags, ban_tags, do_cfg, cfg_scale, negative_tags, total_token_length, max_new_tokens, min_new_tokens, temperature, top_p, top_k, num_beams):
	prompt = handle_inputs(rating_tags, copyright_tags_list, character_tags_list, general_tags, ban_tags, do_cfg, cfg_scale, negative_tags, total_token_length, max_new_tokens, min_new_tokens, temperature, top_p, top_k, num_beams)[2]
	return prompt