danbooru-tags-transformer

Runtime error

App Files Files Community

danbooru-tags-transformer / app.py

Plat

initial commit

85549bf 5 months ago

raw

history blame

No virus

15.4 kB

	from pathlib import Path
	import time

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	import gradio as gr

	MODEL_NAME = "p1atdev/dart-test-1"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, trust_remote_code=True)


	BOS = "<\|bos\|>"
	EOS = "<\|eos\|>"
	RATING_BOS = "<rating>"
	RATING_EOS = "</rating>"
	COPYRIGHT_BOS = "<copyright>"
	COPYRIGHT_EOS = "</copyright>"
	CHARACTER_BOS = "<character>"
	CHARACTER_EOS = "</character>"
	GENERAL_BOS = "<general>"
	GENERAL_EOS = "</general>"

	RATING_BOS_ID = tokenizer.convert_tokens_to_ids(RATING_BOS)
	RATING_EOS_ID = tokenizer.convert_tokens_to_ids(RATING_EOS)
	COPYRIGHT_BOS_ID = tokenizer.convert_tokens_to_ids(COPYRIGHT_BOS)
	COPYRIGHT_EOS_ID = tokenizer.convert_tokens_to_ids(COPYRIGHT_EOS)
	CHARACTER_BOS_ID = tokenizer.convert_tokens_to_ids(CHARACTER_BOS)
	CHARACTER_EOS_ID = tokenizer.convert_tokens_to_ids(CHARACTER_EOS)
	GENERAL_BOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_BOS)
	GENERAL_EOS_ID = tokenizer.convert_tokens_to_ids(GENERAL_EOS)

	assert isinstance(RATING_BOS_ID, int)
	assert isinstance(RATING_EOS_ID, int)
	assert isinstance(COPYRIGHT_BOS_ID, int)
	assert isinstance(COPYRIGHT_EOS_ID, int)
	assert isinstance(CHARACTER_BOS_ID, int)
	assert isinstance(CHARACTER_EOS_ID, int)
	assert isinstance(GENERAL_BOS_ID, int)
	assert isinstance(GENERAL_EOS_ID, int)

	SPECIAL_TAGS = [
	BOS,
	EOS,
	RATING_BOS,
	RATING_EOS,
	COPYRIGHT_BOS,
	COPYRIGHT_EOS,
	CHARACTER_BOS,
	CHARACTER_EOS,
	GENERAL_BOS,
	GENERAL_EOS,
	]

	SPECIAL_TAG_IDS = tokenizer.convert_tokens_to_ids(SPECIAL_TAGS)
	assert isinstance(SPECIAL_TAG_IDS, list)


	RATING_TAGS = {
	"sfw": "rating:sfw",
	"nsfw": "rating:nsfw",
	"general": "rating:general",
	"sensitive": "rating:sensitive",
	"questionable": "rating:questionable",
	"explicit": "rating:explicit",
	}
	RATING_TAG_IDS = {k: tokenizer.convert_tokens_to_ids(v) for k, v in RATING_TAGS.items()}


	def load_tags(path: str \| Path):
	if isinstance(path, str):
	path = Path(path)

	with open(path, "r", encoding="utf-8") as file:
	lines = [line.strip() for line in file.readlines() if line.strip() != ""]

	return lines


	COPYRIGHT_TAGS_LIST: list[str] = load_tags("./tags/copyright.txt")
	CHARACTER_TAGS_LIST: list[str] = load_tags("./tags/character.txt")
	PEOPLE_TAGS_LIST: list[str] = load_tags("./tags/people.txt")

	PEOPLE_TAG_IDS_LIST = tokenizer.convert_tokens_to_ids(PEOPLE_TAGS_LIST)

	assert isinstance(PEOPLE_TAG_IDS_LIST, list)


	def compose_prompt(
	rating: str = "rating:sfw, rating:general",
	copyright: str = "",
	character: str = "",
	general: str = "",
	):
	return "".join(
	[
	BOS,
	RATING_BOS,
	rating,
	RATING_EOS,
	COPYRIGHT_BOS,
	copyright,
	COPYRIGHT_EOS,
	CHARACTER_BOS,
	character,
	CHARACTER_EOS,
	GENERAL_BOS,
	general,
	]
	)


	@torch.no_grad()
	def generate(
	input_text,
	max_new_tokens=128,
	do_sample: bool = True,
	temperature: float = 1.0,
	top_p: float = 1,
	top_k: int = 20,
	num_beams: int = 1,
	bad_words_ids: list[int] \| None = None,
	) -> list[int]:
	inputs = tokenizer(
	input_text,
	return_tensors="pt",
	).input_ids

	generated = model.generate(
	inputs,
	max_new_tokens=max_new_tokens,
	do_sample=do_sample,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	num_beams=num_beams,
	bad_words_ids=(
	[[token] for token in bad_words_ids] if bad_words_ids is not None else None
	),
	no_repeat_ngram_size=1,
	)[0]

	return generated.tolist()


	def decode_normal(token_ids: list[int], skip_special_tokens: bool = True):
	return tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)


	def decode_general_only(token_ids: list[int]):
	token_ids = token_ids[token_ids.index(GENERAL_BOS_ID) :]
	return tokenizer.decode(token_ids, skip_special_tokens=True)


	def split_people_tokens_part(token_ids: list[int]):
	people_tokens = []
	other_tokens = []

	for token in token_ids:
	if token in PEOPLE_TAG_IDS_LIST:
	people_tokens.append(token)
	else:
	other_tokens.append(token)

	return people_tokens, other_tokens


	def decode_animagine(token_ids: list[int]):
	def get_part(eos_token_id: int, remains_part: list[int]):
	part = []
	for i, token_id in enumerate(remains_part):
	if token_id == eos_token_id:
	return part, remains_part[i:]

	part.append(token_id)

	raise Exception("The provided EOS token was not found in the token_ids.")

	# get each part
	rating_part, remains = get_part(RATING_EOS_ID, token_ids)
	copyright_part, remains = get_part(COPYRIGHT_EOS_ID, remains)
	character_part, remains = get_part(CHARACTER_EOS_ID, remains)
	general_part, _ = get_part(GENERAL_EOS_ID, remains)

	# separete people tags (1girl, 1boy, no humans...)
	people_part, other_general_part = split_people_tokens_part(general_part)

	# remove "rating:sfw"
	rating_part = [token for token in rating_part if token != RATING_TAG_IDS["sfw"]]

	# AnimagineXL v3 style order
	rearranged_tokens = (
	people_part + character_part + copyright_part + other_general_part + rating_part
	)
	rearranged_tokens = [
	token for token in rearranged_tokens if token not in SPECIAL_TAG_IDS
	]

	decoded = tokenizer.decode(rearranged_tokens, skip_special_tokens=True)

	# fix "nsfw" tag
	decoded = decoded.replace("rating:nsfw", "nsfw")

	return decoded


	def prepare_rating_tags(rating: str):
	tag = RATING_TAGS[rating]
	if tag in [RATING_TAGS["general"], RATING_TAGS["sensitive"]]:
	parent = RATING_TAGS["sfw"]
	else:
	parent = RATING_TAGS["nsfw"]

	return f"{parent}, {tag}"


	def handle_inputs(
	rating_tags: str,
	copyright_tags_list: list[str],
	character_tags_list: list[str],
	general_tags: str,
	ban_tags: str,
	max_new_tokens: int = 128,
	temperature: float = 1.0,
	top_p: float = 1.0,
	top_k: int = 20,
	num_beams: int = 1,
	):
	"""
	Returns:
	[
	output_tags_natural,
	output_tags_general_only,
	output_tags_animagine,
	input_prompt_raw,
	output_tags_raw,
	elapsed_time,
	]
	"""

	start_time = time.time()

	copyright_tags = ", ".join(copyright_tags_list)
	character_tags = ", ".join(character_tags_list)

	prompt = compose_prompt(
	rating=prepare_rating_tags(rating_tags),
	copyright=copyright_tags,
	character=character_tags,
	general=general_tags,
	)

	bad_words_ids = tokenizer.encode_plus(ban_tags).input_ids

	generated_ids = generate(
	prompt,
	max_new_tokens=max_new_tokens,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	num_beams=num_beams,
	bad_words_ids=bad_words_ids if len(bad_words_ids) > 0 else None,
	)

	decoded_normal = decode_normal(generated_ids, skip_special_tokens=True)
	decoded_general_only = decode_general_only(generated_ids)
	decoded_animagine = decode_animagine(generated_ids)
	decoded_raw = decode_normal(generated_ids, skip_special_tokens=False)

	end_time = time.time()
	elapsed_time = f"Elapsed: {(end_time - start_time) * 1000:.2f} ms"

	return [
	decoded_normal,
	decoded_general_only,
	decoded_animagine,
	prompt,
	decoded_raw,
	elapsed_time,
	]


	def demo():
	with gr.Blocks() as ui:
	with gr.Row():
	with gr.Column():
	with gr.Group():
	rating_dropdown = gr.Dropdown(
	label="Rating",
	choices=[
	"general",
	"sensitive",
	"questionable",
	"explicit",
	],
	value="general",
	)

	with gr.Group():
	copyright_tags_mode_dropdown = gr.Dropdown(
	label="Copyright tags mode",
	choices=[
	"None",
	"Original",
	# "Auto", # TODO: implement these modes
	# "Random",
	"Custom",
	],
	value="None",
	interactive=True,
	)
	copyright_tags_dropdown = gr.Dropdown(
	label="Copyright tags",
	choices=COPYRIGHT_TAGS_LIST,
	value=[],
	multiselect=True,
	visible=False,
	)

	def on_change_copyright_tags_dropdouwn(mode: str):
	kwargs: dict = {"visible": mode == "Custom"}
	if mode == "Original":
	kwargs["value"] = ["original"]
	elif mode == "None":
	kwargs["value"] = []

	return gr.update(**kwargs)

	with gr.Group():
	character_tags_mode_dropdown = gr.Dropdown(
	label="Character tags mode",
	choices=[
	"None",
	# "Auto", # TODO: implement these modes
	# "Random",
	"Custom",
	],
	value="None",
	interactive=True,
	)
	character_tags_dropdown = gr.Dropdown(
	label="Character tags",
	choices=CHARACTER_TAGS_LIST,
	value=[],
	multiselect=True,
	visible=False,
	)

	def on_change_character_tags_dropdouwn(mode: str):
	kwargs: dict = {"visible": mode == "Custom"}

	return gr.update(**kwargs)

	with gr.Group():
	general_tags_textbox = gr.Textbox(
	label="General tags",
	placeholder="1girl, ...",
	lines=4,
	)

	ban_tags_textbox = gr.Textbox(
	label="Ban tags",
	placeholder="",
	value="",
	lines=2,
	)

	with gr.Accordion(label="Generation config", open=False):
	with gr.Group():
	max_new_tokens_slider = gr.Slider(
	label="Max new tokens",
	maximum=256,
	minimum=1,
	step=1,
	value=128,
	)
	temperature_slider = gr.Slider(
	label="Temperature (larger is more random)",
	maximum=1.0,
	minimum=0.0,
	step=0.1,
	value=1.0,
	)
	top_p_slider = gr.Slider(
	label="Top p (larger is more random)",
	maximum=1.0,
	minimum=0.0,
	step=0.1,
	value=1.0,
	)
	top_k_slider = gr.Slider(
	label="Top k (larger is more random)",
	maximum=500,
	minimum=1,
	step=1,
	value=100,
	)
	num_beams_slider = gr.Slider(
	label="Number of beams (smaller is more random)",
	maximum=10,
	minimum=1,
	step=1,
	value=1,
	)

	generate_btn = gr.Button("Generate", variant="primary")

	with gr.Column():
	output_tags_natural = gr.Textbox(
	label="Generation result",
	# placeholder="tags will be here",
	interactive=False,
	)

	output_tags_general_only = gr.Textbox(
	label="General tags only",
	interactive=False,
	)

	output_tags_animagine = gr.Textbox(
	label="Output tags (AnimagineXL v3 style order)",
	# placeholder="tags will be here in Animagine v3 style order",
	interactive=False,
	)

	elapsed_time_md = gr.Markdown(value="Waiting to generate...")

	with gr.Accordion(label="Metadata", open=False):
	input_prompt_raw = gr.Textbox(
	label="Input prompt (raw)",
	interactive=False,
	lines=4,
	)

	output_tags_raw = gr.Textbox(
	label="Output tags (raw)",
	interactive=False,
	lines=4,
	)

	copyright_tags_mode_dropdown.change(
	on_change_copyright_tags_dropdouwn,
	inputs=[copyright_tags_mode_dropdown],
	outputs=[copyright_tags_dropdown],
	)
	character_tags_mode_dropdown.change(
	on_change_character_tags_dropdouwn,
	inputs=[character_tags_mode_dropdown],
	outputs=[character_tags_dropdown],
	)

	generate_btn.click(
	handle_inputs,
	inputs=[
	rating_dropdown,
	copyright_tags_dropdown,
	character_tags_dropdown,
	general_tags_textbox,
	ban_tags_textbox,
	max_new_tokens_slider,
	temperature_slider,
	top_p_slider,
	top_k_slider,
	num_beams_slider,
	],
	outputs=[
	output_tags_natural,
	output_tags_general_only,
	output_tags_animagine,
	input_prompt_raw,
	output_tags_raw,
	elapsed_time_md,
	],
	)

	ui.launch()


	if __name__ == "__main__":
	demo()