Spaces:

Kotomiya07
/

danbooru-tags-upsampler-animagine-xl-3.1

Running

App Files Files Community

danbooru-tags-upsampler-animagine-xl-3.1 / app.py

Kotomiya07

Add application file

27e3c58 about 2 months ago

raw history blame contribute delete

No virus

30 kB

	import os
	import gc
	import gradio as gr
	import numpy as np
	import torch
	import json
	import config
	import utils
	import logging
	from PIL import Image, PngImagePlugin
	from datetime import datetime
	from diffusers.models import AutoencoderKL
	from diffusers import StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline
	import tags

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	DESCRIPTION = "Animagine XL 3.1"
	if not torch.cuda.is_available():
	DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU. </p>"
	IS_COLAB = utils.is_google_colab() or os.getenv("IS_COLAB") == "1"
	HF_TOKEN = os.getenv("HF_TOKEN")
	CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES") == "1"
	MIN_IMAGE_SIZE = int(os.getenv("MIN_IMAGE_SIZE", "512"))
	MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048"))
	USE_TORCH_COMPILE = False #os.getenv("USE_TORCH_COMPILE") == "1"
	ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD") == "1"
	OUTPUT_DIR = os.getenv("OUTPUT_DIR", "./outputs")

	MODEL = os.getenv(
	"MODEL",
	"https://huggingface.co/cagliostrolab/animagine-xl-3.1/blob/main/animagine-xl-3.1.safetensors",
	)
	VAE_MODEL = os.getenv(
	"VAE_MODEL",
	"madebyollin/sdxl-vae-fp16-fix",
	)

	torch.backends.cudnn.deterministic = True
	torch.backends.cudnn.benchmark = False

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


	def load_pipeline(model_name, vae_model):
	vae = AutoencoderKL.from_pretrained(
	vae_model,
	torch_dtype=torch.float16,
	)
	pipeline = (
	StableDiffusionXLPipeline.from_single_file
	if MODEL.endswith(".safetensors")
	else StableDiffusionXLPipeline.from_pretrained
	)

	pipe = pipeline(
	model_name,
	vae=vae,
	torch_dtype=torch.float16,
	custom_pipeline="lpw_stable_diffusion_xl",
	use_safetensors=True,
	add_watermarker=False,
	use_auth_token=HF_TOKEN,
	)

	pipe.to(device)
	return pipe


	def generate(
	prompt: str,
	negative_prompt: str = "",
	seed: int = 0,
	custom_width: int = 1024,
	custom_height: int = 1024,
	guidance_scale: float = 7.0,
	num_inference_steps: int = 28,
	sampler: str = "Euler a",
	aspect_ratio_selector: str = "896 x 1152",
	style_selector: str = "(None)",
	quality_selector: str = "Standard v3.1",
	use_upscaler: bool = False,
	upscaler_strength: float = 0.55,
	upscale_by: float = 1.5,
	add_quality_tags: bool = True,
	add_danbooru_tags: bool = False,
	rating_tags: str = "general",
	copyright_tags_list: list[str] = [],
	character_tags_list: list[str] = [],
	general_tags: str = "",
	ban_tags: str = "",
	do_cfg: bool = False,
	cfg_scale: float = 1.5,
	negative_tags: str = "",
	total_token_length: str = "long",
	max_new_tokens: int = 128,
	min_new_tokens: int = 0,
	temperature: float = 1.0,
	top_p: float = 1.0,
	top_k: int = 20,
	num_beams: int = 1,
	# model_backend: str = "Default",
	progress=gr.Progress(track_tqdm=True),
	):
	generator = utils.seed_everything(seed)

	width, height = utils.aspect_ratio_handler(
	aspect_ratio_selector,
	custom_width,
	custom_height,
	)

	prompt = utils.add_wildcard(prompt, wildcard_files)

	generated_tags_animagine = ""
	if add_danbooru_tags:
	generated_tags_animagine = tags.add_tags(prompt, rating_tags, copyright_tags_list, character_tags_list, general_tags, ban_tags, do_cfg, cfg_scale, negative_tags, total_token_length, max_new_tokens, min_new_tokens, temperature, top_p, top_k, num_beams)
	prompt = generated_tags_animagine.strip()

	prompt, negative_prompt = utils.preprocess_prompt(
	quality_prompt, quality_selector, prompt, negative_prompt, add_quality_tags
	)
	prompt, negative_prompt = utils.preprocess_prompt(
	styles, style_selector, prompt, negative_prompt
	)

	width, height = utils.preprocess_image_dimensions(width, height)

	backup_scheduler = pipe.scheduler
	pipe.scheduler = utils.get_scheduler(pipe.scheduler.config, sampler)

	if use_upscaler:
	upscaler_pipe = StableDiffusionXLImg2ImgPipeline(**pipe.components)
	metadata = {
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	"resolution": f"{width} x {height}",
	"guidance_scale": guidance_scale,
	"num_inference_steps": num_inference_steps,
	"seed": seed,
	"sampler": sampler,
	"sdxl_style": style_selector,
	"add_quality_tags": add_quality_tags,
	"quality_tags": quality_selector,
	}

	if use_upscaler:
	new_width = int(width * upscale_by)
	new_height = int(height * upscale_by)
	metadata["use_upscaler"] = {
	"upscale_method": "nearest-exact",
	"upscaler_strength": upscaler_strength,
	"upscale_by": upscale_by,
	"new_resolution": f"{new_width} x {new_height}",
	}
	else:
	metadata["use_upscaler"] = None
	metadata["Model"] = {
	"Model": DESCRIPTION,
	"Model hash": "e3c47aedb0",
	}

	logger.info(json.dumps(metadata, indent=4))

	try:
	if use_upscaler:
	latents = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	generator=generator,
	output_type="latent",
	).images
	upscaled_latents = utils.upscale(latents, "nearest-exact", upscale_by)
	images = upscaler_pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image=upscaled_latents,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	strength=upscaler_strength,
	generator=generator,
	output_type="pil",
	).images
	else:
	images = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	generator=generator,
	output_type="pil",
	).images

	if images:
	image_paths = [
	utils.save_image(image, metadata, OUTPUT_DIR, IS_COLAB)
	for image in images
	]

	for image_path in image_paths:
	logger.info(f"Image saved as {image_path} with metadata")

	return image_paths, metadata, generated_tags_animagine
	except Exception as e:
	logger.exception(f"An error occurred: {e}")
	raise
	finally:
	if use_upscaler:
	del upscaler_pipe
	pipe.scheduler = backup_scheduler
	utils.free_memory()


	if torch.cuda.is_available():
	pipe = load_pipeline(MODEL, VAE_MODEL)
	logger.info("Loaded on Device!")
	else:
	pipe = None

	styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in config.style_list}
	quality_prompt = {
	k["name"]: (k["prompt"], k["negative_prompt"]) for k in config.quality_prompt_list
	}

	wildcard_files = utils.load_wildcard_files("wildcard")

	COPY_ACTION_JS = """\
	(inputs, _outputs) => {
	// inputs is the string value of the input_text
	if (inputs.trim() !== "") {
	navigator.clipboard.writeText(inputs);
	}
	}"""

	with gr.Blocks(css="style.css", theme="NoCrypt/miku@1.2.1") as demo:
	title = gr.HTML(
	f"""<h1><span>{DESCRIPTION}</span></h1>""",
	elem_id="title",
	)
	gr.Markdown(
	f"""Gradio demo for [cagliostrolab/animagine-xl-3.1](https://huggingface.co/cagliostrolab/animagine-xl-3.1)""",
	elem_id="subtitle",
	)
	gr.DuplicateButton(
	value="Duplicate Space for private use",
	elem_id="duplicate-button",
	visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
	)
	with gr.Row():
	with gr.Column(scale=2):
	with gr.Tab("Txt2img"):
	with gr.Group():
	prompt = gr.Text(
	label="Prompt",
	max_lines=5,
	placeholder="Enter your prompt",
	)
	negative_prompt = gr.Text(
	label="Negative Prompt",
	max_lines=5,
	placeholder="Enter a negative prompt",
	)
	with gr.Accordion(label="Quality Tags", open=True):
	add_quality_tags = gr.Checkbox(
	label="Add Quality Tags", value=True
	)
	quality_selector = gr.Dropdown(
	label="Quality Tags Presets",
	interactive=True,
	choices=list(quality_prompt.keys()),
	value="Standard v3.1",
	)
	add_danbooru_tags = gr.Checkbox(
	label="Add Generated Tags", value=False
	)
	with gr.Tab("Advanced Settings"):
	with gr.Group():
	style_selector = gr.Radio(
	label="Style Preset",
	container=True,
	interactive=True,
	choices=list(styles.keys()),
	value="(None)",
	)
	with gr.Group():
	aspect_ratio_selector = gr.Radio(
	label="Aspect Ratio",
	choices=config.aspect_ratios,
	value="896 x 1152",
	container=True,
	)
	with gr.Group(visible=False) as custom_resolution:
	with gr.Row():
	custom_width = gr.Slider(
	label="Width",
	minimum=MIN_IMAGE_SIZE,
	maximum=MAX_IMAGE_SIZE,
	step=8,
	value=1024,
	)
	custom_height = gr.Slider(
	label="Height",
	minimum=MIN_IMAGE_SIZE,
	maximum=MAX_IMAGE_SIZE,
	step=8,
	value=1024,
	)
	with gr.Group():
	use_upscaler = gr.Checkbox(label="Use Upscaler", value=False)
	with gr.Row() as upscaler_row:
	upscaler_strength = gr.Slider(
	label="Strength",
	minimum=0,
	maximum=1,
	step=0.05,
	value=0.55,
	visible=False,
	)
	upscale_by = gr.Slider(
	label="Upscale by",
	minimum=1,
	maximum=1.5,
	step=0.1,
	value=1.5,
	visible=False,
	)
	with gr.Group():
	sampler = gr.Dropdown(
	label="Sampler",
	choices=config.sampler_list,
	interactive=True,
	value="Euler a",
	)
	with gr.Group():
	seed = gr.Slider(
	label="Seed", minimum=0, maximum=utils.MAX_SEED, step=1, value=0
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	with gr.Group():
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance scale",
	minimum=1,
	maximum=12,
	step=0.1,
	value=7.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=28,
	)

	# danbooru-tags-upsampler
	with gr.Tab("tags"):
	with gr.Row():
	with gr.Column():

	# with gr.Group(
	# visible=False,
	# ):
	# model_backend_radio = gr.Radio(
	# label="Model backend",
	# choices=list(MODEL_BACKEND_MAP.keys()),
	# value="Default",
	# interactive=True,
	# )

	with gr.Group():
	rating_dropdown = gr.Dropdown(
	label="Rating",
	choices=[
	"general",
	"sensitive",
	"questionable",
	"explicit",
	],
	value="general",
	)

	with gr.Group():
	copyright_tags_mode_dropdown = gr.Dropdown(
	label="Copyright tags mode",
	choices=[
	"None",
	"Original",
	# "Auto", # TODO: implement these modes
	# "Random",
	"Custom",
	],
	value="None",
	interactive=True,
	)
	copyright_tags_dropdown = gr.Dropdown(
	label="Copyright tags",
	choices=tags.get_copyright_tags_list(), # type: ignore
	value=[],
	multiselect=True,
	visible=False,
	)

	def on_change_copyright_tags_dropdouwn(mode: str):
	kwargs: dict = {"visible": mode == "Custom"}
	if mode == "Original":
	kwargs["value"] = ["original"]
	elif mode == "None":
	kwargs["value"] = []

	return gr.update(**kwargs)

	with gr.Group():
	character_tags_mode_dropdown = gr.Dropdown(
	label="Character tags mode",
	choices=[
	"None",
	# "Auto", # TODO: implement these modes
	# "Random",
	"Custom",
	],
	value="None",
	interactive=True,
	)
	character_tags_dropdown = gr.Dropdown(
	label="Character tags",
	choices=tags.get_character_tags_list(), # type: ignore
	value=[],
	multiselect=True,
	visible=False,
	)

	def on_change_character_tags_dropdouwn(mode: str):
	kwargs: dict = {"visible": mode == "Custom"}
	if mode == "None":
	kwargs["value"] = []

	return gr.update(**kwargs)

	with gr.Group():
	general_tags_textbox = gr.Textbox(
	label="General tags (the condition to generate tags)",
	value="",
	placeholder="1girl, ...",
	lines=4,
	)

	ban_tags_textbox = gr.Textbox(
	label="Ban tags (tags in this field never appear in generation)",
	value="",
	placeholder="official alternate cosutme, english text,...",
	lines=2,
	)

	generate_btn = gr.Button("Generate", variant="primary")

	with gr.Accordion(label="Generation config (advanced)", open=False):
	with gr.Group():
	do_cfg_check = gr.Checkbox(
	label="Do CFG (Classifier Free Guidance)",
	value=False,
	)
	cfg_scale_slider = gr.Slider(
	label="CFG scale",
	maximum=3.0,
	minimum=0.1,
	step=0.1,
	value=1.5,
	visible=False,
	)
	negative_tags_textbox = gr.Textbox(
	label="Negative prompt",
	placeholder="simple background, ...",
	value="",
	lines=2,
	visible=False,
	)

	def on_change_do_cfg_check(do_cfg: bool):
	kwargs: dict = {"visible": do_cfg}
	return gr.update(kwargs), gr.update(kwargs)

	do_cfg_check.change(
	on_change_do_cfg_check,
	inputs=[do_cfg_check],
	outputs=[cfg_scale_slider, negative_tags_textbox],
	)

	with gr.Group():
	total_token_length_radio = gr.Radio(
	label="Total token length",
	choices=list(tags.get_length_tags().keys()),
	value="long",
	)

	with gr.Group():
	max_new_tokens_slider = gr.Slider(
	label="Max new tokens",
	maximum=256,
	minimum=1,
	step=1,
	value=128,
	)
	min_new_tokens_slider = gr.Slider(
	label="Min new tokens",
	maximum=255,
	minimum=0,
	step=1,
	value=0,
	)
	temperature_slider = gr.Slider(
	label="Temperature (larger is more random)",
	maximum=1.0,
	minimum=0.0,
	step=0.1,
	value=1.0,
	)
	top_p_slider = gr.Slider(
	label="Top p (larger is more random)",
	maximum=1.0,
	minimum=0.0,
	step=0.1,
	value=1.0,
	)
	top_k_slider = gr.Slider(
	label="Top k (larger is more random)",
	maximum=500,
	minimum=1,
	step=1,
	value=100,
	)
	num_beams_slider = gr.Slider(
	label="Number of beams (smaller is more random)",
	maximum=10,
	minimum=1,
	step=1,
	value=1,
	)

	with gr.Column():
	with gr.Group():
	output_tags_natural = gr.Textbox(
	label="Generation result",
	# placeholder="tags will be here",
	interactive=False,
	)
	output_tags_natural_copy_btn = gr.Button("Copy", visible=False)
	output_tags_natural_copy_btn.click(
	fn=tags.copy_text,
	inputs=[output_tags_natural],
	js=COPY_ACTION_JS,
	)

	with gr.Group():
	output_tags_general_only = gr.Textbox(
	label="General tags only (sorted)",
	interactive=False,
	)
	output_tags_general_only_copy_btn = gr.Button("Copy", visible=False)
	output_tags_general_only_copy_btn.click(
	fn=tags.copy_text,
	inputs=[output_tags_general_only],
	js=COPY_ACTION_JS,
	)

	with gr.Group():
	output_tags_animagine = gr.Textbox(
	label="Output tags (AnimagineXL v3 style order)",
	# placeholder="tags will be here in Animagine v3 style order",
	interactive=False,
	)
	output_tags_animagine_copy_btn = gr.Button("Copy", visible=False)
	output_tags_animagine_copy_btn.click(
	fn=tags.copy_text,
	inputs=[output_tags_animagine],
	js=COPY_ACTION_JS,
	)

	with gr.Accordion(label="Metadata", open=False):
	_model_backend_md = gr.Markdown(
	f"Model backend: {tags.get_model_backend()}",
	)
	input_prompt_raw = gr.Textbox(
	label="Input prompt (raw)",
	interactive=False,
	lines=4,
	)

	output_tags_raw = gr.Textbox(
	label="Output tags (raw)",
	interactive=False,
	lines=4,
	)

	elapsed_time_md = gr.Markdown(value="Waiting to generate...")

	copyright_tags_mode_dropdown.change(
	on_change_copyright_tags_dropdouwn,
	inputs=[copyright_tags_mode_dropdown],
	outputs=[copyright_tags_dropdown],
	)
	character_tags_mode_dropdown.change(
	on_change_character_tags_dropdouwn,
	inputs=[character_tags_mode_dropdown],
	outputs=[character_tags_dropdown],
	)

	generate_btn.click(
	tags.handle_inputs,
	inputs=[
	rating_dropdown,
	copyright_tags_dropdown,
	character_tags_dropdown,
	general_tags_textbox,
	ban_tags_textbox,
	do_cfg_check,
	cfg_scale_slider,
	negative_tags_textbox,
	total_token_length_radio,
	max_new_tokens_slider,
	min_new_tokens_slider,
	temperature_slider,
	top_p_slider,
	top_k_slider,
	num_beams_slider,
	# model_backend_radio,
	],
	outputs=[
	output_tags_natural,
	output_tags_general_only,
	output_tags_animagine,
	input_prompt_raw,
	output_tags_raw,
	elapsed_time_md,
	output_tags_natural_copy_btn,
	output_tags_general_only_copy_btn,
	output_tags_animagine_copy_btn,
	],
	)

	gr.Examples(
	examples=[
	["1girl, solo, from side", ""],
	["1girl, solo, abstract, from above", ""],
	["2girls, yuri", "1boy"],
	["no humans, scenery, summer, day", ""],
	],
	inputs=[
	general_tags_textbox,
	ban_tags_textbox,
	],
	)

	with gr.Column(scale=3):
	with gr.Blocks():
	run_button = gr.Button("Generate", variant="primary")
	result = gr.Gallery(
	label="Result",
	columns=1,
	height='100%',
	preview=True,
	show_label=False
	)
	generated_tags_animagine = gr.Textbox(
	label="Generated tags (AnimagineXL v3 style order)",
	# placeholder="tags will be here in Animagine v3 style order",
	interactive=False,
	)
	with gr.Accordion(label="Generation Parameters", open=False):
	gr_metadata = gr.JSON(label="metadata", show_label=False)
	gr.Examples(
	examples=config.examples,
	inputs=prompt,
	outputs=[result, gr_metadata],
	fn=lambda args, kwargs: generate(args, use_upscaler=True, **kwargs),
	cache_examples=CACHE_EXAMPLES
	)
	use_upscaler.change(
	fn=lambda x: [gr.update(visible=x), gr.update(visible=x)],
	inputs=use_upscaler,
	outputs=[upscaler_strength, upscale_by],
	queue=False,
	api_name=False,
	)
	aspect_ratio_selector.change(
	fn=lambda x: gr.update(visible=x == "Custom"),
	inputs=aspect_ratio_selector,
	outputs=custom_resolution,
	queue=False,
	api_name=False,
	)

	gr.on(
	triggers=[
	prompt.submit,
	negative_prompt.submit,
	run_button.click,
	],
	fn=utils.randomize_seed_fn,
	inputs=[seed, randomize_seed],
	outputs=seed,
	queue=False,
	api_name=False,
	).then(
	fn=generate,
	inputs=[
	prompt,
	negative_prompt,
	seed,
	custom_width,
	custom_height,
	guidance_scale,
	num_inference_steps,
	sampler,
	aspect_ratio_selector,
	style_selector,
	quality_selector,
	use_upscaler,
	upscaler_strength,
	upscale_by,
	add_quality_tags,
	add_danbooru_tags,
	rating_dropdown,
	copyright_tags_dropdown,
	character_tags_dropdown,
	general_tags_textbox,
	ban_tags_textbox,
	do_cfg_check,
	cfg_scale_slider,
	negative_tags_textbox,
	total_token_length_radio,
	max_new_tokens_slider,
	min_new_tokens_slider,
	temperature_slider,
	top_p_slider,
	top_k_slider,
	num_beams_slider,
	# model_backend_radio,
	],
	outputs=[result, gr_metadata, generated_tags_animagine],
	api_name="run",
	)

	if __name__ == "__main__":
	demo.queue(max_size=20).launch(debug=IS_COLAB, share=IS_COLAB)