Spaces:

hideosnes
/

SDXL-Lightning

Paused

App Files Files Community

SDXL-Lightning / app.py

hideosnes

Create app.py

2091d9d verified 3 months ago

raw

history blame

No virus

11.7 kB

	import cv2
	import torch
	import random
	import tempfile
	import numpy as np
	from pathlib import Path
	from diffusers import (
	ControlNetModel,
	StableDiffusionXLControlNetPipeline,
	UNet2DConditionModel,
	EulerDiscreteScheduler,
	)
	import spaces
	import gradio as gr
	from huggingface_hub import hf_hub_download, snapshot_download
	from ip_adapter import IPAdapterXL
	from safetensors.torch import load_file

	snapshot_download(
	repo_id="h94/IP-Adapter", allow_patterns="sdxl_models/*", local_dir="."
	)

	# CPU fallback & pipeline-definition
	MAX_SEED = np.iinfo(np.int32).max
	device = "cuda" if torch.cuda.is_available() else "cpu"
	dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32

	# load models & scheduler (==>EULER) & CN (==>canny > test what's better!!!)
	base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
	image_encoder_path = "sdxl_models/image_encoder"
	ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"

	controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
	controlnet = ControlNEtModel.from_pretrained(
	controlnet_path, use_safetensors=False, torch_dtype=torch.float16
	).to(device)

	# load SDXL lightning >> put Turbo here if fallback to Comfy @Litto

	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	base_model_path,
	controlnet = controlnet,
	torch_dtype=torch.float16,
	variant="fp16",
	add_watermark=False,
	)to(device)
	pipe.set_progress_bar_config(disable=True)
	pipe.scheduler = EulerDiscreteScheduler.from_config(
	pipe.scheduler.config, timestep_spacing="trailing", prediction_type="epsilon"
	)
	pipe.unet.load_state_dict(
	load_file(
	hf_hub_download(
	"ByteDance/SDXL-Lightning", "sdxl_lightning_2step_unet.safetensors"
	),
	device="cuda",
	)
	)

	# load ip-adapter with specific target blocks for style transfer and layout preservation. Should be better than Comfy! Test this!
	# target_blocks=["block"] for original IP-Adapter
	# target_blocks=["up_blocks.0.attentions.1"] for style blocks only
	# target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
	ip_model = IPAdapterXL(
	pipe,
	image_encoder_path,
	ip_ckpt,
	device,
	target_blocks=["up_blocks.0.attentions.1"]
	)

	# Resizing the input image
	# OpenCV goes here!!!
	# Test this with smaller side-no for faster infr

	def resize_img(
	input_image,
	max_side=1280,
	min_side=1024,
	size=None,
	pad_to_max_side=False,
	mode=Image.BILINEAR,
	base_pixel_number=64,
	):
	w, h = input_image.size
	if size is not None:
	w_resize_new, h_resize_new = size
	else:
	ratio = min_side / min(h, w)
	w, h = round(ratio * w), round(ratio * h)
	ratio = max_side / max(h, w)
	input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
	w = (round(ratio * w) // base_pixel_number) * base_pixel_number
	w = (round(ratio * h) // base_pixel_number) * base_pixel_number
	nput_image.resize([w_resize_new, h_resize_new], mode)
	input_image = input_image.resize([w_resize_new, h_resize_new], mode)

	if pad_to_max_side:
	res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
	offset_x = (max_side - w_resize_new) // 2
	offset_y = (max_side - h_resize_new) // 2
	res[offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new] = (
	np.array(input_image)
	)
	input_image = Image.fromarray(res)
	return input_image

	# expand example images for endpoints --> info an Johannes/Jascha what to expect

	examples = [
	[
	"./assets/zeichnung1.jpg",
	None,
	"3D model, cute monster, test prompt",
	1.0,
	0.0,
	],
	[
	"./assets/zeichnung2.jpg",
	"./assets/guidance-target.jpg",
	"3D model, cute, kawai, monster, another test prompt",
	1.0,
	0.6,
	],
	]

	def run_for_examples(style_image, source_image, prompt, scale, control_scale):
	return create_image(
	image_pil=style_image,
	input_image=source_image,
	prompt=prompt,
	n_prompt="text, watermark, low res, low quality, worst quality, deformed, blurry",
	scale=scale,
	control_scale=control_scale,
	guidance_scale=0.0,
	num_inference_steps=2,
	seed=42,
	target="Load only style blocks",
	neg_content_prompt="",
	neg_content_scale=0,
	)

	# Main function for image synthesis (input -> run_for_examples)

	@spaces.GPU(enable_queue=True)
	def create_image(
	image_pil,
	input_image,
	prompt,
	n_prompt,
	scale,
	control_scale,
	guidance_scale,
	num_inference_steps,
	target="Load only style blocks",
	neg_content_prompt=None,
	neg_content_scale=0,
	):
	seed = random.randint(0, MAX_SEED) if seed == -1 else seed
	if target == "Load original IP-Adapter":
	# target_blocks=["blocks"] for original IP-Adapter
	ip_model = IPAdapterXL(
	pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"]
	)
	elif target == "Load only style blocks":
	# target_blocks=["up_blocks.0.attentions.1"] for style blocks only
	ip_model = IPAdapterXL(
	pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"],
	)
	elif target == "Load style+layout block":
	# target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
	ip_model = IPAdapterXL(
	pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"],
	)

	if input_image is not None:
	input_image = resize_img(input_image, max_side=1024)
	cv_input_image = pil_to_cv2(input_image)
	detected_map = cv2.Canny(cv_input_image, 50, 200)
	canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
	else:
	canny_map = Image.new("RGB", (1024, 1024), color=(255,255,255))
	control_scale = 0

	if float(control_scale) == 0:
	canny_map = canny_map.resize((1024, 1024))

	if len(neg_content_prompt) > 0 and neg_content_scale != 0:
	images = ip_model.generate(
	pil_image_image_pil,
	prompt=prompt,
	negative_prompt=n_prompt,
	scale=scale,
	guidance_scale=guidance_scale,
	num_samples=1,
	num_inference_steps=num_inference_steps,
	seed=seed,
	image=canny_map,
	controlnet_conditioning_scale=float(control_scale),
	)
	image = images[0]
	with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmpfile:
	image.save(tmpfile, "JPEG", quality=80, optimize=True, progressive=True) # check what happens to imgs when this changes!!!
	return Path(tmpfile.name)

	def pil_to_cv2(image_pil):
	image_np = np.array(image_pil)
	image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
	return image_cv2

	# Gradio Description & Frontend Stuff for Space (remove this for Endpoint)
	title = r"""
	<h1 align="center">MewMewMew: Simsalabim!</h1>
	"""

	description = r"""
	<b>Let's test this! ARM <3 GoldExtra</b><br>
	<b>SDXL-Lightning && IP-Adapter</b>
	"""

	article = r"""
	Ask Hidéo if something breaks: <a href="mailto:hideo@artificialmuseum.com">Hidéo's Mail</a>
	"""

	block = gr.Blocks()
	with block:
	#description
	gr.Markdown(title)
	gr.MArkdown(description)

	with gr.Tabs():
	with gr.Row():
	with gr.Column():
	with gr.Row()
	with gr.Column():
	image_pil = gr.Image(label="Style Image", type="pil")
	with gr.Column():
	prompt = gr.Textbox(
	label="Prompt",
	value="mewmewmew, kitty cats, unicorns, uWu",
	)

	scale = gr.Slider(
	minimum=0, maximum=2.0, step=0.01, value=1.0, label="Maßstab // scale"
	)
	with gr.Accordion(open=False, label="Für Details erweitern!"):
	target = gr.Radio(
	[
	"Load only style blocks",
	"Load style+layout block",
	"Load original IP-Adapter",
	],
	value="Load only style blocks",
	label="Modus für IP-Adapter auswählen"
	)

	with gr.Column():
	src_image_pil = gr.Image(
	label="Guidance Image (optional)", type="pil"
	)
	control_scale = gr.Slider(
	minimum=0, maximum=1.0, step=0.1, value=0.5,
	label="ControlNet-Stärke // control_scale",
	)
	n_prompt = gr.Textbox(
	label="Negative Prompts",
	value=""text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
	)
	neg_content_prompt = gr.Textbox(
	label="Negative Content Prompt (optional)", value=""
	)
	neg_content_scale = gr.Slider(
	minimum=0,
	maximum=1.0,
	step=0.1,
	value=0.5,
	label="Negative Content Stärke // neg_content_scale"
	)
	guidance_scale = gr.Slider(
	minimum=0,
	maximum=10.0,
	step=0.01,
	value=0.0,
	label="guidance-scale"
	)
	num_inference_steps = gr.Slider(
	minimum=2,
	maximum=50.0,
	step=1.0,
	value=2,
	label="Anzahl der Inference Steps (optional) // num_inference_steps"
	)
	seed = gr.Slider(
	minimum=-1,
	maximum=MAX_SEED,
	value=-1,
	step=1,
	label="Seed Value // -1 = random // Seed-Proof=True"
	)

	generate_button = gr.Button("Simsalabim")

	with gr.Column():
	generated_image = gr.Image(label="MewMewMagix uWu")

	inputs = [
	image_pil,
	src_image_pil,
	prompt,
	n_prompt,
	scale,
	control_scale,
	guidance_scale,
	num_inference_steps,
	seed,
	target,
	neg_content_prompt,
	neg_content_scale,
	]
	outputs = [generated_image]

	gr.on(
	triggers=[
	prompt.input,
	generate_button.click,
	guidance_scale.input,
	scale.input,
	control_scale.input,
	seed.input,
	],
	fn=create_image,
	inputs=inputs,
	outputs=outputs,
	show_progress="minimal",
	show_api=False,
	trigger_mode="always_last",
	)

	gr.Examples(
	examples=examples,
	inputs=[image_pil, src_image_pil, prompt, scale, control_scale],
	fn=run_for_examples,
	outputs=[generated_image],
	cache_examples=True,
	)

	gr.Markdown(article)

	block.queue(api_open=False)
	block.launch(show_api=False)