Spaces:

lint
/

sdpipe_webui

Runtime error

App Files Files Community

sdpipe_webui / app.py

1lint

add textual inversion functionality

7833185 over 1 year ago

raw

history blame

No virus

14.5 kB


	# inpaint pipeline with fix to avoid noise added to latents during final iteration of denoising loop
	from inpaint_pipeline import SDInpaintPipeline as StableDiffusionInpaintPipelineLegacy

	from diffusers import (
	StableDiffusionPipeline,
	StableDiffusionImg2ImgPipeline,
	)

	import diffusers.schedulers
	import gradio as gr
	import torch
	import random
	from multiprocessing import cpu_count
	import json
	from PIL import Image
	import os
	import argparse
	import shutil
	import gc

	import importlib

	from textual_inversion import main as run_textual_inversion

	def pad_image(image):
	w, h = image.size
	if w == h:
	return image
	elif w > h:
	new_image = Image.new(image.mode, (w, w), (0, 0, 0))
	new_image.paste(image, (0, (w - h) // 2))
	return new_image
	else:
	new_image = Image.new(image.mode, (h, h), (0, 0, 0))
	new_image.paste(image, ((h - w) // 2, 0))
	return new_image

	_xformers_available = importlib.util.find_spec("xformers") is not None
	device = "cuda" if torch.cuda.is_available() else "cpu"
	low_vram_mode = False

	# scheduler dict includes superclass SchedulerMixin (it still generates reasonable images)
	scheduler_dict = {
	k: v
	for k, v in diffusers.schedulers.__dict__.items()
	if "Scheduler" in k and "Flax" not in k
	}
	scheduler_dict.pop(
	"VQDiffusionScheduler"
	) # requires unique parameter, unlike other schedulers
	scheduler_names = list(scheduler_dict.keys())
	default_scheduler = scheduler_names[3] # expected to be DPM Multistep

	model_ids = [
	"andite/anything-v4.0",
	"hakurei/waifu-diffusion",
	"prompthero/openjourney-v2",
	"runwayml/stable-diffusion-v1-5",
	"johnslegers/epic-diffusion",
	"stabilityai/stable-diffusion-2-1",
	]

	loaded_model_id = ""


	def load_pipe(
	model_id, scheduler_name, pipe_class=StableDiffusionPipeline, pipe_kwargs="{}"
	):
	global pipe, loaded_model_id

	scheduler = scheduler_dict[scheduler_name]

	# load new weights from disk only when changing model_id
	if model_id != loaded_model_id:
	pipe = pipe_class.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	safety_checker=None,
	requires_safety_checker=False,
	scheduler=scheduler.from_pretrained(model_id, subfolder="scheduler"),
	**json.loads(pipe_kwargs),
	)
	loaded_model_id = model_id

	# if same model_id, instantiate new pipeline with same underlying pytorch objects to avoid reloading weights from disk
	elif pipe_class != pipe.__class__ or not isinstance(pipe.scheduler, scheduler):
	pipe.components["scheduler"] = scheduler.from_pretrained(
	model_id, subfolder="scheduler"
	)
	pipe = pipe_class(**pipe.components)

	if device == 'cuda':
	pipe = pipe.to(device)
	if _xformers_available:
	pipe.enable_xformers_memory_efficient_attention()
	print("using xformers")
	if low_vram_mode:
	pipe.enable_attention_slicing()
	print("using attention slicing to lower VRAM")

	return pipe


	pipe = None
	pipe = load_pipe(model_ids[0], default_scheduler)

	@torch.autocast(device)
	@torch.no_grad()
	def generate(
	model_name,
	scheduler_name,
	prompt,
	guidance,
	steps,
	n_images=1,
	width=512,
	height=512,
	seed=0,
	image=None,
	strength=0.5,
	inpaint_image=None,
	inpaint_strength=0.5,
	inpaint_radio='',
	neg_prompt="",
	pipe_class=StableDiffusionPipeline,
	pipe_kwargs="{}",
	):

	if seed == -1:
	seed = random.randint(0, 2147483647)

	generator = torch.Generator("cuda").manual_seed(seed)

	pipe = load_pipe(
	model_id=model_name,
	scheduler_name=scheduler_name,
	pipe_class=pipe_class,
	pipe_kwargs=pipe_kwargs,
	)

	status_message = (
	f"Prompt: '{prompt}' \| Seed: {seed} \| Guidance: {guidance} \| Scheduler: {scheduler_name} \| Steps: {steps}"
	)

	if pipe_class == StableDiffusionPipeline:
	status_message = "Text to Image " + status_message

	result = pipe(
	prompt,
	negative_prompt=neg_prompt,
	num_images_per_prompt=n_images,
	num_inference_steps=int(steps),
	guidance_scale=guidance,
	width=width,
	height=height,
	generator=generator,
	)

	elif pipe_class == StableDiffusionImg2ImgPipeline:

	status_message = "Image to Image " + status_message
	print(image.size)
	image = image.resize((width, height))
	print(image.size)

	result = pipe(
	prompt,
	negative_prompt=neg_prompt,
	num_images_per_prompt=n_images,
	image=image,
	num_inference_steps=int(steps),
	strength=strength,
	guidance_scale=guidance,
	generator=generator,
	)

	elif pipe_class == StableDiffusionInpaintPipelineLegacy:
	status_message = "Inpainting " + status_message

	init_image = inpaint_image["image"].resize((width, height))
	mask = inpaint_image["mask"].resize((width, height))


	result = pipe(
	prompt,
	negative_prompt=neg_prompt,
	num_images_per_prompt=n_images,
	image=init_image,
	mask_image=mask,
	num_inference_steps=int(steps),
	strength=inpaint_strength,
	preserve_unmasked_image=(inpaint_radio == inpaint_options[0]),
	guidance_scale=guidance,
	generator=generator,
	)

	else:
	return None, f"Unhandled pipeline class: {pipe_class}", -1

	return result.images, status_message, seed


	# based on lvkaokao/textual-inversion-training
	def train_textual_inversion(model_name, scheduler_name, type_of_thing, files, concept_word, init_word, text_train_steps, text_train_bsz, text_learning_rate, progress=gr.Progress(track_tqdm=True)):

	pipe = load_pipe(
	model_id=model_name,
	scheduler_name=scheduler_name,
	pipe_class=StableDiffusionPipeline,
	)

	pipe.disable_xformers_memory_efficient_attention() # xformers handled by textual inversion script

	concept_dir = 'concept_images'
	output_dir = 'output_model'
	training_resolution = 512

	if os.path.exists(output_dir): shutil.rmtree('output_model')
	if os.path.exists(concept_dir): shutil.rmtree('concept_images')

	os.makedirs(concept_dir, exist_ok=True)
	os.makedirs(output_dir, exist_ok=True)

	gc.collect()
	torch.cuda.empty_cache()

	if(prompt == "" or prompt == None):
	raise gr.Error("You forgot to define your concept prompt")

	for j, file_temp in enumerate(files):
	file = Image.open(file_temp.name)
	image = pad_image(file)
	image = image.resize((training_resolution, training_resolution))
	extension = file_temp.name.split(".")[1]
	image = image.convert('RGB')
	image.save(f'{concept_dir}/{j+1}.{extension}', quality=100)


	args_general = argparse.Namespace(
	train_data_dir=concept_dir,
	learnable_property=type_of_thing,
	placeholder_token=concept_word,
	initializer_token=init_word,
	resolution=training_resolution,
	train_batch_size=text_train_bsz,
	gradient_accumulation_steps=1,
	gradient_checkpointing=True,
	mixed_precision='fp16',
	use_bf16=False,
	max_train_steps=int(text_train_steps),
	learning_rate=text_learning_rate,
	scale_lr=True,
	lr_scheduler="constant",
	lr_warmup_steps=0,
	output_dir=output_dir,
	)

	try:
	final_result = run_textual_inversion(pipe, args_general)
	except Exception as e:
	raise gr.Error(e)

	gc.collect()
	torch.cuda.empty_cache()

	return f'Finished training! Check the {output_dir} directory for saved model weights'


	default_img_size = 512

	with open("header.html") as fp:
	header = fp.read()

	with open("footer.html") as fp:
	footer = fp.read()

	with gr.Blocks(css="style.css") as demo:

	pipe_state = gr.State(lambda: StableDiffusionPipeline)

	gr.HTML(header)

	with gr.Row():

	with gr.Column(scale=70):

	# with gr.Row():
	prompt = gr.Textbox(
	label="Prompt", placeholder="<Shift+Enter> to generate", lines=2
	)
	neg_prompt = gr.Textbox(label="Negative Prompt", placeholder="", lines=2)

	with gr.Column(scale=30):
	model_name = gr.Dropdown(
	label="Model", choices=model_ids, value=loaded_model_id
	)
	scheduler_name = gr.Dropdown(
	label="Scheduler", choices=scheduler_names, value=default_scheduler
	)
	generate_button = gr.Button(value="Generate", elem_id="generate-button")

	with gr.Row():

	with gr.Column():

	with gr.Tab("Text to Image") as tab:
	tab.select(lambda: StableDiffusionPipeline, [], pipe_state)

	with gr.Tab("Image to image") as tab:
	tab.select(lambda: StableDiffusionImg2ImgPipeline, [], pipe_state)

	image = gr.Image(
	label="Image to Image",
	source="upload",
	tool="editor",
	type="pil",
	elem_id="image_upload",
	).style(height=default_img_size)
	strength = gr.Slider(
	label="Denoising strength",
	minimum=0,
	maximum=1,
	step=0.02,
	value=0.8,
	)

	with gr.Tab("Inpainting") as tab:
	tab.select(lambda: StableDiffusionInpaintPipelineLegacy, [], pipe_state)

	inpaint_image = gr.Image(
	label="Inpainting",
	source="upload",
	tool="sketch",
	type="pil",
	elem_id="image_upload",
	).style(height=default_img_size)
	inpaint_strength = gr.Slider(
	label="Denoising strength",
	minimum=0,
	maximum=1,
	step=0.02,
	value=0.8,
	)
	inpaint_options = ["preserve non-masked portions of image", "output entire inpainted image"]
	inpaint_radio = gr.Radio(inpaint_options, value=inpaint_options[0], show_label=False, interactive=True)

	with gr.Tab("Textual Inversion") as tab:
	tab.select(lambda: StableDiffusionPipeline, [], pipe_state)

	type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)

	text_train_bsz = gr.Slider(
	label="Training Batch Size",
	minimum=1,
	maximum=8,
	step=1,
	value=1,
	)

	files = gr.File(label=f'''Upload the images for your concept''', file_count="multiple", interactive=True, visible=True)

	text_train_steps = gr.Number(label="How many steps", value=1000)

	text_learning_rate = gr.Number(label="Learning Rate", value=5.e-4)

	concept_word = gr.Textbox(label=f'''concept word - use a unique, made up word to avoid collisions''')
	init_word = gr.Textbox(label=f'''initial word - to init the concept embedding''')

	textual_inversion_button = gr.Button(value="Train Textual Inversion")

	training_status = gr.Text(label="Training Status")

	with gr.Row():
	batch_size = gr.Slider(
	label="Batch Size", value=1, minimum=1, maximum=8, step=1
	)
	seed = gr.Slider(-1, 2147483647, label="Seed", value=-1, step=1)

	with gr.Row():
	guidance = gr.Slider(
	label="Guidance scale", value=7.5, minimum=0, maximum=20
	)
	steps = gr.Slider(
	label="Steps", value=20, minimum=1, maximum=100, step=1
	)

	with gr.Row():
	width = gr.Slider(
	label="Width",
	value=default_img_size,
	minimum=64,
	maximum=1024,
	step=32,
	)
	height = gr.Slider(
	label="Height",
	value=default_img_size,
	minimum=64,
	maximum=1024,
	step=32,
	)

	with gr.Column():
	gallery = gr.Gallery(
	label="Generated images", show_label=False, elem_id="gallery"
	).style(height=default_img_size, grid=2)

	generation_details = gr.Markdown()

	pipe_kwargs = gr.Textbox(label="Pipe kwargs", value="{\n\t\n}")

	# if torch.cuda.is_available():
	# giga = 2**30
	# vram_guage = gr.Slider(0, torch.cuda.memory_reserved(0)/giga, label='VRAM Allocated to Reserved (GB)', value=0, step=1)
	# demo.load(lambda : torch.cuda.memory_allocated(0)/giga, inputs=[], outputs=vram_guage, every=0.5, show_progress=False)

	gr.HTML(footer)

	inputs = [
	model_name,
	scheduler_name,
	prompt,
	guidance,
	steps,
	batch_size,
	width,
	height,
	seed,
	image,
	strength,
	inpaint_image,
	inpaint_strength,
	inpaint_radio,
	neg_prompt,
	pipe_state,
	pipe_kwargs,
	]
	outputs = [gallery, generation_details, seed]

	prompt.submit(generate, inputs=inputs, outputs=outputs)
	generate_button.click(generate, inputs=inputs, outputs=outputs)

	textual_inversion_inputs = [model_name, scheduler_name, type_of_thing, files, concept_word, init_word, text_train_steps, text_train_bsz, text_learning_rate]

	textual_inversion_button.click(train_textual_inversion, inputs=textual_inversion_inputs, outputs=[training_status])


	#demo = gr.TabbedInterface([demo, dreambooth_tab], ["Main", "Dreambooth"])

	demo.queue(concurrency_count=cpu_count())

	demo.launch()