PowerPaint

Paused

App Files Files Community

PowerPaint / app.py

sacj

Update app.py (#8)

24b767e verified 5 months ago

raw

history blame

10.2 kB

	import os
	import random

	import gradio as gr
	import numpy as np
	import torch
	from PIL import Image, ImageFilter
	from transformers import CLIPTextModel

	from diffusers import UniPCMultistepScheduler
	from model.BrushNet_CA import BrushNetModel
	from model.diffusers_c.models import UNet2DConditionModel
	from pipeline.pipeline_PowerPaint_Brushnet_CA import StableDiffusionPowerPaintBrushNetPipeline
	from utils.utils import TokenizerWrapper, add_tokens


	base_path = "./PowerPaint_v2"
	os.system("apt install git")
	os.system("apt install git-lfs")
	os.system(f"git lfs clone https://code.openxlab.org.cn/zhuangjunhao/PowerPaint_v2.git {base_path}")
	os.system(f"cd {base_path} && git lfs pull")
	os.system("cd ..")
	torch.set_grad_enabled(False)
	context_prompt = ""
	context_negative_prompt = ""
	base_model_path = "./PowerPaint_v2/realisticVisionV60B1_v51VAE/"
	dtype = torch.float16
	unet = UNet2DConditionModel.from_pretrained(
	"runwayml/stable-diffusion-v1-5", subfolder="unet", revision=None, torch_dtype=dtype
	)
	text_encoder_brushnet = CLIPTextModel.from_pretrained(
	"runwayml/stable-diffusion-v1-5", subfolder="text_encoder", revision=None, torch_dtype=dtype
	)
	brushnet = BrushNetModel.from_unet(unet)
	global pipe
	pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained(
	base_model_path,
	brushnet=brushnet,
	text_encoder_brushnet=text_encoder_brushnet,
	torch_dtype=dtype,
	low_cpu_mem_usage=False,
	safety_checker=None,
	)
	pipe.unet = UNet2DConditionModel.from_pretrained(base_model_path, subfolder="unet", revision=None, torch_dtype=dtype)
	pipe.tokenizer = TokenizerWrapper(from_pretrained=base_model_path, subfolder="tokenizer", revision=None)
	add_tokens(
	tokenizer=pipe.tokenizer,
	text_encoder=pipe.text_encoder_brushnet,
	placeholder_tokens=["P_ctxt", "P_shape", "P_obj"],
	initialize_tokens=["a", "a", "a"],
	num_vectors_per_token=10,
	)
	from safetensors.torch import load_model


	load_model(pipe.brushnet, "./PowerPaint_v2/PowerPaint_Brushnet/diffusion_pytorch_model.safetensors")

	pipe.text_encoder_brushnet.load_state_dict(
	torch.load("./PowerPaint_v2/PowerPaint_Brushnet/pytorch_model.bin"), strict=False
	)

	pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

	pipe.enable_model_cpu_offload()
	global current_control
	current_control = "canny"
	# controlnet_conditioning_scale = 0.8


	def set_seed(seed):
	torch.manual_seed(seed)
	torch.cuda.manual_seed(seed)
	torch.cuda.manual_seed_all(seed)
	np.random.seed(seed)
	random.seed(seed)


	def add_task(control_type):
	# print(control_type)
	if control_type == "object-removal":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"
	elif control_type == "context-aware":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = ""
	negative_promptB = ""
	elif control_type == "shape-guided":
	promptA = "P_shape"
	promptB = "P_ctxt"
	negative_promptA = "P_shape"
	negative_promptB = "P_ctxt"
	elif control_type == "image-outpainting":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"
	else:
	promptA = "P_obj"
	promptB = "P_obj"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"

	return promptA, promptB, negative_promptA, negative_promptB


	def predict(
	input_image,
	prompt,
	fitting_degree,
	ddim_steps,
	scale,
	seed,
	negative_prompt,
	task,
	left_expansion_ratio,
	right_expansion_ratio,
	top_expansion_ratio,
	bottom_expansion_ratio,
	):
	size1, size2 = input_image["image"].convert("RGB").size

	if task != "image-outpainting":
	input_image["image"] = input_image["image"].convert("RGB").resize((1024, 1024), Image.LANCZOS)
	else:
	input_image["image"] = input_image["image"].convert("RGB").resize((1024, 1024), Image.LANCZOS)


	if task == "image-outpainting" or task == "context-aware":
	prompt = prompt + " empty scene"
	if task == "object-removal":
	prompt = prompt + " empty scene blur"

	if (
	left_expansion_ratio is not None and right_expansion_ratio is not None
	and top_expansion_ratio is not None and bottom_expansion_ratio is not None
	):
	o_W, o_H = input_image["image"].convert("RGB").size
	c_W = int((1 + left_expansion_ratio + right_expansion_ratio) * o_W)
	c_H = int((1 + top_expansion_ratio + bottom_expansion_ratio) * o_H)

	expand_img = np.ones((c_H, c_W, 3), dtype=np.uint8) * 127
	original_img = np.array(input_image["image"])
	expand_img[
	int(top_expansion_ratio * o_H):int(top_expansion_ratio * o_H) + o_H,
	int(left_expansion_ratio * o_W):int(left_expansion_ratio * o_W) + o_W,
	:
	] = original_img

	blurry_gap = 10

	expand_mask = np.ones((c_H, c_W, 3), dtype=np.uint8) * 255
	expand_mask[
	int(top_expansion_ratio * o_H) + blurry_gap:int(top_expansion_ratio * o_H) + o_H - blurry_gap,
	int(left_expansion_ratio * o_W) + blurry_gap:int(left_expansion_ratio * o_W) + o_W - blurry_gap,
	:
	] = 0

	input_image["image"] = Image.fromarray(expand_img)
	input_image["mask"] = Image.fromarray(expand_mask)

	promptA, promptB, negative_promptA, negative_promptB = add_task(task)
	img = np.array(input_image["image"].convert("RGB"))

	W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
	H = int(np.shape(img)[1] - np.shape(img)[1] % 8)
	input_image["image"] = input_image["image"].resize((H, W), Image.LANCZOS)
	input_image["mask"] = input_image["mask"].resize((H, W), Image.LANCZOS)

	np_inpimg = np.array(input_image["image"])
	np_inmask = np.array(input_image["mask"]) / 255.0
	if len(np_inmask.shape)==2:
	np_inmask = np.expand_dims(np_inmask, axis=-1)
	# return np_inpimg, np_inmask

	np_inpimg = np_inpimg * (1 - np_inmask)

	input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB")


	# return input_image
	set_seed(seed)
	global pipe
	result = pipe(
	promptA=promptA,
	promptB=promptB,
	promptU=prompt,
	tradoff=fitting_degree,
	tradoff_nag=fitting_degree,
	image=input_image["image"].convert("RGB"),
	mask=input_image["mask"].convert("RGB"),
	num_inference_steps=ddim_steps,
	generator=torch.Generator("cuda").manual_seed(seed),
	brushnet_conditioning_scale=1.0,
	negative_promptA=negative_promptA,
	negative_promptB=negative_promptB,
	negative_promptU=negative_prompt,
	guidance_scale=scale,
	width=H,
	height=W,
	).images[0]
	mask_np = np.array(input_image["mask"].convert("RGB"))
	red = np.array(result).astype("float") * 1
	red[:, :, 0] = 180.0
	red[:, :, 2] = 0
	red[:, :, 1] = 0
	result_m = np.array(result)
	result_m = Image.fromarray(
	(
	result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) + mask_np.astype("float") / 512.0 * red
	).astype("uint8")
	)
	m_img = input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=3))
	m_img = np.asarray(m_img) / 255.0
	img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0
	ours_np = np.asarray(result) / 255.0
	ours_np = ours_np * m_img + (1 - m_img) * img_np
	result_paste = Image.fromarray(np.uint8(ours_np * 255))

	dict_res = [input_image["mask"].convert("RGB"), result_m]

	dict_out = [result]

	return dict_out, dict_res



	import gradio as gr

	def custom_infer(input_image_path,
	input_mask_path=None,
	prompt="",
	fitting_degree=0.5,
	ddim_steps=20,
	scale=5,
	seed=143,
	negative_prompt="",
	task="text-guided",
	left_expansion_ratio=0.2,
	right_expansion_ratio=0.2,
	top_expansion_ratio=0.2,
	bottom_expansion_ratio=0.2):

	image = Image.open(input_image_path)
	if input_mask_path:
	mask = Image.open(input_mask_path)

	if task == "text-guided":
	input_dict = {"image": image, "mask": mask}
	a, b = predict(input_dict, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, None, None, None, None)

	if task == "image-outpainting":
	input_dict = {"image": image}
	a, b = predict(input_dict, prompt, fitting_degree, ddim_steps, scale, seed, negative_prompt, task, left_expansion_ratio, right_expansion_ratio, top_expansion_ratio, bottom_expansion_ratio)

	return a[0]
	import gradio as gr

	# Define the Gradio interface using the new version
	inputs = [
	gr.Image(label="Input Image", type="filepath"),
	gr.Image(label="Input Mask (optional)", type="filepath"),
	gr.Textbox(label="Prompt", value="A beautiful landscape"),
	gr.Slider(label="Fitting Degree", minimum=1, maximum=20, value=7, step=1),
	gr.Slider(label="DDIM Steps", minimum=10, maximum=50, value=20, step=1),
	gr.Slider(label="Scale", minimum=1, maximum=20, value=7.5, step=0.1),
	gr.Slider(label="Use Seed", minimum=0, maximum=1300000, value=143, step=1),
	gr.Textbox(label="Negative Prompt", value="blur, low quality"),
	gr.Radio(label="Task", choices=["text-guided", "image-outpainting"], value="image-outpainting"),
	gr.Slider(label="Left Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01),
	gr.Slider(label="Right Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01),
	gr.Slider(label="Top Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01),
	gr.Slider(label="Bottom Expansion Ratio", minimum=0, maximum=2, value=0.2, step=0.01)
	]

	outputs = [
	gr.Image(label="Output Image")
	]

	# Create the Gradio interface
	demo = gr.Interface(fn=custom_infer, inputs=inputs, outputs=outputs, title="Inference")

	demo.queue(concurrency_count=1, max_size=1, api_open=True)
	demo.launch(show_api=True)