import cv2 import numpy as np import torch from PIL import Image, ImageFilter, ImageOps from pipeline_PowerPaint import StableDiffusionInpaintPipeline as Pipeline from power_paint_tokenizer import PowerPaintTokenizer from diffusers.utils import load_image def add_task_to_prompt(prompt, negative_prompt, task): if task == "object-removal": promptA = prompt + " P_ctxt" promptB = prompt + " P_ctxt" negative_promptA = negative_prompt + " P_obj" negative_promptB = negative_prompt + " P_obj" elif task == "shape-guided": promptA = prompt + " P_shape" promptB = prompt + " P_ctxt" negative_promptA = negative_prompt negative_promptB = negative_prompt elif task == "image-outpainting": promptA = prompt + " P_ctxt" promptB = prompt + " P_ctxt" negative_promptA = negative_prompt + " P_obj" negative_promptB = negative_prompt + " P_obj" else: promptA = prompt + " P_obj" promptB = prompt + " P_obj" negative_promptA = negative_prompt negative_promptB = negative_prompt return promptA, promptB, negative_promptA, negative_promptB @torch.inference_mode() def predict( pipe, input_image, prompt, fitting_degree, ddim_steps, scale, negative_prompt, task, ): width, height = input_image["image"].convert("RGB").size if width < height: input_image["image"] = ( input_image["image"].convert("RGB").resize((640, int(height / width * 640))) ) else: input_image["image"] = ( input_image["image"].convert("RGB").resize((int(width / height * 640), 640)) ) promptA, promptB, negative_promptA, negative_promptB = add_task_to_prompt( prompt, negative_prompt, task ) print(promptA, promptB, negative_promptA, negative_promptB) img = np.array(input_image["image"].convert("RGB")) W = int(np.shape(img)[0] - np.shape(img)[0] % 8) H = int(np.shape(img)[1] - np.shape(img)[1] % 8) input_image["image"] = input_image["image"].resize((H, W)) input_image["mask"] = input_image["mask"].resize((H, W)) result = pipe( promptA=promptA, promptB=promptB, tradoff=fitting_degree, tradoff_nag=fitting_degree, negative_promptA=negative_promptA, negative_promptB=negative_promptB, image=input_image["image"].convert("RGB"), mask_image=input_image["mask"].convert("RGB"), width=H, height=W, guidance_scale=scale, num_inference_steps=ddim_steps, ).images[0] mask_np = np.array(input_image["mask"].convert("RGB")) red = np.array(result).astype("float") * 1 red[:, :, 0] = 180.0 red[:, :, 2] = 0 red[:, :, 1] = 0 result_m = np.array(result) result_m = Image.fromarray( ( result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) + mask_np.astype("float") / 512.0 * red ).astype("uint8") ) m_img = ( input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=3)) ) m_img = np.asarray(m_img) / 255.0 img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0 ours_np = np.asarray(result) / 255.0 ours_np = ours_np * m_img + (1 - m_img) * img_np result_paste = Image.fromarray(np.uint8(ours_np * 255)) dict_res = [input_image["mask"].convert("RGB"), result_m] dict_out = [input_image["image"].convert("RGB"), result_paste] return dict_out, dict_res pipe = Pipeline.from_pretrained( "Sanster/PowerPaint-V1-stable-diffusion-inpainting", torch_dtype=torch.float16, safety_checker=None, variant="fp16", ) pipe.tokenizer = PowerPaintTokenizer(pipe.tokenizer) pipe = pipe.to("mps") img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" image = load_image(img_url).convert("RGB") mask = load_image(mask_url).convert("RGB") input_image = {"image": image, "mask": mask} prompt = "Face of a fox sitting on a bench" negative_prompt = "out of frame, lowres, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, disfigured, gross proportions, malformed limbs, watermark, signature" fitting_degree = 1 ddim_steps = 30 tasks = [ { "task": "object-removal", "guidance_scale": 12, "prompt": "", "negative_prompt": "", }, { "task": "shape-guided", "guidance_scale": 7.5, "prompt": prompt, "negative_prompt": negative_prompt, }, { "task": "inpaint", "guidance_scale": 7.5, "prompt": prompt, "negative_prompt": negative_prompt, }, { "task": "image-outpainting", "guidance_scale": 7.5, "prompt": "A dog seitting on a bench", "negative_prompt": negative_prompt, }, ] for task in tasks: if task["task"] == "image-outpainting": margin = 128 input_image["image"] = ImageOps.expand( input_image["image"], border=(margin, margin, margin, margin), fill=(127, 127, 127), ) outpaint_mask = np.zeros_like(np.asarray(input_image["mask"])) input_image["mask"] = Image.fromarray( cv2.copyMakeBorder( outpaint_mask, margin, margin, margin, margin, cv2.BORDER_CONSTANT, value=(255, 255, 255), ) ) dict_out, dict_res = predict( pipe, input_image, task["prompt"], fitting_degree, ddim_steps, task["guidance_scale"], task["negative_prompt"], task, ) result_image = dict_out[1] result_image.save(f"{task['task']}_result.png")