|
import cv2 |
|
import numpy as np |
|
import torch |
|
from PIL import Image, ImageFilter, ImageOps |
|
from pipeline_PowerPaint import StableDiffusionInpaintPipeline as Pipeline |
|
from power_paint_tokenizer import PowerPaintTokenizer |
|
from diffusers.utils import load_image |
|
|
|
|
|
def add_task_to_prompt(prompt, negative_prompt, task): |
|
if task == "object-removal": |
|
promptA = prompt + " P_ctxt" |
|
promptB = prompt + " P_ctxt" |
|
negative_promptA = negative_prompt + " P_obj" |
|
negative_promptB = negative_prompt + " P_obj" |
|
elif task == "shape-guided": |
|
promptA = prompt + " P_shape" |
|
promptB = prompt + " P_ctxt" |
|
negative_promptA = negative_prompt |
|
negative_promptB = negative_prompt |
|
elif task == "image-outpainting": |
|
promptA = prompt + " P_ctxt" |
|
promptB = prompt + " P_ctxt" |
|
negative_promptA = negative_prompt + " P_obj" |
|
negative_promptB = negative_prompt + " P_obj" |
|
else: |
|
promptA = prompt + " P_obj" |
|
promptB = prompt + " P_obj" |
|
negative_promptA = negative_prompt |
|
negative_promptB = negative_prompt |
|
|
|
return promptA, promptB, negative_promptA, negative_promptB |
|
|
|
|
|
@torch.inference_mode() |
|
def predict( |
|
pipe, |
|
input_image, |
|
prompt, |
|
fitting_degree, |
|
ddim_steps, |
|
scale, |
|
negative_prompt, |
|
task, |
|
): |
|
width, height = input_image["image"].convert("RGB").size |
|
|
|
if width < height: |
|
input_image["image"] = ( |
|
input_image["image"].convert("RGB").resize((640, int(height / width * 640))) |
|
) |
|
else: |
|
input_image["image"] = ( |
|
input_image["image"].convert("RGB").resize((int(width / height * 640), 640)) |
|
) |
|
|
|
promptA, promptB, negative_promptA, negative_promptB = add_task_to_prompt( |
|
prompt, negative_prompt, task |
|
) |
|
print(promptA, promptB, negative_promptA, negative_promptB) |
|
img = np.array(input_image["image"].convert("RGB")) |
|
|
|
W = int(np.shape(img)[0] - np.shape(img)[0] % 8) |
|
H = int(np.shape(img)[1] - np.shape(img)[1] % 8) |
|
input_image["image"] = input_image["image"].resize((H, W)) |
|
input_image["mask"] = input_image["mask"].resize((H, W)) |
|
result = pipe( |
|
promptA=promptA, |
|
promptB=promptB, |
|
tradoff=fitting_degree, |
|
tradoff_nag=fitting_degree, |
|
negative_promptA=negative_promptA, |
|
negative_promptB=negative_promptB, |
|
image=input_image["image"].convert("RGB"), |
|
mask_image=input_image["mask"].convert("RGB"), |
|
width=H, |
|
height=W, |
|
guidance_scale=scale, |
|
num_inference_steps=ddim_steps, |
|
).images[0] |
|
mask_np = np.array(input_image["mask"].convert("RGB")) |
|
red = np.array(result).astype("float") * 1 |
|
red[:, :, 0] = 180.0 |
|
red[:, :, 2] = 0 |
|
red[:, :, 1] = 0 |
|
result_m = np.array(result) |
|
result_m = Image.fromarray( |
|
( |
|
result_m.astype("float") * (1 - mask_np.astype("float") / 512.0) |
|
+ mask_np.astype("float") / 512.0 * red |
|
).astype("uint8") |
|
) |
|
m_img = ( |
|
input_image["mask"].convert("RGB").filter(ImageFilter.GaussianBlur(radius=3)) |
|
) |
|
m_img = np.asarray(m_img) / 255.0 |
|
img_np = np.asarray(input_image["image"].convert("RGB")) / 255.0 |
|
ours_np = np.asarray(result) / 255.0 |
|
ours_np = ours_np * m_img + (1 - m_img) * img_np |
|
result_paste = Image.fromarray(np.uint8(ours_np * 255)) |
|
|
|
dict_res = [input_image["mask"].convert("RGB"), result_m] |
|
|
|
dict_out = [input_image["image"].convert("RGB"), result_paste] |
|
|
|
return dict_out, dict_res |
|
|
|
|
|
pipe = Pipeline.from_pretrained( |
|
"Sanster/PowerPaint-V1-stable-diffusion-inpainting", |
|
torch_dtype=torch.float16, |
|
safety_checker=None, |
|
variant="fp16", |
|
) |
|
pipe.tokenizer = PowerPaintTokenizer(pipe.tokenizer) |
|
pipe = pipe.to("mps") |
|
|
|
|
|
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" |
|
mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" |
|
image = load_image(img_url).convert("RGB") |
|
mask = load_image(mask_url).convert("RGB") |
|
|
|
input_image = {"image": image, "mask": mask} |
|
prompt = "Face of a fox sitting on a bench" |
|
negative_prompt = "out of frame, lowres, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, disfigured, gross proportions, malformed limbs, watermark, signature" |
|
fitting_degree = 1 |
|
ddim_steps = 30 |
|
tasks = [ |
|
{ |
|
"task": "object-removal", |
|
"guidance_scale": 12, |
|
"prompt": "", |
|
"negative_prompt": "", |
|
}, |
|
{ |
|
"task": "shape-guided", |
|
"guidance_scale": 7.5, |
|
"prompt": prompt, |
|
"negative_prompt": negative_prompt, |
|
}, |
|
{ |
|
"task": "inpaint", |
|
"guidance_scale": 7.5, |
|
"prompt": prompt, |
|
"negative_prompt": negative_prompt, |
|
}, |
|
{ |
|
"task": "image-outpainting", |
|
"guidance_scale": 7.5, |
|
"prompt": "A dog seitting on a bench", |
|
"negative_prompt": negative_prompt, |
|
}, |
|
] |
|
|
|
for task in tasks: |
|
if task["task"] == "image-outpainting": |
|
margin = 128 |
|
input_image["image"] = ImageOps.expand( |
|
input_image["image"], |
|
border=(margin, margin, margin, margin), |
|
fill=(127, 127, 127), |
|
) |
|
outpaint_mask = np.zeros_like(np.asarray(input_image["mask"])) |
|
input_image["mask"] = Image.fromarray( |
|
cv2.copyMakeBorder( |
|
outpaint_mask, |
|
margin, |
|
margin, |
|
margin, |
|
margin, |
|
cv2.BORDER_CONSTANT, |
|
value=(255, 255, 255), |
|
) |
|
) |
|
|
|
dict_out, dict_res = predict( |
|
pipe, |
|
input_image, |
|
task["prompt"], |
|
fitting_degree, |
|
ddim_steps, |
|
task["guidance_scale"], |
|
task["negative_prompt"], |
|
task, |
|
) |
|
|
|
result_image = dict_out[1] |
|
result_image.save(f"{task['task']}_result.png") |
|
|