picpilot-server / scripts /outpainting.py
VikramSingh178's picture
commit
ffaa8aa
raw
history blame
No virus
11.6 kB
import torch
from controlnet_aux import ZoeDetector
from PIL import Image
from diffusers import AutoencoderKL, ControlNetModel, StableDiffusionXLControlNetPipeline, StableDiffusionXLInpaintPipeline
from scripts.api_utils import ImageAugmentation, accelerator
import lightning.pytorch as pl
from rembg import remove
pl.seed_everything(42)
class ControlNetZoeDepthOutpainting:
"""
A class for processing and outpainting images using Stable Diffusion XL.
This class encapsulates the entire pipeline for loading an image,
generating a depth map, creating a temporary background, and performing
the final outpainting.
"""
def __init__(self, target_size: tuple[int, int] = (1024, 1024)):
"""
Initialize the ImageOutpaintingProcessor with necessary models and pipelines.
Args:
target_size (tuple[int, int]): The target size for the output image (width, height).
"""
self.target_size = target_size
print("Initializing models and pipelines...")
self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(accelerator())
self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
self.controlnets = [
ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
]
print("Setting up sdxl pipeline...")
self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=self.controlnets, vae=self.vae).to(accelerator())
print("Setting up inpaint pipeline...")
self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting", torch_dtype=torch.float16, variant="fp16", vae=self.vae).to(accelerator())
def load_and_preprocess_image(self, image_path: str) -> tuple[Image.Image, Image.Image]:
"""
Load an image from a file path and preprocess it for outpainting.
Args:
image_path (str): Path of the image to process.
Returns:
tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
"""
original_image = Image.open(image_path).convert("RGBA")
original_image = remove(original_image)
return self.scale_and_paste(original_image, self.target_size)
def scale_and_paste(self, original_image: Image.Image, target_size: tuple[int, int], scale_factor: float = 0.95) -> tuple[Image.Image, Image.Image]:
"""
Scale the original image and paste it onto a background of the target size.
Args:
original_image (Image.Image): The original image to process.
target_size (tuple[int, int]): The target size (width, height) for the output image.
scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).
Returns:
tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
"""
target_width, target_height = target_size
aspect_ratio = original_image.width / original_image.height
if (target_width / target_height) < aspect_ratio:
new_width = int(target_width * scale_factor)
new_height = int(new_width / aspect_ratio)
else:
new_height = int(target_height * scale_factor)
new_width = int(new_height * aspect_ratio)
resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
background = Image.new("RGBA", target_size, "white")
x = (target_width - new_width) // 2
y = (target_height - new_height) // 2
background.paste(resized_original, (x, y), resized_original)
return resized_original, background
def generate_depth_map(self, image: Image.Image) -> Image.Image:
"""
Generate a depth map for the given image using the Zoe model.
Args:
image (Image.Image): The image to generate a depth map for.
Returns:
Image.Image: The generated depth map.
"""
return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])
def generate_base_image(self, prompt: str, negative_prompt: str, inpaint_image: Image.Image, zoe_image: Image.Image, guidance_scale: float, controlnet_num_inference_steps: int, controlnet_conditioning_scale: float, control_guidance_end: float) -> Image.Image:
"""
Generate an image using the controlnet pipeline.
Args:
prompt (str): The prompt for image generation.
negative_prompt (str): The negative prompt for image generation.
inpaint_image (Image.Image): The image to inpaint.
zoe_image (Image.Image): The depth map image.
guidance_scale (float): Guidance scale for controlnet.
controlnet_num_inference_steps (int): Number of inference steps for controlnet.
controlnet_conditioning_scale (float): Conditioning scale for controlnet.
control_guidance_end (float): Guidance end for controlnet.
Returns:
Image.Image: The generated image.
"""
return self.controlnet_pipeline(
prompt,
negative_prompt=negative_prompt,
image=[inpaint_image, zoe_image],
guidance_scale=guidance_scale,
num_inference_steps=controlnet_num_inference_steps,
controlnet_conditioning_scale=controlnet_conditioning_scale,
control_guidance_end=control_guidance_end,
).images[0]
def create_mask(self, image: Image.Image, segmentation_model: str, detection_model: str) -> Image.Image:
"""
Create a mask for the final outpainting process.
Args:
image (Image.Image): The original image.
segmentation_model (str): The segmentation model identifier.
detection_model (str): The detection model identifier.
Returns:
Image.Image: The created mask.
"""
image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1], roi_scale=0.4)
mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model, detection_model)
inverted_mask = image_augmenter.invert_mask(mask_image)
return inverted_mask
def generate_outpainting(self, prompt: str, negative_prompt: str, image: Image.Image, mask: Image.Image, guidance_scale: float, strength: float, num_inference_steps: int) -> Image.Image:
"""
Generate the final outpainted image.
Args:
prompt (str): The prompt for image generation.
negative_prompt (str): The negative prompt for image generation.
image (Image.Image): The image to outpaint.
mask (Image.Image): The mask for outpainting.
guidance_scale (float): Guidance scale for inpainting.
strength (float): Strength for inpainting.
num_inference_steps (int): Number of inference steps for inpainting.
Returns:
Image.Image: The final outpainted image.
"""
return self.inpaint_pipeline(
prompt,
negative_prompt=negative_prompt,
image=image,
mask_image=mask,
guidance_scale=guidance_scale,
strength=strength,
num_inference_steps=num_inference_steps,
).images[0]
def run_pipeline(self, image_path: str, controlnet_prompt: str, controlnet_negative_prompt: str, controlnet_conditioning_scale: float, controlnet_guidance_scale: float, controlnet_num_inference_steps: int, controlnet_guidance_end: float, inpainting_prompt: str, inpainting_negative_prompt: str, inpainting_guidance_scale: float, inpainting_strength: float, inpainting_num_inference_steps: int) -> Image.Image:
"""
Process an image through the entire outpainting pipeline.
Args:
image_path (str): Path of the image to process.
controlnet_prompt (str): Prompt for the controlnet image generation.
controlnet_negative_prompt (str): Negative prompt for controlnet image generation.
controlnet_conditioning_scale (float): Conditioning scale for controlnet.
controlnet_guidance_scale (float): Guidance scale for controlnet.
controlnet_num_inference_steps (int): Number of inference steps for controlnet.
controlnet_guidance_end (float): Guidance end for controlnet.
inpainting_prompt (str): Prompt for the inpainting image generation.
inpainting_negative_prompt (str): Negative prompt for inpainting image generation.
inpainting_guidance_scale (float): Guidance scale for inpainting.
inpainting_strength (float): Strength for inpainting.
inpainting_num_inference_steps (int): Number of inference steps for inpainting.
Returns:
Image.Image: The final outpainted image.
"""
print("Loading and preprocessing image")
resized_img, background_image = self.load_and_preprocess_image(image_path)
print("Generating depth map")
image_zoe = self.generate_depth_map(background_image)
print("Generating initial image")
temp_image = self.generate_base_image(controlnet_prompt, controlnet_negative_prompt, background_image, image_zoe,
controlnet_guidance_scale, controlnet_num_inference_steps, controlnet_conditioning_scale, controlnet_guidance_end)
x = (self.target_size[0] - resized_img.width) // 2
y = (self.target_size[1] - resized_img.height) // 2
temp_image.paste(resized_img, (x, y), resized_img)
print("Creating mask for outpainting")
final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
print("Generating final outpainted image")
final_image = self.generate_outpainting(inpainting_prompt, inpainting_negative_prompt, temp_image, mask_blurred,
inpainting_guidance_scale, inpainting_strength, inpainting_num_inference_steps)
final_image.paste(resized_img, (x, y), resized_img)
return final_image
def main():
processor = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
result = processor.run_pipeline("/home/PicPilot/sample_data/example1.jpg",
"product in the kitchen",
"low resolution, Bad Resolution",
0.9,
7.5,
50,
0.6,
"Editorial Photography of the Pot in the kitchen",
"low Resolution, Bad Resolution",
8,
0.7,
30)
result.save("outpainted_result.png")
print("Outpainting complete. Result saved as 'outpainted_result.png'")
if __name__ == "__main__":
main()