Spaces:
Runtime error
Runtime error
File size: 11,558 Bytes
f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 ffaa8aa f596b65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import torch
from controlnet_aux import ZoeDetector
from PIL import Image
from diffusers import AutoencoderKL, ControlNetModel, StableDiffusionXLControlNetPipeline, StableDiffusionXLInpaintPipeline
from scripts.api_utils import ImageAugmentation, accelerator
import lightning.pytorch as pl
from rembg import remove
pl.seed_everything(42)
class ControlNetZoeDepthOutpainting:
"""
A class for processing and outpainting images using Stable Diffusion XL.
This class encapsulates the entire pipeline for loading an image,
generating a depth map, creating a temporary background, and performing
the final outpainting.
"""
def __init__(self, target_size: tuple[int, int] = (1024, 1024)):
"""
Initialize the ImageOutpaintingProcessor with necessary models and pipelines.
Args:
target_size (tuple[int, int]): The target size for the output image (width, height).
"""
self.target_size = target_size
print("Initializing models and pipelines...")
self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(accelerator())
self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
self.controlnets = [
ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
]
print("Setting up sdxl pipeline...")
self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=self.controlnets, vae=self.vae).to(accelerator())
print("Setting up inpaint pipeline...")
self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting", torch_dtype=torch.float16, variant="fp16", vae=self.vae).to(accelerator())
def load_and_preprocess_image(self, image_path: str) -> tuple[Image.Image, Image.Image]:
"""
Load an image from a file path and preprocess it for outpainting.
Args:
image_path (str): Path of the image to process.
Returns:
tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
"""
original_image = Image.open(image_path).convert("RGBA")
original_image = remove(original_image)
return self.scale_and_paste(original_image, self.target_size)
def scale_and_paste(self, original_image: Image.Image, target_size: tuple[int, int], scale_factor: float = 0.95) -> tuple[Image.Image, Image.Image]:
"""
Scale the original image and paste it onto a background of the target size.
Args:
original_image (Image.Image): The original image to process.
target_size (tuple[int, int]): The target size (width, height) for the output image.
scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).
Returns:
tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
"""
target_width, target_height = target_size
aspect_ratio = original_image.width / original_image.height
if (target_width / target_height) < aspect_ratio:
new_width = int(target_width * scale_factor)
new_height = int(new_width / aspect_ratio)
else:
new_height = int(target_height * scale_factor)
new_width = int(new_height * aspect_ratio)
resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
background = Image.new("RGBA", target_size, "white")
x = (target_width - new_width) // 2
y = (target_height - new_height) // 2
background.paste(resized_original, (x, y), resized_original)
return resized_original, background
def generate_depth_map(self, image: Image.Image) -> Image.Image:
"""
Generate a depth map for the given image using the Zoe model.
Args:
image (Image.Image): The image to generate a depth map for.
Returns:
Image.Image: The generated depth map.
"""
return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])
def generate_base_image(self, prompt: str, negative_prompt: str, inpaint_image: Image.Image, zoe_image: Image.Image, guidance_scale: float, controlnet_num_inference_steps: int, controlnet_conditioning_scale: float, control_guidance_end: float) -> Image.Image:
"""
Generate an image using the controlnet pipeline.
Args:
prompt (str): The prompt for image generation.
negative_prompt (str): The negative prompt for image generation.
inpaint_image (Image.Image): The image to inpaint.
zoe_image (Image.Image): The depth map image.
guidance_scale (float): Guidance scale for controlnet.
controlnet_num_inference_steps (int): Number of inference steps for controlnet.
controlnet_conditioning_scale (float): Conditioning scale for controlnet.
control_guidance_end (float): Guidance end for controlnet.
Returns:
Image.Image: The generated image.
"""
return self.controlnet_pipeline(
prompt,
negative_prompt=negative_prompt,
image=[inpaint_image, zoe_image],
guidance_scale=guidance_scale,
num_inference_steps=controlnet_num_inference_steps,
controlnet_conditioning_scale=controlnet_conditioning_scale,
control_guidance_end=control_guidance_end,
).images[0]
def create_mask(self, image: Image.Image, segmentation_model: str, detection_model: str) -> Image.Image:
"""
Create a mask for the final outpainting process.
Args:
image (Image.Image): The original image.
segmentation_model (str): The segmentation model identifier.
detection_model (str): The detection model identifier.
Returns:
Image.Image: The created mask.
"""
image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1], roi_scale=0.4)
mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model, detection_model)
inverted_mask = image_augmenter.invert_mask(mask_image)
return inverted_mask
def generate_outpainting(self, prompt: str, negative_prompt: str, image: Image.Image, mask: Image.Image, guidance_scale: float, strength: float, num_inference_steps: int) -> Image.Image:
"""
Generate the final outpainted image.
Args:
prompt (str): The prompt for image generation.
negative_prompt (str): The negative prompt for image generation.
image (Image.Image): The image to outpaint.
mask (Image.Image): The mask for outpainting.
guidance_scale (float): Guidance scale for inpainting.
strength (float): Strength for inpainting.
num_inference_steps (int): Number of inference steps for inpainting.
Returns:
Image.Image: The final outpainted image.
"""
return self.inpaint_pipeline(
prompt,
negative_prompt=negative_prompt,
image=image,
mask_image=mask,
guidance_scale=guidance_scale,
strength=strength,
num_inference_steps=num_inference_steps,
).images[0]
def run_pipeline(self, image_path: str, controlnet_prompt: str, controlnet_negative_prompt: str, controlnet_conditioning_scale: float, controlnet_guidance_scale: float, controlnet_num_inference_steps: int, controlnet_guidance_end: float, inpainting_prompt: str, inpainting_negative_prompt: str, inpainting_guidance_scale: float, inpainting_strength: float, inpainting_num_inference_steps: int) -> Image.Image:
"""
Process an image through the entire outpainting pipeline.
Args:
image_path (str): Path of the image to process.
controlnet_prompt (str): Prompt for the controlnet image generation.
controlnet_negative_prompt (str): Negative prompt for controlnet image generation.
controlnet_conditioning_scale (float): Conditioning scale for controlnet.
controlnet_guidance_scale (float): Guidance scale for controlnet.
controlnet_num_inference_steps (int): Number of inference steps for controlnet.
controlnet_guidance_end (float): Guidance end for controlnet.
inpainting_prompt (str): Prompt for the inpainting image generation.
inpainting_negative_prompt (str): Negative prompt for inpainting image generation.
inpainting_guidance_scale (float): Guidance scale for inpainting.
inpainting_strength (float): Strength for inpainting.
inpainting_num_inference_steps (int): Number of inference steps for inpainting.
Returns:
Image.Image: The final outpainted image.
"""
print("Loading and preprocessing image")
resized_img, background_image = self.load_and_preprocess_image(image_path)
print("Generating depth map")
image_zoe = self.generate_depth_map(background_image)
print("Generating initial image")
temp_image = self.generate_base_image(controlnet_prompt, controlnet_negative_prompt, background_image, image_zoe,
controlnet_guidance_scale, controlnet_num_inference_steps, controlnet_conditioning_scale, controlnet_guidance_end)
x = (self.target_size[0] - resized_img.width) // 2
y = (self.target_size[1] - resized_img.height) // 2
temp_image.paste(resized_img, (x, y), resized_img)
print("Creating mask for outpainting")
final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
print("Generating final outpainted image")
final_image = self.generate_outpainting(inpainting_prompt, inpainting_negative_prompt, temp_image, mask_blurred,
inpainting_guidance_scale, inpainting_strength, inpainting_num_inference_steps)
final_image.paste(resized_img, (x, y), resized_img)
return final_image
def main():
processor = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
result = processor.run_pipeline("/home/PicPilot/sample_data/example1.jpg",
"product in the kitchen",
"low resolution, Bad Resolution",
0.9,
7.5,
50,
0.6,
"Editorial Photography of the Pot in the kitchen",
"low Resolution, Bad Resolution",
8,
0.7,
30)
result.save("outpainted_result.png")
print("Outpainting complete. Result saved as 'outpainted_result.png'")
if __name__ == "__main__":
main()
|