File size: 11,558 Bytes
f596b65
 
 
ffaa8aa
 
 
 
f596b65
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
 
 
 
 
 
 
ffaa8aa
f596b65
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
 
ffaa8aa
f596b65
 
 
ffaa8aa
 
 
 
f596b65
ffaa8aa
f596b65
ffaa8aa
f596b65
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
ffaa8aa
f596b65
ffaa8aa
 
f596b65
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
f596b65
 
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
 
 
 
 
 
 
 
 
 
 
 
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
 
ffaa8aa
f596b65
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
 
 
 
 
f596b65
 
ffaa8aa
f596b65
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
 
f596b65
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
ffaa8aa
 
f596b65
 
 
ffaa8aa
f596b65
 
 
 
 
 
ffaa8aa
 
 
 
 
f596b65
 
ffaa8aa
f596b65
 
 
 
 
 
ffaa8aa
 
 
f596b65
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
 
 
 
 
 
 
 
 
 
 
f596b65
 
ffaa8aa
f596b65
ffaa8aa
 
 
f596b65
ffaa8aa
 
 
f596b65
 
 
ffaa8aa
f596b65
 
ffaa8aa
 
 
f596b65
 
 
ffaa8aa
f596b65
ffaa8aa
 
 
 
 
 
 
 
 
 
 
 
 
f596b65
 
 
ffaa8aa
f596b65
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
import torch
from controlnet_aux import ZoeDetector
from PIL import Image
from diffusers import AutoencoderKL, ControlNetModel, StableDiffusionXLControlNetPipeline, StableDiffusionXLInpaintPipeline
from scripts.api_utils import ImageAugmentation, accelerator
import lightning.pytorch as pl
from rembg import remove

pl.seed_everything(42)


class ControlNetZoeDepthOutpainting:
    """
    A class for processing and outpainting images using Stable Diffusion XL.

    This class encapsulates the entire pipeline for loading an image,
    generating a depth map, creating a temporary background, and performing
    the final outpainting.
    """

    def __init__(self, target_size: tuple[int, int] = (1024, 1024)):
        """
        Initialize the ImageOutpaintingProcessor with necessary models and pipelines.

        Args:
            target_size (tuple[int, int]): The target size for the output image (width, height).
        """
        self.target_size = target_size
        print("Initializing models and pipelines...")
        self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(accelerator())
        self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
        self.controlnets = [
            ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
            ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
        ]
        print("Setting up sdxl pipeline...")
        self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=self.controlnets, vae=self.vae).to(accelerator())
        print("Setting up inpaint pipeline...")
        self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting", torch_dtype=torch.float16, variant="fp16", vae=self.vae).to(accelerator())

    def load_and_preprocess_image(self, image_path: str) -> tuple[Image.Image, Image.Image]:
        """
        Load an image from a file path and preprocess it for outpainting.

        Args:
            image_path (str): Path of the image to process.

        Returns:
            tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
        """
        original_image = Image.open(image_path).convert("RGBA")
        original_image = remove(original_image)
        return self.scale_and_paste(original_image, self.target_size)

    def scale_and_paste(self, original_image: Image.Image, target_size: tuple[int, int], scale_factor: float = 0.95) -> tuple[Image.Image, Image.Image]:
        """
        Scale the original image and paste it onto a background of the target size.

        Args:
            original_image (Image.Image): The original image to process.
            target_size (tuple[int, int]): The target size (width, height) for the output image.
            scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).

        Returns:
            tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
        """
        target_width, target_height = target_size
        aspect_ratio = original_image.width / original_image.height

        if (target_width / target_height) < aspect_ratio:
            new_width = int(target_width * scale_factor)
            new_height = int(new_width / aspect_ratio)
        else:
            new_height = int(target_height * scale_factor)
            new_width = int(new_height * aspect_ratio)

        resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
        background = Image.new("RGBA", target_size, "white")
        x = (target_width - new_width) // 2
        y = (target_height - new_height) // 2
        background.paste(resized_original, (x, y), resized_original)
        return resized_original, background

    def generate_depth_map(self, image: Image.Image) -> Image.Image:
        """
        Generate a depth map for the given image using the Zoe model.

        Args:
            image (Image.Image): The image to generate a depth map for.

        Returns:
            Image.Image: The generated depth map.
        """
        return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])

    def generate_base_image(self, prompt: str, negative_prompt: str, inpaint_image: Image.Image, zoe_image: Image.Image, guidance_scale: float, controlnet_num_inference_steps: int, controlnet_conditioning_scale: float, control_guidance_end: float) -> Image.Image:
        """
        Generate an image using the controlnet pipeline.

        Args:
            prompt (str): The prompt for image generation.
            negative_prompt (str): The negative prompt for image generation.
            inpaint_image (Image.Image): The image to inpaint.
            zoe_image (Image.Image): The depth map image.
            guidance_scale (float): Guidance scale for controlnet.
            controlnet_num_inference_steps (int): Number of inference steps for controlnet.
            controlnet_conditioning_scale (float): Conditioning scale for controlnet.
            control_guidance_end (float): Guidance end for controlnet.

        Returns:
            Image.Image: The generated image.
        """
        return self.controlnet_pipeline(
            prompt,
            negative_prompt=negative_prompt,
            image=[inpaint_image, zoe_image],
            guidance_scale=guidance_scale,
            num_inference_steps=controlnet_num_inference_steps,
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            control_guidance_end=control_guidance_end,
        ).images[0]

    def create_mask(self, image: Image.Image, segmentation_model: str, detection_model: str) -> Image.Image:
        """
        Create a mask for the final outpainting process.

        Args:
            image (Image.Image): The original image.
            segmentation_model (str): The segmentation model identifier.
            detection_model (str): The detection model identifier.

        Returns:
            Image.Image: The created mask.
        """
        image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1], roi_scale=0.4)
        mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model, detection_model)
        inverted_mask = image_augmenter.invert_mask(mask_image)
        return inverted_mask

    def generate_outpainting(self, prompt: str, negative_prompt: str, image: Image.Image, mask: Image.Image, guidance_scale: float, strength: float, num_inference_steps: int) -> Image.Image:
        """
        Generate the final outpainted image.

        Args:
            prompt (str): The prompt for image generation.
            negative_prompt (str): The negative prompt for image generation.
            image (Image.Image): The image to outpaint.
            mask (Image.Image): The mask for outpainting.
            guidance_scale (float): Guidance scale for inpainting.
            strength (float): Strength for inpainting.
            num_inference_steps (int): Number of inference steps for inpainting.

        Returns:
            Image.Image: The final outpainted image.
        """
        return self.inpaint_pipeline(
            prompt,
            negative_prompt=negative_prompt,
            image=image,
            mask_image=mask,
            guidance_scale=guidance_scale,
            strength=strength,
            num_inference_steps=num_inference_steps,
        ).images[0]

    def run_pipeline(self, image_path: str, controlnet_prompt: str, controlnet_negative_prompt: str, controlnet_conditioning_scale: float, controlnet_guidance_scale: float, controlnet_num_inference_steps: int, controlnet_guidance_end: float, inpainting_prompt: str, inpainting_negative_prompt: str, inpainting_guidance_scale: float, inpainting_strength: float, inpainting_num_inference_steps: int) -> Image.Image:
        """
        Process an image through the entire outpainting pipeline.

        Args:
            image_path (str): Path of the image to process.
            controlnet_prompt (str): Prompt for the controlnet image generation.
            controlnet_negative_prompt (str): Negative prompt for controlnet image generation.
            controlnet_conditioning_scale (float): Conditioning scale for controlnet.
            controlnet_guidance_scale (float): Guidance scale for controlnet.
            controlnet_num_inference_steps (int): Number of inference steps for controlnet.
            controlnet_guidance_end (float): Guidance end for controlnet.
            inpainting_prompt (str): Prompt for the inpainting image generation.
            inpainting_negative_prompt (str): Negative prompt for inpainting image generation.
            inpainting_guidance_scale (float): Guidance scale for inpainting.
            inpainting_strength (float): Strength for inpainting.
            inpainting_num_inference_steps (int): Number of inference steps for inpainting.

        Returns:
            Image.Image: The final outpainted image.
        """
        print("Loading and preprocessing image")
        resized_img, background_image = self.load_and_preprocess_image(image_path)
        print("Generating depth map")
        image_zoe = self.generate_depth_map(background_image)
        print("Generating initial image")
        temp_image = self.generate_base_image(controlnet_prompt, controlnet_negative_prompt, background_image, image_zoe,
                                              controlnet_guidance_scale, controlnet_num_inference_steps, controlnet_conditioning_scale, controlnet_guidance_end)
        x = (self.target_size[0] - resized_img.width) // 2
        y = (self.target_size[1] - resized_img.height) // 2
        temp_image.paste(resized_img, (x, y), resized_img)
        print("Creating mask for outpainting")
        final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
        mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
        print("Generating final outpainted image")
        final_image = self.generate_outpainting(inpainting_prompt, inpainting_negative_prompt, temp_image, mask_blurred,
                                                inpainting_guidance_scale, inpainting_strength, inpainting_num_inference_steps)
        final_image.paste(resized_img, (x, y), resized_img)
        return final_image


def main():
    processor = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
    result = processor.run_pipeline("/home/PicPilot/sample_data/example1.jpg",
                                    "product in the kitchen",
                                    "low resolution, Bad Resolution",
                                     0.9,
                                     7.5,
                                     50,
                                     0.6,
                                     "Editorial Photography of the Pot in the kitchen",
                                     "low Resolution, Bad Resolution",
                                     8,
                                     0.7,
                                     30)
    result.save("outpainted_result.png")
    print("Outpainting complete. Result saved as 'outpainted_result.png'")


if __name__ == "__main__":
    main()