Spaces:

VikramSingh178
/

picpilot-server

Runtime error

File size: 11,558 Bytes

f596b65
 
 
ffaa8aa
 
 
 
f596b65
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
 
 
 
 
 
 
ffaa8aa
f596b65
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
 
ffaa8aa
f596b65
 
 
ffaa8aa
 
 
 
f596b65
ffaa8aa
f596b65
ffaa8aa
f596b65
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
ffaa8aa
f596b65
ffaa8aa
 
f596b65
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
f596b65
 
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
 
 
 
 
 
 
 
 
 
 
 
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
 
ffaa8aa
f596b65
 
 
ffaa8aa
f596b65
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
 
 
 
 
f596b65
 
ffaa8aa
f596b65
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
 
f596b65
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
f596b65
ffaa8aa
 
f596b65
 
 
ffaa8aa
f596b65
 
 
 
 
 
ffaa8aa
 
 
 
 
f596b65
 
ffaa8aa
f596b65
 
 
 
 
 
ffaa8aa
 
 
f596b65
 
ffaa8aa
f596b65
 
 
 
ffaa8aa
 
 
 
 
 
 
 
 
 
 
 
f596b65
 
ffaa8aa
f596b65
ffaa8aa
 
 
f596b65
ffaa8aa
 
 
f596b65
 
 
ffaa8aa
f596b65
 
ffaa8aa
 
 
f596b65
 
 
ffaa8aa
f596b65
ffaa8aa
 
 
 
 
 
 
 
 
 
 
 
 
f596b65
 
 
ffaa8aa
f596b65

import torch
from controlnet_aux import ZoeDetector
from PIL import Image
from diffusers import AutoencoderKL, ControlNetModel, StableDiffusionXLControlNetPipeline, StableDiffusionXLInpaintPipeline
from scripts.api_utils import ImageAugmentation, accelerator
import lightning.pytorch as pl
from rembg import remove

pl.seed_everything(42)


class ControlNetZoeDepthOutpainting:
    """
    A class for processing and outpainting images using Stable Diffusion XL.

    This class encapsulates the entire pipeline for loading an image,
    generating a depth map, creating a temporary background, and performing
    the final outpainting.
    """

    def __init__(self, target_size: tuple[int, int] = (1024, 1024)):
        """
        Initialize the ImageOutpaintingProcessor with necessary models and pipelines.

        Args:
            target_size (tuple[int, int]): The target size for the output image (width, height).
        """
        self.target_size = target_size
        print("Initializing models and pipelines...")
        self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(accelerator())
        self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
        self.controlnets = [
            ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
            ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
        ]
        print("Setting up sdxl pipeline...")
        self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=self.controlnets, vae=self.vae).to(accelerator())
        print("Setting up inpaint pipeline...")
        self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting", torch_dtype=torch.float16, variant="fp16", vae=self.vae).to(accelerator())

    def load_and_preprocess_image(self, image_path: str) -> tuple[Image.Image, Image.Image]:
        """
        Load an image from a file path and preprocess it for outpainting.

        Args:
            image_path (str): Path of the image to process.

        Returns:
            tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
        """
        original_image = Image.open(image_path).convert("RGBA")
        original_image = remove(original_image)
        return self.scale_and_paste(original_image, self.target_size)

    def scale_and_paste(self, original_image: Image.Image, target_size: tuple[int, int], scale_factor: float = 0.95) -> tuple[Image.Image, Image.Image]:
        """
        Scale the original image and paste it onto a background of the target size.

        Args:
            original_image (Image.Image): The original image to process.
            target_size (tuple[int, int]): The target size (width, height) for the output image.
            scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).

        Returns:
            tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
        """
        target_width, target_height = target_size
        aspect_ratio = original_image.width / original_image.height

        if (target_width / target_height) < aspect_ratio:
            new_width = int(target_width * scale_factor)
            new_height = int(new_width / aspect_ratio)
        else:
            new_height = int(target_height * scale_factor)
            new_width = int(new_height * aspect_ratio)

        resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
        background = Image.new("RGBA", target_size, "white")
        x = (target_width - new_width) // 2
        y = (target_height - new_height) // 2
        background.paste(resized_original, (x, y), resized_original)
        return resized_original, background

    def generate_depth_map(self, image: Image.Image) -> Image.Image:
        """
        Generate a depth map for the given image using the Zoe model.

        Args:
            image (Image.Image): The image to generate a depth map for.

        Returns:
            Image.Image: The generated depth map.
        """
        return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])

    def generate_base_image(self, prompt: str, negative_prompt: str, inpaint_image: Image.Image, zoe_image: Image.Image, guidance_scale: float, controlnet_num_inference_steps: int, controlnet_conditioning_scale: float, control_guidance_end: float) -> Image.Image:
        """
        Generate an image using the controlnet pipeline.

        Args:
            prompt (str): The prompt for image generation.
            negative_prompt (str): The negative prompt for image generation.
            inpaint_image (Image.Image): The image to inpaint.
            zoe_image (Image.Image): The depth map image.
            guidance_scale (float): Guidance scale for controlnet.
            controlnet_num_inference_steps (int): Number of inference steps for controlnet.
            controlnet_conditioning_scale (float): Conditioning scale for controlnet.
            control_guidance_end (float): Guidance end for controlnet.

        Returns:
            Image.Image: The generated image.
        """
        return self.controlnet_pipeline(
            prompt,
            negative_prompt=negative_prompt,
            image=[inpaint_image, zoe_image],
            guidance_scale=guidance_scale,
            num_inference_steps=controlnet_num_inference_steps,
            controlnet_conditioning_scale=controlnet_conditioning_scale,
            control_guidance_end=control_guidance_end,
        ).images[0]

    def create_mask(self, image: Image.Image, segmentation_model: str, detection_model: str) -> Image.Image:
        """
        Create a mask for the final outpainting process.

        Args:
            image (Image.Image): The original image.
            segmentation_model (str): The segmentation model identifier.
            detection_model (str): The detection model identifier.

        Returns:
            Image.Image: The created mask.
        """
        image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1], roi_scale=0.4)
        mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model, detection_model)
        inverted_mask = image_augmenter.invert_mask(mask_image)
        return inverted_mask

    def generate_outpainting(self, prompt: str, negative_prompt: str, image: Image.Image, mask: Image.Image, guidance_scale: float, strength: float, num_inference_steps: int) -> Image.Image:
        """
        Generate the final outpainted image.

        Args:
            prompt (str): The prompt for image generation.
            negative_prompt (str): The negative prompt for image generation.
            image (Image.Image): The image to outpaint.
            mask (Image.Image): The mask for outpainting.
            guidance_scale (float): Guidance scale for inpainting.
            strength (float): Strength for inpainting.
            num_inference_steps (int): Number of inference steps for inpainting.

        Returns:
            Image.Image: The final outpainted image.
        """
        return self.inpaint_pipeline(
            prompt,
            negative_prompt=negative_prompt,
            image=image,
            mask_image=mask,
            guidance_scale=guidance_scale,
            strength=strength,
            num_inference_steps=num_inference_steps,
        ).images[0]

    def run_pipeline(self, image_path: str, controlnet_prompt: str, controlnet_negative_prompt: str, controlnet_conditioning_scale: float, controlnet_guidance_scale: float, controlnet_num_inference_steps: int, controlnet_guidance_end: float, inpainting_prompt: str, inpainting_negative_prompt: str, inpainting_guidance_scale: float, inpainting_strength: float, inpainting_num_inference_steps: int) -> Image.Image:
        """
        Process an image through the entire outpainting pipeline.

        Args:
            image_path (str): Path of the image to process.
            controlnet_prompt (str): Prompt for the controlnet image generation.
            controlnet_negative_prompt (str): Negative prompt for controlnet image generation.
            controlnet_conditioning_scale (float): Conditioning scale for controlnet.
            controlnet_guidance_scale (float): Guidance scale for controlnet.
            controlnet_num_inference_steps (int): Number of inference steps for controlnet.
            controlnet_guidance_end (float): Guidance end for controlnet.
            inpainting_prompt (str): Prompt for the inpainting image generation.
            inpainting_negative_prompt (str): Negative prompt for inpainting image generation.
            inpainting_guidance_scale (float): Guidance scale for inpainting.
            inpainting_strength (float): Strength for inpainting.
            inpainting_num_inference_steps (int): Number of inference steps for inpainting.

        Returns:
            Image.Image: The final outpainted image.
        """
        print("Loading and preprocessing image")
        resized_img, background_image = self.load_and_preprocess_image(image_path)
        print("Generating depth map")
        image_zoe = self.generate_depth_map(background_image)
        print("Generating initial image")
        temp_image = self.generate_base_image(controlnet_prompt, controlnet_negative_prompt, background_image, image_zoe,
                                              controlnet_guidance_scale, controlnet_num_inference_steps, controlnet_conditioning_scale, controlnet_guidance_end)
        x = (self.target_size[0] - resized_img.width) // 2
        y = (self.target_size[1] - resized_img.height) // 2
        temp_image.paste(resized_img, (x, y), resized_img)
        print("Creating mask for outpainting")
        final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
        mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
        print("Generating final outpainted image")
        final_image = self.generate_outpainting(inpainting_prompt, inpainting_negative_prompt, temp_image, mask_blurred,
                                                inpainting_guidance_scale, inpainting_strength, inpainting_num_inference_steps)
        final_image.paste(resized_img, (x, y), resized_img)
        return final_image


def main():
    processor = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
    result = processor.run_pipeline("/home/PicPilot/sample_data/example1.jpg",
                                    "product in the kitchen",
                                    "low resolution, Bad Resolution",
                                     0.9,
                                     7.5,
                                     50,
                                     0.6,
                                     "Editorial Photography of the Pot in the kitchen",
                                     "low Resolution, Bad Resolution",
                                     8,
                                     0.7,
                                     30)
    result.save("outpainted_result.png")
    print("Outpainting complete. Result saved as 'outpainted_result.png'")


if __name__ == "__main__":
    main()