Spaces:

VikramSingh178
/

picpilot-server

Runtime error

App Files Files Community

VikramSingh178 commited on Jul 9

Commit

ffaa8aa

•

1 Parent(s): f596b65

commit

Browse files

Former-commit-id: b753d5f0ba3f3f032ac18f11985cbdfccbd7afe9

Files changed (4) hide show

api/endpoints.py +0 -7
api/routers/painting.py +86 -175
scripts/controlnet_outpainting.py +87 -26
scripts/outpainting.py +96 -94

api/endpoints.py CHANGED Viewed

@@ -7,13 +7,6 @@ import uvicorn
 logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record='all'))
 app = FastAPI(openapi_url='/api/v1/product-diffusion/openapi.json',docs_url='/api/v1/product-diffusion/docs')
 app.add_middleware(
     CORSMiddleware,

 logfire.configure(pydantic_plugin=logfire.PydanticPlugin(record='all'))
 app = FastAPI(openapi_url='/api/v1/product-diffusion/openapi.json',docs_url='/api/v1/product-diffusion/docs')
 app.add_middleware(
     CORSMiddleware,

api/routers/painting.py CHANGED Viewed

@@ -1,64 +1,49 @@
 import os
 import uuid
-from typing import List, Tuple, Any, Dict
 from fastapi import APIRouter, File, UploadFile, HTTPException, Form
-from pydantic import BaseModel, Field
-from PIL import Image
 import lightning.pytorch as pl
-from scripts.api_utils import pil_to_s3_json, pil_to_b64_json, ImageAugmentation, accelerator
-from scripts.inpainting_pipeline import AutoPaintingPipeline, load_pipeline
-from hydra import compose, initialize
 from async_batcher.batcher import AsyncBatcher
-import json
 from functools import lru_cache
-pl.seed_everything(42)
-router = APIRouter()
-with initialize(version_base=None, config_path="../../configs"):
-    cfg = compose(config_name="inpainting")
-# Load the inpainting pipeline
 @lru_cache(maxsize=1)
-def load_pipeline_wrapper():
-    """
-    Load the inpainting pipeline with the specified configuration.
-    Returns:
-        pipeline: The loaded inpainting pipeline.
-    """
-    pipeline = load_pipeline(cfg.model, accelerator(), enable_compile=True)
-    return pipeline
-inpainting_pipeline = load_pipeline_wrapper()
-class InpaintingRequest(BaseModel):
     """
-    Model representing a request for inpainting inference.
     """
-    prompt: str = Field(..., description="Prompt text for inference")
-    negative_prompt: str = Field(..., description="Negative prompt text for inference")
-    num_inference_steps: int = Field(..., description="Number of inference steps")
-    strength: float = Field(..., description="Strength of the inference")
-    guidance_scale: float = Field(..., description="Guidance scale for inference")
-    mode: str = Field(..., description="Mode for output ('b64_json' or 's3_json')")
-    num_images: int = Field(..., description="Number of images to generate")
-    use_augmentation: bool = Field(True, description="Whether to use image augmentation")
-class InpaintingBatchRequestModel(BaseModel):
     """
-    Model representing a batch request for inpainting inference.
     """
-    requests: List[InpaintingRequest]
 async def save_image(image: UploadFile) -> str:
     """
     Save an uploaded image to a temporary file and return the file path.
-    Args:
-        image (UploadFile): The uploaded image file.
-    Returns:
-        str: File path where the image is saved.
     """
     file_name = f"{uuid.uuid4()}.png"
     file_path = os.path.join("/tmp", file_name)
@@ -66,149 +51,75 @@ async def save_image(image: UploadFile) -> str:
         f.write(await image.read())
     return file_path
-def augment_image(image_path, target_width, target_height, roi_scale, segmentation_model_name, detection_model_name):
-    """
-    Augment an image by extending its dimensions and generating masks.
-    Args:
-        image_path (str): Path to the image file.
-        target_width (int): Target width for augmentation.
-        target_height (int): Target height for augmentation.
-        roi_scale (float): Scale factor for region of interest.
-        segmentation_model_name (str): Name of the segmentation model.
-        detection_model_name (str): Name of the detection model.
-    Returns:
-        Tuple[Image.Image, Image.Image]: Augmented image and inverted mask.
-    """
-    image = Image.open(image_path)
-    image_augmentation = ImageAugmentation(target_width, target_height, roi_scale)
-    image = image_augmentation.extend_image(image)
-    mask = image_augmentation.generate_mask_from_bbox(image, segmentation_model_name, detection_model_name)
-    inverted_mask = image_augmentation.invert_mask(mask)
-    return image, inverted_mask
-def run_inference(cfg, image_path: str, request: InpaintingRequest):
-    """
-    Run inference using an inpainting pipeline on an image.
-    Args:
-        cfg (dict): Configuration dictionary.
-        image_path (str): Path to the image file.
-        request (InpaintingRequest): Pydantic model containing inference parameters.
-    Returns:
-        dict: Resulting image in the specified mode ('b64_json' or 's3_json').
-    Raises:
-        ValueError: If an invalid mode is provided.
-    """
-    if request.use_augmentation:
-        image, mask_image = augment_image(image_path,
-                                          cfg['target_width'],
-                                          cfg['target_height'],
-                                          cfg['roi_scale'],
-                                          cfg['segmentation_model'],
-                                          cfg['detection_model'])
-    else:
-        image = Image.open(image_path)
-        mask_image = None
-    painting_pipeline = AutoPaintingPipeline(
-        pipeline=inpainting_pipeline,
-        image=image,
-        mask_image=mask_image,
-        target_height=cfg['target_height'],
-        target_width=cfg['target_width']
     )
-    output = painting_pipeline.run_inference(prompt=request.prompt,
-                                    negative_prompt=request.negative_prompt,
-                                    num_inference_steps=request.num_inference_steps,
-                                    strength=request.strength,
-                                    guidance_scale=request.guidance_scale,
-                                    num_images=request.num_images)
-    if request.mode == "s3_json":
-        return pil_to_s3_json(output, file_name="output.png")
-    elif request.mode == "b64_json":
-        return pil_to_b64_json(output)
-    else:
-        raise ValueError("Invalid mode. Supported modes are 'b64_json' and 's3_json'.")
-class InpaintingBatcher(AsyncBatcher):
-    async def process_batch(self, batch: Tuple[List[str], List[InpaintingRequest]]) -> List[Dict[str, Any]]:
-        """
-        Process a batch of images and requests for inpainting inference.
-        Args:
-            batch (Tuple[List[str], List[InpaintingRequest]]): Tuple of image paths and corresponding requests.
-        Returns:
-            List[Dict[str, Any]]: List of resulting images in the specified mode ('b64_json' or 's3_json').
-        """
-        image_paths, requests = batch
-        results = []
-        for image_path, request in zip(image_paths, requests):
-            result = run_inference(cfg, image_path, request)
-            results.append(result)
-        return results
-@router.post("/inpainting")
-async def inpainting_inference(
     image: UploadFile = File(...),
-    request_data: str = Form(...),
 ):
-    """
-    Handle POST request for inpainting inference.
-    Args:
-        image (UploadFile): Uploaded image file.
-        request_data (str): JSON string of the request parameters.
-    Returns:
-        dict: Resulting image in the specified mode ('b64_json' or 's3_json').
-    Raises:
-        HTTPException: If there is an error during image processing.
-    """
     try:
         image_path = await save_image(image)
-        request_dict = json.loads(request_data)
-        request = InpaintingRequest(**request_dict)
-        result = run_inference(cfg, image_path, request)
-        return result
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@router.post("/inpainting/batch")
-async def inpainting_batch_inference(
-    images: List[UploadFile] = File(...),
-    request_data: str = Form(...),
-):
-    """
-    Handle POST request for batch inpainting inference.
-    Args:
-        images (List[UploadFile]): List of uploaded image files.
-        request_data (str): JSON string of the request parameters.
-    Returns:
-        List[dict]: List of resulting images in the specified mode ('b64_json' or 's3_json').
-    Raises:
-        HTTPException: If there is an error during image processing.
-    """
     try:
-        request_dict = json.loads(request_data)
-        batch_request = InpaintingBatchRequestModel(**request_dict)
-        requests = batch_request.requests
-        if len(images) != len(requests):
-            raise HTTPException(status_code=400, detail="The number of images and requests must match.")
-        batcher = InpaintingBatcher(max_batch_size=64)
-        image_paths = [await save_image(image) for image in images]
-        results = batcher.process_batch((image_paths, requests))
-        return results
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 import os
 import uuid
+import json
+from typing import List
 from fastapi import APIRouter, File, UploadFile, HTTPException, Form
+from pydantic import BaseModel, Field, ValidationError
 import lightning.pytorch as pl
+from scripts.api_utils import pil_to_b64_json
+from scripts.outpainting import ControlNetZoeDepthOutpainting
 from async_batcher.batcher import AsyncBatcher
 from functools import lru_cache
+pl.seed_everything(42)
+router = APIRouter()
 @lru_cache(maxsize=1)
+def load_pipeline():
+    outpainting_pipeline = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
+    return outpainting_pipeline
+class OutpaintingRequest(BaseModel):
     """
+    Model representing a request for outpainting inference.
     """
+    controlnet_prompt: str = Field(...)
+    controlnet_negative_prompt: str = Field(...)
+    controlnet_conditioning_scale: float = Field(...)
+    controlnet_guidance_scale: float = Field(...)
+    controlnet_num_inference_steps: int = Field(...)
+    controlnet_guidance_end: float = Field(...)
+    inpainting_prompt: str = Field(...)
+    inpainting_negative_prompt: str = Field(...)
+    inpainting_guidance_scale: float = Field(...)
+    inpainting_strength: float = Field(...)
+    inpainting_num_inference_steps: int = Field(...)
+class OutpaintingBatchRequestModel(BaseModel):
     """
+    Model representing a batch request for outpainting inference.
     """
+    requests: List[OutpaintingRequest]
 async def save_image(image: UploadFile) -> str:
     """
     Save an uploaded image to a temporary file and return the file path.
     """
     file_name = f"{uuid.uuid4()}.png"
     file_path = os.path.join("/tmp", file_name)
         f.write(await image.read())
     return file_path
+def run_inference(image_path: str, request: OutpaintingRequest):
+    pipeline = load_pipeline()
+    result = pipeline.run_pipeline(
+        image_path,
+        controlnet_prompt=request.controlnet_prompt,
+        controlnet_negative_prompt=request.controlnet_negative_prompt,
+        controlnet_conditioning_scale=request.controlnet_conditioning_scale,
+        controlnet_guidance_scale=request.controlnet_guidance_scale,
+        controlnet_num_inference_steps=request.controlnet_num_inference_steps,
+        controlnet_guidance_end=request.controlnet_guidance_end,
+        inpainting_prompt=request.inpainting_prompt,
+        inpainting_negative_prompt=request.inpainting_negative_prompt,
+        inpainting_guidance_scale=request.inpainting_guidance_scale,
+        inpainting_strength=request.inpainting_strength,
+        inpainting_num_inference_steps=request.inpainting_num_inference_steps
     )
+    return result
+@router.post("/outpaint")
+async def outpaint(
     image: UploadFile = File(...),
+    request: str = Form(...)
 ):
     try:
+        request_dict = json.loads(request)
+        outpainting_request = OutpaintingRequest(**request_dict)
         image_path = await save_image(image)
+        result = run_inference(image_path, outpainting_request)
+        result_json = pil_to_b64_json(result)
+        os.remove(image_path)
+        return {"result": result_json}
+    except json.JSONDecodeError:
+        raise HTTPException(status_code=400, detail="Invalid JSON in request data")
+    except ValidationError as e:
+        raise HTTPException(status_code=422, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+class OutpaintingBatcher(AsyncBatcher):
+    async def process_batch(self, batch):
+        results = []
+        for image, request in batch:
+            image_path = await save_image(image)
+            try:
+                result = run_inference(image_path, request)
+                results.append(result)
+            finally:
+                os.remove(image_path)
+        return results
+@router.post("/batch_outpaint")
+async def batch_outpaint(images: List[UploadFile] = File(...), batch_request: str = Form(...)):
     try:
+        batch_request_dict = json.loads(batch_request)
+        batch_outpainting_request = OutpaintingBatchRequestModel(**batch_request_dict)
+        batcher = OutpaintingBatcher(max_queue_size=64)
+        results = await batcher.process_batch(list(zip(images, batch_outpainting_request.requests)))
+        result_jsons = [pil_to_b64_json(result) for result in results]
+        return {"results": result_jsons}
+    except json.JSONDecodeError:
+        raise HTTPException(status_code=400, detail="Invalid JSON in batch request data")
+    except ValidationError as e:
+        raise HTTPException(status_code=422, detail=str(e))
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

scripts/controlnet_outpainting.py CHANGED Viewed

@@ -1,35 +1,96 @@
-from diffusers import ControlNetModel,StableDiffusionXLControlNetPipeline
 import torch
-import requests
 from PIL import Image
-from io import BytesIO
-controlnet = ControlNetModel.from_pretrained(
-    "destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"
-)
-response = requests.get("https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/outpainting/313891870-adb6dc80-2e9e-420c-bac3-f93e6de8d06b.png?download=true")
-control_image = Image.open('/home/PicPilot/sample_data/example2.jpg')
-pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
-    "RunDiffusion/Juggernaut-XL-v9",
-    torch_dtype=torch.float16,
-    variant="fp16",
-    controlnet=controlnet,
-).to("cuda")
-image = pipeline(
-    prompt='Showcase 4k',
-    negative_prompt='low Resolution , Bad Resolution',
-    height=1024,
-    width=1024,
-    guidance_scale=7.5,
-    num_inference_steps=100,
-    image=control_image,
-    controlnet_conditioning_scale=0.9,
-    control_guidance_end=0.9,
-).images[0]
-image.save('output.png')

+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline
 import torch
 from PIL import Image
+import lightning.pytorch as pl
+from scripts.api_utils import accelerator
+from typing import Optional
+import matplotlib.pyplot as plt
+pl.seed_everything(42)
+class ImageGenerator:
+    """
+    A class to generate images using ControlNet and Stable Diffusion XL pipelines.
+    Attributes:
+        controlnet (ControlNetModel): The ControlNet model.
+        pipeline (StableDiffusionXLControlNetPipeline): The Stable Diffusion XL pipeline with ControlNet.
+    """
+    def __init__(self, controlnet_model_name, sd_pipeline_model_name):
+        """
+        Initializes the ImageGenerator with the specified models.
+        Args:
+            controlnet_model_name (str): The name of the ControlNet model.
+            sd_pipeline_model_name (str): The name of the Stable Diffusion XL pipeline model.
+            image (str): The path to the image to be used.
+        """
+        self.controlnet = ControlNetModel.from_pretrained(
+            controlnet_model_name, torch_dtype=torch.float16, variant="fp16"
+        )
+        self.pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
+            sd_pipeline_model_name,
+            torch_dtype=torch.float16,
+            variant="fp16",
+            controlnet=self.controlnet,
+        ).to(accelerator())
+    def inference(self, prompt, negative_prompt, height, width, guidance_scale, num_images_per_prompt, num_inference_steps, image_path, controlnet_conditioning_scale, control_guidance_end,output_path:Optional[str]):
+        """
+        Generates images based on the provided parameters.
+        Args:
+            prompt (str): The prompt for image generation.
+            negative_prompt (str): The negative prompt for image generation.
+            height (int): The height of the generated images.
+            width (int): The width of the generated images.
+            guidance_scale (float): The guidance scale for image generation.
+            num_images_per_prompt (int): The number of images to generate per prompt.
+            num_inference_steps (int): The number of inference steps.
+            image_path (str): The path to the image to be used.
+            controlnet_conditioning_scale (float): The conditioning scale for ControlNet.
+            control_guidance_end (float): The end guidance for ControlNet.
+        Returns:
+            list: A list of generated images.
+        """
+        images_list = self.pipeline(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=num_images_per_prompt,
+            num_inference_steps=num_inference_steps,
+            image=Image.open(image_path),
+            controlnet_conditioning_scale=controlnet_conditioning_scale,
+            control_guidance_end=control_guidance_end,
+        ).images
+        if output_path:
+            for i,image in enumerate(images_list):
+                 image.save(f'{output_path}/output_{i}.png')
+        else:
+            return images_list
+if __name__ == "__main__":
+    generator = ImageGenerator(
+        controlnet_model_name="destitech/controlnet-inpaint-dreamer-sdxl",
+        sd_pipeline_model_name="RunDiffusion/Juggernaut-XL-v9"
+    )
+    generator.inference(
+        prompt='Park',
+        negative_prompt='low Resolution , Bad Resolution',
+        height=1080,
+        width=1920,
+        guidance_scale=7.5,
+        num_images_per_prompt=4,
+        num_inference_steps=100,
+        image_path='/home/PicPilot/sample_data/example1.jpg',
+        controlnet_conditioning_scale=0.9,
+        control_guidance_end=0.9,
+        output_path='/home/PicPilot/output'
+    )

scripts/outpainting.py CHANGED Viewed

@@ -1,24 +1,15 @@
-import requests
 import torch
 from controlnet_aux import ZoeDetector
 from PIL import Image
-from diffusers import (
-    AutoencoderKL,
-    ControlNetModel,
-    StableDiffusionXLControlNetPipeline,
-    StableDiffusionXLInpaintPipeline
-)
-from typing import Optional
-from api_utils import ImageAugmentation
-import lightning.pytorch as pl
-pl.seed_everything(42)
-class OutpaintingProcessor:
     """
     A class for processing and outpainting images using Stable Diffusion XL.
@@ -27,65 +18,56 @@ class OutpaintingProcessor:
     the final outpainting.
     """
-    def __init__(self, target_size=(1024, 1024)):
         """
-        Initialize the OutpaintingProcessor with necessary models and pipelines.
         Args:
-            target_size (tuple): The target size for the output image (width, height).
         """
         self.target_size = target_size
         print("Initializing models and pipelines...")
-        self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(self.device)
         self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
         self.controlnets = [
             ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
-            ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16,variant='fp16')
-            ]
-        print("Setting up initial pipeline...")
-        self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
-            "SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16",
-            controlnet=self.controlnets, vae=self.vae
-        ).to(self.device)
         print("Setting up inpaint pipeline...")
-        self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting",torch_dtype=torch.float16,
-            variant="fp16",
-            vae=self.vae,
-        ).to(self.device)
-        print("Initialization complete.")
-    def load_and_preprocess_image(self, image_url):
         """
-        Load an image from a URL and preprocess it for outpainting.
         Args:
-            image_url (str): URL of the image to process.
         Returns:
-            tuple: A tuple containing the resized original image and the background image.
         """
-        original_image = Image.open(requests.get(image_url, stream=True).raw).convert("RGBA")
         return self.scale_and_paste(original_image, self.target_size)
-    def scale_and_paste(self, original_image, target_size, scale_factor=0.95):
         """
         Scale the original image and paste it onto a background of the target size.
         Args:
-            original_image (PIL.Image): The original image to process.
-            target_size (tuple): The target size (width, height) for the output image.
             scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).
         Returns:
-            tuple: A tuple containing the resized original image and the background image.
         """
         target_width, target_height = target_size
         aspect_ratio = original_image.width / original_image.height
-        if (target_width / target_height) < aspect_ratio:
             new_width = int(target_width * scale_factor)
             new_height = int(new_width / aspect_ratio)
         else:
@@ -97,128 +79,148 @@ class OutpaintingProcessor:
         x = (target_width - new_width) // 2
         y = (target_height - new_height) // 2
         background.paste(resized_original, (x, y), resized_original)
         return resized_original, background
-    def generate_depth_map(self, image):
         """
         Generate a depth map for the given image using the Zoe model.
         Args:
-            image (PIL.Image): The image to generate a depth map for.
         Returns:
-            PIL.Image: The generated depth map.
         """
         return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])
-    def generate_image(self, prompt, negative_prompt, inpaint_image, zoe_image,guidance_scale,num_inference_steps):
         """
-        Generate an image using the initial  pipeline.
         Args:
             prompt (str): The prompt for image generation.
             negative_prompt (str): The negative prompt for image generation.
-            inpaint_image (PIL.Image): The image to inpaint.
-            zoe_image (PIL.Image): The depth map image.
-            seed (int, optional): Seed for random number generation.
         Returns:
-            PIL.Image: The generated image.
         """
-        return self.initial_pipeline(
             prompt,
             negative_prompt=negative_prompt,
             image=[inpaint_image, zoe_image],
             guidance_scale=guidance_scale,
-            num_inference_steps=25,
-            controlnet_conditioning_scale=[0.5, 0.8],
-            control_guidance_end=[0.9, 0.6],
         ).images[0]
-    def create_mask(self, image, segmentation_model, detection_model):
         """
         Create a mask for the final outpainting process.
         Args:
-            image (PIL.Image): The original image.
             segmentation_model (str): The segmentation model identifier.
             detection_model (str): The detection model identifier.
         Returns:
-            PIL.Image: The created mask.
         """
-        image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1])
-        mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model,detection_model)
         inverted_mask = image_augmenter.invert_mask(mask_image)
         return inverted_mask
-    def generate_outpainting(self, prompt, negative_prompt, image, mask, seed:Optional[int]=42):
         """
         Generate the final outpainted image.
         Args:
             prompt (str): The prompt for image generation.
             negative_prompt (str): The negative prompt for image generation.
-            image (PIL.Image): The image to outpaint.
-            mask (PIL.Image): The mask for outpainting.
-            seed (int, optional): Seed for random number generation.
         Returns:
-            PIL.Image: The final outpainted image.
         """
         return self.inpaint_pipeline(
             prompt,
             negative_prompt=negative_prompt,
             image=image,
             mask_image=mask,
-            guidance_scale=10.0,
-            strength=0.8,
-            num_inference_steps=30,
         ).images[0]
-    def process(self, image_url, initial_prompt, final_prompt, negative_prompt=""):
         """
         Process an image through the entire outpainting pipeline.
         Args:
-            image_url (str): URL of the image to process.
-            initial_prompt (str): Prompt for the initial image generation.
-            final_prompt (str): Prompt for the final outpainting.
-            negative_prompt (str, optional): Negative prompt for both stages.
         Returns:
-            PIL.Image: The final outpainted image.
         """
-        print("Loading and preprocessing image...")
-        resized_img, background_image = self.load_and_preprocess_image(image_url)
-        print("Generating depth map...")
         image_zoe = self.generate_depth_map(background_image)
-        print("Generating initial image...")
-        temp_image = self.generate_image(initial_prompt, negative_prompt, background_image, image_zoe)
         x = (self.target_size[0] - resized_img.width) // 2
         y = (self.target_size[1] - resized_img.height) // 2
         temp_image.paste(resized_img, (x, y), resized_img)
-        print("Creating mask for outpainting...")
         final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
         mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
-        print("Generating final outpainted image...")
-        final_image = self.generate_outpainting(final_prompt, negative_prompt, temp_image, mask_blurred)
         final_image.paste(resized_img, (x, y), resized_img)
         return final_image
 def main():
-    processor = OutpaintingProcessor(target_size=(1024, 1024))  # Set to 720p resolution
-    result = processor.process(
-        "https://huggingface.co/datasets/OzzyGT/testing-resources/resolve/main/outpainting/BMW_i8_Safety_Car_Front.png?download=true",
-        "a car on the highway",
-        "high quality photo of a car on the highway, shadows, highly detailed")
     result.save("outpainted_result.png")
     print("Outpainting complete. Result saved as 'outpainted_result.png'")
 if __name__ == "__main__":
     main()

 import torch
 from controlnet_aux import ZoeDetector
 from PIL import Image
+from diffusers import AutoencoderKL, ControlNetModel, StableDiffusionXLControlNetPipeline, StableDiffusionXLInpaintPipeline
+from scripts.api_utils import ImageAugmentation, accelerator
+import lightning.pytorch as pl
+from rembg import remove
+pl.seed_everything(42)
+class ControlNetZoeDepthOutpainting:
     """
     A class for processing and outpainting images using Stable Diffusion XL.
     the final outpainting.
     """
+    def __init__(self, target_size: tuple[int, int] = (1024, 1024)):
         """
+        Initialize the ImageOutpaintingProcessor with necessary models and pipelines.
         Args:
+            target_size (tuple[int, int]): The target size for the output image (width, height).
         """
         self.target_size = target_size
         print("Initializing models and pipelines...")
+        self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(accelerator())
         self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
         self.controlnets = [
             ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
+            ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
+        ]
+        print("Setting up sdxl pipeline...")
+        self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=self.controlnets, vae=self.vae).to(accelerator())
         print("Setting up inpaint pipeline...")
+        self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting", torch_dtype=torch.float16, variant="fp16", vae=self.vae).to(accelerator())
+    def load_and_preprocess_image(self, image_path: str) -> tuple[Image.Image, Image.Image]:
         """
+        Load an image from a file path and preprocess it for outpainting.
         Args:
+            image_path (str): Path of the image to process.
         Returns:
+            tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
         """
+        original_image = Image.open(image_path).convert("RGBA")
+        original_image = remove(original_image)
         return self.scale_and_paste(original_image, self.target_size)
+    def scale_and_paste(self, original_image: Image.Image, target_size: tuple[int, int], scale_factor: float = 0.95) -> tuple[Image.Image, Image.Image]:
         """
         Scale the original image and paste it onto a background of the target size.
         Args:
+            original_image (Image.Image): The original image to process.
+            target_size (tuple[int, int]): The target size (width, height) for the output image.
             scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).
         Returns:
+            tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
         """
         target_width, target_height = target_size
         aspect_ratio = original_image.width / original_image.height
+        if (target_width / target_height) < aspect_ratio:
             new_width = int(target_width * scale_factor)
             new_height = int(new_width / aspect_ratio)
         else:
         x = (target_width - new_width) // 2
         y = (target_height - new_height) // 2
         background.paste(resized_original, (x, y), resized_original)
         return resized_original, background
+    def generate_depth_map(self, image: Image.Image) -> Image.Image:
         """
         Generate a depth map for the given image using the Zoe model.
         Args:
+            image (Image.Image): The image to generate a depth map for.
         Returns:
+            Image.Image: The generated depth map.
         """
         return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])
+    def generate_base_image(self, prompt: str, negative_prompt: str, inpaint_image: Image.Image, zoe_image: Image.Image, guidance_scale: float, controlnet_num_inference_steps: int, controlnet_conditioning_scale: float, control_guidance_end: float) -> Image.Image:
         """
+        Generate an image using the controlnet pipeline.
         Args:
             prompt (str): The prompt for image generation.
             negative_prompt (str): The negative prompt for image generation.
+            inpaint_image (Image.Image): The image to inpaint.
+            zoe_image (Image.Image): The depth map image.
+            guidance_scale (float): Guidance scale for controlnet.
+            controlnet_num_inference_steps (int): Number of inference steps for controlnet.
+            controlnet_conditioning_scale (float): Conditioning scale for controlnet.
+            control_guidance_end (float): Guidance end for controlnet.
         Returns:
+            Image.Image: The generated image.
         """
+        return self.controlnet_pipeline(
             prompt,
             negative_prompt=negative_prompt,
             image=[inpaint_image, zoe_image],
             guidance_scale=guidance_scale,
+            num_inference_steps=controlnet_num_inference_steps,
+            controlnet_conditioning_scale=controlnet_conditioning_scale,
+            control_guidance_end=control_guidance_end,
         ).images[0]
+    def create_mask(self, image: Image.Image, segmentation_model: str, detection_model: str) -> Image.Image:
         """
         Create a mask for the final outpainting process.
         Args:
+            image (Image.Image): The original image.
             segmentation_model (str): The segmentation model identifier.
             detection_model (str): The detection model identifier.
         Returns:
+            Image.Image: The created mask.
         """
+        image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1], roi_scale=0.4)
+        mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model, detection_model)
         inverted_mask = image_augmenter.invert_mask(mask_image)
         return inverted_mask
+    def generate_outpainting(self, prompt: str, negative_prompt: str, image: Image.Image, mask: Image.Image, guidance_scale: float, strength: float, num_inference_steps: int) -> Image.Image:
         """
         Generate the final outpainted image.
         Args:
             prompt (str): The prompt for image generation.
             negative_prompt (str): The negative prompt for image generation.
+            image (Image.Image): The image to outpaint.
+            mask (Image.Image): The mask for outpainting.
+            guidance_scale (float): Guidance scale for inpainting.
+            strength (float): Strength for inpainting.
+            num_inference_steps (int): Number of inference steps for inpainting.
         Returns:
+            Image.Image: The final outpainted image.
         """
         return self.inpaint_pipeline(
             prompt,
             negative_prompt=negative_prompt,
             image=image,
             mask_image=mask,
+            guidance_scale=guidance_scale,
+            strength=strength,
+            num_inference_steps=num_inference_steps,
         ).images[0]
+    def run_pipeline(self, image_path: str, controlnet_prompt: str, controlnet_negative_prompt: str, controlnet_conditioning_scale: float, controlnet_guidance_scale: float, controlnet_num_inference_steps: int, controlnet_guidance_end: float, inpainting_prompt: str, inpainting_negative_prompt: str, inpainting_guidance_scale: float, inpainting_strength: float, inpainting_num_inference_steps: int) -> Image.Image:
         """
         Process an image through the entire outpainting pipeline.
         Args:
+            image_path (str): Path of the image to process.
+            controlnet_prompt (str): Prompt for the controlnet image generation.
+            controlnet_negative_prompt (str): Negative prompt for controlnet image generation.
+            controlnet_conditioning_scale (float): Conditioning scale for controlnet.
+            controlnet_guidance_scale (float): Guidance scale for controlnet.
+            controlnet_num_inference_steps (int): Number of inference steps for controlnet.
+            controlnet_guidance_end (float): Guidance end for controlnet.
+            inpainting_prompt (str): Prompt for the inpainting image generation.
+            inpainting_negative_prompt (str): Negative prompt for inpainting image generation.
+            inpainting_guidance_scale (float): Guidance scale for inpainting.
+            inpainting_strength (float): Strength for inpainting.
+            inpainting_num_inference_steps (int): Number of inference steps for inpainting.
         Returns:
+            Image.Image: The final outpainted image.
         """
+        print("Loading and preprocessing image")
+        resized_img, background_image = self.load_and_preprocess_image(image_path)
+        print("Generating depth map")
         image_zoe = self.generate_depth_map(background_image)
+        print("Generating initial image")
+        temp_image = self.generate_base_image(controlnet_prompt, controlnet_negative_prompt, background_image, image_zoe,
+                                              controlnet_guidance_scale, controlnet_num_inference_steps, controlnet_conditioning_scale, controlnet_guidance_end)
         x = (self.target_size[0] - resized_img.width) // 2
         y = (self.target_size[1] - resized_img.height) // 2
         temp_image.paste(resized_img, (x, y), resized_img)
+        print("Creating mask for outpainting")
         final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
         mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
+        print("Generating final outpainted image")
+        final_image = self.generate_outpainting(inpainting_prompt, inpainting_negative_prompt, temp_image, mask_blurred,
+                                                inpainting_guidance_scale, inpainting_strength, inpainting_num_inference_steps)
         final_image.paste(resized_img, (x, y), resized_img)
         return final_image
 def main():
+    processor = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
+    result = processor.run_pipeline("/home/PicPilot/sample_data/example1.jpg",
+                                    "product in the kitchen",
+                                    "low resolution, Bad Resolution",
+                                     0.9,
+                                     7.5,
+                                     50,
+                                     0.6,
+                                     "Editorial Photography of the Pot in the kitchen",
+                                     "low Resolution, Bad Resolution",
+                                     8,
+                                     0.7,
+                                     30)
     result.save("outpainted_result.png")
     print("Outpainting complete. Result saved as 'outpainted_result.png'")
 if __name__ == "__main__":
     main()