Spaces:

VikramSingh178
/

picpilot-server

Runtime error

App Files Files Community

picpilot-server / scripts /outpainting.py

VikramSingh178

commit

ffaa8aa about 2 months ago

raw

history blame

No virus

11.6 kB

	import torch
	from controlnet_aux import ZoeDetector
	from PIL import Image
	from diffusers import AutoencoderKL, ControlNetModel, StableDiffusionXLControlNetPipeline, StableDiffusionXLInpaintPipeline
	from scripts.api_utils import ImageAugmentation, accelerator
	import lightning.pytorch as pl
	from rembg import remove

	pl.seed_everything(42)


	class ControlNetZoeDepthOutpainting:
	"""
	A class for processing and outpainting images using Stable Diffusion XL.

	This class encapsulates the entire pipeline for loading an image,
	generating a depth map, creating a temporary background, and performing
	the final outpainting.
	"""

	def __init__(self, target_size: tuple[int, int] = (1024, 1024)):
	"""
	Initialize the ImageOutpaintingProcessor with necessary models and pipelines.

	Args:
	target_size (tuple[int, int]): The target size for the output image (width, height).
	"""
	self.target_size = target_size
	print("Initializing models and pipelines...")
	self.vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to(accelerator())
	self.zoe = ZoeDetector.from_pretrained("lllyasviel/Annotators")
	self.controlnets = [
	ControlNetModel.from_pretrained("destitech/controlnet-inpaint-dreamer-sdxl", torch_dtype=torch.float16, variant="fp16"),
	ControlNetModel.from_pretrained("diffusers/controlnet-zoe-depth-sdxl-1.0", torch_dtype=torch.float16)
	]
	print("Setting up sdxl pipeline...")
	self.controlnet_pipeline = StableDiffusionXLControlNetPipeline.from_pretrained("SG161222/RealVisXL_V4.0", torch_dtype=torch.float16, variant="fp16", controlnet=self.controlnets, vae=self.vae).to(accelerator())
	print("Setting up inpaint pipeline...")
	self.inpaint_pipeline = StableDiffusionXLInpaintPipeline.from_pretrained("OzzyGT/RealVisXL_V4.0_inpainting", torch_dtype=torch.float16, variant="fp16", vae=self.vae).to(accelerator())

	def load_and_preprocess_image(self, image_path: str) -> tuple[Image.Image, Image.Image]:
	"""
	Load an image from a file path and preprocess it for outpainting.

	Args:
	image_path (str): Path of the image to process.

	Returns:
	tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
	"""
	original_image = Image.open(image_path).convert("RGBA")
	original_image = remove(original_image)
	return self.scale_and_paste(original_image, self.target_size)

	def scale_and_paste(self, original_image: Image.Image, target_size: tuple[int, int], scale_factor: float = 0.95) -> tuple[Image.Image, Image.Image]:
	"""
	Scale the original image and paste it onto a background of the target size.

	Args:
	original_image (Image.Image): The original image to process.
	target_size (tuple[int, int]): The target size (width, height) for the output image.
	scale_factor (float): Factor to scale down the image to leave some padding (default: 0.95).

	Returns:
	tuple[Image.Image, Image.Image]: A tuple containing the resized original image and the background image.
	"""
	target_width, target_height = target_size
	aspect_ratio = original_image.width / original_image.height

	if (target_width / target_height) < aspect_ratio:
	new_width = int(target_width * scale_factor)
	new_height = int(new_width / aspect_ratio)
	else:
	new_height = int(target_height * scale_factor)
	new_width = int(new_height * aspect_ratio)

	resized_original = original_image.resize((new_width, new_height), Image.LANCZOS)
	background = Image.new("RGBA", target_size, "white")
	x = (target_width - new_width) // 2
	y = (target_height - new_height) // 2
	background.paste(resized_original, (x, y), resized_original)
	return resized_original, background

	def generate_depth_map(self, image: Image.Image) -> Image.Image:
	"""
	Generate a depth map for the given image using the Zoe model.

	Args:
	image (Image.Image): The image to generate a depth map for.

	Returns:
	Image.Image: The generated depth map.
	"""
	return self.zoe(image, detect_resolution=512, image_resolution=self.target_size[0])

	def generate_base_image(self, prompt: str, negative_prompt: str, inpaint_image: Image.Image, zoe_image: Image.Image, guidance_scale: float, controlnet_num_inference_steps: int, controlnet_conditioning_scale: float, control_guidance_end: float) -> Image.Image:
	"""
	Generate an image using the controlnet pipeline.

	Args:
	prompt (str): The prompt for image generation.
	negative_prompt (str): The negative prompt for image generation.
	inpaint_image (Image.Image): The image to inpaint.
	zoe_image (Image.Image): The depth map image.
	guidance_scale (float): Guidance scale for controlnet.
	controlnet_num_inference_steps (int): Number of inference steps for controlnet.
	controlnet_conditioning_scale (float): Conditioning scale for controlnet.
	control_guidance_end (float): Guidance end for controlnet.

	Returns:
	Image.Image: The generated image.
	"""
	return self.controlnet_pipeline(
	prompt,
	negative_prompt=negative_prompt,
	image=[inpaint_image, zoe_image],
	guidance_scale=guidance_scale,
	num_inference_steps=controlnet_num_inference_steps,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	control_guidance_end=control_guidance_end,
	).images[0]

	def create_mask(self, image: Image.Image, segmentation_model: str, detection_model: str) -> Image.Image:
	"""
	Create a mask for the final outpainting process.

	Args:
	image (Image.Image): The original image.
	segmentation_model (str): The segmentation model identifier.
	detection_model (str): The detection model identifier.

	Returns:
	Image.Image: The created mask.
	"""
	image_augmenter = ImageAugmentation(self.target_size[0], self.target_size[1], roi_scale=0.4)
	mask_image = image_augmenter.generate_mask_from_bbox(image, segmentation_model, detection_model)
	inverted_mask = image_augmenter.invert_mask(mask_image)
	return inverted_mask

	def generate_outpainting(self, prompt: str, negative_prompt: str, image: Image.Image, mask: Image.Image, guidance_scale: float, strength: float, num_inference_steps: int) -> Image.Image:
	"""
	Generate the final outpainted image.

	Args:
	prompt (str): The prompt for image generation.
	negative_prompt (str): The negative prompt for image generation.
	image (Image.Image): The image to outpaint.
	mask (Image.Image): The mask for outpainting.
	guidance_scale (float): Guidance scale for inpainting.
	strength (float): Strength for inpainting.
	num_inference_steps (int): Number of inference steps for inpainting.

	Returns:
	Image.Image: The final outpainted image.
	"""
	return self.inpaint_pipeline(
	prompt,
	negative_prompt=negative_prompt,
	image=image,
	mask_image=mask,
	guidance_scale=guidance_scale,
	strength=strength,
	num_inference_steps=num_inference_steps,
	).images[0]

	def run_pipeline(self, image_path: str, controlnet_prompt: str, controlnet_negative_prompt: str, controlnet_conditioning_scale: float, controlnet_guidance_scale: float, controlnet_num_inference_steps: int, controlnet_guidance_end: float, inpainting_prompt: str, inpainting_negative_prompt: str, inpainting_guidance_scale: float, inpainting_strength: float, inpainting_num_inference_steps: int) -> Image.Image:
	"""
	Process an image through the entire outpainting pipeline.

	Args:
	image_path (str): Path of the image to process.
	controlnet_prompt (str): Prompt for the controlnet image generation.
	controlnet_negative_prompt (str): Negative prompt for controlnet image generation.
	controlnet_conditioning_scale (float): Conditioning scale for controlnet.
	controlnet_guidance_scale (float): Guidance scale for controlnet.
	controlnet_num_inference_steps (int): Number of inference steps for controlnet.
	controlnet_guidance_end (float): Guidance end for controlnet.
	inpainting_prompt (str): Prompt for the inpainting image generation.
	inpainting_negative_prompt (str): Negative prompt for inpainting image generation.
	inpainting_guidance_scale (float): Guidance scale for inpainting.
	inpainting_strength (float): Strength for inpainting.
	inpainting_num_inference_steps (int): Number of inference steps for inpainting.

	Returns:
	Image.Image: The final outpainted image.
	"""
	print("Loading and preprocessing image")
	resized_img, background_image = self.load_and_preprocess_image(image_path)
	print("Generating depth map")
	image_zoe = self.generate_depth_map(background_image)
	print("Generating initial image")
	temp_image = self.generate_base_image(controlnet_prompt, controlnet_negative_prompt, background_image, image_zoe,
	controlnet_guidance_scale, controlnet_num_inference_steps, controlnet_conditioning_scale, controlnet_guidance_end)
	x = (self.target_size[0] - resized_img.width) // 2
	y = (self.target_size[1] - resized_img.height) // 2
	temp_image.paste(resized_img, (x, y), resized_img)
	print("Creating mask for outpainting")
	final_mask = self.create_mask(temp_image, "facebook/sam-vit-large", "yolov8l")
	mask_blurred = self.inpaint_pipeline.mask_processor.blur(final_mask, blur_factor=20)
	print("Generating final outpainted image")
	final_image = self.generate_outpainting(inpainting_prompt, inpainting_negative_prompt, temp_image, mask_blurred,
	inpainting_guidance_scale, inpainting_strength, inpainting_num_inference_steps)
	final_image.paste(resized_img, (x, y), resized_img)
	return final_image


	def main():
	processor = ControlNetZoeDepthOutpainting(target_size=(1024, 1024))
	result = processor.run_pipeline("/home/PicPilot/sample_data/example1.jpg",
	"product in the kitchen",
	"low resolution, Bad Resolution",
	0.9,
	7.5,
	50,
	0.6,
	"Editorial Photography of the Pot in the kitchen",
	"low Resolution, Bad Resolution",
	8,
	0.7,
	30)
	result.save("outpainted_result.png")
	print("Outpainting complete. Result saved as 'outpainted_result.png'")


	if __name__ == "__main__":
	main()