Update handler.py

2a4315a verified 10 months ago

4.75 kB

	from typing import List, Dict, Any
	import base64
	from PIL import Image
	from io import BytesIO
	from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
	import torch
	import controlnet_hinter

	# set device
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	if device.type != 'cuda':
	raise ValueError("Need to run on GPU")
	# set mixed precision dtype
	dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16

	# controlnet mapping for depth controlnet
	CONTROLNET_MAPPING = {
	"depth": {
	"model_id": "lllyasviel/sd-controlnet-depth",
	"hinter": controlnet_hinter.hint_depth
	}
	}

	class EndpointHandler():
	def __init__(self, path=""):
	# define default controlnet id and load controlnet
	self.control_type = "depth"
	self.controlnet = ControlNetModel.from_pretrained(CONTROLNET_MAPPING[self.control_type]["model_id"], torch_dtype=dtype).to(device)

	# Load StableDiffusionControlNetPipeline
	self.stable_diffusion_id = "runwayml/stable-diffusion-v1-5"
	self.pipe = StableDiffusionControlNetPipeline.from_pretrained(self.stable_diffusion_id,
	controlnet=self.controlnet,
	torch_dtype=dtype,
	safety_checker=None).to(device)
	# Define Generator with seed
	self.generator = torch.Generator(device="cpu").manual_seed(3)

	def __call__(self, data: Any) -> Dict[str, str]:
	# Example JSON payload for testing
	example_payload = {
	"prompt": "a beautiful landscape",
	"negative_prompt": "blur",
	"width": 1024,
	"height": 1024,
	"steps": 30,
	"cfg_scale": 7,
	"alwayson_scripts": {
	"controlnet": {
	"args": [
	{
	"enabled": True,
	"input_image": "image in base64",
	"model": "control_sd15_depth [fef5e48e]",
	"control_mode": "Balanced"
	}
	]
	}
	}
	}

	# Extract parameters from the payload
	prompt = data.get("prompt", None)
	negative_prompt = data.get("negative_prompt", None)
	width = data.get("width", None)
	height = data.get("height", None)
	num_inference_steps = data.get("steps", 30)
	guidance_scale = data.get("cfg_scale", 7)

	# Extract controlnet configuration from payload
	controlnet_config = data.get("alwayson_scripts", {}).get("controlnet", {}).get("args", [{}])[0]

	# Run stable diffusion process
	out = self.pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	num_images_per_prompt=1,
	height=height,
	width=width,
	controlnet_conditioning_scale=1.0,
	generator=self.generator,
	)

	# Get the generated image
	generated_image = out.images[0]

	# Process with controlnet if enabled
	if controlnet_config.get("enabled", False):
	input_image_base64 = controlnet_config.get("input_image", "")
	input_image = self.decode_base64_image(input_image_base64)
	controlnet_model = controlnet_config.get("model", "")
	controlnet_control_mode = controlnet_config.get("control_mode", "")

	processed_image = self.process_with_controlnet(generated_image, input_image, controlnet_model, controlnet_control_mode)
	else:
	processed_image = generated_image

	# Return the final processed image as base64
	return {"image": self.encode_base64_image(processed_image)}

	def process_with_controlnet(self, generated_image, input_image, model, control_mode):
	# Simulated controlnet processing (replace with actual implementation)
	# Here, we're just using the input_image as-is. Replace this with your controlnet logic.
	return input_image

	def encode_base64_image(self, image):
	# Encode the PIL Image to base64
	buffer = BytesIO()
	image.save(buffer, format="PNG")
	return base64.b64encode(buffer.getvalue()).decode("utf-8")

	def decode_base64_image(self, image_string):
	base64_image = base64.b64decode(image_string)
	buffer = BytesIO(base64_image)
	image = Image.open(buffer)
	return image