from typing import Dict, List, Any import torch from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline from PIL import Image import base64 from io import BytesIO # set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device.type != "cuda": raise ValueError("need to run on GPU") class EndpointHandler: def __init__(self, path=""): # load StableDiffusionInpaintPipeline pipeline self.pipe = StableDiffusionXLPipeline.from_pretrained( path, torch_dtype=torch.float16, variant="fp16", use_safetensors=True ) # use DPMSolverMultistepScheduler self.pipe.scheduler = DPMSolverMultistepScheduler.from_config( self.pipe.scheduler.config ) # move to device self.pipe = self.pipe.to(device) def __call__(self, data: Any) -> List[List[Dict[str, float]]]: """ :param data: A dictionary contains `inputs` and optional `image` field. :return: A dictionary with `image` field contains image in base64. """ prompt = data.pop("inputs", data) # hyperparamters num_inference_steps = data.pop("num_inference_steps", 30) guidance_scale = data.pop("guidance_scale", 8) negative_prompt = data.pop("negative_prompt", None) height = data.pop("height", None) width = data.pop("width", None) # run inference pipeline out = self.pipe( prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, num_images_per_prompt=1, negative_prompt=negative_prompt, height=height, width=width, ) # return first generate PIL image return out.images[0]