from typing import Dict, List, Any import torch from torch import autocast from diffusers import StableDiffusionXLPipeline import base64 from io import BytesIO device = torch.device("cuda" if torch.cuda.is_available() else 'cpu') if device.type != "cuda": raise ValueError('need to run on gpu') class EndpointHandler(): def __init__(self, path="") : self.pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True) self.pipe = self.pipe.to(device) def __call__(self, data:Any) -> List[List[Dict[str, float]]]: print(data) inputs = data.pop("inputs", data) print(device) with autocast(device.type): image = self.pipe(inputs, guidance_scale=7.5).images[0] buffered = BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) return { "image" : img_str.decode()}