import base64 import json import sys from collections import defaultdict from io import BytesIO from pprint import pprint from typing import Any, Dict, List import torch from diffusers import ( DiffusionPipeline, DPMSolverMultistepScheduler, DPMSolverSinglestepScheduler, EulerAncestralDiscreteScheduler, ) from safetensors.torch import load_file from torch import autocast # https://huggingface.co/philschmid/stable-diffusion-v1-4-endpoints # https://huggingface.co/docs/inference-endpoints/guides/custom_handler # set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if device.type != "cuda": raise ValueError("need to run on GPU") class EndpointHandler: LORA_PATHS = { "hairdetailer": r"lora/hairdetailer.safetensors", "lora_leica": r"lora/lora_leica.safetensors", "epiNoiseoffset_v2": r"lora/epiNoiseoffset_v2.safetensors", "MBHU-TT2FRS": r"lora/MBHU-TT2FRS.safetensors", "ShinyOiledSkin_v20": r"lora/ShinyOiledSkin_v20-LoRA.safetensors", "polyhedron_new_skin_v1.1": r"lora/polyhedron_new_skin_v1.1.safetensors", "detailed_eye-10": r"lora/detailed_eye-10.safetensors", "add_detail": r"lora/add_detail.safetensors", "MuscleGirl_v1": r"lora/MuscleGirl_v1.safetensors", } TEXTUAL_INVERSION = [ { "weight_name": "embeddings/EasyNegative.safetensors", "token": "easynegative", }, { "weight_name": "embeddings/EasyNegative.safetensors", "token": "EasyNegative", }, {"weight_name": "embeddings/badhandv4.pt", "token": "badhandv4"}, { "weight_name": "embeddings/bad-artist-anime.pt", "token": "bad-artist-anime", }, {"weight_name": "embeddings/NegfeetV2.pt", "token": "NegfeetV2"}, { "weight_name": "embeddings/ng_deepnegative_v1_75t.pt", "token": "ng_deepnegative_v1_75t", }, { "weight_name": "embeddings/ng_deepnegative_v1_75t.pt", "token": "NG_DeepNegative_V1_75T", }, {"weight_name": "embeddings/bad-hands-5.pt", "token": "bad-hands-5"}, ] def __init__(self, path="."): # load the optimized model self.pipe = DiffusionPipeline.from_pretrained( path, custom_pipeline="lpw_stable_diffusion", # avoid 77 token limit torch_dtype=torch.float16, # accelerate render ) self.pipe = self.pipe.to(device) # DPM++ 2M SDE Karras # increase step to avoid high contrast num_inference_steps=30 self.pipe.scheduler = DPMSolverMultistepScheduler.from_config( self.pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++", ) # Mode boulardus self.pipe.safety_checker = None # Load negative embeddings to avoid bad hands, etc self.load_embeddings() # Load default Lora models self.pipe = self.load_selected_loras( [ ("polyhedron_new_skin_v1.1", 0.35), # nice Skin ("detailed_eye-10", 0.3), # nice eyes ("add_detail", 0.4), # detailed pictures ("MuscleGirl_v1", 0.3), # shape persons ], ) # boosts performance by another 20% self.pipe.enable_xformers_memory_efficient_attention() self.pipe.enable_attention_slicing() def load_lora(self, pipeline, lora_path, lora_weight=0.5): state_dict = load_file(lora_path) LORA_PREFIX_UNET = "lora_unet" LORA_PREFIX_TEXT_ENCODER = "lora_te" alpha = lora_weight visited = [] for key in state_dict: state_dict[key] = state_dict[key].to(device) # directly update weight in diffusers model for key in state_dict: # as we have set the alpha beforehand, so just skip if ".alpha" in key or key in visited: continue if "text" in key: layer_infos = ( key.split(".")[0] .split(LORA_PREFIX_TEXT_ENCODER + "_")[-1] .split("_") ) curr_layer = pipeline.text_encoder else: layer_infos = ( key.split(".")[0].split(LORA_PREFIX_UNET + "_")[-1].split("_") ) curr_layer = pipeline.unet # find the target layer temp_name = layer_infos.pop(0) while len(layer_infos) > -1: try: curr_layer = curr_layer.__getattr__(temp_name) if len(layer_infos) > 0: temp_name = layer_infos.pop(0) elif len(layer_infos) == 0: break except Exception: if len(temp_name) > 0: temp_name += "_" + layer_infos.pop(0) else: temp_name = layer_infos.pop(0) # org_forward(x) + lora_up(lora_down(x)) * multiplier pair_keys = [] if "lora_down" in key: pair_keys.append(key.replace("lora_down", "lora_up")) pair_keys.append(key) else: pair_keys.append(key) pair_keys.append(key.replace("lora_up", "lora_down")) # update weight if len(state_dict[pair_keys[0]].shape) == 4: weight_up = ( state_dict[pair_keys[0]].squeeze(3).squeeze(2).to(torch.float32) ) weight_down = ( state_dict[pair_keys[1]].squeeze(3).squeeze(2).to(torch.float32) ) curr_layer.weight.data += alpha * torch.mm( weight_up, weight_down ).unsqueeze(2).unsqueeze(3) else: weight_up = state_dict[pair_keys[0]].to(torch.float32) weight_down = state_dict[pair_keys[1]].to(torch.float32) curr_layer.weight.data += alpha * torch.mm(weight_up, weight_down) # update visited list for item in pair_keys: visited.append(item) return pipeline def load_embeddings(self): """Load textual inversions, avoid bad prompts""" for model in EndpointHandler.TEXTUAL_INVERSION: self.pipe.load_textual_inversion( ".", weight_name=model["weight_name"], token=model["token"] ) def load_selected_loras(self, selections): """Load Loras models, can lead to marvelous creations""" for model_name, weight in selections: lora_path = EndpointHandler.LORA_PATHS[model_name] self.pipe = self.load_lora( pipeline=self.pipe, lora_path=lora_path, lora_weight=weight ) return self.pipe def __call__(self, data: Any) -> List[List[Dict[str, float]]]: """ Args: data (:obj:): includes the input data and the parameters for the inference. Return: A :obj:`dict`:. base64 encoded image """ global device # Which Lora do we load ? # selected_models = [ # ("ShinyOiledSkin_v20", 0.3), # ("MBHU-TT2FRS", 0.5), # ("hairdetailer", 0.5), # ("lora_leica", 0.5), # ("epiNoiseoffset_v2", 0.5), # ] # 1. Verify input arguments required_fields = [ "prompt", "negative_prompt", "width", "num_inference_steps", "height", "seed", "guidance_scale", ] missing_fields = [field for field in required_fields if field not in data] if missing_fields: return { "flag": "error", "message": f"Missing fields: {', '.join(missing_fields)}", } # Now extract the fields prompt = data["prompt"] negative_prompt = data["negative_prompt"] loras_model = data.pop("loras_model", None) seed = data["seed"] width = data["width"] num_inference_steps = data["num_inference_steps"] height = data["height"] guidance_scale = data["guidance_scale"] # USe this to add automatically some negative prompts forced_negative = ( negative_prompt + """easynegative, badhandv4, bad-artist-anime, NegfeetV2, ng_deepnegative_v1_75t, bad-hands-5 """ ) # Set the generator seed if provided generator = torch.Generator(device="cuda").manual_seed(seed) if seed else None # Load the provided Lora models if loras_model: self.pipe = self.load_selected_loras(loras_model) try: # 2. Process with autocast(device.type): image = self.pipe.text2img( prompt=prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, height=height, width=width, negative_prompt=forced_negative, generator=generator, max_embeddings_multiples=5, ).images[0] # encode image as base 64 buffered = BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()) # Return the success response return {"flag": "success", "image": img_str.decode()} except Exception as e: # Handle any other exceptions and return an error response return {"flag": "error", "message": str(e)}