fofr-sdxl-emoji / handler.py
multimodalart's picture
Native diffusers textual embeddings loading
e204e85 verified
raw
history blame
2.08 kB
from typing import Dict, List, Any
import torch
from torch import autocast
from huggingface_hub import hf_hub_download
from diffusers import DiffusionPipeline
import base64
from io import BytesIO
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device ~>", device)
class EndpointHandler:
def __init__(self, path=""):
print("path ~>", path)
self.pipe = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16 if device.type == "cuda" else None,
variant="fp16",
).to(device)
self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors")
self.pipe.fuse_lora()
embedding_path = hf_hub_download(
repo_id="SvenN/sdxl-emoji", filename="embeddings.pti", repo_type="model"
)
state_dict = load_file(embedding_path)
self.pipe.load_textual_inversion(state_dict["text_encoders_0"], token=["<s0>", "<s1>"], text_encoder=pipeline.text_encoder, tokenizer=pipeline.tokenizer)
self.pipe.load_textual_inversion(state_dict["text_encoders_1"], token=["<s0>", "<s1>"], text_encoder=pipeline.text_encoder_2, tokenizer=pipeline.tokenizer_2)
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
"""
Args:
data (:obj:):
includes the input data and the parameters for the inference.
Return:
A :obj:`dict`:. base64 encoded image
"""
inputs = data.pop("inputs", data)
# Automatically add trigger tokens to the beginning of the prompt
full_prompt = f"A <s0><s1> emoji {inputs}"
images = self.pipe(
full_prompt,
cross_attention_kwargs={"scale": 0.8},
num_inference_steps=25
).images
image = images[0]
return image
if __name__ == "__main__":
handler = EndpointHandler()
print(handler)
output = handler({"inputs": "emoji of a tiger face, white background"})
print(output)