Charles-Elena
/

InstantStyle-SDXL-Lightning

Inference Endpoints

Model card Files Files and versions Community

yamildiego commited on Apr 29

Commit

158f9b4

•

1 Parent(s): 0b11b0c

rollback self.ip_ckpt

Browse files

Files changed (2) hide show

handler.py +220 -12
requirements.txt +16 -2

handler.py CHANGED Viewed

@@ -1,15 +1,223 @@
 class EndpointHandler():
-    def __init__(self, path=""):
-        pass
     def __call__(self, data):
-        """
-       data args:
-            inputs (:obj: `str`)
-            date (:obj: `str`)
-      Return:
-            A :obj:`list` | `dict`: will be serialized and returned
-        """
-        inputs = data.pop("inputs",data)
-        return inputs

+import cv2
+import torch
+import random
+import numpy as np
+from PIL import Image
+from pathlib import Path
+from huggingface_hub import hf_hub_download, snapshot_download
+from ip_adapter.ip_adapter import IPAdapterXL
+from safetensors.torch import load_file
+import os
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionXLControlNetPipeline,
+    UNet2DConditionModel,
+    EulerDiscreteScheduler,
+)
+# global variable
+MAX_SEED = np.iinfo(np.int32).max
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
+# initialization
+base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
+# image_encoder_path = "sdxl_models/image_encoder"
+# ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
+controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
 class EndpointHandler():
+    def __init__(self, model_dir):
+        repo_id = "h94/IP-Adapter"
+        # Descargar todo el contenido del directorio image_encoder
+        local_repo_path = snapshot_download(repo_id=repo_id)
+        # image_encoder_local_path = os.path.join(local_repo_path, "image_encoder")
+        self.image_encoder_local_path = os.path.join(local_repo_path, "sdxl_models", "image_encoder")
+        self.ip_ckpt = os.path.join(local_repo_path, "sdxl_models", "ip-adapter_sdxl.bin")
+        self.controlnet = ControlNetModel.from_pretrained(
+            controlnet_path, use_safetensors=False, torch_dtype=torch.float16
+        ).to(device)
+        # load SDXL lightnining
+        self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+            base_model_path,
+            controlnet=self.controlnet,
+            torch_dtype=torch.float16,
+            variant="fp16",
+            add_watermarker=False,
+        ).to(device)
+        self.pipe.set_progress_bar_config(disable=True)
+        self.pipe.scheduler = EulerDiscreteScheduler.from_config(
+            self.pipe.scheduler.config, timestep_spacing="trailing", prediction_type="epsilon"
+        )
+        self.pipe.unet.load_state_dict(
+            load_file(
+                hf_hub_download(
+                    "ByteDance/SDXL-Lightning", "sdxl_lightning_2step_unet.safetensors"
+                ),
+                device="cuda",
+            )
+        )
+        self.ip_model = IPAdapterXL(
+            self.pipe,
+            self.image_encoder_local_path,
+            self.ip_ckpt,
+            device,
+            target_blocks=["up_blocks.0.attentions.1"],
+        )
     def __call__(self, data):
+        def create_image(
+            image_pil,
+            input_image,
+            prompt,
+            n_prompt,
+            scale,
+            control_scale,
+            guidance_scale,
+            num_inference_steps,
+            seed,
+            target="Load only style blocks",
+            neg_content_prompt=None,
+            neg_content_scale=0,
+        ):
+            seed = random.randint(0, MAX_SEED) if seed == -1 else seed
+            if target == "Load original IP-Adapter":
+                # target_blocks=["blocks"] for original IP-Adapter
+                ip_model = IPAdapterXL(
+                    self.pipe, self.image_encoder_local_path, self.ip_ckpt, device, target_blocks=["blocks"]
+                )
+            elif target == "Load only style blocks":
+                # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
+                ip_model = IPAdapterXL(
+                    self.pipe,
+                    self.image_encoder_local_path,
+                    self.ip_ckpt,
+                    device,
+                    target_blocks=["up_blocks.0.attentions.1"],
+                )
+            elif target == "Load style+layout block":
+                # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
+                ip_model = IPAdapterXL(
+                    self.pipe,
+                    self.image_encoder_local_path,
+                    self.ip_ckpt,
+                    device,
+                    target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"],
+                )
+            if input_image is not None:
+                input_image = resize_img(input_image, max_side=1024)
+                cv_input_image = pil_to_cv2(input_image)
+                detected_map = cv2.Canny(cv_input_image, 50, 200)
+                canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
+            else:
+                canny_map = Image.new("RGB", (1024, 1024), color=(255, 255, 255))
+                control_scale = 0
+            if float(control_scale) == 0:
+                canny_map = canny_map.resize((1024, 1024))
+            if len(neg_content_prompt) > 0 and neg_content_scale != 0:
+                images = ip_model.generate(
+                    pil_image=image_pil,
+                    prompt=prompt,
+                    negative_prompt=n_prompt,
+                    scale=scale,
+                    guidance_scale=guidance_scale,
+                    num_samples=1,
+                    num_inference_steps=num_inference_steps,
+                    seed=seed,
+                    image=canny_map,
+                    controlnet_conditioning_scale=float(control_scale),
+                    neg_content_prompt=neg_content_prompt,
+                    neg_content_scale=neg_content_scale,
+                )
+            else:
+                images = ip_model.generate(
+                    pil_image=image_pil,
+                    prompt=prompt,
+                    negative_prompt=n_prompt,
+                    scale=scale,
+                    guidance_scale=guidance_scale,
+                    num_samples=1,
+                    num_inference_steps=num_inference_steps,
+                    seed=seed,
+                    image=canny_map,
+                    controlnet_conditioning_scale=float(control_scale),
+                )
+            image = images[0]
+            return image
+        def pil_to_cv2(image_pil):
+            image_np = np.array(image_pil)
+            image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+            return image_cv2
+        def resize_img(
+            input_image,
+            max_side=1280,
+            min_side=1024,
+            size=None,
+            pad_to_max_side=False,
+            mode=Image.BILINEAR,
+            base_pixel_number=64,
+        ):
+            w, h = input_image.size
+            if size is not None:
+                w_resize_new, h_resize_new = size
+            else:
+                ratio = min_side / min(h, w)
+                w, h = round(ratio * w), round(ratio * h)
+                ratio = max_side / max(h, w)
+                input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
+                w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
+                h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
+            input_image = input_image.resize([w_resize_new, h_resize_new], mode)
+            if pad_to_max_side:
+                res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
+                offset_x = (max_side - w_resize_new) // 2
+                offset_y = (max_side - h_resize_new) // 2
+                res[
+                    offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new
+                ] = np.array(input_image)
+                input_image = Image.fromarray(res)
+            return input_image
+        style_image = "https://huggingface.co/spaces/radames/InstantStyle-SDXL-Lightning/resolve/main/assets/0.jpg"
+        source_image =None
+        prompt =  "a cat, masterpiece, best quality, high quality"
+        scale =1.0
+        control_scale =0.0
+        return create_image(
+        image_pil=style_image,
+        input_image=source_image,
+        prompt=prompt,
+        n_prompt="text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
+        scale=scale,
+        control_scale=control_scale,
+        guidance_scale=0.0,
+        num_inference_steps=2,
+        seed=42,
+        target="Load only style blocks",
+        neg_content_prompt="",
+        neg_content_scale=0,
+    )

requirements.txt CHANGED Viewed

@@ -1,2 +1,16 @@
-transformers==4.18.0
-holidays==0.13

+diffusers==0.27.2
+torch>=2.0.0
+torchvision>=0.15.1
+transformers>=4.37.1
+accelerate
+safetensors
+einops
+spaces>=0.19.4
+omegaconf
+peft
+huggingface-hub>=0.20.2
+opencv-python
+gradio
+controlnet_aux
+gdown
+peft