Spaces:

hideosnes
/

SDXL-Lightning

Paused

App Files Files Community

hideosnes commited on May 5

Commit

2091d9d

•

1 Parent(s): b52d6ea

Create app.py

Browse files

Files changed (1) hide show

app.py +350 -0

app.py ADDED Viewed

	@@ -0,0 +1,350 @@

+import cv2
+import torch
+import random
+import tempfile
+import numpy as np
+from pathlib import Path
+from diffusers import (
+    ControlNetModel,
+    StableDiffusionXLControlNetPipeline,
+    UNet2DConditionModel,
+    EulerDiscreteScheduler,
+)
+import spaces
+import gradio as gr
+from huggingface_hub import hf_hub_download, snapshot_download
+from ip_adapter import IPAdapterXL
+from safetensors.torch import load_file
+snapshot_download(
+    repo_id="h94/IP-Adapter", allow_patterns="sdxl_models/*", local_dir="."
+)
+# CPU fallback & pipeline-definition
+MAX_SEED = np.iinfo(np.int32).max
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
+# load models & scheduler (==>EULER) & CN (==>canny > test what's better!!!)
+base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
+image_encoder_path = "sdxl_models/image_encoder"
+ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
+controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
+controlnet = ControlNEtModel.from_pretrained(
+    controlnet_path, use_safetensors=False, torch_dtype=torch.float16
+).to(device)
+# load SDXL lightning >> put Turbo here if fallback to Comfy @Litto
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    base_model_path,
+    controlnet = controlnet,
+    torch_dtype=torch.float16,
+    variant="fp16",
+    add_watermark=False,
+)to(device)
+pipe.set_progress_bar_config(disable=True)
+pipe.scheduler = EulerDiscreteScheduler.from_config(
+    pipe.scheduler.config, timestep_spacing="trailing", prediction_type="epsilon"
+)
+pipe.unet.load_state_dict(
+    load_file(
+    hf_hub_download(
+        "ByteDance/SDXL-Lightning", "sdxl_lightning_2step_unet.safetensors"
+    ),
+    device="cuda",
+  )
+)
+# load ip-adapter with specific target blocks for style transfer and layout preservation. Should be better than Comfy! Test this!
+# target_blocks=["block"] for original IP-Adapter
+# target_blocks=["up_blocks.0.attentions.1"] for style blocks only
+# target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
+ip_model = IPAdapterXL(
+    pipe,
+    image_encoder_path,
+    ip_ckpt,
+    device,
+    target_blocks=["up_blocks.0.attentions.1"]
+)
+# Resizing the input image
+# OpenCV goes here!!!
+# Test this with smaller side-no for faster infr
+def resize_img(
+    input_image,
+    max_side=1280,
+    min_side=1024,
+    size=None,
+    pad_to_max_side=False,
+    mode=Image.BILINEAR,
+    base_pixel_number=64,
+):
+    w, h = input_image.size
+    if size is not None:
+        w_resize_new, h_resize_new = size
+    else:
+        ratio = min_side / min(h, w)
+        w, h = round(ratio * w), round(ratio * h)
+        ratio = max_side / max(h, w)
+        input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
+        w = (round(ratio * w) // base_pixel_number) * base_pixel_number
+        w = (round(ratio * h) // base_pixel_number) * base_pixel_number
+        nput_image.resize([w_resize_new, h_resize_new], mode)
+    input_image = input_image.resize([w_resize_new, h_resize_new], mode)
+    if pad_to_max_side:
+        res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
+        offset_x = (max_side - w_resize_new) // 2
+        offset_y = (max_side - h_resize_new) // 2
+        res[offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new] = (
+            np.array(input_image)
+        )
+        input_image = Image.fromarray(res)
+    return input_image
+# expand example images for endpoints --> info an Johannes/Jascha what to expect
+examples = [
+    [
+        "./assets/zeichnung1.jpg",
+        None,
+        "3D model, cute monster, test prompt",
+        1.0,
+        0.0,
+    ],
+    [
+        "./assets/zeichnung2.jpg",
+        "./assets/guidance-target.jpg",
+        "3D model, cute, kawai, monster, another test prompt",
+        1.0,
+        0.6,
+    ],
+]
+def run_for_examples(style_image, source_image, prompt, scale, control_scale):
+    return create_image(
+        image_pil=style_image,
+        input_image=source_image,
+        prompt=prompt,
+        n_prompt="text, watermark, low res, low quality, worst quality, deformed, blurry",
+        scale=scale,
+        control_scale=control_scale,
+        guidance_scale=0.0,
+        num_inference_steps=2,
+        seed=42,
+        target="Load only style blocks",
+        neg_content_prompt="",
+        neg_content_scale=0,
+    )
+# Main function for image synthesis (input -> run_for_examples)
+@spaces.GPU(enable_queue=True)
+def create_image(
+    image_pil,
+    input_image,
+    prompt,
+    n_prompt,
+    scale,
+    control_scale,
+    guidance_scale,
+    num_inference_steps,
+    target="Load only style blocks",
+    neg_content_prompt=None,
+    neg_content_scale=0,
+):
+    seed = random.randint(0, MAX_SEED) if seed == -1 else seed
+    if target == "Load original IP-Adapter":
+        # target_blocks=["blocks"] for original IP-Adapter
+        ip_model = IPAdapterXL(
+            pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"]
+        )
+    elif target == "Load only style blocks":
+        # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
+        ip_model = IPAdapterXL(
+            pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1"],
+        )
+    elif target == "Load style+layout block":
+        # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
+        ip_model = IPAdapterXL(
+            pipe, image_encoder_path, ip_ckpt, device, target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"],
+        )
+    if input_image is not None:
+        input_image = resize_img(input_image, max_side=1024)
+        cv_input_image = pil_to_cv2(input_image)
+        detected_map = cv2.Canny(cv_input_image, 50, 200)
+        canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
+    else:
+        canny_map = Image.new("RGB", (1024, 1024), color=(255,255,255))
+        control_scale = 0
+    if float(control_scale) == 0:
+        canny_map = canny_map.resize((1024, 1024))
+    if len(neg_content_prompt) > 0 and neg_content_scale != 0:
+        images = ip_model.generate(
+            pil_image_image_pil,
+            prompt=prompt,
+            negative_prompt=n_prompt,
+            scale=scale,
+            guidance_scale=guidance_scale,
+            num_samples=1,
+            num_inference_steps=num_inference_steps,
+            seed=seed,
+            image=canny_map,
+            controlnet_conditioning_scale=float(control_scale),
+        )
+    image = images[0]
+    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmpfile:
+        image.save(tmpfile, "JPEG", quality=80, optimize=True, progressive=True) # check what happens to imgs when this changes!!!
+        return Path(tmpfile.name)
+def pil_to_cv2(image_pil):
+    image_np = np.array(image_pil)
+    image_cv2 = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
+    return image_cv2
+# Gradio Description & Frontend Stuff for Space (remove this for Endpoint)
+title = r"""
+<h1 align="center">MewMewMew: Simsalabim!</h1>
+"""
+description = r"""
+<b>Let's test this! ARM <3 GoldExtra</b><br>
+<b>SDXL-Lightning && IP-Adapter</b>
+"""
+article = r"""
+Ask Hidéo if something breaks: <a href="mailto:hideo@artificialmuseum.com">Hidéo's Mail</a>
+"""
+block = gr.Blocks()
+with block:
+    #description
+    gr.Markdown(title)
+    gr.MArkdown(description)
+    with gr.Tabs():
+        with gr.Row():
+            with gr.Column():
+                with gr.Row()
+                with gr.Column():
+                    image_pil = gr.Image(label="Style Image", type="pil")
+                with gr.Column():
+                    prompt = gr.Textbox(
+                        label="Prompt",
+                        value="mewmewmew, kitty cats, unicorns, uWu",
+                    )
+                    scale = gr.Slider(
+                        minimum=0, maximum=2.0, step=0.01, value=1.0, label="Maßstab // scale"
+                    )
+                with gr.Accordion(open=False, label="Für Details erweitern!"):
+                    target = gr.Radio(
+                        [
+                            "Load only style blocks",
+                            "Load style+layout block",
+                            "Load original IP-Adapter",
+                        ],
+                        value="Load only style blocks",
+                        label="Modus für IP-Adapter auswählen"
+                    )
+                    with gr.Column():
+                        src_image_pil = gr.Image(
+                            label="Guidance Image (optional)", type="pil"
+                        )
+                    control_scale = gr.Slider(
+                        minimum=0, maximum=1.0, step=0.1, value=0.5,
+                        label="ControlNet-Stärke // control_scale",
+                    )
+                    n_prompt = gr.Textbox(
+                        label="Negative Prompts",
+                        value=""text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
+                    )
+                    neg_content_prompt = gr.Textbox(
+                        label="Negative Content Prompt (optional)", value=""
+                    )
+                    neg_content_scale = gr.Slider(
+                        minimum=0,
+                        maximum=1.0,
+                        step=0.1,
+                        value=0.5,
+                        label="Negative Content Stärke // neg_content_scale"
+                    )
+                    guidance_scale = gr.Slider(
+                        minimum=0,
+                        maximum=10.0,
+                        step=0.01,
+                        value=0.0,
+                        label="guidance-scale"
+                    )
+                    num_inference_steps = gr.Slider(
+                        minimum=2,
+                        maximum=50.0,
+                        step=1.0,
+                        value=2,
+                        label="Anzahl der Inference Steps (optional) // num_inference_steps"
+                    )
+                    seed = gr.Slider(
+                        minimum=-1,
+                        maximum=MAX_SEED,
+                        value=-1,
+                        step=1,
+                        label="Seed Value // -1 = random // Seed-Proof=True"
+                    )
+                generate_button = gr.Button("Simsalabim")
+            with gr.Column():
+                generated_image = gr.Image(label="MewMewMagix uWu")
+    inputs = [
+        image_pil,
+        src_image_pil,
+        prompt,
+        n_prompt,
+        scale,
+        control_scale,
+        guidance_scale,
+        num_inference_steps,
+        seed,
+        target,
+        neg_content_prompt,
+        neg_content_scale,
+    ]
+    outputs = [generated_image]
+    gr.on(
+        triggers=[
+            prompt.input,
+            generate_button.click,
+            guidance_scale.input,
+            scale.input,
+            control_scale.input,
+            seed.input,
+        ],
+        fn=create_image,
+        inputs=inputs,
+        outputs=outputs,
+        show_progress="minimal",
+        show_api=False,
+        trigger_mode="always_last",
+    )
+    gr.Examples(
+        examples=examples,
+        inputs=[image_pil, src_image_pil, prompt, scale, control_scale],
+        fn=run_for_examples,
+        outputs=[generated_image],
+        cache_examples=True,
+    )
+    gr.Markdown(article)
+    block.queue(api_open=False)
+    block.launch(show_api=False)