Spaces:

radames
/

Real-Time-Latent-Consistency-Model

Runtime error

App Files Files Community

radames commited on Nov 10, 2023

Commit

3e16ee5

•

1 Parent(s): 93c0f43

model examples

Browse files

Files changed (5) hide show

app-controlnetlora.py +33 -41
app-txt2imglora.py +3 -7
requirements.txt +3 -3
static/controlnetlora.html +49 -15
static/txt2imglora.html +5 -1

app-controlnetlora.py CHANGED Viewed

@@ -23,9 +23,6 @@ import torch
 from canny_gpu import SobelOperator
-# from controlnet_aux import OpenposeDetector
-# import cv2
 try:
     import intel_extension_for_pytorch as ipex
 except:
@@ -44,12 +41,10 @@ MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
-HF_TOKEN = os.environ.get("HF_TOKEN", None)
 WIDTH = 512
 HEIGHT = 512
 # check if MPS is available OSX only M1/M2/M3 chips
 mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
 xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
@@ -76,37 +71,40 @@ controlnet_canny = ControlNetModel.from_pretrained(
 canny_torch = SobelOperator(device=device)
-model_id = "nitrosocke/mo-di-diffusion"
-lcm_lora_id = "lcm-sd/lcm-sd1.5-lora"
 if SAFETY_CHECKER == "True":
-    pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
-        model_id,
-        controlnet=controlnet_canny,
-    )
 else:
-    pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
-        model_id,
-        safety_checker=None,
-        controlnet=controlnet_canny,
-    )
-pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
-pipe.set_progress_bar_config(disable=True)
-pipe.to(device=device, dtype=torch_dtype).to(device)
-pipe.unet.to(memory_format=torch.channels_last)
-if psutil.virtual_memory().total < 64 * 1024**3:
-    pipe.enable_attention_slicing()
-# Load LCM LoRA
-pipe.load_lora_weights(
-    lcm_lora_id,
-    weight_name="lcm_sd_lora.safetensors",
-    adapter_name="lcm",
-    use_auth_token=HF_TOKEN,
-)
 compel_proc = Compel(
     tokenizer=pipe.tokenizer,
@@ -142,16 +140,17 @@ class InputParams(BaseModel):
     canny_low_threshold: float = 0.31
     canny_high_threshold: float = 0.78
     debug_canny: bool = False
-def predict(
-    input_image: Image.Image, params: InputParams, prompt_embeds: torch.Tensor = None
-):
     generator = torch.manual_seed(params.seed)
     control_image = canny_torch(
         input_image, params.canny_low_threshold, params.canny_high_threshold
     )
     results = pipe(
         control_image=control_image,
         prompt_embeds=prompt_embeds,
@@ -245,23 +244,16 @@ async def stream(user_id: uuid.UUID):
         async def generate():
             last_prompt: str = None
-            prompt_embeds: torch.Tensor = None
             while True:
                 data = await queue.get()
                 input_image = data["image"]
                 params = data["params"]
                 if input_image is None:
                     continue
-                # avoid recalculate prompt embeds
-                if last_prompt != params.prompt:
-                    print("new prompt")
-                    prompt_embeds = compel_proc(params.prompt)
-                    last_prompt = params.prompt
                 image = predict(
                     input_image,
                     params,
-                    prompt_embeds,
                 )
                 if image is None:
                     continue

 from canny_gpu import SobelOperator
 try:
     import intel_extension_for_pytorch as ipex
 except:
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
 WIDTH = 512
 HEIGHT = 512
 # check if MPS is available OSX only M1/M2/M3 chips
 mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
 xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
 canny_torch = SobelOperator(device=device)
+models_id = [
+    "wavymulder/Analog-Diffusion",
+    "nitrosocke/Ghibli-Diffusion",
+    "nitrosocke/mo-di-diffusion",
+]
+lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
 if SAFETY_CHECKER == "True":
+    pipes = {}
+    for model_id in models_id:
+        pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+            model_id,
+            controlnet=controlnet_canny,
+        )
+        pipes[model_id] = pipe
 else:
+    pipes = {}
+    for model_id in models_id:
+        pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+            model_id,
+            safety_checker=None,
+            controlnet=controlnet_canny,
+        )
+        pipes[model_id] = pipe
+for pipe in pipes.values():
+    pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
+    pipe.set_progress_bar_config(disable=True)
+    pipe.to(device=device, dtype=torch_dtype).to(device)
+    if psutil.virtual_memory().total < 64 * 1024**3:
+        pipe.enable_attention_slicing()
+    # Load LCM LoRA
+    pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
 compel_proc = Compel(
     tokenizer=pipe.tokenizer,
     canny_low_threshold: float = 0.31
     canny_high_threshold: float = 0.78
     debug_canny: bool = False
+    model_id: str = "nitrosocke/Ghibli-Diffusion"
+def predict(input_image: Image.Image, params: InputParams):
     generator = torch.manual_seed(params.seed)
     control_image = canny_torch(
         input_image, params.canny_low_threshold, params.canny_high_threshold
     )
+    prompt_embeds = compel_proc(params.prompt)
+    pipe = pipes[params.model_id]
     results = pipe(
         control_image=control_image,
         prompt_embeds=prompt_embeds,
         async def generate():
             last_prompt: str = None
             while True:
                 data = await queue.get()
                 input_image = data["image"]
                 params = data["params"]
                 if input_image is None:
                     continue
                 image = predict(
                     input_image,
                     params,
                 )
                 if image is None:
                     continue

app-txt2imglora.py CHANGED Viewed

@@ -35,7 +35,6 @@ MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
-HF_TOKEN = os.environ.get("HF_TOKEN", None)
 WIDTH = 512
 HEIGHT = 512
@@ -61,7 +60,7 @@ if mps_available:
     torch_dtype = torch.float32
 model_id = "wavymulder/Analog-Diffusion"
-lcm_lora_id = "lcm-sd/lcm-sd1.5-lora"
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(model_id)
@@ -83,13 +82,11 @@ if TORCH_COMPILE:
     pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
     pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
 # Load LCM LoRA
 pipe.load_lora_weights(
     lcm_lora_id,
-    weight_name="lcm_sd_lora.safetensors",
-    adapter_name="lcm",
-    use_auth_token=HF_TOKEN,
 )
 compel_proc = Compel(
@@ -121,7 +118,6 @@ def predict(params: InputParams):
         guidance_scale=params.guidance_scale,
         width=params.width,
         height=params.height,
-        # original_inference_steps=params.lcm_steps,
         output_type="pil",
     )
     nsfw_content_detected = (

 TIMEOUT = float(os.environ.get("TIMEOUT", 0))
 SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
 WIDTH = 512
 HEIGHT = 512
     torch_dtype = torch.float32
 model_id = "wavymulder/Analog-Diffusion"
+lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
 if SAFETY_CHECKER == "True":
     pipe = DiffusionPipeline.from_pretrained(model_id)
     pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
     pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
 # Load LCM LoRA
 pipe.load_lora_weights(
     lcm_lora_id,
+    adapter_name="lcm"
 )
 compel_proc = Compel(
         guidance_scale=params.guidance_scale,
         width=params.width,
         height=params.height,
         output_type="pil",
     )
     nsfw_content_detected = (

requirements.txt CHANGED Viewed

@@ -1,5 +1,4 @@
-# diffusers==0.22.2
-git+https://github.com/huggingface/diffusers.git@6110d7c95f630479cf01340cc8a8141c1e359f09
 transformers==4.34.1
 gradio==3.50.2
 --extra-index-url https://download.pytorch.org/whl/cu121
@@ -10,4 +9,5 @@ Pillow==10.1.0
 accelerate==0.24.0
 compel==2.0.2
 controlnet-aux==0.0.7
-peft==0.6.0

+diffusers==0.23.0
 transformers==4.34.1
 gradio==3.50.2
 --extra-index-url https://download.pytorch.org/whl/cu121
 accelerate==0.24.0
 compel==2.0.2
 controlnet-aux==0.0.7
+peft==0.6.0
+xformers

static/controlnetlora.html CHANGED Viewed

@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
-    <title>Real-Time Latent Consistency Model ControlNet</title>
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <script
         src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
@@ -93,8 +93,9 @@
             }
             async function videoTimeUpdateHandler() {
-                const dimension = getValue("input[name=dimension]:checked");
-                const [WIDTH, HEIGHT] = JSON.parse(dimension);
                 const canvas = new OffscreenCanvas(WIDTH, HEIGHT);
                 const videoW = webcamVideo.videoWidth;
@@ -118,7 +119,8 @@
                     "controlnet_end": getValue("#controlnet_end"),
                     "canny_low_threshold": getValue("#canny_low_threshold"),
                     "canny_high_threshold": getValue("#canny_high_threshold"),
-                    "debug_canny": getValue("#debug_canny")
                 }));
             }
             let mediaDevices = [];
@@ -220,8 +222,33 @@
                 console.log(err);
             }
         }
         const lcmLive = LCMLive(videoEl, imageEl);
         startBtn.addEventListener("click", async () => {
             try {
@@ -263,16 +290,18 @@
     <main class="container mx-auto px-4 py-4 max-w-4xl flex flex-col gap-4">
         <article class="text-center max-w-xl mx-auto">
             <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
-            <h2 class="text-2xl font-bold mb-4">ControlNet Lora</h2>
             <p class="text-sm">
                 This demo showcases
-                <a href="https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7" target="_blank"
-                    class="text-blue-500 underline hover:no-underline">LCM</a> Image to Image pipeline
-                using
-                <a href="https://github.com/huggingface/diffusers/tree/main/examples/community#latent-consistency-pipeline"
                     target="_blank" class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG
-                stream server. Featuring <a href="https://huggingface.co/nitrosocke/mo-di-diffusion" target="_blank"
-                    class="text-blue-500 underline hover:no-underline">Nitrosocke Mo-Di Diffusion</a>Model.
             </p>
         </article>
         <div>
@@ -285,9 +314,14 @@
             <div class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
                 <textarea type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1  outline-none dark:text-black"
                     title="Prompt, this is an example, feel free to modify"
-                    placeholder="Add your prompt here...">a magical princess with golden hair, modern disney style</textarea>
             </div>
         </div>
         <div class="">
             <details>
                 <summary class="font-medium cursor-pointer">Advanced Options</summary>
@@ -310,7 +344,7 @@
                         0.3</output>
                     <!--  -->
                     <label class="text-sm font-medium" for="strength">Strength</label>
-                    <input type="range" id="strength" name="strength" min="0.1" max="1" step="0.001" value="0.50"
                         oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
                     <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
                         0.5</output>
@@ -357,7 +391,7 @@
                     </button>
                     <!--  -->
                     <!--  -->
-                    <label class="text-sm font-medium" for="dimension">Image Dimensions</label>
                     <div class="col-span-2 flex gap-2">
                         <div class="flex gap-1">
                             <input type="radio" id="dimension512" name="dimension" value="[512,512]" checked
@@ -369,7 +403,7 @@
                                 lass="cursor-pointer">
                             <label for="dimension768" class="text-sm cursor-pointer">768x768</label>
                         </div>
-                    </div>
                     <!--  -->
                     <!--  -->
                     <label class="text-sm font-medium" for="debug_canny">Debug Canny</label>

 <head>
     <meta charset="UTF-8">
+    <title>Real-Time Latent Consistency Model ControlNet Lora</title>
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <script
         src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
             }
             async function videoTimeUpdateHandler() {
+                const model_id = getValue("input[name=base_model]:checked");
+                const [WIDTH, HEIGHT] = [512, 512];
                 const canvas = new OffscreenCanvas(WIDTH, HEIGHT);
                 const videoW = webcamVideo.videoWidth;
                     "controlnet_end": getValue("#controlnet_end"),
                     "canny_low_threshold": getValue("#canny_low_threshold"),
                     "canny_high_threshold": getValue("#canny_high_threshold"),
+                    "debug_canny": getValue("#debug_canny"),
+                    "model_id": model_id
                 }));
             }
             let mediaDevices = [];
                 console.log(err);
             }
         }
+        const models_id = {
+            "nitrosocke/Ghibli-Diffusion": "ghibli style",
+            "nitrosocke/mo-di-diffusion": "modern disney style",
+            "wavymulder/Analog-Diffusion": "analog style"
+        }
+        document.addEventListener("DOMContentLoaded", () => {
+            const models_options = document.querySelector("#models_options");
+            Object.entries(models_id).forEach(([model, activation], i) => {
+                const modelEl = document.createElement("div");
+                modelEl.innerHTML = `
+                <input type="radio" id="${model}" name="base_model" value="${model}" class="cursor-pointer" ${i === 0 ? "checked" : ""}>
+                <label for="${model}" class="text-sm cursor-pointer" title="Use the keyword on your prompt: ${activation}">${model}: <b>${activation}</b>
+                    <a href="https://hf.co/${model}" title="Model link on Hugging Face" target="_blank" class="text-sm text-blue-500 underline hover:no-underline">⤴️</a></label>
+                `;
+                models_options.appendChild(modelEl);
+            })
+            models_options.addEventListener("change", () => {
+                const model = getValue("input[name=base_model]:checked");
+                const prompt = getValue("#prompt");
+                const activation = models_id[model];
+                if (prompt.includes(activation))
+                    return;
+                document.querySelector("#prompt").value = `${activation} portrait of a person`;
+            })
+        })
         const lcmLive = LCMLive(videoEl, imageEl);
         startBtn.addEventListener("click", async () => {
             try {
     <main class="container mx-auto px-4 py-4 max-w-4xl flex flex-col gap-4">
         <article class="text-center max-w-xl mx-auto">
             <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
+            <h2 class="text-2xl font-bold mb-4">ControlNet LoRa</h2>
             <p class="text-sm">
                 This demo showcases
+                <a href="https://huggingface.co/blog/lcm_lora" target="_blank"
+                    class="text-blue-500 underline hover:no-underline">LCM LoRa</a> ControlNet pipeline
+                using <a
+                    href="https://huggingface.co/docs/diffusers/api/pipelines/latent_consistency_models#latent-consistency-models"
                     target="_blank" class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG
+                stream server.
+            </p>
+            <p class="text-sm">
+                There are <span id="queue_size" class="font-bold">0</span> user(s) sharing the same GPU.
             </p>
         </article>
         <div>
             <div class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
                 <textarea type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1  outline-none dark:text-black"
                     title="Prompt, this is an example, feel free to modify"
+                    placeholder="Add your prompt here...">ghibli style portrait of a person</textarea>
             </div>
         </div>
+        <!--  -->
+        <label class="font-medium" for="base_model">Base Model</label>
+        <fieldset class="flex flex-col gap-2" id="models_options">
+        </fieldset>
+        <!--  -->
         <div class="">
             <details>
                 <summary class="font-medium cursor-pointer">Advanced Options</summary>
                         0.3</output>
                     <!--  -->
                     <label class="text-sm font-medium" for="strength">Strength</label>
+                    <input type="range" id="strength" name="strength" min="0.1" max="1" step="0.0001" value="0.50"
                         oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
                     <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
                         0.5</output>
                     </button>
                     <!--  -->
                     <!--  -->
+                    <!-- <label class="text-sm font-medium" for="dimension">Image Dimensions</label>
                     <div class="col-span-2 flex gap-2">
                         <div class="flex gap-1">
                             <input type="radio" id="dimension512" name="dimension" value="[512,512]" checked
                                 lass="cursor-pointer">
                             <label for="dimension768" class="text-sm cursor-pointer">768x768</label>
                         </div>
+                    </div> -->
                     <!--  -->
                     <!--  -->
                     <label class="text-sm font-medium" for="debug_canny">Debug Canny</label>

static/txt2imglora.html CHANGED Viewed

@@ -212,6 +212,10 @@
                 stream server. Featuring <a href="https://huggingface.co/wavymulder/Analog-Diffusion" target="_blank"
                     class="text-blue-500 underline hover:no-underline">Analog Diffusion</a> Model.
             </p>
         </article>
         <div>
             <h2 class="font-medium">Prompt</h2>
@@ -250,7 +254,7 @@
                     <input type="range" id="guidance-scale" name="guidance-scale" min="0" max="5" step="0.0001"
                         value="0.8" oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
                     <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
-                        8.0</output>
                     <!--  -->
                     <label class="text-sm font-medium" for="seed">Seed</label>
                     <input type="number" id="seed" name="seed" value="299792458"

                 stream server. Featuring <a href="https://huggingface.co/wavymulder/Analog-Diffusion" target="_blank"
                     class="text-blue-500 underline hover:no-underline">Analog Diffusion</a> Model.
             </p>
+            <p class="text-sm">
+                There are <span id="queue_size" class="font-bold">0</span> user(s) sharing the same GPU, affecting
+                real-time performance.
+            </p>
         </article>
         <div>
             <h2 class="font-medium">Prompt</h2>
                     <input type="range" id="guidance-scale" name="guidance-scale" min="0" max="5" step="0.0001"
                         value="0.8" oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
                     <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
+                        0.8</output>
                     <!--  -->
                     <label class="text-sm font-medium" for="seed">Seed</label>
                     <input type="number" id="seed" name="seed" value="299792458"