radames commited on
Commit
3e16ee5
1 Parent(s): 93c0f43

model examples

Browse files
app-controlnetlora.py CHANGED
@@ -23,9 +23,6 @@ import torch
23
 
24
  from canny_gpu import SobelOperator
25
 
26
- # from controlnet_aux import OpenposeDetector
27
- # import cv2
28
-
29
  try:
30
  import intel_extension_for_pytorch as ipex
31
  except:
@@ -44,12 +41,10 @@ MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
44
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
45
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
46
  TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
47
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
48
 
49
  WIDTH = 512
50
  HEIGHT = 512
51
 
52
-
53
  # check if MPS is available OSX only M1/M2/M3 chips
54
  mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
55
  xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
@@ -76,37 +71,40 @@ controlnet_canny = ControlNetModel.from_pretrained(
76
 
77
  canny_torch = SobelOperator(device=device)
78
 
79
- model_id = "nitrosocke/mo-di-diffusion"
80
- lcm_lora_id = "lcm-sd/lcm-sd1.5-lora"
 
 
 
 
81
 
82
  if SAFETY_CHECKER == "True":
83
- pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
84
- model_id,
85
- controlnet=controlnet_canny,
86
- )
 
 
 
87
  else:
88
- pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
89
- model_id,
90
- safety_checker=None,
91
- controlnet=controlnet_canny,
92
- )
93
-
94
- pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
95
- pipe.set_progress_bar_config(disable=True)
96
- pipe.to(device=device, dtype=torch_dtype).to(device)
97
- pipe.unet.to(memory_format=torch.channels_last)
98
-
 
99
 
100
- if psutil.virtual_memory().total < 64 * 1024**3:
101
- pipe.enable_attention_slicing()
102
 
103
- # Load LCM LoRA
104
- pipe.load_lora_weights(
105
- lcm_lora_id,
106
- weight_name="lcm_sd_lora.safetensors",
107
- adapter_name="lcm",
108
- use_auth_token=HF_TOKEN,
109
- )
110
 
111
  compel_proc = Compel(
112
  tokenizer=pipe.tokenizer,
@@ -142,16 +140,17 @@ class InputParams(BaseModel):
142
  canny_low_threshold: float = 0.31
143
  canny_high_threshold: float = 0.78
144
  debug_canny: bool = False
 
145
 
146
 
147
- def predict(
148
- input_image: Image.Image, params: InputParams, prompt_embeds: torch.Tensor = None
149
- ):
150
  generator = torch.manual_seed(params.seed)
151
 
152
  control_image = canny_torch(
153
  input_image, params.canny_low_threshold, params.canny_high_threshold
154
  )
 
 
155
  results = pipe(
156
  control_image=control_image,
157
  prompt_embeds=prompt_embeds,
@@ -245,23 +244,16 @@ async def stream(user_id: uuid.UUID):
245
 
246
  async def generate():
247
  last_prompt: str = None
248
- prompt_embeds: torch.Tensor = None
249
  while True:
250
  data = await queue.get()
251
  input_image = data["image"]
252
  params = data["params"]
253
  if input_image is None:
254
  continue
255
- # avoid recalculate prompt embeds
256
- if last_prompt != params.prompt:
257
- print("new prompt")
258
- prompt_embeds = compel_proc(params.prompt)
259
- last_prompt = params.prompt
260
 
261
  image = predict(
262
  input_image,
263
  params,
264
- prompt_embeds,
265
  )
266
  if image is None:
267
  continue
 
23
 
24
  from canny_gpu import SobelOperator
25
 
 
 
 
26
  try:
27
  import intel_extension_for_pytorch as ipex
28
  except:
 
41
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
42
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
43
  TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
 
44
 
45
  WIDTH = 512
46
  HEIGHT = 512
47
 
 
48
  # check if MPS is available OSX only M1/M2/M3 chips
49
  mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
50
  xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
 
71
 
72
  canny_torch = SobelOperator(device=device)
73
 
74
+ models_id = [
75
+ "wavymulder/Analog-Diffusion",
76
+ "nitrosocke/Ghibli-Diffusion",
77
+ "nitrosocke/mo-di-diffusion",
78
+ ]
79
+ lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
80
 
81
  if SAFETY_CHECKER == "True":
82
+ pipes = {}
83
+ for model_id in models_id:
84
+ pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
85
+ model_id,
86
+ controlnet=controlnet_canny,
87
+ )
88
+ pipes[model_id] = pipe
89
  else:
90
+ pipes = {}
91
+ for model_id in models_id:
92
+ pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
93
+ model_id,
94
+ safety_checker=None,
95
+ controlnet=controlnet_canny,
96
+ )
97
+ pipes[model_id] = pipe
98
+ for pipe in pipes.values():
99
+ pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
100
+ pipe.set_progress_bar_config(disable=True)
101
+ pipe.to(device=device, dtype=torch_dtype).to(device)
102
 
103
+ if psutil.virtual_memory().total < 64 * 1024**3:
104
+ pipe.enable_attention_slicing()
105
 
106
+ # Load LCM LoRA
107
+ pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
 
 
 
 
 
108
 
109
  compel_proc = Compel(
110
  tokenizer=pipe.tokenizer,
 
140
  canny_low_threshold: float = 0.31
141
  canny_high_threshold: float = 0.78
142
  debug_canny: bool = False
143
+ model_id: str = "nitrosocke/Ghibli-Diffusion"
144
 
145
 
146
+ def predict(input_image: Image.Image, params: InputParams):
 
 
147
  generator = torch.manual_seed(params.seed)
148
 
149
  control_image = canny_torch(
150
  input_image, params.canny_low_threshold, params.canny_high_threshold
151
  )
152
+ prompt_embeds = compel_proc(params.prompt)
153
+ pipe = pipes[params.model_id]
154
  results = pipe(
155
  control_image=control_image,
156
  prompt_embeds=prompt_embeds,
 
244
 
245
  async def generate():
246
  last_prompt: str = None
 
247
  while True:
248
  data = await queue.get()
249
  input_image = data["image"]
250
  params = data["params"]
251
  if input_image is None:
252
  continue
 
 
 
 
 
253
 
254
  image = predict(
255
  input_image,
256
  params,
 
257
  )
258
  if image is None:
259
  continue
app-txt2imglora.py CHANGED
@@ -35,7 +35,6 @@ MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
35
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
36
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
37
  TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
38
- HF_TOKEN = os.environ.get("HF_TOKEN", None)
39
 
40
  WIDTH = 512
41
  HEIGHT = 512
@@ -61,7 +60,7 @@ if mps_available:
61
  torch_dtype = torch.float32
62
 
63
  model_id = "wavymulder/Analog-Diffusion"
64
- lcm_lora_id = "lcm-sd/lcm-sd1.5-lora"
65
 
66
  if SAFETY_CHECKER == "True":
67
  pipe = DiffusionPipeline.from_pretrained(model_id)
@@ -83,13 +82,11 @@ if TORCH_COMPILE:
83
  pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
84
 
85
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
86
-
87
  # Load LCM LoRA
88
  pipe.load_lora_weights(
89
  lcm_lora_id,
90
- weight_name="lcm_sd_lora.safetensors",
91
- adapter_name="lcm",
92
- use_auth_token=HF_TOKEN,
93
  )
94
 
95
  compel_proc = Compel(
@@ -121,7 +118,6 @@ def predict(params: InputParams):
121
  guidance_scale=params.guidance_scale,
122
  width=params.width,
123
  height=params.height,
124
- # original_inference_steps=params.lcm_steps,
125
  output_type="pil",
126
  )
127
  nsfw_content_detected = (
 
35
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
36
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
37
  TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
 
38
 
39
  WIDTH = 512
40
  HEIGHT = 512
 
60
  torch_dtype = torch.float32
61
 
62
  model_id = "wavymulder/Analog-Diffusion"
63
+ lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
64
 
65
  if SAFETY_CHECKER == "True":
66
  pipe = DiffusionPipeline.from_pretrained(model_id)
 
82
  pipe.vae = torch.compile(pipe.vae, mode="reduce-overhead", fullgraph=True)
83
 
84
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
85
+
86
  # Load LCM LoRA
87
  pipe.load_lora_weights(
88
  lcm_lora_id,
89
+ adapter_name="lcm"
 
 
90
  )
91
 
92
  compel_proc = Compel(
 
118
  guidance_scale=params.guidance_scale,
119
  width=params.width,
120
  height=params.height,
 
121
  output_type="pil",
122
  )
123
  nsfw_content_detected = (
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- # diffusers==0.22.2
2
- git+https://github.com/huggingface/diffusers.git@6110d7c95f630479cf01340cc8a8141c1e359f09
3
  transformers==4.34.1
4
  gradio==3.50.2
5
  --extra-index-url https://download.pytorch.org/whl/cu121
@@ -10,4 +9,5 @@ Pillow==10.1.0
10
  accelerate==0.24.0
11
  compel==2.0.2
12
  controlnet-aux==0.0.7
13
- peft==0.6.0
 
 
1
+ diffusers==0.23.0
 
2
  transformers==4.34.1
3
  gradio==3.50.2
4
  --extra-index-url https://download.pytorch.org/whl/cu121
 
9
  accelerate==0.24.0
10
  compel==2.0.2
11
  controlnet-aux==0.0.7
12
+ peft==0.6.0
13
+ xformers
static/controlnetlora.html CHANGED
@@ -3,7 +3,7 @@
3
 
4
  <head>
5
  <meta charset="UTF-8">
6
- <title>Real-Time Latent Consistency Model ControlNet</title>
7
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
  <script
9
  src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
@@ -93,8 +93,9 @@
93
  }
94
 
95
  async function videoTimeUpdateHandler() {
96
- const dimension = getValue("input[name=dimension]:checked");
97
- const [WIDTH, HEIGHT] = JSON.parse(dimension);
 
98
 
99
  const canvas = new OffscreenCanvas(WIDTH, HEIGHT);
100
  const videoW = webcamVideo.videoWidth;
@@ -118,7 +119,8 @@
118
  "controlnet_end": getValue("#controlnet_end"),
119
  "canny_low_threshold": getValue("#canny_low_threshold"),
120
  "canny_high_threshold": getValue("#canny_high_threshold"),
121
- "debug_canny": getValue("#debug_canny")
 
122
  }));
123
  }
124
  let mediaDevices = [];
@@ -220,8 +222,33 @@
220
  console.log(err);
221
  }
222
  }
 
 
 
 
 
223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
 
225
  const lcmLive = LCMLive(videoEl, imageEl);
226
  startBtn.addEventListener("click", async () => {
227
  try {
@@ -263,16 +290,18 @@
263
  <main class="container mx-auto px-4 py-4 max-w-4xl flex flex-col gap-4">
264
  <article class="text-center max-w-xl mx-auto">
265
  <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
266
- <h2 class="text-2xl font-bold mb-4">ControlNet Lora</h2>
267
  <p class="text-sm">
268
  This demo showcases
269
- <a href="https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7" target="_blank"
270
- class="text-blue-500 underline hover:no-underline">LCM</a> Image to Image pipeline
271
- using
272
- <a href="https://github.com/huggingface/diffusers/tree/main/examples/community#latent-consistency-pipeline"
273
  target="_blank" class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG
274
- stream server. Featuring <a href="https://huggingface.co/nitrosocke/mo-di-diffusion" target="_blank"
275
- class="text-blue-500 underline hover:no-underline">Nitrosocke Mo-Di Diffusion</a>Model.
 
 
276
  </p>
277
  </article>
278
  <div>
@@ -285,9 +314,14 @@
285
  <div class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
286
  <textarea type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1 outline-none dark:text-black"
287
  title="Prompt, this is an example, feel free to modify"
288
- placeholder="Add your prompt here...">a magical princess with golden hair, modern disney style</textarea>
289
  </div>
290
  </div>
 
 
 
 
 
291
  <div class="">
292
  <details>
293
  <summary class="font-medium cursor-pointer">Advanced Options</summary>
@@ -310,7 +344,7 @@
310
  0.3</output>
311
  <!-- -->
312
  <label class="text-sm font-medium" for="strength">Strength</label>
313
- <input type="range" id="strength" name="strength" min="0.1" max="1" step="0.001" value="0.50"
314
  oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
315
  <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
316
  0.5</output>
@@ -357,7 +391,7 @@
357
  </button>
358
  <!-- -->
359
  <!-- -->
360
- <label class="text-sm font-medium" for="dimension">Image Dimensions</label>
361
  <div class="col-span-2 flex gap-2">
362
  <div class="flex gap-1">
363
  <input type="radio" id="dimension512" name="dimension" value="[512,512]" checked
@@ -369,7 +403,7 @@
369
  lass="cursor-pointer">
370
  <label for="dimension768" class="text-sm cursor-pointer">768x768</label>
371
  </div>
372
- </div>
373
  <!-- -->
374
  <!-- -->
375
  <label class="text-sm font-medium" for="debug_canny">Debug Canny</label>
 
3
 
4
  <head>
5
  <meta charset="UTF-8">
6
+ <title>Real-Time Latent Consistency Model ControlNet Lora</title>
7
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
8
  <script
9
  src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
 
93
  }
94
 
95
  async function videoTimeUpdateHandler() {
96
+ const model_id = getValue("input[name=base_model]:checked");
97
+
98
+ const [WIDTH, HEIGHT] = [512, 512];
99
 
100
  const canvas = new OffscreenCanvas(WIDTH, HEIGHT);
101
  const videoW = webcamVideo.videoWidth;
 
119
  "controlnet_end": getValue("#controlnet_end"),
120
  "canny_low_threshold": getValue("#canny_low_threshold"),
121
  "canny_high_threshold": getValue("#canny_high_threshold"),
122
+ "debug_canny": getValue("#debug_canny"),
123
+ "model_id": model_id
124
  }));
125
  }
126
  let mediaDevices = [];
 
222
  console.log(err);
223
  }
224
  }
225
+ const models_id = {
226
+ "nitrosocke/Ghibli-Diffusion": "ghibli style",
227
+ "nitrosocke/mo-di-diffusion": "modern disney style",
228
+ "wavymulder/Analog-Diffusion": "analog style"
229
+ }
230
 
231
+ document.addEventListener("DOMContentLoaded", () => {
232
+ const models_options = document.querySelector("#models_options");
233
+ Object.entries(models_id).forEach(([model, activation], i) => {
234
+ const modelEl = document.createElement("div");
235
+ modelEl.innerHTML = `
236
+ <input type="radio" id="${model}" name="base_model" value="${model}" class="cursor-pointer" ${i === 0 ? "checked" : ""}>
237
+ <label for="${model}" class="text-sm cursor-pointer" title="Use the keyword on your prompt: ${activation}">${model}: <b>${activation}</b>
238
+ <a href="https://hf.co/${model}" title="Model link on Hugging Face" target="_blank" class="text-sm text-blue-500 underline hover:no-underline">⤴️</a></label>
239
+ `;
240
+ models_options.appendChild(modelEl);
241
+ })
242
+ models_options.addEventListener("change", () => {
243
+ const model = getValue("input[name=base_model]:checked");
244
+ const prompt = getValue("#prompt");
245
+ const activation = models_id[model];
246
+ if (prompt.includes(activation))
247
+ return;
248
+ document.querySelector("#prompt").value = `${activation} portrait of a person`;
249
+ })
250
 
251
+ })
252
  const lcmLive = LCMLive(videoEl, imageEl);
253
  startBtn.addEventListener("click", async () => {
254
  try {
 
290
  <main class="container mx-auto px-4 py-4 max-w-4xl flex flex-col gap-4">
291
  <article class="text-center max-w-xl mx-auto">
292
  <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
293
+ <h2 class="text-2xl font-bold mb-4">ControlNet LoRa</h2>
294
  <p class="text-sm">
295
  This demo showcases
296
+ <a href="https://huggingface.co/blog/lcm_lora" target="_blank"
297
+ class="text-blue-500 underline hover:no-underline">LCM LoRa</a> ControlNet pipeline
298
+ using <a
299
+ href="https://huggingface.co/docs/diffusers/api/pipelines/latent_consistency_models#latent-consistency-models"
300
  target="_blank" class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG
301
+ stream server.
302
+ </p>
303
+ <p class="text-sm">
304
+ There are <span id="queue_size" class="font-bold">0</span> user(s) sharing the same GPU.
305
  </p>
306
  </article>
307
  <div>
 
314
  <div class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
315
  <textarea type="text" id="prompt" class="font-light w-full px-3 py-2 mx-1 outline-none dark:text-black"
316
  title="Prompt, this is an example, feel free to modify"
317
+ placeholder="Add your prompt here...">ghibli style portrait of a person</textarea>
318
  </div>
319
  </div>
320
+ <!-- -->
321
+ <label class="font-medium" for="base_model">Base Model</label>
322
+ <fieldset class="flex flex-col gap-2" id="models_options">
323
+ </fieldset>
324
+ <!-- -->
325
  <div class="">
326
  <details>
327
  <summary class="font-medium cursor-pointer">Advanced Options</summary>
 
344
  0.3</output>
345
  <!-- -->
346
  <label class="text-sm font-medium" for="strength">Strength</label>
347
+ <input type="range" id="strength" name="strength" min="0.1" max="1" step="0.0001" value="0.50"
348
  oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
349
  <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
350
  0.5</output>
 
391
  </button>
392
  <!-- -->
393
  <!-- -->
394
+ <!-- <label class="text-sm font-medium" for="dimension">Image Dimensions</label>
395
  <div class="col-span-2 flex gap-2">
396
  <div class="flex gap-1">
397
  <input type="radio" id="dimension512" name="dimension" value="[512,512]" checked
 
403
  lass="cursor-pointer">
404
  <label for="dimension768" class="text-sm cursor-pointer">768x768</label>
405
  </div>
406
+ </div> -->
407
  <!-- -->
408
  <!-- -->
409
  <label class="text-sm font-medium" for="debug_canny">Debug Canny</label>
static/txt2imglora.html CHANGED
@@ -212,6 +212,10 @@
212
  stream server. Featuring <a href="https://huggingface.co/wavymulder/Analog-Diffusion" target="_blank"
213
  class="text-blue-500 underline hover:no-underline">Analog Diffusion</a> Model.
214
  </p>
 
 
 
 
215
  </article>
216
  <div>
217
  <h2 class="font-medium">Prompt</h2>
@@ -250,7 +254,7 @@
250
  <input type="range" id="guidance-scale" name="guidance-scale" min="0" max="5" step="0.0001"
251
  value="0.8" oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
252
  <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
253
- 8.0</output>
254
  <!-- -->
255
  <label class="text-sm font-medium" for="seed">Seed</label>
256
  <input type="number" id="seed" name="seed" value="299792458"
 
212
  stream server. Featuring <a href="https://huggingface.co/wavymulder/Analog-Diffusion" target="_blank"
213
  class="text-blue-500 underline hover:no-underline">Analog Diffusion</a> Model.
214
  </p>
215
+ <p class="text-sm">
216
+ There are <span id="queue_size" class="font-bold">0</span> user(s) sharing the same GPU, affecting
217
+ real-time performance.
218
+ </p>
219
  </article>
220
  <div>
221
  <h2 class="font-medium">Prompt</h2>
 
254
  <input type="range" id="guidance-scale" name="guidance-scale" min="0" max="5" step="0.0001"
255
  value="0.8" oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)">
256
  <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
257
+ 0.8</output>
258
  <!-- -->
259
  <label class="text-sm font-medium" for="seed">Seed</label>
260
  <input type="number" id="seed" name="seed" value="299792458"