adamelliotfields commited on
Commit
61ad3d2
1 Parent(s): c5cf566

Add IP-Adapter

Browse files
Files changed (8) hide show
  1. README.md +6 -5
  2. app.css +4 -0
  3. app.py +74 -31
  4. cli.py +4 -0
  5. lib/config.py +4 -5
  6. lib/inference.py +15 -2
  7. lib/loader.py +49 -11
  8. usage.md +15 -15
README.md CHANGED
@@ -16,6 +16,7 @@ license: apache-2.0
16
  models:
17
  - ai-forever/Real-ESRGAN
18
  - fluently/Fluently-v4
 
19
  - Linaqruf/anything-v3-1
20
  - Lykon/dreamshaper-8
21
  - prompthero/openjourney-v4
@@ -28,6 +29,9 @@ preload_from_hub:
28
  - >-
29
  fluently/Fluently-v4
30
  text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
 
 
 
31
  - >-
32
  Linaqruf/anything-v3-1
33
  text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
@@ -48,9 +52,10 @@ preload_from_hub:
48
  # diffusion
49
 
50
  Gradio app for Stable Diffusion 1.5 including:
51
- * txt2img and img2img pipelines
52
  * Curated models and TI embeddings
53
  * 100+ styles from sdxl_prompt_styler
 
54
  * Compel prompt weighting
55
  * Multiple samplers with Karras scheduling
56
  * DeepCache, FreeU, and Clip Skip available
@@ -80,7 +85,3 @@ python app.py --port 7860
80
  # cli
81
  python cli.py 'an astronaut riding a horse on mars'
82
  ```
83
-
84
- ## TODO
85
-
86
- - [ ] IP-Adapter and T2I-Adapter
 
16
  models:
17
  - ai-forever/Real-ESRGAN
18
  - fluently/Fluently-v4
19
+ - h94/IP-Adapter
20
  - Linaqruf/anything-v3-1
21
  - Lykon/dreamshaper-8
22
  - prompthero/openjourney-v4
 
29
  - >-
30
  fluently/Fluently-v4
31
  text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
32
+ - >-
33
+ h94/IP-Adapter
34
+ models/ip-adapter-full-face_sd15.safetensors,models/ip-adapter-plus_sd15.safetensors,models/image_encoder/model.safetensors
35
  - >-
36
  Linaqruf/anything-v3-1
37
  text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
 
52
  # diffusion
53
 
54
  Gradio app for Stable Diffusion 1.5 including:
55
+ * txt2img and img2img pipelines with IP-Adapter
56
  * Curated models and TI embeddings
57
  * 100+ styles from sdxl_prompt_styler
58
+ * 150+ prompts from StableStudio
59
  * Compel prompt weighting
60
  * Multiple samplers with Karras scheduling
61
  * DeepCache, FreeU, and Clip Skip available
 
85
  # cli
86
  python cli.py 'an astronaut riding a horse on mars'
87
  ```
 
 
 
 
app.css CHANGED
@@ -47,6 +47,10 @@
47
  max-width: 42px;
48
  }
49
 
 
 
 
 
50
  .popover {
51
  position: relative;
52
  }
 
47
  max-width: 42px;
48
  }
49
 
50
+ .image-container {
51
+ max-height: 438px;
52
+ }
53
+
54
  .popover {
55
  position: relative;
56
  }
app.py CHANGED
@@ -44,27 +44,32 @@ def random_fn():
44
  return gr.Textbox(value=random.choice(prompts))
45
 
46
 
47
- # can't toggle interactive in JS
48
- def gallery_fn(images, image):
49
- if image is not None:
50
  return gr.Dropdown(
51
  choices=[("🔒", -2)],
52
  interactive=False,
53
  value=-2,
54
  )
55
- return gr.Dropdown(
56
- choices=[("None", -1)]
57
- + [(str(i + 1), i) for i, _ in enumerate(images if images is not None else [])],
58
- interactive=True,
59
- value=-1,
 
 
 
 
 
 
 
60
  )
61
 
62
 
63
  def image_prompt_fn(images):
64
- return gallery_fn(images, None)
65
 
66
 
67
- # can't use image input in JS
68
  def image_select_fn(images, image, i):
69
  # -2 is the lock icon, -1 is None
70
  if i == -2:
@@ -278,29 +283,53 @@ with gr.Blocks(
278
  with gr.TabItem("🖼️ Image"):
279
  with gr.Row():
280
  image_prompt = gr.Image(
 
281
  show_label=False,
282
  min_width=320,
283
  format="png",
284
  type="pil",
285
- scale=0,
286
  )
287
-
288
- with gr.Row():
289
- image_select = gr.Dropdown(
290
- choices=[("None", -1)],
291
- label="Load from Gallery",
292
- interactive=True,
293
- filterable=False,
294
- value=-1,
295
- )
296
- denoising_strength = gr.Slider(
297
- value=Config.DENOISING_STRENGTH,
298
- label="Denoising Strength",
299
- minimum=0.0,
300
- maximum=1.0,
301
- step=0.1,
302
  )
303
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  with gr.TabItem("ℹ️ Usage"):
305
  gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
306
 
@@ -358,9 +387,9 @@ with gr.Blocks(
358
  seed.change(None, inputs=[seed], outputs=[], js=seed_js)
359
 
360
  file_format.change(
361
- lambda f: (gr.Gallery(format=f), gr.Image(format=f)),
362
  inputs=[file_format],
363
- outputs=[output_images, image_prompt],
364
  show_api=False,
365
  )
366
 
@@ -372,11 +401,11 @@ with gr.Blocks(
372
  js=aspect_ratio_js,
373
  )
374
 
375
- # lock the input image so you don't lose it when the gallery updates
376
  output_images.change(
377
  gallery_fn,
378
- inputs=[output_images, image_prompt],
379
- outputs=[image_select],
380
  show_api=False,
381
  )
382
 
@@ -387,6 +416,12 @@ with gr.Blocks(
387
  outputs=[image_prompt],
388
  show_api=False,
389
  )
 
 
 
 
 
 
390
 
391
  # reset the dropdown on clear
392
  image_prompt.clear(
@@ -395,6 +430,12 @@ with gr.Blocks(
395
  outputs=[image_select],
396
  show_api=False,
397
  )
 
 
 
 
 
 
398
 
399
  # show "Custom" aspect ratio when manually changing width or height
400
  gr.on(
@@ -415,6 +456,8 @@ with gr.Blocks(
415
  prompt,
416
  negative_prompt,
417
  image_prompt,
 
 
418
  embeddings,
419
  style,
420
  seed,
 
44
  return gr.Textbox(value=random.choice(prompts))
45
 
46
 
47
+ def create_image_dropdown(images, locked=False):
48
+ if locked:
 
49
  return gr.Dropdown(
50
  choices=[("🔒", -2)],
51
  interactive=False,
52
  value=-2,
53
  )
54
+ else:
55
+ return gr.Dropdown(
56
+ choices=[("None", -1)] + [(str(i + 1), i) for i, _ in enumerate(images or [])],
57
+ interactive=True,
58
+ value=-1,
59
+ )
60
+
61
+
62
+ def gallery_fn(images, image, ip_image):
63
+ return (
64
+ create_image_dropdown(images, locked=image is not None),
65
+ create_image_dropdown(images, locked=ip_image is not None),
66
  )
67
 
68
 
69
  def image_prompt_fn(images):
70
+ return create_image_dropdown(images)
71
 
72
 
 
73
  def image_select_fn(images, image, i):
74
  # -2 is the lock icon, -1 is None
75
  if i == -2:
 
283
  with gr.TabItem("🖼️ Image"):
284
  with gr.Row():
285
  image_prompt = gr.Image(
286
+ show_share_button=False,
287
  show_label=False,
288
  min_width=320,
289
  format="png",
290
  type="pil",
 
291
  )
292
+ ip_image = gr.Image(
293
+ show_share_button=False,
294
+ label="IP-Adapter",
295
+ min_width=320,
296
+ format="png",
297
+ type="pil",
 
 
 
 
 
 
 
 
 
298
  )
299
 
300
+ with gr.Group():
301
+ with gr.Row():
302
+ image_select = gr.Dropdown(
303
+ choices=[("None", -1)],
304
+ label="Gallery Image",
305
+ interactive=True,
306
+ filterable=False,
307
+ value=-1,
308
+ )
309
+ ip_image_select = gr.Dropdown(
310
+ choices=[("None", -1)],
311
+ label="Gallery Image (IP-Adapter)",
312
+ interactive=True,
313
+ filterable=False,
314
+ value=-1,
315
+ )
316
+
317
+ with gr.Row():
318
+ denoising_strength = gr.Slider(
319
+ value=Config.DENOISING_STRENGTH,
320
+ label="Denoising Strength",
321
+ minimum=0.0,
322
+ maximum=1.0,
323
+ step=0.1,
324
+ )
325
+
326
+ with gr.Row():
327
+ ip_face = gr.Checkbox(
328
+ elem_classes=["checkbox"],
329
+ label="IP-Adapter Face",
330
+ value=False,
331
+ )
332
+
333
  with gr.TabItem("ℹ️ Usage"):
334
  gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
335
 
 
387
  seed.change(None, inputs=[seed], outputs=[], js=seed_js)
388
 
389
  file_format.change(
390
+ lambda f: (gr.Gallery(format=f), gr.Image(format=f), gr.Image(format=f)),
391
  inputs=[file_format],
392
+ outputs=[output_images, image_prompt, ip_image],
393
  show_api=False,
394
  )
395
 
 
401
  js=aspect_ratio_js,
402
  )
403
 
404
+ # lock the input images so you don't lose them when the gallery updates
405
  output_images.change(
406
  gallery_fn,
407
+ inputs=[output_images, image_prompt, ip_image],
408
+ outputs=[image_select, ip_image_select],
409
  show_api=False,
410
  )
411
 
 
416
  outputs=[image_prompt],
417
  show_api=False,
418
  )
419
+ ip_image_select.change(
420
+ image_select_fn,
421
+ inputs=[output_images, ip_image, ip_image_select],
422
+ outputs=[ip_image],
423
+ show_api=False,
424
+ )
425
 
426
  # reset the dropdown on clear
427
  image_prompt.clear(
 
430
  outputs=[image_select],
431
  show_api=False,
432
  )
433
+ ip_image.clear(
434
+ image_prompt_fn,
435
+ inputs=[output_images],
436
+ outputs=[ip_image_select],
437
+ show_api=False,
438
+ )
439
 
440
  # show "Custom" aspect ratio when manually changing width or height
441
  gr.on(
 
456
  prompt,
457
  negative_prompt,
458
  image_prompt,
459
+ ip_image,
460
+ ip_face,
461
  embeddings,
462
  style,
463
  seed,
cli.py CHANGED
@@ -31,6 +31,8 @@ def main():
31
  parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
32
  parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
33
  parser.add_argument("--image", type=str, metavar="STR")
 
 
34
  parser.add_argument("--taesd", action="store_true")
35
  parser.add_argument("--clip-skip", action="store_true")
36
  parser.add_argument("--truncate", action="store_true")
@@ -44,6 +46,8 @@ def main():
44
  args.prompt,
45
  args.negative,
46
  args.image,
 
 
47
  args.embedding,
48
  args.style,
49
  args.seed,
 
31
  parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
32
  parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
33
  parser.add_argument("--image", type=str, metavar="STR")
34
+ parser.add_argument("--ip-image", type=str, metavar="STR")
35
+ parser.add_argument("--ip-face", action="store_true")
36
  parser.add_argument("--taesd", action="store_true")
37
  parser.add_argument("--clip-skip", action="store_true")
38
  parser.add_argument("--truncate", action="store_true")
 
46
  args.prompt,
47
  args.negative,
48
  args.image,
49
+ args.ip_image,
50
+ args.ip_face,
51
  args.embedding,
52
  args.style,
53
  args.seed,
lib/config.py CHANGED
@@ -20,12 +20,11 @@ Config = SimpleNamespace(
20
  ],
21
  SCHEDULER="DEIS 2M",
22
  SCHEDULERS=[
 
23
  "DEIS 2M",
24
  "DPM++ 2M",
25
- "DPM2 a",
26
  "Euler a",
27
- "Heun",
28
- "LMS",
29
  "PNDM",
30
  ],
31
  EMBEDDING="fast_negative",
@@ -39,8 +38,8 @@ Config = SimpleNamespace(
39
  HEIGHT=576,
40
  NUM_IMAGES=1,
41
  SEED=-1,
42
- GUIDANCE_SCALE=7,
43
- INFERENCE_STEPS=30,
44
  DENOISING_STRENGTH=0.6,
45
  DEEPCACHE_INTERVAL=2,
46
  SCALE=1,
 
20
  ],
21
  SCHEDULER="DEIS 2M",
22
  SCHEDULERS=[
23
+ "DDIM",
24
  "DEIS 2M",
25
  "DPM++ 2M",
26
+ "Euler",
27
  "Euler a",
 
 
28
  "PNDM",
29
  ],
30
  EMBEDDING="fast_negative",
 
38
  HEIGHT=576,
39
  NUM_IMAGES=1,
40
  SEED=-1,
41
+ GUIDANCE_SCALE=6,
42
+ INFERENCE_STEPS=35,
43
  DENOISING_STRENGTH=0.6,
44
  DEEPCACHE_INTERVAL=2,
45
  SCALE=1,
lib/inference.py CHANGED
@@ -75,6 +75,8 @@ def generate(
75
  positive_prompt,
76
  negative_prompt="",
77
  image_prompt=None,
 
 
78
  embeddings=[],
79
  style=None,
80
  seed=None,
@@ -120,11 +122,17 @@ def generate(
120
 
121
  KIND = "img2img" if image_prompt is not None else "txt2img"
122
 
 
 
 
 
 
123
  with torch.inference_mode():
124
  start = time.perf_counter()
125
  loader = Loader()
126
  pipe, upscaler = loader.load(
127
  KIND,
 
128
  model,
129
  scheduler,
130
  karras,
@@ -146,10 +154,12 @@ def generate(
146
  token=f"<{embedding}>",
147
  )
148
  negative_prompt = (
149
- f"{negative_prompt}, {embedding}" if negative_prompt else embedding
 
 
150
  )
151
  except (EnvironmentError, HFValidationError, RepositoryNotFoundError):
152
- raise Error(f"Invalid embedding: {embedding}")
153
 
154
  # prompt embeds
155
  compel = Compel(
@@ -202,6 +212,9 @@ def generate(
202
  kwargs["strength"] = denoising_strength
203
  kwargs["image"] = prepare_image(image_prompt, (width, height))
204
 
 
 
 
205
  try:
206
  image = pipe(**kwargs).images[0]
207
  if scale > 1:
 
75
  positive_prompt,
76
  negative_prompt="",
77
  image_prompt=None,
78
+ ip_image=None,
79
+ ip_face=False,
80
  embeddings=[],
81
  style=None,
82
  seed=None,
 
122
 
123
  KIND = "img2img" if image_prompt is not None else "txt2img"
124
 
125
+ IP_ADAPTER = None
126
+
127
+ if ip_image:
128
+ IP_ADAPTER = "full-face" if ip_face else "plus"
129
+
130
  with torch.inference_mode():
131
  start = time.perf_counter()
132
  loader = Loader()
133
  pipe, upscaler = loader.load(
134
  KIND,
135
+ IP_ADAPTER,
136
  model,
137
  scheduler,
138
  karras,
 
154
  token=f"<{embedding}>",
155
  )
156
  negative_prompt = (
157
+ f"{negative_prompt}, (<{embedding}>)1.1"
158
+ if negative_prompt
159
+ else f"(<{embedding}>)1.1"
160
  )
161
  except (EnvironmentError, HFValidationError, RepositoryNotFoundError):
162
+ raise Error(f"Invalid embedding: <{embedding}>")
163
 
164
  # prompt embeds
165
  compel = Compel(
 
212
  kwargs["strength"] = denoising_strength
213
  kwargs["image"] = prepare_image(image_prompt, (width, height))
214
 
215
+ if IP_ADAPTER:
216
+ kwargs["ip_adapter_image"] = prepare_image(ip_image, (width, height))
217
+
218
  try:
219
  image = pipe(**kwargs).images[0]
220
  if scale > 1:
lib/loader.py CHANGED
@@ -1,17 +1,17 @@
1
  import torch
2
  from DeepCache import DeepCacheSDHelper
3
  from diffusers import (
 
4
  DEISMultistepScheduler,
5
  DPMSolverMultistepScheduler,
6
  EulerAncestralDiscreteScheduler,
7
- HeunDiscreteScheduler,
8
- KDPM2AncestralDiscreteScheduler,
9
- LMSDiscreteScheduler,
10
  PNDMScheduler,
11
  StableDiffusionImg2ImgPipeline,
12
  StableDiffusionPipeline,
13
  )
14
  from diffusers.models import AutoencoderKL, AutoencoderTiny
 
15
  from torch._dynamo import OptimizedModule
16
 
17
  from .upscaler import RealESRGAN
@@ -29,6 +29,7 @@ class Loader:
29
  cls._instance = super(Loader, cls).__new__(cls)
30
  cls._instance.pipe = None
31
  cls._instance.upscaler = None
 
32
  return cls._instance
33
 
34
  def _load_upscaler(self, device=None, scale=4):
@@ -61,7 +62,38 @@ class Loader:
61
  # https://github.com/ChenyangSi/FreeU
62
  self.pipe.enable_freeu(b1=1.5, b2=1.6, s1=0.9, s2=0.2)
63
 
64
- def _load_vae(self, model_name=None, taesd=False, variant=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  vae_type = type(self.pipe.vae)
66
  is_kl = issubclass(vae_type, (AutoencoderKL, OptimizedModule))
67
  is_tiny = issubclass(vae_type, AutoencoderTiny)
@@ -97,10 +129,12 @@ class Loader:
97
  self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
98
  if not isinstance(self.pipe, pipelines[kind]):
99
  self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
 
100
 
101
  def load(
102
  self,
103
  kind,
 
104
  model,
105
  scheduler,
106
  karras,
@@ -114,26 +148,29 @@ class Loader:
114
  model_lower = model.lower()
115
 
116
  schedulers = {
 
117
  "DEIS 2M": DEISMultistepScheduler,
118
  "DPM++ 2M": DPMSolverMultistepScheduler,
119
- "DPM2 a": KDPM2AncestralDiscreteScheduler,
120
  "Euler a": EulerAncestralDiscreteScheduler,
121
- "Heun": HeunDiscreteScheduler,
122
- "LMS": LMSDiscreteScheduler,
123
  "PNDM": PNDMScheduler,
124
  }
125
 
126
  scheduler_kwargs = {
127
  "beta_schedule": "scaled_linear",
128
  "timestep_spacing": "leading",
129
- "use_karras_sigmas": karras,
130
  "beta_start": 0.00085,
131
  "beta_end": 0.012,
132
  "steps_offset": 1,
133
  }
134
 
135
- if scheduler in ["Euler a", "PNDM"]:
136
- del scheduler_kwargs["use_karras_sigmas"]
 
 
 
 
 
137
 
138
  # no fp16 variant
139
  if model_lower not in [
@@ -175,7 +212,8 @@ class Loader:
175
  self.pipe = None
176
  self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
177
 
178
- self._load_vae(model_lower, taesd, variant)
 
179
  self._load_freeu(freeu)
180
  self._load_deepcache(deepcache)
181
  self._load_upscaler(device, scale)
 
1
  import torch
2
  from DeepCache import DeepCacheSDHelper
3
  from diffusers import (
4
+ DDIMScheduler,
5
  DEISMultistepScheduler,
6
  DPMSolverMultistepScheduler,
7
  EulerAncestralDiscreteScheduler,
8
+ EulerDiscreteScheduler,
 
 
9
  PNDMScheduler,
10
  StableDiffusionImg2ImgPipeline,
11
  StableDiffusionPipeline,
12
  )
13
  from diffusers.models import AutoencoderKL, AutoencoderTiny
14
+ from diffusers.models.attention_processor import AttnProcessor2_0, IPAdapterAttnProcessor2_0
15
  from torch._dynamo import OptimizedModule
16
 
17
  from .upscaler import RealESRGAN
 
29
  cls._instance = super(Loader, cls).__new__(cls)
30
  cls._instance.pipe = None
31
  cls._instance.upscaler = None
32
+ cls._instance.ip_adapter = None
33
  return cls._instance
34
 
35
  def _load_upscaler(self, device=None, scale=4):
 
62
  # https://github.com/ChenyangSi/FreeU
63
  self.pipe.enable_freeu(b1=1.5, b2=1.6, s1=0.9, s2=0.2)
64
 
65
+ def _load_ip_adapter(self, ip_adapter=None):
66
+ if self.ip_adapter is None and self.ip_adapter != ip_adapter:
67
+ self.pipe.load_ip_adapter(
68
+ "h94/IP-Adapter",
69
+ subfolder="models",
70
+ weight_name=f"ip-adapter-{ip_adapter}_sd15.safetensors",
71
+ )
72
+ self.pipe.set_ip_adapter_scale(0.6 if ip_adapter == "full-face" else 0.5)
73
+ self.ip_adapter = ip_adapter
74
+
75
+ if self.ip_adapter is not None and ip_adapter is None:
76
+ if not isinstance(self.pipe, StableDiffusionImg2ImgPipeline):
77
+ self.pipe.image_encoder = None
78
+ self.pipe.register_to_config(image_encoder=[None, None])
79
+
80
+ self.pipe.feature_extractor = None
81
+ self.pipe.unet.encoder_hid_proj = None
82
+ self.pipe.unet.config.encoder_hid_dim_type = None
83
+ self.pipe.register_to_config(feature_extractor=[None, None])
84
+
85
+ attn_procs = {}
86
+ for name, value in self.pipe.unet.attn_processors.items():
87
+ attn_processor_class = AttnProcessor2_0() # raises if not torch 2
88
+ attn_procs[name] = (
89
+ attn_processor_class
90
+ if isinstance(value, IPAdapterAttnProcessor2_0)
91
+ else value.__class__()
92
+ )
93
+ self.pipe.unet.set_attn_processor(attn_procs)
94
+ self.pipe.ip_adapter = None
95
+
96
+ def _load_vae(self, taesd=False, model_name=None, variant=None):
97
  vae_type = type(self.pipe.vae)
98
  is_kl = issubclass(vae_type, (AutoencoderKL, OptimizedModule))
99
  is_tiny = issubclass(vae_type, AutoencoderTiny)
 
129
  self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
130
  if not isinstance(self.pipe, pipelines[kind]):
131
  self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
132
+ self.ip_adapter = None
133
 
134
  def load(
135
  self,
136
  kind,
137
+ ip_adapter,
138
  model,
139
  scheduler,
140
  karras,
 
148
  model_lower = model.lower()
149
 
150
  schedulers = {
151
+ "DDIM": DDIMScheduler,
152
  "DEIS 2M": DEISMultistepScheduler,
153
  "DPM++ 2M": DPMSolverMultistepScheduler,
154
+ "Euler": EulerDiscreteScheduler,
155
  "Euler a": EulerAncestralDiscreteScheduler,
 
 
156
  "PNDM": PNDMScheduler,
157
  }
158
 
159
  scheduler_kwargs = {
160
  "beta_schedule": "scaled_linear",
161
  "timestep_spacing": "leading",
 
162
  "beta_start": 0.00085,
163
  "beta_end": 0.012,
164
  "steps_offset": 1,
165
  }
166
 
167
+ if scheduler not in ["DDIM", "Euler a", "PNDM"]:
168
+ scheduler_kwargs["use_karras_sigmas"] = karras
169
+
170
+ # https://github.com/huggingface/diffusers/blob/8a3f0c1/scripts/convert_original_stable_diffusion_to_diffusers.py#L939
171
+ if scheduler == "DDIM":
172
+ scheduler_kwargs["clip_sample"] = False
173
+ scheduler_kwargs["set_alpha_to_one"] = False
174
 
175
  # no fp16 variant
176
  if model_lower not in [
 
212
  self.pipe = None
213
  self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
214
 
215
+ self._load_ip_adapter(ip_adapter)
216
+ self._load_vae(taesd, model_lower, variant)
217
  self._load_freeu(freeu)
218
  self._load_deepcache(deepcache)
219
  self._load_upscaler(device, scale)
usage.md CHANGED
@@ -12,6 +12,8 @@ Positive and negative prompts are embedded by [Compel](https://github.com/damian
12
 
13
  Note that `++` is `1.1^2` (and so on). See [syntax features](https://github.com/damian0815/compel/blob/main/doc/syntax.md) to learn more and read [Civitai](https://civitai.com)'s guide on [prompting](https://education.civitai.com/civitais-prompt-crafting-guide-part-1-basics/) for best practices.
14
 
 
 
15
  #### Arrays
16
 
17
  Arrays allow you to generate different images from a single prompt. For example, `[[cat,corgi]]` will expand into 2 separate prompts. Make sure `Images` is set accordingly (e.g., 2). Only works for the positive prompt. Inspired by [Fooocus](https://github.com/lllyasviel/Fooocus/pull/1503).
@@ -30,7 +32,7 @@ Styles are prompt templates from twri's [sdxl_prompt_styler](https://github.com/
30
 
31
  ### Scale
32
 
33
- Rescale up to 4x using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN).
34
 
35
  ### Models
36
 
@@ -45,27 +47,25 @@ Each model checkpoint has a different aesthetic:
45
 
46
  ### Schedulers
47
 
48
- Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be used:
49
-
50
- * [DEIS 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/deis) (default)
51
- * [DPM++ 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/multistep_dpm_solver)
52
- * [DPM2 a](https://huggingface.co/docs/diffusers/api/schedulers/dpm_discrete_ancestral)
53
- * [Euler a](https://huggingface.co/docs/diffusers/en/api/schedulers/euler_ancestral)
54
- * [Heun](https://huggingface.co/docs/diffusers/api/schedulers/heun)
55
- * [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
56
- * [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
57
 
58
  ### Image-to-Image
59
 
60
- The `🖼️ Image` tab enables the image-to-image pipeline. Either use the image input or select a generation from the gallery and then adjust the denoising strength. To disable, simply clear the image input (the `x` overlay button).
 
 
 
 
 
 
61
 
62
- Denoising strength is essentially how much the generation will differ from the input image. A value of `0` will be identical to the original, while `1` will be a completely new image. You may want to also increase the number of inference steps.
63
 
64
  ### Advanced
65
 
66
  #### DeepCache
67
 
68
- [DeepCache](https://github.com/horseee/DeepCache) (Ma et al. 2023) caches lower U-Net layers and reuses them every `Interval` steps:
69
  * `1`: no caching
70
  * `2`: more quality (default)
71
  * `3`: balanced
@@ -73,7 +73,7 @@ Denoising strength is essentially how much the generation will differ from the i
73
 
74
  #### FreeU
75
 
76
- [FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the U-Net’s skip connections and backbone feature maps to potentially improve image quality.
77
 
78
  #### Clip Skip
79
 
@@ -81,7 +81,7 @@ When enabled, the last CLIP layer is skipped. This can sometimes improve image q
81
 
82
  #### Tiny VAE
83
 
84
- Enable [madebyollin/taesd](https://github.com/madebyollin/taesd) for almost instant latent decoding with a minor loss in detail. Useful for development.
85
 
86
  #### Prompt Truncation
87
 
 
12
 
13
  Note that `++` is `1.1^2` (and so on). See [syntax features](https://github.com/damian0815/compel/blob/main/doc/syntax.md) to learn more and read [Civitai](https://civitai.com)'s guide on [prompting](https://education.civitai.com/civitais-prompt-crafting-guide-part-1-basics/) for best practices.
14
 
15
+ You can also press the `🎲` button to generate a random prompt.
16
+
17
  #### Arrays
18
 
19
  Arrays allow you to generate different images from a single prompt. For example, `[[cat,corgi]]` will expand into 2 separate prompts. Make sure `Images` is set accordingly (e.g., 2). Only works for the positive prompt. Inspired by [Fooocus](https://github.com/lllyasviel/Fooocus/pull/1503).
 
32
 
33
  ### Scale
34
 
35
+ Rescale up to 4x using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) (Wang et al. 2021).
36
 
37
  ### Models
38
 
 
47
 
48
  ### Schedulers
49
 
50
+ The default is [DEIS 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/deis) with [Karras](https://arxiv.org/abs/2206.00364) enabled. The other multistep scheduler, [DPM++ 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/multistep_dpm_solver), is also good. For realism, [DDIM](https://huggingface.co/docs/diffusers/en/api/schedulers/ddim) is recommended. [Euler a](https://huggingface.co/docs/diffusers/en/api/schedulers/euler_ancestral) is worth trying for a different look.
 
 
 
 
 
 
 
 
51
 
52
  ### Image-to-Image
53
 
54
+ The `🖼️ Image` tab enables the image-to-image and IP-Adapter pipelines. Either use the image input or select a generation from the gallery. To disable, simply clear the image input (the `x` overlay button).
55
+
56
+ Denoising strength is essentially how much the generation will differ from the input image. A value of `0` will be identical to the original, while `1` will be a completely new image. You may want to also increase the number of inference steps. Only applies to the image-to-image input.
57
+
58
+ ### IP-Adapter
59
+
60
+ In an image-to-image pipeline, the input image is used as the initial latent. With [IP-Adapter](https://github.com/tencent-ailab/IP-Adapter) (Ye et al. 2023), the input image is processed by a separate image encoder and the encoded features are used as conditioning along with the text prompt.
61
 
62
+ For capturing faces, enable `IP-Adapter Face` to use the full-face model. You should use an input image that is mostly a face along with the Realistic Vision model. The input image should also be the same aspect ratio as the output to avoid distortion.
63
 
64
  ### Advanced
65
 
66
  #### DeepCache
67
 
68
+ [DeepCache](https://github.com/horseee/DeepCache) (Ma et al. 2023) caches lower UNet layers and reuses them every `Interval` steps:
69
  * `1`: no caching
70
  * `2`: more quality (default)
71
  * `3`: balanced
 
73
 
74
  #### FreeU
75
 
76
+ [FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the UNet’s skip connections and backbone feature maps to potentially improve image quality.
77
 
78
  #### Clip Skip
79
 
 
81
 
82
  #### Tiny VAE
83
 
84
+ Enable [madebyollin/taesd](https://github.com/madebyollin/taesd) for near-instant latent decoding with a minor loss in detail. Useful for development.
85
 
86
  #### Prompt Truncation
87