lokCX commited on
Commit
c6ee68c
1 Parent(s): da347d0

Upload sd_samplers_kdiffusion.py

Browse files
Files changed (1) hide show
  1. sd_samplers_kdiffusion.py +393 -0
sd_samplers_kdiffusion.py ADDED
@@ -0,0 +1,393 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+ import torch
3
+ import inspect
4
+ import einops
5
+ import k_diffusion.sampling
6
+ from modules import prompt_parser, devices, sd_samplers_common
7
+
8
+ from modules.shared import opts, state
9
+ import modules.shared as shared
10
+ from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback
11
+ from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback
12
+
13
+ samplers_k_diffusion = [
14
+ ('Euler a', 'sample_euler_ancestral', ['k_euler_a', 'k_euler_ancestral'], {}),
15
+ ('Euler', 'sample_euler', ['k_euler'], {}),
16
+ ('LMS', 'sample_lms', ['k_lms'], {}),
17
+ ('Heun', 'sample_heun', ['k_heun'], {}),
18
+ ('DPM2', 'sample_dpm_2', ['k_dpm_2'], {'discard_next_to_last_sigma': True}),
19
+ ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a'], {'discard_next_to_last_sigma': True}),
20
+ ('DPM++ 2S a', 'sample_dpmpp_2s_ancestral', ['k_dpmpp_2s_a'], {}),
21
+ ('DPM++ 2M', 'sample_dpmpp_2m', ['k_dpmpp_2m'], {}),
22
+ ('DPM++ SDE', 'sample_dpmpp_sde', ['k_dpmpp_sde'], {}),
23
+ ('DPM fast', 'sample_dpm_fast', ['k_dpm_fast'], {}),
24
+ ('DPM adaptive', 'sample_dpm_adaptive', ['k_dpm_ad'], {}),
25
+ ('LMS Karras', 'sample_lms', ['k_lms_ka'], {'scheduler': 'karras'}),
26
+ ('DPM2 Karras', 'sample_dpm_2', ['k_dpm_2_ka'], {'scheduler': 'karras', 'discard_next_to_last_sigma': True}),
27
+ ('DPM2 a Karras', 'sample_dpm_2_ancestral', ['k_dpm_2_a_ka'], {'scheduler': 'karras', 'discard_next_to_last_sigma': True}),
28
+ ('DPM++ 2S a Karras', 'sample_dpmpp_2s_ancestral', ['k_dpmpp_2s_a_ka'], {'scheduler': 'karras'}),
29
+ ('DPM++ 2M Karras', 'sample_dpmpp_2m', ['k_dpmpp_2m_ka'], {'scheduler': 'karras'}),
30
+ ('DPM++ SDE Karras', 'sample_dpmpp_sde', ['k_dpmpp_sde_ka'], {'scheduler': 'karras'}),
31
+ ]
32
+
33
+ samplers_data_k_diffusion = [
34
+ sd_samplers_common.SamplerData(label, lambda model, funcname=funcname: KDiffusionSampler(funcname, model), aliases, options)
35
+ for label, funcname, aliases, options in samplers_k_diffusion
36
+ if hasattr(k_diffusion.sampling, funcname)
37
+ ]
38
+ from tqdm.auto import trange
39
+
40
+ @torch.no_grad()
41
+ def sample_dpmpp_2m_alt(model, x, sigmas, extra_args=None, callback=None, disable=None):
42
+ """DPM-Solver++(2M)."""
43
+ extra_args = {} if extra_args is None else extra_args
44
+ s_in = x.new_ones([x.shape[0]])
45
+ sigma_fn = lambda t: t.neg().exp()
46
+ t_fn = lambda sigma: sigma.log().neg()
47
+ old_denoised = None
48
+
49
+ for i in trange(len(sigmas) - 1, disable=disable):
50
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
51
+ if callback is not None:
52
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
53
+ t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
54
+ h = t_next - t
55
+ if old_denoised is None or sigmas[i + 1] == 0:
56
+ x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised
57
+ else:
58
+ h_last = t - t_fn(sigmas[i - 1])
59
+ r = h_last / h
60
+ denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised
61
+ x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d
62
+ sigma_progress = i / len(sigmas)
63
+ adjustment_factor = 1 + (0.15 * (sigma_progress * sigma_progress))
64
+ old_denoised = denoised * adjustment_factor
65
+ return x
66
+
67
+ k_diffusion.sampling.sample_dpmpp_2m_alt = sample_dpmpp_2m_alt
68
+
69
+ samplers_data_k_diffusion.insert(9, sd_samplers_common.SamplerData('DPM++ 2M alt', lambda model: KDiffusionSampler('sample_dpmpp_2m_alt', model), ['k_dpmpp_2m_alt'], {}))
70
+ samplers_data_k_diffusion.insert(10, sd_samplers_common.SamplerData('DPM++ 2M alt Karras', lambda model: KDiffusionSampler('sample_dpmpp_2m_alt', model), ['k_dpmpp_2m_alt_ka'], {'scheduler': 'karras'}))
71
+
72
+
73
+ sampler_extra_params = {
74
+ 'sample_euler': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
75
+ 'sample_heun': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
76
+ 'sample_dpm_2': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
77
+ }
78
+
79
+
80
+ class CFGDenoiser(torch.nn.Module):
81
+ """
82
+ Classifier free guidance denoiser. A wrapper for stable diffusion model (specifically for unet)
83
+ that can take a noisy picture and produce a noise-free picture using two guidances (prompts)
84
+ instead of one. Originally, the second prompt is just an empty string, but we use non-empty
85
+ negative prompt.
86
+ """
87
+
88
+ def __init__(self, model):
89
+ super().__init__()
90
+ self.inner_model = model
91
+ self.mask = None
92
+ self.nmask = None
93
+ self.init_latent = None
94
+ self.step = 0
95
+ self.image_cfg_scale = None
96
+
97
+ def combine_denoised(self, x_out, conds_list, uncond, cond_scale):
98
+ denoised_uncond = x_out[-uncond.shape[0]:]
99
+ denoised = torch.clone(denoised_uncond)
100
+
101
+ for i, conds in enumerate(conds_list):
102
+ for cond_index, weight in conds:
103
+ denoised[i] += (x_out[cond_index] - denoised_uncond[i]) * (weight * cond_scale)
104
+
105
+ return denoised
106
+
107
+ def combine_denoised_for_edit_model(self, x_out, cond_scale):
108
+ out_cond, out_img_cond, out_uncond = x_out.chunk(3)
109
+ denoised = out_uncond + cond_scale * (out_cond - out_img_cond) + self.image_cfg_scale * (out_img_cond - out_uncond)
110
+
111
+ return denoised
112
+
113
+ def forward(self, x, sigma, uncond, cond, cond_scale, image_cond):
114
+ if state.interrupted or state.skipped:
115
+ raise sd_samplers_common.InterruptedException
116
+
117
+ # at self.image_cfg_scale == 1.0 produced results for edit model are the same as with normal sampling,
118
+ # so is_edit_model is set to False to support AND composition.
119
+ is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0
120
+
121
+ conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
122
+ uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step)
123
+
124
+ assert not is_edit_model or all([len(conds) == 1 for conds in conds_list]), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
125
+
126
+ batch_size = len(conds_list)
127
+ repeats = [len(conds_list[i]) for i in range(batch_size)]
128
+
129
+ if not is_edit_model:
130
+ x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x])
131
+ sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma])
132
+ image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond])
133
+ else:
134
+ x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x])
135
+ sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma])
136
+ image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_cond] + [torch.zeros_like(self.init_latent)])
137
+
138
+ denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond)
139
+ cfg_denoiser_callback(denoiser_params)
140
+ x_in = denoiser_params.x
141
+ image_cond_in = denoiser_params.image_cond
142
+ sigma_in = denoiser_params.sigma
143
+ tensor = denoiser_params.text_cond
144
+ uncond = denoiser_params.text_uncond
145
+
146
+ if tensor.shape[1] == uncond.shape[1]:
147
+ if not is_edit_model:
148
+ cond_in = torch.cat([tensor, uncond])
149
+ else:
150
+ cond_in = torch.cat([tensor, uncond, uncond])
151
+
152
+ if shared.batch_cond_uncond:
153
+ x_out = self.inner_model(x_in, sigma_in, cond={"c_crossattn": [cond_in], "c_concat": [image_cond_in]})
154
+ else:
155
+ x_out = torch.zeros_like(x_in)
156
+ for batch_offset in range(0, x_out.shape[0], batch_size):
157
+ a = batch_offset
158
+ b = a + batch_size
159
+ x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": [cond_in[a:b]], "c_concat": [image_cond_in[a:b]]})
160
+ else:
161
+ x_out = torch.zeros_like(x_in)
162
+ batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size
163
+ for batch_offset in range(0, tensor.shape[0], batch_size):
164
+ a = batch_offset
165
+ b = min(a + batch_size, tensor.shape[0])
166
+
167
+ if not is_edit_model:
168
+ c_crossattn = [tensor[a:b]]
169
+ else:
170
+ c_crossattn = torch.cat([tensor[a:b]], uncond)
171
+
172
+ x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond={"c_crossattn": c_crossattn, "c_concat": [image_cond_in[a:b]]})
173
+
174
+ x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond={"c_crossattn": [uncond], "c_concat": [image_cond_in[-uncond.shape[0]:]]})
175
+
176
+ denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps)
177
+ cfg_denoised_callback(denoised_params)
178
+
179
+ devices.test_for_nans(x_out, "unet")
180
+
181
+ if opts.live_preview_content == "Prompt":
182
+ sd_samplers_common.store_latent(x_out[0:uncond.shape[0]])
183
+ elif opts.live_preview_content == "Negative prompt":
184
+ sd_samplers_common.store_latent(x_out[-uncond.shape[0]:])
185
+
186
+ if not is_edit_model:
187
+ denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
188
+ else:
189
+ denoised = self.combine_denoised_for_edit_model(x_out, cond_scale)
190
+
191
+ if self.mask is not None:
192
+ denoised = self.init_latent * self.mask + self.nmask * denoised
193
+
194
+ self.step += 1
195
+
196
+ return denoised
197
+
198
+
199
+ class TorchHijack:
200
+ def __init__(self, sampler_noises):
201
+ # Using a deque to efficiently receive the sampler_noises in the same order as the previous index-based
202
+ # implementation.
203
+ self.sampler_noises = deque(sampler_noises)
204
+
205
+ def __getattr__(self, item):
206
+ if item == 'randn_like':
207
+ return self.randn_like
208
+
209
+ if hasattr(torch, item):
210
+ return getattr(torch, item)
211
+
212
+ raise AttributeError("'{}' object has no attribute '{}'".format(type(self).__name__, item))
213
+
214
+ def randn_like(self, x):
215
+ if self.sampler_noises:
216
+ noise = self.sampler_noises.popleft()
217
+ if noise.shape == x.shape:
218
+ return noise
219
+
220
+ if x.device.type == 'mps':
221
+ return torch.randn_like(x, device=devices.cpu).to(x.device)
222
+ else:
223
+ return torch.randn_like(x)
224
+
225
+
226
+ class KDiffusionSampler:
227
+ def __init__(self, funcname, sd_model):
228
+ denoiser = k_diffusion.external.CompVisVDenoiser if sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
229
+
230
+ self.model_wrap = denoiser(sd_model, quantize=shared.opts.enable_quantization)
231
+ self.funcname = funcname
232
+ self.func = getattr(k_diffusion.sampling, self.funcname)
233
+ self.extra_params = sampler_extra_params.get(funcname, [])
234
+ self.model_wrap_cfg = CFGDenoiser(self.model_wrap)
235
+ self.sampler_noises = None
236
+ self.stop_at = None
237
+ self.eta = None
238
+ self.config = None
239
+ self.last_latent = None
240
+
241
+ self.conditioning_key = sd_model.model.conditioning_key
242
+
243
+ def callback_state(self, d):
244
+ step = d['i']
245
+ latent = d["denoised"]
246
+ if opts.live_preview_content == "Combined":
247
+ sd_samplers_common.store_latent(latent)
248
+ self.last_latent = latent
249
+
250
+ if self.stop_at is not None and step > self.stop_at:
251
+ raise sd_samplers_common.InterruptedException
252
+
253
+ state.sampling_step = step
254
+ shared.total_tqdm.update()
255
+
256
+ def launch_sampling(self, steps, func):
257
+ state.sampling_steps = steps
258
+ state.sampling_step = 0
259
+
260
+ try:
261
+ return func()
262
+ except sd_samplers_common.InterruptedException:
263
+ return self.last_latent
264
+
265
+ def number_of_needed_noises(self, p):
266
+ return p.steps
267
+
268
+ def initialize(self, p):
269
+ self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
270
+ self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
271
+ self.model_wrap_cfg.step = 0
272
+ self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
273
+ self.eta = p.eta if p.eta is not None else opts.eta_ancestral
274
+
275
+ k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else [])
276
+
277
+ extra_params_kwargs = {}
278
+ for param_name in self.extra_params:
279
+ if hasattr(p, param_name) and param_name in inspect.signature(self.func).parameters:
280
+ extra_params_kwargs[param_name] = getattr(p, param_name)
281
+
282
+ if 'eta' in inspect.signature(self.func).parameters:
283
+ if self.eta != 1.0:
284
+ p.extra_generation_params["Eta"] = self.eta
285
+
286
+ extra_params_kwargs['eta'] = self.eta
287
+
288
+ return extra_params_kwargs
289
+
290
+ def get_sigmas(self, p, steps):
291
+ discard_next_to_last_sigma = self.config is not None and self.config.options.get('discard_next_to_last_sigma', False)
292
+ if opts.always_discard_next_to_last_sigma and not discard_next_to_last_sigma:
293
+ discard_next_to_last_sigma = True
294
+ p.extra_generation_params["Discard penultimate sigma"] = True
295
+
296
+ steps += 1 if discard_next_to_last_sigma else 0
297
+
298
+ if p.sampler_noise_scheduler_override:
299
+ sigmas = p.sampler_noise_scheduler_override(steps)
300
+ elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
301
+ sigma_min, sigma_max = (0.1, 10) if opts.use_old_karras_scheduler_sigmas else (self.model_wrap.sigmas[0].item(), self.model_wrap.sigmas[-1].item())
302
+
303
+ sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, device=shared.device)
304
+ else:
305
+ sigmas = self.model_wrap.get_sigmas(steps)
306
+
307
+ if discard_next_to_last_sigma:
308
+ sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
309
+
310
+ return sigmas
311
+
312
+ def create_noise_sampler(self, x, sigmas, p):
313
+ """For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes"""
314
+ if shared.opts.no_dpmpp_sde_batch_determinism:
315
+ return None
316
+
317
+ from k_diffusion.sampling import BrownianTreeNoiseSampler
318
+ sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
319
+ current_iter_seeds = p.all_seeds[p.iteration * p.batch_size:(p.iteration + 1) * p.batch_size]
320
+ return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=current_iter_seeds)
321
+
322
+ def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
323
+ steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps)
324
+
325
+ sigmas = self.get_sigmas(p, steps)
326
+
327
+ sigma_sched = sigmas[steps - t_enc - 1:]
328
+ xi = x + noise * sigma_sched[0]
329
+
330
+ extra_params_kwargs = self.initialize(p)
331
+ parameters = inspect.signature(self.func).parameters
332
+
333
+ if 'sigma_min' in parameters:
334
+ ## last sigma is zero which isn't allowed by DPM Fast & Adaptive so taking value before last
335
+ extra_params_kwargs['sigma_min'] = sigma_sched[-2]
336
+ if 'sigma_max' in parameters:
337
+ extra_params_kwargs['sigma_max'] = sigma_sched[0]
338
+ if 'n' in parameters:
339
+ extra_params_kwargs['n'] = len(sigma_sched) - 1
340
+ if 'sigma_sched' in parameters:
341
+ extra_params_kwargs['sigma_sched'] = sigma_sched
342
+ if 'sigmas' in parameters:
343
+ extra_params_kwargs['sigmas'] = sigma_sched
344
+
345
+ if self.funcname == 'sample_dpmpp_sde':
346
+ noise_sampler = self.create_noise_sampler(x, sigmas, p)
347
+ extra_params_kwargs['noise_sampler'] = noise_sampler
348
+
349
+ self.model_wrap_cfg.init_latent = x
350
+ self.last_latent = x
351
+ extra_args={
352
+ 'cond': conditioning,
353
+ 'image_cond': image_conditioning,
354
+ 'uncond': unconditional_conditioning,
355
+ 'cond_scale': p.cfg_scale,
356
+ }
357
+
358
+ samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
359
+
360
+ return samples
361
+
362
+ def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
363
+ steps = steps or p.steps
364
+
365
+ sigmas = self.get_sigmas(p, steps)
366
+
367
+ x = x * sigmas[0]
368
+
369
+ extra_params_kwargs = self.initialize(p)
370
+ parameters = inspect.signature(self.func).parameters
371
+
372
+ if 'sigma_min' in parameters:
373
+ extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item()
374
+ extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item()
375
+ if 'n' in parameters:
376
+ extra_params_kwargs['n'] = steps
377
+ else:
378
+ extra_params_kwargs['sigmas'] = sigmas
379
+
380
+ if self.funcname == 'sample_dpmpp_sde':
381
+ noise_sampler = self.create_noise_sampler(x, sigmas, p)
382
+ extra_params_kwargs['noise_sampler'] = noise_sampler
383
+
384
+ self.last_latent = x
385
+ samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={
386
+ 'cond': conditioning,
387
+ 'image_cond': image_conditioning,
388
+ 'uncond': unconditional_conditioning,
389
+ 'cond_scale': p.cfg_scale
390
+ }, disable=False, callback=self.callback_state, **extra_params_kwargs))
391
+
392
+ return samples
393
+