Spaces:

devesg
/

singing_voice_conversion

Running

App Files Files Community

singing_voice_conversion / modules /diffusion /karras /karras_diffusion.py

RMSnow

add backend inference and inferface output

0883aa1 9 months ago

raw

history blame

No virus

29.5 kB

	# Copyright (c) 2023 Amphion.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Based on: https://github.com/crowsonkb/k-diffusion
	"""
	import random

	import numpy as np
	import torch as th
	import torch.nn as nn
	import torch.nn.functional as F

	# from piq import LPIPS
	from utils.ssim import SSIM

	from modules.diffusion.karras.random_utils import get_generator


	def mean_flat(tensor):
	"""
	Take the mean over all non-batch dimensions.
	"""
	return tensor.mean(dim=list(range(1, len(tensor.shape))))


	def append_dims(x, target_dims):
	"""Appends dimensions to the end of a tensor until it has target_dims dimensions."""
	dims_to_append = target_dims - x.ndim
	if dims_to_append < 0:
	raise ValueError(
	f"input has {x.ndim} dims but target_dims is {target_dims}, which is less"
	)
	return x[(...,) + (None,) * dims_to_append]


	def append_zero(x):
	return th.cat([x, x.new_zeros([1])])


	def get_weightings(weight_schedule, snrs, sigma_data):
	if weight_schedule == "snr":
	weightings = snrs
	elif weight_schedule == "snr+1":
	weightings = snrs + 1
	elif weight_schedule == "karras":
	weightings = snrs + 1.0 / sigma_data**2
	elif weight_schedule == "truncated-snr":
	weightings = th.clamp(snrs, min=1.0)
	elif weight_schedule == "uniform":
	weightings = th.ones_like(snrs)
	else:
	raise NotImplementedError()
	return weightings


	class KarrasDenoiser:
	def __init__(
	self,
	sigma_data: float = 0.5,
	sigma_max=80.0,
	sigma_min=0.002,
	rho=7.0,
	weight_schedule="karras",
	distillation=False,
	loss_norm="l2",
	):
	self.sigma_data = sigma_data
	self.sigma_max = sigma_max
	self.sigma_min = sigma_min
	self.weight_schedule = weight_schedule
	self.distillation = distillation
	self.loss_norm = loss_norm
	# if loss_norm == "lpips":
	# self.lpips_loss = LPIPS(replace_pooling=True, reduction="none")
	if loss_norm == "ssim":
	self.ssim_loss = SSIM()
	self.rho = rho
	self.num_timesteps = 40

	def get_snr(self, sigmas):
	return sigmas**-2

	def get_sigmas(self, sigmas):
	return sigmas

	def get_scalings(self, sigma):
	c_skip = self.sigma_data2 / (sigma2 + self.sigma_data**2)
	c_out = sigma * self.sigma_data / (sigma2 + self.sigma_data2) ** 0.5
	c_in = 1 / (sigma2 + self.sigma_data2) ** 0.5
	return c_skip, c_out, c_in

	def get_scalings_for_boundary_condition(self, sigma):
	c_skip = self.sigma_data**2 / (
	(sigma - self.sigma_min) 2 + self.sigma_data2
	)
	c_out = (
	(sigma - self.sigma_min)
	* self.sigma_data
	/ (sigma2 + self.sigma_data2) ** 0.5
	)
	c_in = 1 / (sigma2 + self.sigma_data2) ** 0.5
	return c_skip, c_out, c_in

	def training_losses(self, model, x_start, sigmas, condition=None, noise=None):
	if noise is None:
	noise = th.randn_like(x_start)

	terms = {}

	dims = x_start.ndim
	x_t = x_start + noise * append_dims(sigmas, dims)
	model_output, denoised = self.denoise(model, x_t, sigmas, condition)

	snrs = self.get_snr(sigmas)
	weights = append_dims(
	get_weightings(self.weight_schedule, snrs, self.sigma_data), dims
	)
	# terms["xs_mse"] = mean_flat((denoised - x_start) ** 2)
	terms["mse"] = mean_flat(weights * (denoised - x_start) ** 2)
	# terms["mae"] = mean_flat(weights * th.abs(denoised - x_start))
	# terms["mse"] = nn.MSELoss(reduction="none")(denoised, x_start)

	# if "vb" in terms:
	# terms["loss"] = terms["mse"] + terms["vb"]
	# else:
	terms["loss"] = terms["mse"]

	return terms

	def consistency_losses(
	self,
	model,
	x_start,
	num_scales,
	# model_kwargs=None,
	condition=None,
	target_model=None,
	teacher_model=None,
	teacher_diffusion=None,
	noise=None,
	):
	if noise is None:
	noise = th.randn_like(x_start)

	dims = x_start.ndim

	def denoise_fn(x, t):
	return self.denoise(model, x, t, condition)[1]

	if target_model:

	@th.no_grad()
	def target_denoise_fn(x, t):
	return self.denoise(target_model, x, t, condition)[1]

	else:
	raise NotImplementedError("Must have a target model")

	if teacher_model:

	@th.no_grad()
	def teacher_denoise_fn(x, t):
	return teacher_diffusion.denoise(teacher_model, x, t, condition)[1]

	@th.no_grad()
	def heun_solver(samples, t, next_t, x0):
	x = samples
	if teacher_model is None:
	denoiser = x0
	else:
	denoiser = teacher_denoise_fn(x, t)

	d = (x - denoiser) / append_dims(t, dims)
	samples = x + d * append_dims(next_t - t, dims)
	if teacher_model is None:
	denoiser = x0
	else:
	denoiser = teacher_denoise_fn(samples, next_t)

	next_d = (samples - denoiser) / append_dims(next_t, dims)
	samples = x + (d + next_d) * append_dims((next_t - t) / 2, dims)

	return samples

	@th.no_grad()
	def euler_solver(samples, t, next_t, x0):
	x = samples
	if teacher_model is None:
	denoiser = x0
	else:
	denoiser = teacher_denoise_fn(x, t)
	d = (x - denoiser) / append_dims(t, dims)
	samples = x + d * append_dims(next_t - t, dims)

	return samples

	indices = th.randint(
	0, num_scales - 1, (x_start.shape[0],), device=x_start.device
	)

	t = self.sigma_max ** (1 / self.rho) + indices / (num_scales - 1) * (
	self.sigma_min (1 / self.rho) - self.sigma_max (1 / self.rho)
	)
	t = t**self.rho

	t2 = self.sigma_max ** (1 / self.rho) + (indices + 1) / (num_scales - 1) * (
	self.sigma_min (1 / self.rho) - self.sigma_max (1 / self.rho)
	)
	t2 = t2**self.rho

	x_t = x_start + noise * append_dims(t, dims)

	dropout_state = th.get_rng_state()
	distiller = denoise_fn(x_t, t)

	if teacher_model is None:
	x_t2 = euler_solver(x_t, t, t2, x_start).detach()
	else:
	x_t2 = heun_solver(x_t, t, t2, x_start).detach()

	th.set_rng_state(dropout_state)
	distiller_target = target_denoise_fn(x_t2, t2)
	distiller_target = distiller_target.detach()

	snrs = self.get_snr(t)
	weights = get_weightings(self.weight_schedule, snrs, self.sigma_data)
	if self.loss_norm == "l1":
	diffs = th.abs(distiller - distiller_target)
	loss = mean_flat(diffs) * weights
	elif self.loss_norm == "l2":
	# diffs = (distiller - distiller_target) ** 2
	loss = F.mse_loss(distiller, distiller_target)
	# loss = mean_flat(diffs) * weights
	elif self.loss_norm == "ssim":
	loss = self.ssim_loss(distiller, distiller_target) * weights
	# elif self.loss_norm == "l2-32":
	# distiller = F.interpolate(distiller, size=32, mode="bilinear")
	# distiller_target = F.interpolate(
	# distiller_target,
	# size=32,
	# mode="bilinear",
	# )
	# diffs = (distiller - distiller_target) ** 2
	# loss = mean_flat(diffs) * weights
	# elif self.loss_norm == "lpips":
	# if x_start.shape[-1] < 256:
	# distiller = F.interpolate(distiller, size=224, mode="bilinear")
	# distiller_target = F.interpolate(
	# distiller_target, size=224, mode="bilinear"
	# )

	# loss = (
	# self.lpips_loss(
	# (distiller + 1) / 2.0,
	# (distiller_target + 1) / 2.0,
	# )
	# * weights
	# )
	else:
	raise ValueError(f"Unknown loss norm {self.loss_norm}")

	terms = {}
	terms["loss"] = loss

	return terms

	# def progdist_losses(
	# self,
	# model,
	# x_start,
	# num_scales,
	# model_kwargs=None,
	# teacher_model=None,
	# teacher_diffusion=None,
	# noise=None,
	# ):
	# if model_kwargs is None:
	# model_kwargs = {}
	# if noise is None:
	# noise = th.randn_like(x_start)

	# dims = x_start.ndim

	# def denoise_fn(x, t):
	# return self.denoise(model, x, t, **model_kwargs)[1]

	# @th.no_grad()
	# def teacher_denoise_fn(x, t):
	# return teacher_diffusion.denoise(teacher_model, x, t, **model_kwargs)[1]

	# @th.no_grad()
	# def euler_solver(samples, t, next_t):
	# x = samples
	# denoiser = teacher_denoise_fn(x, t)
	# d = (x - denoiser) / append_dims(t, dims)
	# samples = x + d * append_dims(next_t - t, dims)

	# return samples

	# @th.no_grad()
	# def euler_to_denoiser(x_t, t, x_next_t, next_t):
	# denoiser = x_t - append_dims(t, dims) * (x_next_t - x_t) / append_dims(
	# next_t - t, dims
	# )
	# return denoiser

	# indices = th.randint(0, num_scales, (x_start.shape[0],), device=x_start.device)

	# t = self.sigma_max ** (1 / self.rho) + indices / num_scales * (
	# self.sigma_min (1 / self.rho) - self.sigma_max (1 / self.rho)
	# )
	# t = t**self.rho

	# t2 = self.sigma_max ** (1 / self.rho) + (indices + 0.5) / num_scales * (
	# self.sigma_min (1 / self.rho) - self.sigma_max (1 / self.rho)
	# )
	# t2 = t2**self.rho

	# t3 = self.sigma_max ** (1 / self.rho) + (indices + 1) / num_scales * (
	# self.sigma_min (1 / self.rho) - self.sigma_max (1 / self.rho)
	# )
	# t3 = t3**self.rho

	# x_t = x_start + noise * append_dims(t, dims)

	# denoised_x = denoise_fn(x_t, t)

	# x_t2 = euler_solver(x_t, t, t2).detach()
	# x_t3 = euler_solver(x_t2, t2, t3).detach()

	# target_x = euler_to_denoiser(x_t, t, x_t3, t3).detach()

	# snrs = self.get_snr(t)
	# weights = get_weightings(self.weight_schedule, snrs, self.sigma_data)
	# if self.loss_norm == "l1":
	# diffs = th.abs(denoised_x - target_x)
	# loss = mean_flat(diffs) * weights
	# elif self.loss_norm == "l2":
	# diffs = (denoised_x - target_x) ** 2
	# loss = mean_flat(diffs) * weights
	# elif self.loss_norm == "lpips":
	# if x_start.shape[-1] < 256:
	# denoised_x = F.interpolate(denoised_x, size=224, mode="bilinear")
	# target_x = F.interpolate(target_x, size=224, mode="bilinear")
	# loss = (
	# self.lpips_loss(
	# (denoised_x + 1) / 2.0,
	# (target_x + 1) / 2.0,
	# )
	# * weights
	# )
	# else:
	# raise ValueError(f"Unknown loss norm {self.loss_norm}")

	# terms = {}
	# terms["loss"] = loss

	# return terms

	def denoise(self, model, x_t, sigmas, condition):
	if not self.distillation:
	c_skip, c_out, c_in = [
	append_dims(x, x_t.ndim) for x in self.get_scalings(sigmas)
	]
	else:
	c_skip, c_out, c_in = [
	append_dims(x, x_t.ndim)
	for x in self.get_scalings_for_boundary_condition(sigmas)
	]
	rescaled_t = 1000 * 0.25 * th.log(sigmas + 1e-44)
	# rescaled_t = rescaled_t[:, None]
	model_output = model(c_in * x_t, rescaled_t, condition)
	denoised = c_out * model_output + c_skip * x_t
	return model_output, denoised


	def karras_sample(
	diffusion,
	model,
	shape,
	steps,
	clip_denoised=True,
	progress=True,
	callback=None,
	# model_kwargs=None,
	condition=None,
	device=None,
	sigma_min=0.002,
	sigma_max=80, # higher for highres?
	rho=7.0,
	sampler="heun",
	s_churn=0.0,
	s_tmin=0.0,
	s_tmax=float("inf"),
	s_noise=1.0,
	generator=None,
	ts=None,
	):
	if generator is None:
	generator = get_generator("dummy")

	if sampler == "progdist":
	sigmas = get_sigmas_karras(steps + 1, sigma_min, sigma_max, rho, device=device)
	else:
	sigmas = get_sigmas_karras(steps, sigma_min, sigma_max, rho, device=device)
	th.manual_seed(42)
	x_T = generator.randn(shape, device=device) sigma_max
	sigmas = sigmas.unsqueeze(-1)
	sample_fn = {
	"heun": sample_heun,
	"dpm": sample_dpm,
	"ancestral": sample_euler_ancestral,
	"onestep": sample_onestep,
	"progdist": sample_progdist,
	"euler": sample_euler,
	"multistep": stochastic_iterative_sampler,
	}[sampler]

	if sampler in ["heun", "dpm"]:
	sampler_args = dict(
	s_churn=s_churn, s_tmin=s_tmin, s_tmax=s_tmax, s_noise=s_noise
	)
	elif sampler == "multistep":
	sampler_args = dict(
	ts=ts, t_min=sigma_min, t_max=sigma_max, rho=diffusion.rho, steps=steps
	)
	else:
	sampler_args = {}

	def denoiser(x_t, sigma):
	_, denoised = diffusion.denoise(model, x_t, sigma, condition)
	if clip_denoised:
	denoised = denoised.clamp(-1, 1)
	return denoised

	x_0 = sample_fn(
	denoiser,
	x_T,
	sigmas,
	generator,
	progress=progress,
	callback=callback,
	**sampler_args,
	)
	return x_0.clamp(-1, 1)


	def get_sigmas_karras(n, sigma_min, sigma_max, rho=7.0, device="cpu"):
	"""Constructs the noise schedule of Karras et al. (2022)."""
	ramp = th.linspace(0, 1, n)
	min_inv_rho = sigma_min ** (1 / rho)
	max_inv_rho = sigma_max ** (1 / rho)
	sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
	return append_zero(sigmas).to(device)


	def to_d(x, sigma, denoised):
	"""Converts a denoiser output to a Karras ODE derivative."""
	return (x - denoised) / append_dims(sigma, x.ndim)


	def get_ancestral_step(sigma_from, sigma_to):
	"""Calculates the noise level (sigma_down) to step down to and the amount
	of noise to add (sigma_up) when doing an ancestral sampling step."""
	sigma_up = (
	sigma_to*2 (sigma_from2 - sigma_to2) / sigma_from**2
	) ** 0.5
	sigma_down = (sigma_to2 - sigma_up2) ** 0.5
	return sigma_down, sigma_up


	@th.no_grad()
	def sample_euler_ancestral(model, x, sigmas, generator, progress=False, callback=None):
	"""Ancestral sampling with Euler method steps."""
	s_in = x.new_ones([x.shape[0]])
	indices = range(len(sigmas) - 1)
	if progress:
	from tqdm.auto import tqdm

	indices = tqdm(indices)

	for i in indices:
	denoised = model(x, sigmas[i] * s_in)
	sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1])
	if callback is not None:
	callback(
	{
	"x": x,
	"i": i,
	"sigma": sigmas[i],
	"sigma_hat": sigmas[i],
	"denoised": denoised,
	}
	)
	d = to_d(x, sigmas[i], denoised)
	# Euler method
	dt = sigma_down - sigmas[i]
	x = x + d * dt
	x = x + generator.randn_like(x) * sigma_up
	return x


	@th.no_grad()
	def sample_midpoint_ancestral(model, x, ts, generator, progress=False, callback=None):
	"""Ancestral sampling with midpoint method steps."""
	s_in = x.new_ones([x.shape[0]])
	step_size = 1 / len(ts)
	if progress:
	from tqdm.auto import tqdm

	ts = tqdm(ts)

	for tn in ts:
	dn = model(x, tn * s_in)
	dn_2 = model(x + (step_size / 2) * dn, (tn + step_size / 2) * s_in)
	x = x + step_size * dn_2
	if callback is not None:
	callback({"x": x, "tn": tn, "dn": dn, "dn_2": dn_2})
	return x


	@th.no_grad()
	def sample_heun(
	denoiser,
	x,
	sigmas,
	generator,
	progress=False,
	callback=None,
	s_churn=0.0,
	s_tmin=0.0,
	s_tmax=float("inf"),
	s_noise=1.0,
	):
	"""Implements Algorithm 2 (Heun steps) from Karras et al. (2022)."""
	s_in = x.new_ones([x.shape[0]])
	indices = range(len(sigmas) - 1)
	if progress:
	from tqdm.auto import tqdm

	indices = tqdm(indices)

	for i in indices:
	gamma = (
	min(s_churn / (len(sigmas) - 1), 2**0.5 - 1)
	if s_tmin <= sigmas[i] <= s_tmax
	else 0.0
	)
	eps = generator.randn_like(x) * s_noise
	sigma_hat = sigmas[i] * (gamma + 1)
	if gamma > 0:
	x = x + eps * (sigma_hat2 - sigmas[i] 2) ** 0.5
	denoised = denoiser(x, sigma_hat * s_in)
	d = to_d(x, sigma_hat, denoised)
	if callback is not None:
	callback(
	{
	"x": x,
	"i": i,
	"sigma": sigmas[i],
	"sigma_hat": sigma_hat,
	"denoised": denoised,
	}
	)
	dt = sigmas[i + 1] - sigma_hat
	if sigmas[i + 1] == 0:
	# Euler method
	x = x + d * dt
	else:
	# Heun's method
	x_2 = x + d * dt
	denoised_2 = denoiser(x_2, sigmas[i + 1] * s_in)
	d_2 = to_d(x_2, sigmas[i + 1], denoised_2)
	d_prime = (d + d_2) / 2
	x = x + d_prime * dt
	return x


	@th.no_grad()
	def sample_euler(
	denoiser,
	x,
	sigmas,
	generator,
	progress=False,
	callback=None,
	):
	"""Implements Algorithm 2 (Heun steps) from Karras et al. (2022)."""
	s_in = x.new_ones([x.shape[0]])
	indices = range(len(sigmas) - 1)
	if progress:
	from tqdm.auto import tqdm

	indices = tqdm(indices)

	for i in indices:
	sigma = sigmas[i]
	denoised = denoiser(x, sigma * s_in)
	d = to_d(x, sigma, denoised)
	if callback is not None:
	callback(
	{
	"x": x,
	"i": i,
	"sigma": sigmas[i],
	"denoised": denoised,
	}
	)
	dt = sigmas[i + 1] - sigma
	x = x + d * dt
	return x


	@th.no_grad()
	def sample_dpm(
	denoiser,
	x,
	sigmas,
	generator,
	progress=False,
	callback=None,
	s_churn=0.0,
	s_tmin=0.0,
	s_tmax=float("inf"),
	s_noise=1.0,
	):
	"""A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022)."""
	s_in = x.new_ones([x.shape[0]])
	indices = range(len(sigmas) - 1)
	if progress:
	from tqdm.auto import tqdm

	indices = tqdm(indices)

	for i in indices:
	gamma = (
	min(s_churn / (len(sigmas) - 1), 2**0.5 - 1)
	if s_tmin <= sigmas[i] <= s_tmax
	else 0.0
	)
	eps = generator.randn_like(x) * s_noise
	sigma_hat = sigmas[i] * (gamma + 1)
	if gamma > 0:
	x = x + eps * (sigma_hat2 - sigmas[i] 2) ** 0.5
	denoised = denoiser(x, sigma_hat * s_in)
	d = to_d(x, sigma_hat, denoised)
	if callback is not None:
	callback(
	{
	"x": x,
	"i": i,
	"sigma": sigmas[i],
	"sigma_hat": sigma_hat,
	"denoised": denoised,
	}
	)
	# Midpoint method, where the midpoint is chosen according to a rho=3 Karras schedule
	sigma_mid = ((sigma_hat (1 / 3) + sigmas[i + 1] (1 / 3)) / 2) ** 3
	dt_1 = sigma_mid - sigma_hat
	dt_2 = sigmas[i + 1] - sigma_hat
	x_2 = x + d * dt_1
	denoised_2 = denoiser(x_2, sigma_mid * s_in)
	d_2 = to_d(x_2, sigma_mid, denoised_2)
	x = x + d_2 * dt_2
	return x


	@th.no_grad()
	def sample_onestep(
	distiller,
	x,
	sigmas,
	generator=None,
	progress=False,
	callback=None,
	):
	"""Single-step generation from a distilled model."""
	s_in = x.new_ones([x.shape[0]])
	return distiller(x, sigmas[0] * s_in)


	@th.no_grad()
	def stochastic_iterative_sampler(
	distiller,
	x,
	sigmas,
	generator,
	ts,
	progress=False,
	callback=None,
	t_min=0.002,
	t_max=80.0,
	rho=7.0,
	steps=40,
	):
	t_max_rho = t_max ** (1 / rho)
	t_min_rho = t_min ** (1 / rho)
	s_in = x.new_ones([x.shape[0]])

	for i in range(len(ts) - 1):
	t = (t_max_rho + ts[i] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	x0 = distiller(x, t * s_in)
	next_t = (t_max_rho + ts[i + 1] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	next_t = np.clip(next_t, t_min, t_max)
	x = x0 + generator.randn_like(x) * np.sqrt(next_t2 - t_min2)

	return x


	@th.no_grad()
	def sample_progdist(
	denoiser,
	x,
	sigmas,
	generator=None,
	progress=False,
	callback=None,
	):
	s_in = x.new_ones([x.shape[0]])
	sigmas = sigmas[:-1] # skip the zero sigma

	indices = range(len(sigmas) - 1)
	if progress:
	from tqdm.auto import tqdm

	indices = tqdm(indices)

	for i in indices:
	sigma = sigmas[i]
	denoised = denoiser(x, sigma * s_in)
	d = to_d(x, sigma, denoised)
	if callback is not None:
	callback(
	{
	"x": x,
	"i": i,
	"sigma": sigma,
	"denoised": denoised,
	}
	)
	dt = sigmas[i + 1] - sigma
	x = x + d * dt

	return x


	# @th.no_grad()
	# def iterative_colorization(
	# distiller,
	# images,
	# x,
	# ts,
	# t_min=0.002,
	# t_max=80.0,
	# rho=7.0,
	# steps=40,
	# generator=None,
	# ):
	# def obtain_orthogonal_matrix():
	# vector = np.asarray([0.2989, 0.5870, 0.1140])
	# vector = vector / np.linalg.norm(vector)
	# matrix = np.eye(3)
	# matrix[:, 0] = vector
	# matrix = np.linalg.qr(matrix)[0]
	# if np.sum(matrix[:, 0]) < 0:
	# matrix = -matrix
	# return matrix

	# Q = th.from_numpy(obtain_orthogonal_matrix()).to(dist_util.dev()).to(th.float32)
	# mask = th.zeros(*x.shape[1:], device=dist_util.dev())
	# mask[0, ...] = 1.0

	# def replacement(x0, x1):
	# x0 = th.einsum("bchw,cd->bdhw", x0, Q)
	# x1 = th.einsum("bchw,cd->bdhw", x1, Q)

	# x_mix = x0 * mask + x1 * (1.0 - mask)
	# x_mix = th.einsum("bdhw,cd->bchw", x_mix, Q)
	# return x_mix

	# t_max_rho = t_max ** (1 / rho)
	# t_min_rho = t_min ** (1 / rho)
	# s_in = x.new_ones([x.shape[0]])
	# images = replacement(images, th.zeros_like(images))

	# for i in range(len(ts) - 1):
	# t = (t_max_rho + ts[i] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	# x0 = distiller(x, t * s_in)
	# x0 = th.clamp(x0, -1.0, 1.0)
	# x0 = replacement(images, x0)
	# next_t = (t_max_rho + ts[i + 1] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	# next_t = np.clip(next_t, t_min, t_max)
	# x = x0 + generator.randn_like(x) * np.sqrt(next_t2 - t_min2)

	# return x, images


	# @th.no_grad()
	# def iterative_inpainting(
	# distiller,
	# images,
	# x,
	# ts,
	# t_min=0.002,
	# t_max=80.0,
	# rho=7.0,
	# steps=40,
	# generator=None,
	# ):
	# from PIL import Image, ImageDraw, ImageFont

	# image_size = x.shape[-1]

	# # create a blank image with a white background
	# img = Image.new("RGB", (image_size, image_size), color="white")

	# # get a drawing context for the image
	# draw = ImageDraw.Draw(img)

	# # load a font
	# font = ImageFont.truetype("arial.ttf", 250)

	# # draw the letter "C" in black
	# draw.text((50, 0), "S", font=font, fill=(0, 0, 0))

	# # convert the image to a numpy array
	# img_np = np.array(img)
	# img_np = img_np.transpose(2, 0, 1)
	# img_th = th.from_numpy(img_np).to(dist_util.dev())

	# mask = th.zeros(*x.shape, device=dist_util.dev())
	# mask = mask.reshape(-1, 7, 3, image_size, image_size)

	# mask[::2, :, img_th > 0.5] = 1.0
	# mask[1::2, :, img_th < 0.5] = 1.0
	# mask = mask.reshape(-1, 3, image_size, image_size)

	# def replacement(x0, x1):
	# x_mix = x0 * mask + x1 * (1 - mask)
	# return x_mix

	# t_max_rho = t_max ** (1 / rho)
	# t_min_rho = t_min ** (1 / rho)
	# s_in = x.new_ones([x.shape[0]])
	# images = replacement(images, -th.ones_like(images))

	# for i in range(len(ts) - 1):
	# t = (t_max_rho + ts[i] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	# x0 = distiller(x, t * s_in)
	# x0 = th.clamp(x0, -1.0, 1.0)
	# x0 = replacement(images, x0)
	# next_t = (t_max_rho + ts[i + 1] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	# next_t = np.clip(next_t, t_min, t_max)
	# x = x0 + generator.randn_like(x) * np.sqrt(next_t2 - t_min2)

	# return x, images


	# @th.no_grad()
	# def iterative_superres(
	# distiller,
	# images,
	# x,
	# ts,
	# t_min=0.002,
	# t_max=80.0,
	# rho=7.0,
	# steps=40,
	# generator=None,
	# ):
	# patch_size = 8

	# def obtain_orthogonal_matrix():
	# vector = np.asarray([1] * patch_size**2)
	# vector = vector / np.linalg.norm(vector)
	# matrix = np.eye(patch_size**2)
	# matrix[:, 0] = vector
	# matrix = np.linalg.qr(matrix)[0]
	# if np.sum(matrix[:, 0]) < 0:
	# matrix = -matrix
	# return matrix

	# Q = th.from_numpy(obtain_orthogonal_matrix()).to(dist_util.dev()).to(th.float32)

	# image_size = x.shape[-1]

	# def replacement(x0, x1):
	# x0_flatten = (
	# x0.reshape(-1, 3, image_size, image_size)
	# .reshape(
	# -1,
	# 3,
	# image_size // patch_size,
	# patch_size,
	# image_size // patch_size,
	# patch_size,
	# )
	# .permute(0, 1, 2, 4, 3, 5)
	# .reshape(-1, 3, image_size2 // patch_size2, patch_size**2)
	# )
	# x1_flatten = (
	# x1.reshape(-1, 3, image_size, image_size)
	# .reshape(
	# -1,
	# 3,
	# image_size // patch_size,
	# patch_size,
	# image_size // patch_size,
	# patch_size,
	# )
	# .permute(0, 1, 2, 4, 3, 5)
	# .reshape(-1, 3, image_size2 // patch_size2, patch_size**2)
	# )
	# x0 = th.einsum("bcnd,de->bcne", x0_flatten, Q)
	# x1 = th.einsum("bcnd,de->bcne", x1_flatten, Q)
	# x_mix = x0.new_zeros(x0.shape)
	# x_mix[..., 0] = x0[..., 0]
	# x_mix[..., 1:] = x1[..., 1:]
	# x_mix = th.einsum("bcne,de->bcnd", x_mix, Q)
	# x_mix = (
	# x_mix.reshape(
	# -1,
	# 3,
	# image_size // patch_size,
	# image_size // patch_size,
	# patch_size,
	# patch_size,
	# )
	# .permute(0, 1, 2, 4, 3, 5)
	# .reshape(-1, 3, image_size, image_size)
	# )
	# return x_mix

	# def average_image_patches(x):
	# x_flatten = (
	# x.reshape(-1, 3, image_size, image_size)
	# .reshape(
	# -1,
	# 3,
	# image_size // patch_size,
	# patch_size,
	# image_size // patch_size,
	# patch_size,
	# )
	# .permute(0, 1, 2, 4, 3, 5)
	# .reshape(-1, 3, image_size2 // patch_size2, patch_size**2)
	# )
	# x_flatten[..., :] = x_flatten.mean(dim=-1, keepdim=True)
	# return (
	# x_flatten.reshape(
	# -1,
	# 3,
	# image_size // patch_size,
	# image_size // patch_size,
	# patch_size,
	# patch_size,
	# )
	# .permute(0, 1, 2, 4, 3, 5)
	# .reshape(-1, 3, image_size, image_size)
	# )

	# t_max_rho = t_max ** (1 / rho)
	# t_min_rho = t_min ** (1 / rho)
	# s_in = x.new_ones([x.shape[0]])
	# images = average_image_patches(images)

	# for i in range(len(ts) - 1):
	# t = (t_max_rho + ts[i] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	# x0 = distiller(x, t * s_in)
	# x0 = th.clamp(x0, -1.0, 1.0)
	# x0 = replacement(images, x0)
	# next_t = (t_max_rho + ts[i + 1] / (steps - 1) * (t_min_rho - t_max_rho)) ** rho
	# next_t = np.clip(next_t, t_min, t_max)
	# x = x0 + generator.randn_like(x) * np.sqrt(next_t2 - t_min2)

	# return x, images