Spaces:

TencentARC
/

NVComposer

Running on Zero

App Files Files Community

NVComposer / core /models /utils_diffusion.py

l-li

init(*): initialization.

0b23d5a 7 months ago

raw

history blame contribute delete

6.88 kB

	import math

	import numpy as np
	import torch
	from einops import repeat


	def timestep_embedding(time_steps, dim, max_period=10000, repeat_only=False):
	"""
	Create sinusoidal timestep embeddings.
	:param time_steps: a 1-D Tensor of N indices, one per batch element.
	These may be fractional.
	:param dim: the dimension of the output.
	:param max_period: controls the minimum frequency of the embeddings.
	:return: an [N x dim] Tensor of positional embeddings.
	"""
	if not repeat_only:
	half = dim // 2
	freqs = torch.exp(
	-math.log(max_period)
	* torch.arange(start=0, end=half, dtype=torch.float32)
	/ half
	).to(device=time_steps.device)
	args = time_steps[:, None].float() * freqs[None]
	embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
	if dim % 2:
	embedding = torch.cat(
	[embedding, torch.zeros_like(embedding[:, :1])], dim=-1
	)
	else:
	embedding = repeat(time_steps, "b -> b d", d=dim)
	return embedding


	def make_beta_schedule(
	schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3
	):
	if schedule == "linear":
	betas = (
	torch.linspace(
	linear_start0.5, linear_end0.5, n_timestep, dtype=torch.float64
	)
	** 2
	)

	elif schedule == "cosine":
	time_steps = (
	torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
	)
	alphas = time_steps / (1 + cosine_s) * np.pi / 2
	alphas = torch.cos(alphas).pow(2)
	alphas = alphas / alphas[0]
	betas = 1 - alphas[1:] / alphas[:-1]
	betas = np.clip(betas, a_min=0, a_max=0.999)

	elif schedule == "sqrt_linear":
	betas = torch.linspace(
	linear_start, linear_end, n_timestep, dtype=torch.float64
	)
	elif schedule == "sqrt":
	betas = (
	torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
	** 0.5
	)
	else:
	raise ValueError(f"schedule '{schedule}' unknown.")
	return betas.numpy()


	def make_ddim_time_steps(
	ddim_discr_method, num_ddim_time_steps, num_ddpm_time_steps, verbose=True
	):
	if ddim_discr_method == "uniform":
	c = num_ddpm_time_steps // num_ddim_time_steps
	ddim_time_steps = np.asarray(list(range(0, num_ddpm_time_steps, c)))
	steps_out = ddim_time_steps + 1
	elif ddim_discr_method == "quad":
	ddim_time_steps = (
	(np.linspace(0, np.sqrt(num_ddpm_time_steps * 0.8), num_ddim_time_steps))
	** 2
	).astype(int)
	steps_out = ddim_time_steps + 1
	elif ddim_discr_method == "uniform_trailing":
	c = num_ddpm_time_steps / num_ddim_time_steps
	ddim_time_steps = np.flip(
	np.round(np.arange(num_ddpm_time_steps, 0, -c))
	).astype(np.int64)
	steps_out = ddim_time_steps - 1
	else:
	raise NotImplementedError(
	f'There is no ddim discretization method called "{ddim_discr_method}"'
	)

	# assert ddim_time_steps.shape[0] == num_ddim_time_steps
	# add one to get the final alpha values right (the ones from first scale to data during sampling)
	if verbose:
	print(f"Selected time_steps for ddim sampler: {steps_out}")
	return steps_out


	def make_ddim_sampling_parameters(alphacums, ddim_time_steps, eta, verbose=True):
	# select alphas for computing the variance schedule
	# print(f'ddim_time_steps={ddim_time_steps}, len_alphacums={len(alphacums)}')
	alphas = alphacums[ddim_time_steps]
	alphas_prev = np.asarray([alphacums[0]] + alphacums[ddim_time_steps[:-1]].tolist())

	# according the the formula provided in https://arxiv.org/abs/2010.02502
	sigmas = eta * np.sqrt(
	(1 - alphas_prev) / (1 - alphas) * (1 - alphas / alphas_prev)
	)
	if verbose:
	print(
	f"Selected alphas for ddim sampler: a_t: {alphas}; a_(t-1): {alphas_prev}"
	)
	print(
	f"For the chosen value of eta, which is {eta}, "
	f"this results in the following sigma_t schedule for ddim sampler {sigmas}"
	)
	return sigmas, alphas, alphas_prev


	def betas_for_alpha_bar(num_diffusion_time_steps, alpha_bar, max_beta=0.999):
	"""
	Create a beta schedule that discretizes the given alpha_t_bar function,
	which defines the cumulative product of (1-beta) over time from t = [0,1].
	:param num_diffusion_time_steps: the number of betas to produce.
	:param alpha_bar: a lambda that takes an argument t from 0 to 1 and
	produces the cumulative product of (1-beta) up to that
	part of the diffusion process.
	:param max_beta: the maximum beta to use; use values lower than 1 to
	prevent singularities.
	"""
	betas = []
	for i in range(num_diffusion_time_steps):
	t1 = i / num_diffusion_time_steps
	t2 = (i + 1) / num_diffusion_time_steps
	betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
	return np.array(betas)


	def rescale_zero_terminal_snr(betas):
	"""
	Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)

	Args:
	betas (`numpy.ndarray`):
	the betas that the scheduler is being initialized with.

	Returns:
	`numpy.ndarray`: rescaled betas with zero terminal SNR
	"""
	# Convert betas to alphas_bar_sqrt
	alphas = 1.0 - betas
	alphas_cumprod = np.cumprod(alphas, axis=0)
	alphas_bar_sqrt = np.sqrt(alphas_cumprod)

	# Store old values.
	alphas_bar_sqrt_0 = alphas_bar_sqrt[0].copy()
	alphas_bar_sqrt_T = alphas_bar_sqrt[-1].copy()

	# Shift so the last timestep is zero.
	alphas_bar_sqrt -= alphas_bar_sqrt_T

	# Scale so the first timestep is back to the old value.
	alphas_bar_sqrt *= alphas_bar_sqrt_0 / (alphas_bar_sqrt_0 - alphas_bar_sqrt_T)

	# Convert alphas_bar_sqrt to betas
	alphas_bar = alphas_bar_sqrt**2 # Revert sqrt
	alphas = alphas_bar[1:] / alphas_bar[:-1] # Revert cumprod
	alphas = np.concatenate([alphas_bar[0:1], alphas])
	betas = 1 - alphas

	return betas


	def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
	"""
	Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
	Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
	"""
	std_text = noise_pred_text.std(
	dim=list(range(1, noise_pred_text.ndim)), keepdim=True
	)
	std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
	factor = guidance_rescale * (std_text / std_cfg) + (1 - guidance_rescale)
	return noise_cfg * factor