Spaces:

haodongli
/

LucidDreamer

Runtime error

App Files Files Community

LucidDreamer / guidance /sd_step.py

haodongli

init

916b126 11 months ago

raw

history blame

12.1 kB

	from transformers import CLIPTextModel, CLIPTokenizer, logging
	from diffusers import StableDiffusionPipeline, DiffusionPipeline, DDPMScheduler, DDIMScheduler, EulerDiscreteScheduler, \
	EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler, ControlNetModel, \
	DDIMInverseScheduler
	from diffusers.utils import BaseOutput, deprecate

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import torchvision.transforms as T

	from typing import List, Optional, Tuple, Union
	from dataclasses import dataclass

	from diffusers.utils import BaseOutput, randn_tensor


	@dataclass
	# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
	class DDIMSchedulerOutput(BaseOutput):
	"""
	Output class for the scheduler's `step` function output.

	Args:
	prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
	Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
	denoising loop.
	pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
	The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
	`pred_original_sample` can be used to preview progress or for guidance.
	"""

	prev_sample: torch.FloatTensor
	pred_original_sample: Optional[torch.FloatTensor] = None

	# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
	def ddim_add_noise(
	self,
	original_samples: torch.FloatTensor,
	noise: torch.FloatTensor,
	timesteps: torch.IntTensor,
	) -> torch.FloatTensor:
	# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
	alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
	timesteps = timesteps.to(original_samples.device)

	sqrt_alpha_prod = alphas_cumprod[timesteps] ** 0.5
	sqrt_alpha_prod = sqrt_alpha_prod.flatten()
	while len(sqrt_alpha_prod.shape) < len(original_samples.shape):
	sqrt_alpha_prod = sqrt_alpha_prod.unsqueeze(-1)

	sqrt_one_minus_alpha_prod = (1 - alphas_cumprod[timesteps]) ** 0.5
	sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.flatten()
	while len(sqrt_one_minus_alpha_prod.shape) < len(original_samples.shape):
	sqrt_one_minus_alpha_prod = sqrt_one_minus_alpha_prod.unsqueeze(-1)

	noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
	return noisy_samples

	# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.step
	def ddim_step(
	self,
	model_output: torch.FloatTensor,
	timestep: int,
	sample: torch.FloatTensor,
	delta_timestep: int = None,
	eta: float = 0.0,
	use_clipped_model_output: bool = False,
	generator=None,
	variance_noise: Optional[torch.FloatTensor] = None,
	return_dict: bool = True,
	**kwargs
	) -> Union[DDIMSchedulerOutput, Tuple]:
	"""
	Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
	process from the learned model outputs (most often the predicted noise).

	Args:
	model_output (`torch.FloatTensor`):
	The direct output from learned diffusion model.
	timestep (`float`):
	The current discrete timestep in the diffusion chain.
	sample (`torch.FloatTensor`):
	A current instance of a sample created by the diffusion process.
	eta (`float`):
	The weight of noise for added noise in diffusion step.
	use_clipped_model_output (`bool`, defaults to `False`):
	If `True`, computes "corrected" `model_output` from the clipped predicted original sample. Necessary
	because predicted original sample is clipped to [-1, 1] when `self.config.clip_sample` is `True`. If no
	clipping has happened, "corrected" `model_output` would coincide with the one provided as input and
	`use_clipped_model_output` has no effect.
	generator (`torch.Generator`, optional):
	A random number generator.
	variance_noise (`torch.FloatTensor`):
	Alternative to generating noise with `generator` by directly providing the noise for the variance
	itself. Useful for methods such as [`CycleDiffusion`].
	return_dict (`bool`, optional, defaults to `True`):
	Whether or not to return a [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] or `tuple`.

	Returns:
	[`~schedulers.scheduling_utils.DDIMSchedulerOutput`] or `tuple`:
	If return_dict is `True`, [`~schedulers.scheduling_ddim.DDIMSchedulerOutput`] is returned, otherwise a
	tuple is returned where the first element is the sample tensor.

	"""
	if self.num_inference_steps is None:
	raise ValueError(
	"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
	)

	# See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
	# Ideally, read DDIM paper in-detail understanding

	# Notation (<variable name> -> <name in paper>
	# - pred_noise_t -> e_theta(x_t, t)
	# - pred_original_sample -> f_theta(x_t, t) or x_0
	# - std_dev_t -> sigma_t
	# - eta -> η
	# - pred_sample_direction -> "direction pointing to x_t"
	# - pred_prev_sample -> "x_t-1"


	if delta_timestep is None:
	# 1. get previous step value (=t+1)
	prev_timestep = timestep - self.config.num_train_timesteps // self.num_inference_steps
	else:
	prev_timestep = timestep - delta_timestep

	# 2. compute alphas, betas
	alpha_prod_t = self.alphas_cumprod[timestep]
	alpha_prod_t_prev = self.alphas_cumprod[prev_timestep] if prev_timestep >= 0 else self.final_alpha_cumprod

	beta_prod_t = 1 - alpha_prod_t

	# 3. compute predicted original sample from predicted noise also called
	# "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
	if self.config.prediction_type == "epsilon":
	pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
	pred_epsilon = model_output
	elif self.config.prediction_type == "sample":
	pred_original_sample = model_output
	pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
	elif self.config.prediction_type == "v_prediction":
	pred_original_sample = (alpha_prod_t*0.5) sample - (beta_prod_t*0.5) model_output
	pred_epsilon = (alpha_prod_t*0.5) model_output + (beta_prod_t*0.5) sample
	else:
	raise ValueError(
	f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample`, or"
	" `v_prediction`"
	)

	# 4. Clip or threshold "predicted x_0"
	if self.config.thresholding:
	pred_original_sample = self._threshold_sample(pred_original_sample)
	elif self.config.clip_sample:
	pred_original_sample = pred_original_sample.clamp(
	-self.config.clip_sample_range, self.config.clip_sample_range
	)

	# 5. compute variance: "sigma_t(η)" -> see formula (16)
	# σ_t = sqrt((1 − α_t−1)/(1 − α_t)) * sqrt(1 − α_t/α_t−1)
	# if prev_timestep < timestep:
	# else:
	# variance = abs(self._get_variance(prev_timestep, timestep))

	variance = abs(self._get_variance(timestep, prev_timestep))

	std_dev_t = eta * variance
	std_dev_t = min((1 - alpha_prod_t_prev) / 2, std_dev_t) ** 0.5

	if use_clipped_model_output:
	# the pred_epsilon is always re-derived from the clipped x_0 in Glide
	pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)

	# 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
	pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t2) (0.5) * pred_epsilon

	# 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
	prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction

	if eta > 0:
	if variance_noise is not None and generator is not None:
	raise ValueError(
	"Cannot pass both generator and variance_noise. Please make sure that either `generator` or"
	" `variance_noise` stays `None`."
	)

	if variance_noise is None:
	variance_noise = randn_tensor(
	model_output.shape, generator=generator, device=model_output.device, dtype=model_output.dtype
	)
	variance = std_dev_t * variance_noise

	prev_sample = prev_sample + variance

	prev_sample = torch.nan_to_num(prev_sample)

	if not return_dict:
	return (prev_sample,)

	return DDIMSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)

	def pred_original(
	self,
	model_output: torch.FloatTensor,
	timesteps: int,
	sample: torch.FloatTensor,
	):
	if isinstance(self, DDPMScheduler) or isinstance(self, DDIMScheduler):
	# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
	alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
	timesteps = timesteps.to(sample.device)

	# 1. compute alphas, betas
	alpha_prod_t = alphas_cumprod[timesteps]
	while len(alpha_prod_t.shape) < len(sample.shape):
	alpha_prod_t = alpha_prod_t.unsqueeze(-1)

	beta_prod_t = 1 - alpha_prod_t

	# 2. compute predicted original sample from predicted noise also called
	# "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf
	if self.config.prediction_type == "epsilon":
	pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
	elif self.config.prediction_type == "sample":
	pred_original_sample = model_output
	elif self.config.prediction_type == "v_prediction":
	pred_original_sample = (alpha_prod_t*0.5) sample - (beta_prod_t*0.5) model_output
	else:
	raise ValueError(
	f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, `sample` or"
	" `v_prediction` for the DDPMScheduler."
	)

	# 3. Clip or threshold "predicted x_0"
	if self.config.thresholding:
	pred_original_sample = self._threshold_sample(pred_original_sample)
	elif self.config.clip_sample:
	pred_original_sample = pred_original_sample.clamp(
	-self.config.clip_sample_range, self.config.clip_sample_range
	)
	elif isinstance(self, EulerAncestralDiscreteScheduler) or isinstance(self, EulerDiscreteScheduler):
	timestep = timesteps.to(self.timesteps.device)

	step_index = (self.timesteps == timestep).nonzero().item()
	sigma = self.sigmas[step_index].to(device=sample.device, dtype=sample.dtype)

	# 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
	if self.config.prediction_type == "epsilon":
	pred_original_sample = sample - sigma * model_output
	elif self.config.prediction_type == "v_prediction":
	# * c_out + input * c_skip
	pred_original_sample = model_output * (-sigma / (sigma2 + 1) 0.5) + (sample / (sigma**2 + 1))
	elif self.config.prediction_type == "sample":
	raise NotImplementedError("prediction_type not implemented yet: sample")
	else:
	raise ValueError(
	f"prediction_type given as {self.config.prediction_type} must be one of `epsilon`, or `v_prediction`"
	)
	else:
	raise NotImplementedError

	return pred_original_sample