Spaces:

Warvito
/

diffusion_brain

Build error

App Files Files Community

diffusion_brain / models /ddpm_v2_conditioned.py

Warvito

commit message

c9cd3be about 2 years ago

raw

history blame contribute delete

No virus

15.8 kB

	from functools import partial
	from inspect import isfunction

	import numpy as np
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from tqdm import tqdm

	from models.unet_v2_conditioned import UNetModel


	def exists(x):
	return x is not None


	def default(val, d):
	if exists(val):
	return val
	return d() if isfunction(d) else d


	def noise_like(shape, device, repeat=False):
	repeat_noise = lambda: torch.randn((1, shape[1:]), device=device).repeat(shape[0], ((1,) * (len(shape) - 1)))
	noise = lambda: torch.randn(shape, device=device)
	return repeat_noise() if repeat else noise()


	def extract(a, t, x_shape):
	b, *_ = t.shape
	out = a.gather(-1, t)
	return out.reshape(b, ((1,) (len(x_shape) - 1)))


	def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
	if schedule == "linear":
	betas = (
	torch.linspace(linear_start 0.5, linear_end 0.5, n_timestep, dtype=torch.float64) ** 2
	)

	elif schedule == "cosine":
	timesteps = (
	torch.arange(n_timestep + 1, dtype=torch.float64) / n_timestep + cosine_s
	)
	alphas = timesteps / (1 + cosine_s) * np.pi / 2
	alphas = torch.cos(alphas).pow(2)
	alphas = alphas / alphas[0]
	betas = 1 - alphas[1:] / alphas[:-1]
	betas = np.clip(betas, a_min=0, a_max=0.999)

	elif schedule == "sqrt_linear":
	betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64)
	elif schedule == "sqrt":
	betas = torch.linspace(linear_start, linear_end, n_timestep, dtype=torch.float64) ** 0.5
	else:
	raise ValueError(f"schedule '{schedule}' unknown.")
	return betas.numpy()


	class DDPM(nn.Module):
	def __init__(
	self,
	unet_config,
	timesteps: int = 1000,
	beta_schedule="linear",
	loss_type="l2",
	log_every_t=100,
	clip_denoised=False,
	linear_start=1e-4,
	linear_end=2e-2,
	cosine_s=8e-3,
	original_elbo_weight=0.,
	v_posterior=0., # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta
	l_simple_weight=1.,
	parameterization="eps", # all assuming fixed variance schedules
	learn_logvar=False,
	logvar_init=0.,
	conditioning_key=None,
	):
	super().__init__()
	assert parameterization in ["eps", "x0"], 'currently only supporting "eps" and "x0"'
	self.parameterization = parameterization

	if conditioning_key == "unconditioned":
	conditioning_key = None
	self.conditioning_key = conditioning_key
	self.model = DiffusionWrapper(unet_config, conditioning_key)

	self.clip_denoised = clip_denoised
	self.log_every_t = log_every_t

	self.v_posterior = v_posterior
	self.original_elbo_weight = original_elbo_weight
	self.l_simple_weight = l_simple_weight

	self.loss_type = loss_type

	self.register_schedule(
	beta_schedule=beta_schedule,
	timesteps=timesteps,
	linear_start=linear_start,
	linear_end=linear_end,
	cosine_s=cosine_s,
	)

	self.learn_logvar = learn_logvar
	self.logvar = torch.full(fill_value=logvar_init, size=(self.num_timesteps,))
	if self.learn_logvar:
	self.logvar = nn.Parameter(self.logvar, requires_grad=True)

	def register_schedule(
	self,
	beta_schedule="linear",
	timesteps=1000,
	linear_start=1e-4,
	linear_end=2e-2,
	cosine_s=8e-3
	):
	betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
	cosine_s=cosine_s)
	alphas = 1. - betas
	alphas_cumprod = np.cumprod(alphas, axis=0)
	alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])

	timesteps, = betas.shape
	self.num_timesteps = int(timesteps)
	self.linear_start = linear_start
	self.linear_end = linear_end

	to_torch = partial(torch.tensor, dtype=torch.float32)

	self.register_buffer('betas', to_torch(betas))
	self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
	self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))

	# calculations for diffusion q(x_t \| x_{t-1}) and others
	self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
	self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
	self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
	self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
	self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))

	# calculations for posterior q(x_{t-1} \| x_t, x_0)
	posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / (
	1. - alphas_cumprod) + self.v_posterior * betas
	# above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t)
	self.register_buffer('posterior_variance', to_torch(posterior_variance))
	# below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
	self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20))))
	self.register_buffer('posterior_mean_coef1', to_torch(
	betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)))
	self.register_buffer('posterior_mean_coef2', to_torch(
	(1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod)))

	if self.parameterization == "eps":
	lvlb_weights = self.betas ** 2 / (
	2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod))
	elif self.parameterization == "x0":
	lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod))
	else:
	raise NotImplementedError("mu not supported")
	# TODO how to choose this term
	lvlb_weights[0] = lvlb_weights[1]
	self.register_buffer('lvlb_weights', lvlb_weights, persistent=False)
	assert not torch.isnan(self.lvlb_weights).all()

	def q_mean_variance(self, x_start, t):
	"""
	Get the distribution q(x_t \| x_0).
	:param x_start: the [N x C x ...] tensor of noiseless inputs.
	:param t: the number of diffusion steps (minus 1). Here, 0 means one step.
	:return: A tuple (mean, variance, log_variance), all of x_start's shape.
	"""
	mean = extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
	variance = extract(1.0 - self.alphas_cumprod, t, x_start.shape)
	log_variance = extract(self.log_one_minus_alphas_cumprod, t, x_start.shape)
	return mean, variance, log_variance

	def predict_start_from_noise(self, x_t, t, noise):
	return (
	extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
	extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
	)

	def q_posterior(self, x_start, x_t, t):
	"""
	Compute the mean and variance of the diffusion posterior:
	q(x_{t-1} \| x_t, x_0)
	"""
	posterior_mean = (
	extract(self.posterior_mean_coef1, t, x_t.shape) * x_start +
	extract(self.posterior_mean_coef2, t, x_t.shape) * x_t
	)
	posterior_variance = extract(self.posterior_variance, t, x_t.shape)
	posterior_log_variance_clipped = extract(self.posterior_log_variance_clipped, t, x_t.shape)
	return posterior_mean, posterior_variance, posterior_log_variance_clipped

	def p_mean_variance(self, x, c, t, clip_denoised: bool, return_x0=False):
	"""
	Apply the model to get p(x_{t-1} \| x_t)
	:param model: the model, which takes a signal and a batch of timesteps
	as input.
	:param x: the [N x C x ...] tensor at time t.
	:param t: a 1-D Tensor of timesteps.
	:param clip_denoised: if True, clip the denoised signal into [-1, 1].

	"""
	t_in = t
	model_out = self.apply_model(x, t_in, c)
	if self.parameterization == "eps":
	x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
	elif self.parameterization == "x0":
	x_recon = model_out

	if clip_denoised:
	x_recon.clamp_(-1., 1.)

	model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
	if return_x0:
	return model_mean, posterior_variance, posterior_log_variance, x_recon
	else:
	return model_mean, posterior_variance, posterior_log_variance

	@torch.no_grad()
	def p_sample(
	self,
	x,
	c,
	t,
	clip_denoised=True,
	repeat_noise=False,
	return_x0=False,
	temperature=1.,
	noise_dropout=0.,
	):
	"""
	Sample x_{t-1} from the model at the given timestep.
	:param x: the current tensor at x_{t-1}.
	:param t: the value of t, starting at 0 for the first diffusion step.
	:param clip_denoised: if True, clip the x_start prediction to [-1, 1].
	"""

	b, _, device = x.shape, x.device
	outputs = self.p_mean_variance(
	x=x,
	c=c,
	t=t,
	clip_denoised=clip_denoised,
	return_x0=return_x0,
	)
	if return_x0:
	model_mean, _, model_log_variance, x0 = outputs
	else:
	model_mean, _, model_log_variance = outputs

	noise = noise_like(x.shape, device, repeat_noise) * temperature
	if noise_dropout > 0.:
	noise = torch.nn.functional.dropout(noise, p=noise_dropout)
	# no noise when t == 0
	nonzero_mask = (1 - (t == 0).float()).reshape(b, ((1,) (len(x.shape) - 1)))
	if return_x0:
	return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0
	else:
	return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise

	@torch.no_grad()
	def p_sample_loop(self, cond, shape, return_intermediates=False):
	device = self.betas.device

	b = shape[0]
	img = torch.randn(shape, device=device)
	intermediates = [img]

	for i in tqdm(reversed(range(0, self.num_timesteps)), desc='sampling loop time step', total=self.num_timesteps):
	img = self.p_sample(img, cond, torch.full((b,), i, device=device, dtype=torch.long),
	clip_denoised=self.clip_denoised)
	if i % self.log_every_t == 0 or i == self.num_timesteps - 1:
	intermediates.append(img)
	if return_intermediates:
	return img, intermediates
	return img

	@torch.no_grad()
	def sample(self, batch_size=16, return_intermediates=False):
	image_size = self.image_size
	channels = self.channels
	return self.p_sample_loop((batch_size, channels, image_size, image_size),
	return_intermediates=return_intermediates)

	def q_sample(self, x_start, t, noise=None):
	"""
	Diffuse the data for a given number of diffusion steps.
	In other words, sample from q(x_t \| x_0).
	:param x_start: the initial data batch.
	:param t: the number of diffusion steps (minus 1). Here, 0 means one step.
	:param noise: if specified, the split-out normal noise.
	:return: A noisy version of x_start.
	"""
	noise = default(noise, lambda: torch.randn_like(x_start))

	return (
	extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
	extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise
	)

	def get_loss(self, pred, target, mean=True):
	if self.loss_type == 'l1':
	loss = (target - pred).abs()
	if mean:
	loss = loss.mean()
	elif self.loss_type == 'l2':
	if mean:
	loss = torch.nn.functional.mse_loss(target, pred)
	else:
	loss = torch.nn.functional.mse_loss(target, pred, reduction='none')
	else:
	raise NotImplementedError("unknown loss type '{loss_type}'")

	return loss

	def p_losses(self, x_start, cond, t, noise=None):
	noise = default(noise, lambda: torch.randn_like(x_start))
	x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
	model_output = self.apply_model(x_noisy, t, cond)

	loss_dict = {}
	if self.parameterization == "eps":
	target = noise
	elif self.parameterization == "x0":
	target = x_start
	else:
	raise NotImplementedError(f"Paramterization {self.parameterization} not yet supported")

	loss_simple = self.get_loss(model_output, target, mean=False).mean(dim=[1, 2, 3, 4])
	loss_dict.update({f'loss_simple': loss_simple.mean()})

	logvar_t = self.logvar[t].to(x_start.device)
	loss = loss_simple / torch.exp(logvar_t) + logvar_t
	# loss = loss_simple / torch.exp(self.logvar) + self.logvar
	if self.learn_logvar:
	loss_dict.update({f'loss_gamma': loss.mean()})
	loss_dict.update({'logvar': self.logvar.data.mean()})

	loss = self.l_simple_weight * loss.mean()

	loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3, 4))
	loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean()
	loss_dict.update({f'loss_vlb': loss_vlb})
	loss += (self.original_elbo_weight * loss_vlb)
	loss_dict.update({f'loss': loss})

	return loss, loss_dict

	def forward(self, x, c, args, *kwargs):
	t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=x.device).long()
	return self.p_losses(x, c, t, args, *kwargs)

	def configure_optimizers(self):
	lr = self.learning_rate
	params = list(self.model.parameters())
	if self.learn_logvar:
	print('Diffusion model optimizing logvar')
	params.append(self.logvar)
	opt = torch.optim.AdamW(params, lr=lr)
	return opt

	def apply_model(self, x_noisy, t, cond, return_ids=False):

	if isinstance(cond, dict):
	# hybrid case, cond is exptected to be a dict
	pass
	else:
	if not isinstance(cond, list):
	cond = [cond]
	key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn'
	cond = {key: cond}

	x_recon = self.model(x_noisy, t, **cond)

	if isinstance(x_recon, tuple) and not return_ids:
	return x_recon[0]
	else:
	return x_recon



	class DiffusionWrapper(nn.Module):
	def __init__(self, unet_config, conditioning_key):
	super().__init__()
	self.diffusion_model = UNetModel(
	**unet_config.get("params", dict())
	)
	self.conditioning_key = conditioning_key

	def forward(self, x, t, c_concat: list = None, c_crossattn: list = None):
	xc = torch.cat([x] + c_concat, dim=1)
	cc = torch.cat(c_crossattn, 1)
	out = self.diffusion_model(xc, t, context=cc)


	return out