Michelangelo / michelangelo /models /modules /embedder.py

all files first commit

9c3a994 10 months ago

7.41 kB

	# -- coding: utf-8 --

	import numpy as np
	import torch
	import torch.nn as nn
	import math

	VALID_EMBED_TYPES = ["identity", "fourier", "hashgrid", "sphere_harmonic", "triplane_fourier"]


	class FourierEmbedder(nn.Module):
	"""The sin/cosine positional embedding. Given an input tensor `x` of shape [n_batch, ..., c_dim], it converts
	each feature dimension of `x[..., i]` into:
	[
	sin(x[..., i]),
	sin(f_1*x[..., i]),
	sin(f_2*x[..., i]),
	...
	sin(f_N * x[..., i]),
	cos(x[..., i]),
	cos(f_1*x[..., i]),
	cos(f_2*x[..., i]),
	...
	cos(f_N * x[..., i]),
	x[..., i] # only present if include_input is True.
	], here f_i is the frequency.

	Denote the space is [0 / num_freqs, 1 / num_freqs, 2 / num_freqs, 3 / num_freqs, ..., (num_freqs - 1) / num_freqs].
	If logspace is True, then the frequency f_i is [2^(0 / num_freqs), ..., 2^(i / num_freqs), ...];
	Otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)].

	Args:
	num_freqs (int): the number of frequencies, default is 6;
	logspace (bool): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...],
	otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1)];
	input_dim (int): the input dimension, default is 3;
	include_input (bool): include the input tensor or not, default is True.

	Attributes:
	frequencies (torch.Tensor): If logspace is True, then the frequency f_i is [..., 2^(i / num_freqs), ...],
	otherwise, the frequencies are linearly spaced between [1.0, 2^(num_freqs - 1);

	out_dim (int): the embedding size, if include_input is True, it is input_dim * (num_freqs * 2 + 1),
	otherwise, it is input_dim * num_freqs * 2.

	"""

	def __init__(self,
	num_freqs: int = 6,
	logspace: bool = True,
	input_dim: int = 3,
	include_input: bool = True,
	include_pi: bool = True) -> None:

	"""The initialization"""

	super().__init__()

	if logspace:
	frequencies = 2.0 ** torch.arange(
	num_freqs,
	dtype=torch.float32
	)
	else:
	frequencies = torch.linspace(
	1.0,
	2.0 ** (num_freqs - 1),
	num_freqs,
	dtype=torch.float32
	)

	if include_pi:
	frequencies *= torch.pi

	self.register_buffer("frequencies", frequencies, persistent=False)
	self.include_input = include_input
	self.num_freqs = num_freqs

	self.out_dim = self.get_dims(input_dim)

	def get_dims(self, input_dim):
	temp = 1 if self.include_input or self.num_freqs == 0 else 0
	out_dim = input_dim * (self.num_freqs * 2 + temp)

	return out_dim

	def forward(self, x: torch.Tensor) -> torch.Tensor:
	""" Forward process.

	Args:
	x: tensor of shape [..., dim]

	Returns:
	embedding: an embedding of `x` of shape [..., dim * (num_freqs * 2 + temp)]
	where temp is 1 if include_input is True and 0 otherwise.
	"""

	if self.num_freqs > 0:
	embed = (x[..., None].contiguous() * self.frequencies).view(*x.shape[:-1], -1)
	if self.include_input:
	return torch.cat((x, embed.sin(), embed.cos()), dim=-1)
	else:
	return torch.cat((embed.sin(), embed.cos()), dim=-1)
	else:
	return x


	class LearnedFourierEmbedder(nn.Module):
	""" following @crowsonkb "s lead with learned sinusoidal pos emb """
	""" https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """

	def __init__(self, in_channels, dim):
	super().__init__()
	assert (dim % 2) == 0
	half_dim = dim // 2
	per_channel_dim = half_dim // in_channels
	self.weights = nn.Parameter(torch.randn(per_channel_dim))

	def forward(self, x):
	"""

	Args:
	x (torch.FloatTensor): [..., c]

	Returns:
	x (torch.FloatTensor): [..., d]
	"""

	# [b, t, c, 1] * [1, d] = [b, t, c, d] -> [b, t, c * d]
	freqs = (x[..., None] * self.weights[None] * 2 * np.pi).view(*x.shape[:-1], -1)
	fouriered = torch.cat((x, freqs.sin(), freqs.cos()), dim=-1)
	return fouriered


	class TriplaneLearnedFourierEmbedder(nn.Module):
	def __init__(self, in_channels, dim):
	super().__init__()

	self.yz_plane_embedder = LearnedFourierEmbedder(in_channels, dim)
	self.xz_plane_embedder = LearnedFourierEmbedder(in_channels, dim)
	self.xy_plane_embedder = LearnedFourierEmbedder(in_channels, dim)

	self.out_dim = in_channels + dim

	def forward(self, x):

	yz_embed = self.yz_plane_embedder(x)
	xz_embed = self.xz_plane_embedder(x)
	xy_embed = self.xy_plane_embedder(x)

	embed = yz_embed + xz_embed + xy_embed

	return embed


	def sequential_pos_embed(num_len, embed_dim):
	assert embed_dim % 2 == 0

	pos = torch.arange(num_len, dtype=torch.float32)
	omega = torch.arange(embed_dim // 2, dtype=torch.float32)
	omega /= embed_dim / 2.
	omega = 1. / 10000 ** omega # (D/2,)

	pos = pos.reshape(-1) # (M,)
	out = torch.einsum("m,d->md", pos, omega) # (M, D/2), outer product

	emb_sin = torch.sin(out) # (M, D/2)
	emb_cos = torch.cos(out) # (M, D/2)

	embeddings = torch.cat([emb_sin, emb_cos], dim=1) # (M, D)

	return embeddings


	def timestep_embedding(timesteps, dim, max_period=10000):
	"""
	Create sinusoidal timestep embeddings.
	:param timesteps: a 1-D Tensor of N indices, one per batch element.
	These may be fractional.
	:param dim: the dimension of the output.
	:param max_period: controls the minimum frequency of the embeddings.
	:return: an [N x dim] Tensor of positional embeddings.
	"""
	half = dim // 2
	freqs = torch.exp(
	-math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32) / half
	).to(device=timesteps.device)
	args = timesteps[:, None].to(timesteps.dtype) * freqs[None]
	embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
	if dim % 2:
	embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
	return embedding


	def get_embedder(embed_type="fourier", num_freqs=-1, input_dim=3, degree=4,
	num_levels=16, level_dim=2, per_level_scale=2, base_resolution=16,
	log2_hashmap_size=19, desired_resolution=None):
	if embed_type == "identity" or (embed_type == "fourier" and num_freqs == -1):
	return nn.Identity(), input_dim

	elif embed_type == "fourier":
	embedder_obj = FourierEmbedder(num_freqs=num_freqs, input_dim=input_dim,
	logspace=True, include_input=True)
	return embedder_obj, embedder_obj.out_dim

	elif embed_type == "hashgrid":
	raise NotImplementedError

	elif embed_type == "sphere_harmonic":
	raise NotImplementedError

	else:
	raise ValueError(f"{embed_type} is not valid. Currently only supprts {VALID_EMBED_TYPES}")