import torch import torch.nn as nn # https://github.com/facebookresearch/DiT class TimestepEmbedder(nn.Module): """ Embeds scalar timesteps into vector representations. """ def __init__(self, dim, frequency_embedding_size, max_period): super().__init__() self.mlp = nn.Sequential( nn.Linear(frequency_embedding_size, dim), nn.SiLU(), nn.Linear(dim, dim), ) self.dim = dim self.max_period = max_period assert dim % 2 == 0, 'dim must be even.' with torch.autocast('cuda', enabled=False): self.freqs = nn.Buffer( 1.0 / (10000**(torch.arange(0, frequency_embedding_size, 2, dtype=torch.float32) / frequency_embedding_size)), persistent=False) freq_scale = 10000 / max_period self.freqs = freq_scale * self.freqs def timestep_embedding(self, t): """ Create sinusoidal timestep embeddings. :param t: a 1-D Tensor of N indices, one per batch element. These may be fractional. :param dim: the dimension of the output. :param max_period: controls the minimum frequency of the embeddings. :return: an (N, D) Tensor of positional embeddings. """ # https://github.com/openai/glide-text2im/blob/main/glide_text2im/nn.py args = t[:, None].float() * self.freqs[None] embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) return embedding def forward(self, t): t_freq = self.timestep_embedding(t).to(t.dtype) t_emb = self.mlp(t_freq) return t_emb