Spaces:
Running
Running
File size: 3,278 Bytes
d8530c7 6837c8b d8530c7 7d87cc1 d8530c7 7d87cc1 d8530c7 7d87cc1 d8530c7 6837c8b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import torch
import torch.nn as nn
import numpy as np
import torch
from torch import nn
import torch
from typing import List, Dict, Optional
from torch import Tensor
class TimestepEmbedderMDM(nn.Module):
def __init__(self, latent_dim):
super().__init__()
self.latent_dim = latent_dim
time_embed_dim = self.latent_dim
self.sequence_pos_encoder = PositionalEncoding(d_model=self.latent_dim)
# TODO add time embedding learnable
self.time_embed = nn.Sequential(
nn.Linear(self.latent_dim, time_embed_dim),
nn.SiLU(),
nn.Linear(time_embed_dim, time_embed_dim),
).to('cuda')
def forward(self, timesteps):
return self.time_embed(self.sequence_pos_encoder.pe[timesteps]).permute(1, 0, 2)
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1,
max_len=5000, batch_first=False, negative=False):
super().__init__()
self.batch_first = batch_first
self.dropout = nn.Dropout(p=dropout)
self.max_len = max_len
self.negative = negative
if negative:
pe = torch.zeros(2*max_len, d_model,device='cuda')
position = torch.arange(-max_len, max_len, dtype=torch.float).unsqueeze(1)
else:
pe = torch.zeros(max_len, d_model,device='cuda')
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe, persistent=False)
def forward(self, x, hist_frames=0):
if not self.negative:
center = 0
assert hist_frames == 0
first = 0
else:
center = self.max_len
first = center-hist_frames
if self.batch_first:
last = first + x.shape[1]
x = x + self.pe.permute(1, 0, 2)[:, first:last, :]
else:
last = first + x.shape[0]
x = x + self.pe[first:last, :]
return self.dropout(x)
def collate_tensor_with_padding(batch: List[Tensor]) -> Tensor:
dims = batch[0].dim()
max_size = [max([b.size(i) for b in batch]) for i in range(dims)]
size = (len(batch),) + tuple(max_size)
canvas = batch[0].new_zeros(size=size)
for i, b in enumerate(batch):
sub_tensor = canvas[i]
for d in range(dims):
sub_tensor = sub_tensor.narrow(d, 0, b.size(d))
sub_tensor.add_(b)
return canvas
def collate_x_dict(lst_x_dict: List, *, device: Optional[str] = 'cuda') -> Dict:
x = collate_tensor_with_padding([x_dict["x"] for x_dict in lst_x_dict])
if device is not None:
x = x.to(device)
length = [x_dict["length"] for x_dict in lst_x_dict]
if isinstance(length, list):
length = torch.tensor(length, device=device)
max_len = max(length)
mask = torch.arange(max_len, device=device).expand(
len(length), max_len
) < length.unsqueeze(1)
batch = {"x": x, "length": length, "mask": mask}
return batch |