|
import torch.nn as nn |
|
from models.vq.resnet import Resnet1D |
|
|
|
|
|
class Encoder(nn.Module): |
|
def __init__(self, |
|
input_emb_width=3, |
|
output_emb_width=512, |
|
down_t=2, |
|
stride_t=2, |
|
width=512, |
|
depth=3, |
|
dilation_growth_rate=3, |
|
activation='relu', |
|
norm=None): |
|
super().__init__() |
|
|
|
blocks = [] |
|
filter_t, pad_t = stride_t * 2, stride_t // 2 |
|
blocks.append(nn.Conv1d(input_emb_width, width, 3, 1, 1)) |
|
blocks.append(nn.ReLU()) |
|
|
|
for i in range(down_t): |
|
input_dim = width |
|
block = nn.Sequential( |
|
nn.Conv1d(input_dim, width, filter_t, stride_t, pad_t), |
|
Resnet1D(width, depth, dilation_growth_rate, activation=activation, norm=norm), |
|
) |
|
blocks.append(block) |
|
blocks.append(nn.Conv1d(width, output_emb_width, 3, 1, 1)) |
|
self.model = nn.Sequential(*blocks) |
|
|
|
def forward(self, x): |
|
return self.model(x) |
|
|
|
|
|
class Decoder(nn.Module): |
|
def __init__(self, |
|
input_emb_width=3, |
|
output_emb_width=512, |
|
down_t=2, |
|
stride_t=2, |
|
width=512, |
|
depth=3, |
|
dilation_growth_rate=3, |
|
activation='relu', |
|
norm=None): |
|
super().__init__() |
|
blocks = [] |
|
|
|
blocks.append(nn.Conv1d(output_emb_width, width, 3, 1, 1)) |
|
blocks.append(nn.ReLU()) |
|
for i in range(down_t): |
|
out_dim = width |
|
block = nn.Sequential( |
|
Resnet1D(width, depth, dilation_growth_rate, reverse_dilation=True, activation=activation, norm=norm), |
|
nn.Upsample(scale_factor=2, mode='nearest'), |
|
nn.Conv1d(width, out_dim, 3, 1, 1) |
|
) |
|
blocks.append(block) |
|
blocks.append(nn.Conv1d(width, width, 3, 1, 1)) |
|
blocks.append(nn.ReLU()) |
|
blocks.append(nn.Conv1d(width, input_emb_width, 3, 1, 1)) |
|
self.model = nn.Sequential(*blocks) |
|
|
|
def forward(self, x): |
|
x = self.model(x) |
|
return x.permute(0, 2, 1) |