| |
| import torch.nn as nn |
| from models.encdec import Encoder, Decoder |
| from models.quantize_cnn import QuantizeEMAReset, Quantizer, QuantizeEMA, QuantizeReset |
|
|
|
|
| class VQVAE_251(nn.Module): |
| def __init__(self, |
| args, |
| nb_code=1024, |
| code_dim=512, |
| output_emb_width=512, |
| down_t=3, |
| stride_t=2, |
| width=512, |
| depth=3, |
| dilation_growth_rate=3, |
| activation='relu', |
| norm=None): |
| |
| super().__init__() |
| self.code_dim = code_dim |
| self.num_code = nb_code |
| self.quant = args.quantizer |
| self.encoder = Encoder(251 if args.dataname == 'kit' else 263, output_emb_width, down_t, stride_t, width, depth, dilation_growth_rate, activation=activation, norm=norm) |
| self.decoder = Decoder(251 if args.dataname == 'kit' else 263, output_emb_width, down_t, stride_t, width, depth, dilation_growth_rate, activation=activation, norm=norm) |
| if args.quantizer == "ema_reset": |
| self.quantizer = QuantizeEMAReset(nb_code, code_dim, args) |
| elif args.quantizer == "orig": |
| self.quantizer = Quantizer(nb_code, code_dim, 1.0) |
| elif args.quantizer == "ema": |
| self.quantizer = QuantizeEMA(nb_code, code_dim, args) |
| elif args.quantizer == "reset": |
| self.quantizer = QuantizeReset(nb_code, code_dim, args) |
|
|
|
|
| def preprocess(self, x): |
| |
| x = x.permute(0,2,1).float() |
| return x |
|
|
|
|
| def postprocess(self, x): |
| |
| x = x.permute(0,2,1) |
| return x |
|
|
|
|
| def encode(self, x): |
| N, T, _ = x.shape |
| x_in = self.preprocess(x) |
| x_encoder = self.encoder(x_in) |
| |
| x_encoder = self.postprocess(x_encoder) |
| x_encoder = x_encoder.contiguous().view(-1, x_encoder.shape[-1]) |
| code_idx = self.quantizer.quantize(x_encoder) |
| code_idx = code_idx.view(N, -1) |
| return code_idx |
|
|
|
|
| def encode_x(self, x): |
| N, T, _ = x.shape |
| x_in = self.preprocess(x) |
| x_encoder = self.encoder(x_in) |
| |
| x_encoder = self.postprocess(x_encoder) |
| x_encoder = x_encoder.contiguous().view(-1, x_encoder.shape[-1]) |
| return x_encoder |
|
|
| def forward(self, x): |
| |
| x_in = self.preprocess(x) |
| |
| x_encoder = self.encoder(x_in) |
| |
| |
| x_quantized, loss, perplexity = self.quantizer(x_encoder) |
|
|
| |
| x_decoder = self.decoder(x_quantized) |
| x_out = self.postprocess(x_decoder) |
| return x_out, loss, perplexity |
|
|
|
|
| def forward_decoder(self, x): |
| x_d = self.quantizer.dequantize(x) |
| x_d = x_d.view(1, -1, self.code_dim).permute(0, 2, 1).contiguous() |
| |
| |
| x_decoder = self.decoder(x_d) |
| x_out = self.postprocess(x_decoder) |
| return x_out |
|
|
|
|
|
|
| class HumanVQVAE(nn.Module): |
| def __init__(self, |
| args, |
| nb_code=512, |
| code_dim=512, |
| output_emb_width=512, |
| down_t=3, |
| stride_t=2, |
| width=512, |
| depth=3, |
| dilation_growth_rate=3, |
| activation='relu', |
| norm=None): |
| |
| super().__init__() |
| |
| self.nb_joints = 21 if args.dataname == 'kit' else 22 |
| self.vqvae = VQVAE_251(args, nb_code, code_dim, output_emb_width, down_t, stride_t, width, depth, dilation_growth_rate, activation=activation, norm=norm) |
|
|
| def encode(self, x): |
| b, t, c = x.size() |
| quants = self.vqvae.encode(x) |
| return quants |
|
|
| def encode_x(self, x): |
| b, t, c = x.size() |
| quants = self.vqvae.encode_x(x) |
| return quants |
|
|
| def forward(self, x): |
|
|
| x_out, loss, perplexity = self.vqvae(x) |
| |
| return x_out, loss, perplexity |
|
|
| def forward_decoder(self, x): |
| x_out = self.vqvae.forward_decoder(x) |
| return x_out |
| |