vinthony's picture
tts
416263d
from tqdm import tqdm
import torch
from torch import nn
class Audio2Exp(nn.Module):
def __init__(self, netG, cfg, device, prepare_training_loss=False):
super(Audio2Exp, self).__init__()
self.cfg = cfg
self.device = device
self.netG = netG.to(device)
def test(self, batch):
mel_input = batch['indiv_mels'] # bs T 1 80 16
bs = mel_input.shape[0]
T = mel_input.shape[1]
exp_coeff_pred = []
for i in tqdm(range(0, T, 10),'audio2exp:'): # every 10 frames
current_mel_input = mel_input[:,i:i+10]
#ref = batch['ref'][:, :, :64].repeat((1,current_mel_input.shape[1],1)) #bs T 64
ref = batch['ref'][:, :, :64][:, i:i+10]
ratio = batch['ratio_gt'][:, i:i+10] #bs T
audiox = current_mel_input.view(-1, 1, 80, 16) # bs*T 1 80 16
curr_exp_coeff_pred = self.netG(audiox, ref, ratio) # bs T 64
exp_coeff_pred += [curr_exp_coeff_pred]
# BS x T x 64
results_dict = {
'exp_coeff_pred': torch.cat(exp_coeff_pred, axis=1)
}
return results_dict