In [None]:
!git clone https://github.com/yl4579/StarGANv2-VC
!pip install SoundFile torchaudio munch
!git clone https://github.com/HighCWu/starganv2vc-paddle
!cd starganv2vc-paddle && pip install paddlepaddle-gpu==2.2.2 paddleaudio munch pydub
!cp -r starganv2vc-paddle/starganv2vc_paddle StarGANv2-VC/

In [None]:
!gdown https://drive.google.com/uc?id=1nzTyyl-9A1Hmqya2Q_f2bpZkUoRjbZsY

In [None]:
!unzip -qq Models.zip
!rm -rf Models.zip
!mv Models StarGANv2-VC/Models

In [None]:
%cd StarGANv2-VC

In [None]:
import os
import yaml
import numpy as np
import torch
import warnings
warnings.simplefilter('ignore')

from munch import Munch

from models import build_model

from Utils.ASR.models import ASRCNN
from Utils.JDC.model import JDCNet

torch.backends.cudnn.benchmark = True #

def main(config_path):
 config = yaml.safe_load(open(config_path))
 
 device = config.get('device', 'cpu')

 # load pretrained ASR model
 ASR_config = config.get('ASR_config', False)
 ASR_path = config.get('ASR_path', False)
 with open(ASR_config) as f:
 ASR_config = yaml.safe_load(f)
 ASR_model_config = ASR_config['model_params']
 ASR_model = ASRCNN(**ASR_model_config)
 params = torch.load(ASR_path, map_location='cpu')['model']
 ASR_model.load_state_dict(params)
 ASR_model.to(device)
 _ = ASR_model.eval()
 
 # load pretrained F0 model
 F0_path = config.get('F0_path', False)
 F0_model = JDCNet(num_class=1, seq_len=192)
 params = torch.load(F0_path, map_location='cpu')['net']
 F0_model.load_state_dict(params)
 F0_model.to(device)
 
 # build model
 _, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)
 pretrained_path = 'Models/epoch_00150.pth'# config.get('pretrained_model', False)
 params = torch.load(pretrained_path, map_location='cpu')['model_ema']
 [model_ema[key].load_state_dict(state_dict) for key, state_dict in params.items()]
 _ = [model_ema[key].to(device) for key in model_ema]

 return ASR_model, F0_model, model_ema

ASR_model_torch, F0_model_torch, model_ema_torch = main('./Models/config.yml')


In [None]:
import os
import yaml
import numpy as np
import paddle
import warnings
warnings.simplefilter('ignore')

from munch import Munch

from starganv2vc_paddle.models import build_model

from starganv2vc_paddle.Utils.ASR.models import ASRCNN
from starganv2vc_paddle.Utils.JDC.model import JDCNet

@paddle.no_grad()
def convert_weights(torch_model, paddle_model):
 _ = torch_model.eval()
 _ = paddle_model.eval()
 dense_layers = []
 for name, layer in torch_model.named_modules():
 if isinstance(layer, torch.nn.Linear):
 dense_layers.append(name)
 torch_state_dict = torch_model.state_dict()
 for name, param in paddle_model.named_parameters():
 name = name.replace('._mean', '.running_mean')
 name = name.replace('._variance', '.running_var')
 name = name.replace('.scale', '.weight')
 target_param = torch_state_dict[name].detach().cpu().numpy()
 if '.'.join(name.split('.')[:-1]) in dense_layers:
 if len(param.shape) == 2:
 target_param = target_param.transpose((1,0))
 param.set_value(paddle.to_tensor(target_param))

@torch.no_grad()
@paddle.no_grad()
def main(config_path):
 config = yaml.safe_load(open(config_path))
 
 ASR_config = config.get('ASR_config', False)
 with open(ASR_config) as f:
 ASR_config = yaml.safe_load(f)
 ASR_model_config = ASR_config['model_params']
 ASR_model = ASRCNN(**ASR_model_config)
 _ = ASR_model.eval()
 convert_weights(ASR_model_torch, ASR_model)

 F0_model = JDCNet(num_class=1, seq_len=192)
 _ = F0_model.eval()
 convert_weights(F0_model_torch, F0_model)
 
 # build model
 model, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)

 asr_input = paddle.randn([2, 80, 192])
 asr_output = ASR_model(asr_input)
 asr_output_torch = ASR_model_torch(torch.from_numpy(asr_input.numpy()).cuda())
 print('ASR model input:', asr_input.shape, 'output:', asr_output.shape)
 print('Error:', (asr_output_torch.cpu().numpy() - asr_output.numpy()).mean())
 mel_input = paddle.randn([2, 1, 192, 512])
 f0_output = F0_model(mel_input)
 f0_output_torch = F0_model_torch(torch.from_numpy(mel_input.numpy()).cuda())
 print('F0 model input:', mel_input.shape, 'output:', [t.shape for t in f0_output])
 # print('Error:', (t_dict2['output'].cpu().numpy() - t_dict1['output'].numpy()).mean())
 print('Error:', [(t1.cpu().numpy() - t2.numpy()).mean() for t1, t2 in zip(f0_output_torch, f0_output)])

 _ = [convert_weights(model_ema_torch[k], model_ema[k]) for k in model_ema.keys()]
 label = paddle.to_tensor([0,0], dtype=paddle.int64)
 latent_dim = model_ema.mapping_network.shared[0].weight.shape[0]
 latent_style = paddle.randn([2, latent_dim])
 ref = model_ema.mapping_network(latent_style, label)
 ref_torch = model_ema_torch.mapping_network(torch.from_numpy(latent_style.numpy()).cuda(), torch.from_numpy(label.numpy()).cuda())
 print('Error of mapping network:', (ref_torch.cpu().numpy() - ref.numpy()).mean())
 mel_input2 = paddle.randn([2, 1, 192, 512])
 style_ref = model_ema.style_encoder(mel_input2, label)
 style_ref_torch = model_ema_torch.style_encoder(torch.from_numpy(mel_input2.numpy()).cuda(), torch.from_numpy(label.numpy()).cuda())
 print('StyleGANv2-VC encoder inputs:', mel_input2.shape, 'output:', style_ref.shape, 'should has the same shape as the ref:', ref.shape)
 print('Error of style encoder:', (style_ref_torch.cpu().numpy() - style_ref.numpy()).mean())
 f0_feat = F0_model.get_feature_GAN(mel_input)
 f0_feat_torch = F0_model_torch.get_feature_GAN(torch.from_numpy(mel_input.numpy()).cuda())
 print('Error of f0 feat:', (f0_feat_torch.cpu().numpy() - f0_feat.numpy()).mean())
 out = model_ema.generator(mel_input, style_ref, F0=f0_feat)
 out_torch = model_ema_torch.generator(torch.from_numpy(mel_input.numpy()).cuda(), torch.from_numpy(style_ref.numpy()).cuda(), F0=torch.from_numpy(f0_feat.numpy()).cuda())
 print('StyleGANv2-VC inputs:', label.shape, latent_style.shape, mel_input.shape, 'output:', out.shape)
 print('Error:', (out_torch.cpu().numpy() - out.numpy()).mean())

 paddle.save({'model': ASR_model.state_dict()}, 'ASR.pd')
 paddle.save({ 'net': F0_model.state_dict()}, 'F0.pd')
 model_ema_dict = {key: model.state_dict() for key, model in model_ema.items()}
 
 paddle.save({ 'model_ema': model_ema_dict }, 'VC.pd')

 return 0

main('./Models/config.yml')
