{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "starganv2_vc_weights_converter.ipynb",
      "private_outputs": true,
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "CA5i7YAlagUA"
      },
      "outputs": [],
      "source": [
        "!git clone https://github.com/yl4579/StarGANv2-VC\n",
        "!pip install SoundFile torchaudio munch\n",
        "!git clone https://github.com/HighCWu/starganv2vc-paddle\n",
        "!cd starganv2vc-paddle && pip install paddlepaddle-gpu==2.2.2 paddleaudio munch pydub\n",
        "!cp -r starganv2vc-paddle/starganv2vc_paddle StarGANv2-VC/"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!gdown https://drive.google.com/uc?id=1nzTyyl-9A1Hmqya2Q_f2bpZkUoRjbZsY"
      ],
      "metadata": {
        "id": "ac4g4L1Bbx1t"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!unzip -qq Models.zip\n",
        "!rm -rf Models.zip\n",
        "!mv Models StarGANv2-VC/Models"
      ],
      "metadata": {
        "id": "EJ3vG_RvcOD8"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "%cd StarGANv2-VC"
      ],
      "metadata": {
        "id": "rKovh1Egi4mJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import yaml\n",
        "import numpy as np\n",
        "import torch\n",
        "import warnings\n",
        "warnings.simplefilter('ignore')\n",
        "\n",
        "from munch import Munch\n",
        "\n",
        "from models import build_model\n",
        "\n",
        "from Utils.ASR.models import ASRCNN\n",
        "from Utils.JDC.model import JDCNet\n",
        "\n",
        "torch.backends.cudnn.benchmark = True #\n",
        "\n",
        "def main(config_path):\n",
        "    config = yaml.safe_load(open(config_path))\n",
        "    \n",
        "    device = config.get('device', 'cpu')\n",
        "\n",
        "    # load pretrained ASR model\n",
        "    ASR_config = config.get('ASR_config', False)\n",
        "    ASR_path = config.get('ASR_path', False)\n",
        "    with open(ASR_config) as f:\n",
        "            ASR_config = yaml.safe_load(f)\n",
        "    ASR_model_config = ASR_config['model_params']\n",
        "    ASR_model = ASRCNN(**ASR_model_config)\n",
        "    params = torch.load(ASR_path, map_location='cpu')['model']\n",
        "    ASR_model.load_state_dict(params)\n",
        "    ASR_model.to(device)\n",
        "    _ = ASR_model.eval()\n",
        "    \n",
        "    # load pretrained F0 model\n",
        "    F0_path = config.get('F0_path', False)\n",
        "    F0_model = JDCNet(num_class=1, seq_len=192)\n",
        "    params = torch.load(F0_path, map_location='cpu')['net']\n",
        "    F0_model.load_state_dict(params)\n",
        "    F0_model.to(device)\n",
        "    \n",
        "    # build model\n",
        "    _, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)\n",
        "    pretrained_path = 'Models/epoch_00150.pth'# config.get('pretrained_model', False)\n",
        "    params = torch.load(pretrained_path, map_location='cpu')['model_ema']\n",
        "    [model_ema[key].load_state_dict(state_dict) for key, state_dict in params.items()]\n",
        "    _ = [model_ema[key].to(device) for key in model_ema]\n",
        "\n",
        "    return ASR_model, F0_model, model_ema\n",
        "\n",
        "ASR_model_torch, F0_model_torch, model_ema_torch = main('./Models/config.yml')\n"
      ],
      "metadata": {
        "id": "UpMuk5kni67B"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import yaml\n",
        "import numpy as np\n",
        "import paddle\n",
        "import warnings\n",
        "warnings.simplefilter('ignore')\n",
        "\n",
        "from munch import Munch\n",
        "\n",
        "from starganv2vc_paddle.models import build_model\n",
        "\n",
        "from starganv2vc_paddle.Utils.ASR.models import ASRCNN\n",
        "from starganv2vc_paddle.Utils.JDC.model import JDCNet\n",
        "\n",
        "@paddle.no_grad()\n",
        "def convert_weights(torch_model, paddle_model):\n",
        "    _ = torch_model.eval()\n",
        "    _ = paddle_model.eval()\n",
        "    dense_layers = []\n",
        "    for name, layer in torch_model.named_modules():\n",
        "        if isinstance(layer, torch.nn.Linear):\n",
        "            dense_layers.append(name)\n",
        "    torch_state_dict = torch_model.state_dict()\n",
        "    for name, param in paddle_model.named_parameters():\n",
        "        name = name.replace('._mean', '.running_mean')\n",
        "        name = name.replace('._variance', '.running_var')\n",
        "        name = name.replace('.scale', '.weight')\n",
        "        target_param = torch_state_dict[name].detach().cpu().numpy()\n",
        "        if '.'.join(name.split('.')[:-1]) in dense_layers:\n",
        "            if len(param.shape) == 2:\n",
        "                target_param = target_param.transpose((1,0))\n",
        "        param.set_value(paddle.to_tensor(target_param))\n",
        "\n",
        "@torch.no_grad()\n",
        "@paddle.no_grad()\n",
        "def main(config_path):\n",
        "    config = yaml.safe_load(open(config_path))\n",
        "    \n",
        "    ASR_config = config.get('ASR_config', False)\n",
        "    with open(ASR_config) as f:\n",
        "            ASR_config = yaml.safe_load(f)\n",
        "    ASR_model_config = ASR_config['model_params']\n",
        "    ASR_model = ASRCNN(**ASR_model_config)\n",
        "    _ = ASR_model.eval()\n",
        "    convert_weights(ASR_model_torch, ASR_model)\n",
        "\n",
        "    F0_model = JDCNet(num_class=1, seq_len=192)\n",
        "    _ = F0_model.eval()\n",
        "    convert_weights(F0_model_torch, F0_model)\n",
        "    \n",
        "    # build model\n",
        "    model, model_ema = build_model(Munch(config['model_params']), F0_model, ASR_model)\n",
        "\n",
        "    asr_input = paddle.randn([2, 80, 192])\n",
        "    asr_output = ASR_model(asr_input)\n",
        "    asr_output_torch = ASR_model_torch(torch.from_numpy(asr_input.numpy()).cuda())\n",
        "    print('ASR model input:', asr_input.shape, 'output:', asr_output.shape)\n",
        "    print('Error:', (asr_output_torch.cpu().numpy() - asr_output.numpy()).mean())\n",
        "    mel_input = paddle.randn([2, 1, 192, 512])\n",
        "    f0_output = F0_model(mel_input)\n",
        "    f0_output_torch = F0_model_torch(torch.from_numpy(mel_input.numpy()).cuda())\n",
        "    print('F0 model input:', mel_input.shape, 'output:', [t.shape for t in f0_output])\n",
        "    # print('Error:', (t_dict2['output'].cpu().numpy() - t_dict1['output'].numpy()).mean())\n",
        "    print('Error:', [(t1.cpu().numpy() - t2.numpy()).mean() for t1, t2 in zip(f0_output_torch, f0_output)])\n",
        "\n",
        "    _ = [convert_weights(model_ema_torch[k], model_ema[k]) for k in model_ema.keys()]\n",
        "    label = paddle.to_tensor([0,0], dtype=paddle.int64)\n",
        "    latent_dim = model_ema.mapping_network.shared[0].weight.shape[0]\n",
        "    latent_style = paddle.randn([2, latent_dim])\n",
        "    ref = model_ema.mapping_network(latent_style, label)\n",
        "    ref_torch = model_ema_torch.mapping_network(torch.from_numpy(latent_style.numpy()).cuda(), torch.from_numpy(label.numpy()).cuda())\n",
        "    print('Error of mapping network:', (ref_torch.cpu().numpy() - ref.numpy()).mean())\n",
        "    mel_input2 = paddle.randn([2, 1, 192, 512])\n",
        "    style_ref = model_ema.style_encoder(mel_input2, label)\n",
        "    style_ref_torch = model_ema_torch.style_encoder(torch.from_numpy(mel_input2.numpy()).cuda(), torch.from_numpy(label.numpy()).cuda())\n",
        "    print('StyleGANv2-VC encoder inputs:', mel_input2.shape, 'output:', style_ref.shape, 'should has the same shape as the ref:', ref.shape)\n",
        "    print('Error of style encoder:', (style_ref_torch.cpu().numpy() - style_ref.numpy()).mean())\n",
        "    f0_feat = F0_model.get_feature_GAN(mel_input)\n",
        "    f0_feat_torch = F0_model_torch.get_feature_GAN(torch.from_numpy(mel_input.numpy()).cuda())\n",
        "    print('Error of f0 feat:', (f0_feat_torch.cpu().numpy() - f0_feat.numpy()).mean())\n",
        "    out = model_ema.generator(mel_input, style_ref, F0=f0_feat)\n",
        "    out_torch = model_ema_torch.generator(torch.from_numpy(mel_input.numpy()).cuda(), torch.from_numpy(style_ref.numpy()).cuda(), F0=torch.from_numpy(f0_feat.numpy()).cuda())\n",
        "    print('StyleGANv2-VC inputs:', label.shape, latent_style.shape, mel_input.shape, 'output:', out.shape)\n",
        "    print('Error:', (out_torch.cpu().numpy() - out.numpy()).mean())\n",
        "\n",
        "    paddle.save({'model': ASR_model.state_dict()}, 'ASR.pd')\n",
        "    paddle.save({ 'net': F0_model.state_dict()}, 'F0.pd')\n",
        "    model_ema_dict = {key: model.state_dict() for key, model in model_ema.items()}\n",
        "    \n",
        "    paddle.save({ 'model_ema': model_ema_dict }, 'VC.pd')\n",
        "\n",
        "    return 0\n",
        "\n",
        "main('./Models/config.yml')\n"
      ],
      "metadata": {
        "id": "PnuApVuyIIyd"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}