{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Copyright 2020 The TensorFlowTTS Team. All Rights Reserved.\n", "Licensed under the Apache License, Version 2.0 (the \"License\");" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", "# you may not use this file except in compliance with the License.\n", "# You may obtain a copy of the License at\n", "#\n", "# https://www.apache.org/licenses/LICENSE-2.0\n", "#\n", "# Unless required by applicable law or agreed to in writing, software\n", "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", "# See the License for the specific language governing permissions and\n", "# limitations under the License." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install parallel_wavegan" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/lap13548/anaconda3/envs/tensorflow-tts/lib/python3.7/site-packages/tensorflow_addons/utils/ensure_tf_install.py:68: UserWarning: Tensorflow Addons supports using Python ops for all Tensorflow versions above or equal to 2.2.0 and strictly below 2.3.0 (nightly versions are not supported). \n", " The versions of TensorFlow you are currently using is 2.3.0 and is not supported. \n", "Some things might work, some things might not.\n", "If you were to encounter a bug, do not file an issue.\n", "If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. \n", "You can find the compatibility matrix in TensorFlow Addon's readme:\n", "https://github.com/tensorflow/addons\n", " UserWarning,\n" ] } ], "source": [ "import tensorflow as tf\n", "import torch\n", "\n", "from tensorflow_tts.models import TFParallelWaveGANGenerator\n", "from tensorflow_tts.configs import ParallelWaveGANGeneratorConfig\n", "\n", "from parallel_wavegan.models import ParallelWaveGANGenerator\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "tf_model = TFParallelWaveGANGenerator(config=ParallelWaveGANGeneratorConfig(), name=\"parallel_wavegan_generator\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "tf_model._build()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"parallel_wavegan_generator\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "first_convolution (TFConv1d1 multiple 128 \n", "_________________________________________________________________\n", "tf_conv_in_upsample_net_work multiple 32036 \n", "_________________________________________________________________\n", "residual_block_._0 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._1 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._2 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._3 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._4 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._5 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._6 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._7 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._8 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._9 (TFResidu multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._10 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._11 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._12 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._13 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._14 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._15 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._16 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._17 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._18 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._19 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._20 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._21 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._22 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._23 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._24 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._25 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._26 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._27 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._28 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "residual_block_._29 (TFResid multiple 43264 \n", "_________________________________________________________________\n", "re_lu (ReLU) multiple 0 \n", "_________________________________________________________________\n", "tf_conv1d1x1 (TFConv1d1x1) multiple 4160 \n", "_________________________________________________________________\n", "re_lu_1 (ReLU) multiple 0 \n", "_________________________________________________________________\n", "tf_conv1d1x1_1 (TFConv1d1x1) multiple 65 \n", "_________________________________________________________________\n", "activation_12 (Activation) multiple 0 \n", "=================================================================\n", "Total params: 1,334,309\n", "Trainable params: 1,334,309\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "tf_model.summary()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "torch_checkpoints = torch.load(\"./checkpoint-400000steps.pkl\", map_location=torch.device('cpu'))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "torch_generator_weights = torch_checkpoints[\"model\"][\"generator\"]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "torch_model = ParallelWaveGANGenerator()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch_model.load_state_dict(torch_checkpoints[\"model\"][\"generator\"])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "torch_model.remove_weight_norm()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1334309" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_parameters = filter(lambda p: p.requires_grad, torch_model.parameters())\n", "params = sum([np.prod(p.size()) for p in model_parameters])\n", "params" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# in pytorch, in convolution layer, the order is bias -> weight, in tf it is weight -> bias. We need re-order.\n", "\n", "def convert_weights_pytorch_to_tensorflow(weights_pytorch):\n", " \"\"\"\n", " Convert pytorch Conv1d weight variable to tensorflow Conv2D weights.\n", " 1D: Pytorch (f_output, f_input, kernel_size) -> TF (kernel_size, f_input, 1, f_output)\n", " 2D: Pytorch (f_output, f_input, kernel_size_h, kernel_size_w) -> TF (kernel_size_w, kernel_size_h, f_input, 1, f_output)\n", " \"\"\"\n", " if len(weights_pytorch.shape) == 3: # conv1d-kernel\n", " weights_tensorflow = np.transpose(weights_pytorch, (0,2,1)) # [f_output, kernel_size, f_input]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2)) # [kernel-size, f_output, f_input]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1)) # [kernel-size, f_input, f_output]\n", " return weights_tensorflow\n", " elif len(weights_pytorch.shape) == 1: # conv1d-bias\n", " return weights_pytorch\n", " elif len(weights_pytorch.shape) == 4: # conv2d-kernel\n", " weights_tensorflow = np.transpose(weights_pytorch, (0,2,1,3)) # [f_output, kernel_size_h, f_input, kernel_size_w]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2,3)) # [kernel-size_h, f_output, f_input, kernel-size-w]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1,3)) # [kernel_size_h, f_input, f_output, kernel-size-w]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (0,1,3,2)) # [kernel_size_h, f_input, kernel-size-w, f_output]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1,3)) # [kernel_size_h, kernel-size-w, f_input, f_output]\n", " weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2,3)) # [kernel-size_w, kernel_size_h, f_input, f_output]\n", " return weights_tensorflow\n", "\n", "torch_weights = []\n", "all_keys = list(torch_model.state_dict().keys())\n", "all_values = list(torch_model.state_dict().values())\n", "\n", "idx_already_append = []\n", "\n", "for i in range(len(all_keys) -1):\n", " if i not in idx_already_append:\n", " if all_keys[i].split(\".\")[0:-1] == all_keys[i + 1].split(\".\")[0:-1]:\n", " if all_keys[i].split(\".\")[-1] == \"bias\" and all_keys[i + 1].split(\".\")[-1] == \"weight\":\n", " torch_weights.append(convert_weights_pytorch_to_tensorflow(all_values[i + 1].cpu().detach().numpy()))\n", " torch_weights.append(convert_weights_pytorch_to_tensorflow(all_values[i].cpu().detach().numpy()))\n", " idx_already_append.append(i)\n", " idx_already_append.append(i + 1)\n", " else:\n", " if i not in idx_already_append:\n", " torch_weights.append(convert_weights_pytorch_to_tensorflow(all_values[i].cpu().detach().numpy()))\n", " idx_already_append.append(i)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "tf_var = tf_model.trainable_variables" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "for i, var in enumerate(tf_var):\n", " tf.keras.backend.set_value(var, torch_weights[i])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "feats = np.load(\"LJ001-0009-norm-feats.npy\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "feats = np.expand_dims(feats, 0)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "audio = tf_model(feats)[0, :, 0]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(audio)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "from IPython.display import Audio" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Audio(audio, rate=22050)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }