{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "## Install" ], "metadata": { "id": "UsW1zD1cRqoX" } }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_F_3LjI8zYiw", "outputId": "d8930d99-0d4a-4f7c-cbda-9774057672a2" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Cloning into 'deep-voice-cloning'...\n", "remote: Enumerating objects: 27, done.\u001b[K\n", "remote: Counting objects: 100% (27/27), done.\u001b[K\n", "remote: Compressing objects: 100% (24/24), done.\u001b[K\n", "remote: Total 27 (delta 0), reused 27 (delta 0), pack-reused 0\u001b[K\n", "Receiving objects: 100% (27/27), 1.36 MiB | 29.07 MiB/s, done.\n" ] } ], "source": [ "!git clone https://github.com/konverner/deep-voice-cloning.git" ] }, { "cell_type": "code", "source": [ "%cd deep-voice-cloning\n", "!pip install . > null" ], "metadata": { "id": "YctLbHO10ou9", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5940018d-67bd-4d8d-ce84-0c7a5b92ff44" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "/content/deep-voice-cloning/deep-voice-cloning\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Cloning Voice of Hank Hill" ], "metadata": { "id": "caTAhpANGlnL" } }, { "cell_type": "code", "source": [ "!python scripts/train.py --audio_path \"/content/deep-voice-cloning/scripts/input/hank.mp3\"\\\n", " --output_dir \"/content/deep-voice-cloning/models\"" ], "metadata": { "id": "lYz6IM4ZGlSa" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!python scripts/cloning_inference.py --model_path \"/content/deep-voice-cloning/models/microsoft_speecht5_tts_hank\"\\\n", " --input_text 'do the things, not because they are easy, but because they are hard'\\\n", " --output_path \"scripts/output/do_the_things.wav\"" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_AFvKCrqOPsk", "outputId": "5f1b096c-04e5-4645-d13e-3878ef12a8f3" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-07-22 11:29:53.541592: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ] }, { "cell_type": "code", "source": [ "from IPython import display\n", "display.Audio(\"/content/deep-voice-cloning/scripts/output/do_the_things.wav\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 75 }, "id": "QmboXL9LOSKD", "outputId": "f682399c-7111-4957-cc05-c854f9ab7d19" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {}, "execution_count": 14 } ] }, { "cell_type": "markdown", "source": [ "## Cloning Voice of Homer Simpson" ], "metadata": { "id": "hNlD1Ocfjp1k" } }, { "cell_type": "code", "source": [ "!python scripts/train.py --audio_path \"/content/deep-voice-cloning/scripts/input/homer.mp3\"\\\n", " --output_dir \"/content/deep-voice-cloning/models\"" ], "metadata": { "id": "p15KMsOT0vKY" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!python scripts/cloning_inference.py --input_text 'do the things, not because they are easy, but because they are hard' --output_path \"scripts/output/do_the_things.wav\"" ], "metadata": { "id": "UHUi1maPI6jJ", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "45fefc02-4551-46fc-cc2e-2e43b589aa80" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-07-21 22:24:34.290393: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ] }, { "cell_type": "code", "source": [ "from IPython import display\n", "display.Audio(\"/content/deep-voice-cloning/scripts/output/do_the_things.wav\")" ], "metadata": { "id": "Gl7sjDiKS4eI", "colab": { "base_uri": "https://localhost:8080/", "height": 75 }, "outputId": "c9d1b9af-9a1d-472b-804f-e17011d52135" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "\n", " \n", " " ] }, "metadata": {}, "execution_count": 16 } ] } ] }