diff --git "a/notebooks/CLI_Example.ipynb" "b/notebooks/CLI_Example.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/CLI_Example.ipynb" @@ -0,0 +1,254 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Install" + ], + "metadata": { + "id": "UsW1zD1cRqoX" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_F_3LjI8zYiw", + "outputId": "d8930d99-0d4a-4f7c-cbda-9774057672a2" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'deep-voice-cloning'...\n", + "remote: Enumerating objects: 27, done.\u001b[K\n", + "remote: Counting objects: 100% (27/27), done.\u001b[K\n", + "remote: Compressing objects: 100% (24/24), done.\u001b[K\n", + "remote: Total 27 (delta 0), reused 27 (delta 0), pack-reused 0\u001b[K\n", + "Receiving objects: 100% (27/27), 1.36 MiB | 29.07 MiB/s, done.\n" + ] + } + ], + "source": [ + "!git clone https://github.com/konverner/deep-voice-cloning.git" + ] + }, + { + "cell_type": "code", + "source": [ + "%cd deep-voice-cloning\n", + "!pip install . > null" + ], + "metadata": { + "id": "YctLbHO10ou9", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5940018d-67bd-4d8d-ce84-0c7a5b92ff44" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/deep-voice-cloning/deep-voice-cloning\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Cloning Voice of Hank Hill" + ], + "metadata": { + "id": "caTAhpANGlnL" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/train.py --audio_path \"/content/deep-voice-cloning/scripts/input/hank.mp3\"\\\n", + " --output_dir \"/content/deep-voice-cloning/models\"" + ], + "metadata": { + "id": "lYz6IM4ZGlSa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!python scripts/cloning_inference.py --model_path \"/content/deep-voice-cloning/models/microsoft_speecht5_tts_hank\"\\\n", + " --input_text 'do the things, not because they are easy, but because they are hard'\\\n", + " --output_path \"scripts/output/do_the_things.wav\"" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_AFvKCrqOPsk", + "outputId": "5f1b096c-04e5-4645-d13e-3878ef12a8f3" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2023-07-22 11:29:53.541592: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from IPython import display\n", + "display.Audio(\"/content/deep-voice-cloning/scripts/output/do_the_things.wav\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + }, + "id": "QmboXL9LOSKD", + "outputId": "f682399c-7111-4957-cc05-c854f9ab7d19" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Cloning Voice of Homer Simpson" + ], + "metadata": { + "id": "hNlD1Ocfjp1k" + } + }, + { + "cell_type": "code", + "source": [ + "!python scripts/train.py --audio_path \"/content/deep-voice-cloning/scripts/input/homer.mp3\"\\\n", + " --output_dir \"/content/deep-voice-cloning/models\"" + ], + "metadata": { + "id": "p15KMsOT0vKY" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!python scripts/cloning_inference.py --input_text 'do the things, not because they are easy, but because they are hard' --output_path \"scripts/output/do_the_things.wav\"" + ], + "metadata": { + "id": "UHUi1maPI6jJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "45fefc02-4551-46fc-cc2e-2e43b589aa80" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2023-07-21 22:24:34.290393: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from IPython import display\n", + "display.Audio(\"/content/deep-voice-cloning/scripts/output/do_the_things.wav\")" + ], + "metadata": { + "id": "Gl7sjDiKS4eI", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 75 + }, + "outputId": "c9d1b9af-9a1d-472b-804f-e17011d52135" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + " " + ] + }, + "metadata": {}, + "execution_count": 16 + } + ] + } + ] +} \ No newline at end of file