{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
""
]
},
{
"cell_type": "markdown",
"source": [
"## Install"
],
"metadata": {
"id": "UsW1zD1cRqoX"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_F_3LjI8zYiw",
"outputId": "d8930d99-0d4a-4f7c-cbda-9774057672a2"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'deep-voice-cloning'...\n",
"remote: Enumerating objects: 27, done.\u001b[K\n",
"remote: Counting objects: 100% (27/27), done.\u001b[K\n",
"remote: Compressing objects: 100% (24/24), done.\u001b[K\n",
"remote: Total 27 (delta 0), reused 27 (delta 0), pack-reused 0\u001b[K\n",
"Receiving objects: 100% (27/27), 1.36 MiB | 29.07 MiB/s, done.\n"
]
}
],
"source": [
"!git clone https://github.com/konverner/deep-voice-cloning.git"
]
},
{
"cell_type": "code",
"source": [
"%cd deep-voice-cloning\n",
"!pip install . > null"
],
"metadata": {
"id": "YctLbHO10ou9",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5940018d-67bd-4d8d-ce84-0c7a5b92ff44"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/deep-voice-cloning/deep-voice-cloning\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## Cloning Voice of Hank Hill"
],
"metadata": {
"id": "caTAhpANGlnL"
}
},
{
"cell_type": "code",
"source": [
"!python scripts/train.py --audio_path \"/content/deep-voice-cloning/scripts/input/hank.mp3\"\\\n",
" --output_dir \"/content/deep-voice-cloning/models\""
],
"metadata": {
"id": "lYz6IM4ZGlSa"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!python scripts/cloning_inference.py --model_path \"/content/deep-voice-cloning/models/microsoft_speecht5_tts_hank\"\\\n",
" --input_text 'do the things, not because they are easy, but because they are hard'\\\n",
" --output_path \"scripts/output/do_the_things.wav\""
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_AFvKCrqOPsk",
"outputId": "5f1b096c-04e5-4645-d13e-3878ef12a8f3"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-07-22 11:29:53.541592: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from IPython import display\n",
"display.Audio(\"/content/deep-voice-cloning/scripts/output/do_the_things.wav\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 75
},
"id": "QmboXL9LOSKD",
"outputId": "f682399c-7111-4957-cc05-c854f9ab7d19"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
],
"text/html": [
"\n",
" \n",
" "
]
},
"metadata": {},
"execution_count": 14
}
]
},
{
"cell_type": "markdown",
"source": [
"## Cloning Voice of Homer Simpson"
],
"metadata": {
"id": "hNlD1Ocfjp1k"
}
},
{
"cell_type": "code",
"source": [
"!python scripts/train.py --audio_path \"/content/deep-voice-cloning/scripts/input/homer.mp3\"\\\n",
" --output_dir \"/content/deep-voice-cloning/models\""
],
"metadata": {
"id": "p15KMsOT0vKY"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!python scripts/cloning_inference.py --input_text 'do the things, not because they are easy, but because they are hard' --output_path \"scripts/output/do_the_things.wav\""
],
"metadata": {
"id": "UHUi1maPI6jJ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "45fefc02-4551-46fc-cc2e-2e43b589aa80"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-07-21 22:24:34.290393: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from IPython import display\n",
"display.Audio(\"/content/deep-voice-cloning/scripts/output/do_the_things.wav\")"
],
"metadata": {
"id": "Gl7sjDiKS4eI",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 75
},
"outputId": "c9d1b9af-9a1d-472b-804f-e17011d52135"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
""
],
"text/html": [
"\n",
" \n",
" "
]
},
"metadata": {},
"execution_count": 16
}
]
}
]
}