{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# (YouTube) video to audio" ], "metadata": { "id": "kNt1V_xZCYzb" } }, { "cell_type": "markdown", "source": [ "- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n", "- Date: 08/10/2023" ], "metadata": { "id": "Fa6V8oEynFe-" } }, { "cell_type": "code", "source": [ "%%capture\n", "#!apt-get install -y ffmpeg\n", "!python3 -m pip install -U yt-dlp\n", "!pip install unidecode\n", "!pip install gradio\n", "!pip install pydub" ], "metadata": { "id": "S4yB5r9RCdkH" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "import gradio as gr\n", "import re, unidecode\n", "from unidecode import unidecode\n", "import yt_dlp\n", "import os\n", "import pydub\n", "import numpy as np\n", "\n", "# no space, punctuation, accent in lower string\n", "def cleanString(string):\n", " cleanString = unidecode(string)\n", " cleanString = re.sub('\\W+','_', cleanString)\n", " return cleanString.lower()\n", "\n", "# from audio file path to sample rate and numpy array\n", "def read_audio(f, normalized=False):\n", " \"\"\"MP3 to numpy array\"\"\"\n", " a = pydub.AudioSegment.from_mp3(f)\n", " y = np.array(a.get_array_of_samples())\n", " if a.channels == 2:\n", " y = y.reshape((-1, 2))\n", " if normalized:\n", " return a.frame_rate, np.float32(y) / 2**15\n", " else:\n", " return a.frame_rate, y\n", "\n", "# from YouTube url to audio file path and sample rate + numpy array\n", "def download_audio(url):\n", "\n", " path_to_folder_audio_mp3 = \"./audio/\"\n", " ydl_opts = {\n", " 'format': 'm4a/bestaudio/best',\n", " 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n", " 'postprocessors': [{\n", " 'key': 'FFmpegExtractAudio',\n", " 'preferredcodec': 'mp3',\n", " }]\n", " }\n", "\n", " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", " info_dict = ydl.extract_info(url, download=True)\n", " video_title = info_dict['title']\n", "\n", " # Rename the audio file\n", " local_link = video_title + \".mp3\"\n", " new_local_link = cleanString(video_title) + \".mp3\"\n", " for filename in os.listdir(path_to_folder_audio_mp3):\n", " if cleanString(local_link) == cleanString(filename):\n", " os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n", "\n", " # get audio file path\n", " file_path = path_to_folder_audio_mp3 + new_local_link\n", "\n", " return file_path, read_audio(file_path)\n", "\n", "# Gradio interface\n", "iface = gr.Interface(fn=download_audio,\n", " inputs=gr.Textbox(label=\"YouTube Video URL\"),\n", " outputs=[\n", " gr.File(label=\"Output Audio File\"),\n", " gr.Audio(label=\"Play Audio\", show_download_button=False, format=\"mp3\"),\n", " ],\n", " allow_flagging=\"never\",\n", " title=\"YouTube Video to Audio (mp3)\",\n", " description=\"Just paste any YouTube video url and get its corresponding audio file in mp3.\",\n", " )\n", "iface.launch()" ], "metadata": { "id": "9YvB5hBloP1f" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "# END" ], "metadata": { "id": "u9QYxqjtnzCD" } } ] }