pierreguillou commited on
Commit
10a0c3e
1 Parent(s): 4ef19a6

Upload video_to_audio.ipynb

Browse files
Files changed (1) hide show
  1. docs/video_to_audio.ipynb +139 -0
docs/video_to_audio.ipynb ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "markdown",
19
+ "source": [
20
+ "# (YouTube) video to audio"
21
+ ],
22
+ "metadata": {
23
+ "id": "kNt1V_xZCYzb"
24
+ }
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "source": [
29
+ "- Author: [Pierre Guillou](https://www.linkedin.com/in/pierreguillou/)\n",
30
+ "- Date: 08/10/2023"
31
+ ],
32
+ "metadata": {
33
+ "id": "Fa6V8oEynFe-"
34
+ }
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "source": [
39
+ "%%capture\n",
40
+ "#!apt-get install -y ffmpeg\n",
41
+ "!python3 -m pip install -U yt-dlp\n",
42
+ "!pip install unidecode\n",
43
+ "!pip install gradio\n",
44
+ "!pip install pydub"
45
+ ],
46
+ "metadata": {
47
+ "id": "S4yB5r9RCdkH"
48
+ },
49
+ "execution_count": 1,
50
+ "outputs": []
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "source": [
55
+ "import gradio as gr\n",
56
+ "import re, unidecode\n",
57
+ "from unidecode import unidecode\n",
58
+ "import yt_dlp\n",
59
+ "import os\n",
60
+ "import pydub\n",
61
+ "import numpy as np\n",
62
+ "\n",
63
+ "# no space, punctuation, accent in lower string\n",
64
+ "def cleanString(string):\n",
65
+ " cleanString = unidecode(string)\n",
66
+ " cleanString = re.sub('\\W+','_', cleanString)\n",
67
+ " return cleanString.lower()\n",
68
+ "\n",
69
+ "# from audio file path to sample rate and numpy array\n",
70
+ "def read_audio(f, normalized=False):\n",
71
+ " \"\"\"MP3 to numpy array\"\"\"\n",
72
+ " a = pydub.AudioSegment.from_mp3(f)\n",
73
+ " y = np.array(a.get_array_of_samples())\n",
74
+ " if a.channels == 2:\n",
75
+ " y = y.reshape((-1, 2))\n",
76
+ " if normalized:\n",
77
+ " return a.frame_rate, np.float32(y) / 2**15\n",
78
+ " else:\n",
79
+ " return a.frame_rate, y\n",
80
+ "\n",
81
+ "# from YouTube url to audio file path and sample rate + numpy array\n",
82
+ "def download_audio(url):\n",
83
+ "\n",
84
+ " path_to_folder_audio_mp3 = \"./audio/\"\n",
85
+ " ydl_opts = {\n",
86
+ " 'format': 'm4a/bestaudio/best',\n",
87
+ " 'outtmpl': f'{path_to_folder_audio_mp3}%(title)s',\n",
88
+ " 'postprocessors': [{\n",
89
+ " 'key': 'FFmpegExtractAudio',\n",
90
+ " 'preferredcodec': 'mp3',\n",
91
+ " }]\n",
92
+ " }\n",
93
+ "\n",
94
+ " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
95
+ " info_dict = ydl.extract_info(url, download=True)\n",
96
+ " video_title = info_dict['title']\n",
97
+ "\n",
98
+ " # Rename the audio file\n",
99
+ " local_link = video_title + \".mp3\"\n",
100
+ " new_local_link = cleanString(video_title) + \".mp3\"\n",
101
+ " for filename in os.listdir(path_to_folder_audio_mp3):\n",
102
+ " if cleanString(local_link) == cleanString(filename):\n",
103
+ " os.rename(os.path.join(path_to_folder_audio_mp3, filename),os.path.join(path_to_folder_audio_mp3, new_local_link))\n",
104
+ "\n",
105
+ " # get audio file path\n",
106
+ " file_path = path_to_folder_audio_mp3 + new_local_link\n",
107
+ "\n",
108
+ " return file_path, read_audio(file_path)\n",
109
+ "\n",
110
+ "# Gradio interface\n",
111
+ "iface = gr.Interface(fn=download_audio,\n",
112
+ " inputs=gr.Textbox(label=\"YouTube Video URL\"),\n",
113
+ " outputs=[\n",
114
+ " gr.File(label=\"Output Audio File\"),\n",
115
+ " gr.Audio(label=\"Play Audio\", show_download_button=False, format=\"mp3\"),\n",
116
+ " ],\n",
117
+ " allow_flagging=\"never\",\n",
118
+ " title=\"YouTube Video to Audio (mp3)\",\n",
119
+ " description=\"Just paste any YouTube video url and get its corresponding audio file in mp3.\",\n",
120
+ " )\n",
121
+ "iface.launch()"
122
+ ],
123
+ "metadata": {
124
+ "id": "9YvB5hBloP1f"
125
+ },
126
+ "execution_count": null,
127
+ "outputs": []
128
+ },
129
+ {
130
+ "cell_type": "markdown",
131
+ "source": [
132
+ "# END"
133
+ ],
134
+ "metadata": {
135
+ "id": "u9QYxqjtnzCD"
136
+ }
137
+ }
138
+ ]
139
+ }