ruslanmv commited on
Commit
f692269
1 Parent(s): a216bdd

Add application file

Browse files
Files changed (5) hide show
  1. .gitattributes +31 -31
  2. README.md +14 -14
  3. app.py +228 -228
  4. requirements.txt +7 -7
  5. utils.py +36 -36
.gitattributes CHANGED
@@ -1,31 +1,31 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ftz filter=lfs diff=lfs merge=lfs -text
6
- *.gz filter=lfs diff=lfs merge=lfs -text
7
- *.h5 filter=lfs diff=lfs merge=lfs -text
8
- *.joblib filter=lfs diff=lfs merge=lfs -text
9
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
- *.model filter=lfs diff=lfs merge=lfs -text
11
- *.msgpack filter=lfs diff=lfs merge=lfs -text
12
- *.npy filter=lfs diff=lfs merge=lfs -text
13
- *.npz filter=lfs diff=lfs merge=lfs -text
14
- *.onnx filter=lfs diff=lfs merge=lfs -text
15
- *.ot filter=lfs diff=lfs merge=lfs -text
16
- *.parquet filter=lfs diff=lfs merge=lfs -text
17
- *.pickle filter=lfs diff=lfs merge=lfs -text
18
- *.pkl filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pt filter=lfs diff=lfs merge=lfs -text
21
- *.pth filter=lfs diff=lfs merge=lfs -text
22
- *.rar filter=lfs diff=lfs merge=lfs -text
23
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
- *.tar.* filter=lfs diff=lfs merge=lfs -text
25
- *.tflite filter=lfs diff=lfs merge=lfs -text
26
- *.tgz filter=lfs diff=lfs merge=lfs -text
27
- *.wasm filter=lfs diff=lfs merge=lfs -text
28
- *.xz filter=lfs diff=lfs merge=lfs -text
29
- *.zip filter=lfs diff=lfs merge=lfs -text
30
- *.zst filter=lfs diff=lfs merge=lfs -text
31
- *tfevents* filter=lfs diff=lfs merge=lfs -text
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,14 +1,14 @@
1
- ---
2
- title: Youtube Video Translator
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: purple
6
- python_version: 3.8.9
7
- sdk: gradio
8
- sdk_version: 3.2
9
- app_file: app.py
10
- pinned: false
11
- license: cc0-1.0
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
1
+ ---
2
+ title: Youtube Video Translator
3
+ emoji: 🐨
4
+ colorFrom: yellow
5
+ colorTo: purple
6
+ python_version: 3.8.9
7
+ sdk: gradio
8
+ sdk_version: 3.2
9
+ app_file: app.py
10
+ pinned: false
11
+ license: cc0-1.0
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,229 +1,229 @@
1
- # coding=utf8
2
- # Youtube Video Translator
3
- # Developed by Ruslan Magana Vsevolodovna
4
- # https://ruslanmv.com/
5
-
6
- # importing all necessary libraries
7
- import pathlib
8
- import sys, os
9
- from gtts import gTTS
10
- import gradio as gr
11
- import os
12
- import speech_recognition as sr
13
- from googletrans import Translator, constants
14
- from pprint import pprint
15
- from moviepy.editor import *
16
- from pytube import YouTube
17
- from youtube_transcript_api import YouTubeTranscriptApi
18
- from utils import *
19
-
20
- def download_video(url):
21
- print("Downloading...")
22
- local_file = (
23
- YouTube(url)
24
- .streams.filter(progressive=True, file_extension="mp4")
25
- .first()
26
- .download()
27
- )
28
- print("Downloaded")
29
- return local_file
30
-
31
- def validate_url(url):
32
- import validators
33
- if not validators.url(url):
34
- print("Hi there URL seems invalid ")
35
-
36
-
37
- def cleanup():
38
- import pathlib
39
- import glob
40
- types = ('*.mp4', '*.wav') # the tuple of file types
41
- #Finding mp4 and wave files
42
- junks = []
43
- for files in types:
44
- junks.extend(glob.glob(files))
45
- try:
46
- # Deleting those files
47
- for junk in junks:
48
- print("Deleting",junk)
49
- # Setting the path for the file to delete
50
- file = pathlib.Path(junk)
51
- # Calling the unlink method on the path
52
- file.unlink()
53
- except Exception:
54
- print("I cannot delete the file because it is being used by another process")
55
-
56
- def getSize(filename):
57
- st = os.stat(filename)
58
- return st.st_size
59
-
60
-
61
- def generate_transcript(url,lang_api):
62
- id = url[url.index("=")+1:]
63
- transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api])
64
- script = ""
65
- for text in transcript:
66
- t = text["text"]
67
- if t != '[Music]':
68
- script += t + " "
69
- return script
70
-
71
-
72
- def video_to_translate(url,initial_language,final_language):
73
-
74
- #Internal definitions
75
- if initial_language == "English":
76
- lang_in='en-US'
77
- lang_api='en'
78
- elif initial_language == "Italian":
79
- lang_in='it-IT'
80
- lang_api='it'
81
- elif initial_language == "Spanish":
82
- lang_in='es-MX'
83
- lang_api='es'
84
- elif initial_language == "Russian":
85
- lang_in='ru-RU'
86
- lang_api='rus'
87
- elif initial_language == "German":
88
- lang_in='de-DE'
89
- lang_api='de'
90
- elif initial_language == "Japanese":
91
- lang_in='ja-JP'
92
- lang_api='ja'
93
- if final_language == "English":
94
- lang='en'
95
- elif final_language == "Italian":
96
- lang='it'
97
- elif final_language == "Spanish":
98
- lang='es'
99
- elif final_language == "Russian":
100
- lang='ru'
101
- elif final_language == "German":
102
- lang='de'
103
- elif final_language == "Japanese":
104
- lang='ja'
105
-
106
- # Initial directory
107
- home_dir = os.getcwd()
108
- print('Initial directory:',home_dir)
109
- cleanup()
110
- # Temporal directory
111
- temp_dir=os.path.join(home_dir, "temp")
112
- print('Temporal directory:',temp_dir)
113
- #Create temp directory
114
- pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
115
- # Go to temp directory
116
- os.chdir(temp_dir)
117
- print('Changing temporal directory',os.getcwd())
118
- # Cleaning previous files
119
- cleanup()
120
- file_obj=download_video(url)
121
- print(file_obj)
122
- # Insert Local Video File Path
123
- videoclip = VideoFileClip(file_obj)
124
- try:
125
- # Trying to get transcripts
126
- text = generate_transcript(url,lang_api)
127
- print("Transcript Found")
128
- except Exception:
129
- print("No Transcript Found")
130
- # Trying to recognize audio
131
- # Insert Local Audio File Path
132
- videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
133
- # initialize the recognizer
134
- r = sr.Recognizer()
135
- # open the file
136
- with sr.AudioFile("audio.wav") as source:
137
- # listen for the data (load audio to memory)
138
- audio_data = r.record(source)
139
- # recognize (convert from speech to text)
140
- print("Recognize from ",lang_in)
141
- #There is a limit of 10 MB on all single requests sent to the API using local file
142
- size_wav=getSize("audio.wav")
143
- if size_wav > 50000000:
144
- print("The wav is too large")
145
- audio_chunks=split_audio_wav("audio.wav")
146
- text=""
147
- for chunk in audio_chunks:
148
- print("Converting audio to text",chunk)
149
- try:
150
- text_chunk= r.recognize_google(audio_data, language = lang_in)
151
- except Exception:
152
- print("This video cannot be recognized")
153
- cleanup()
154
- # Return back to main directory
155
- os.chdir(home_dir)
156
- return "./demo/tryagain.mp4"
157
- text=text+text_chunk+" "
158
- text=str(text)
159
- print(type(text))
160
-
161
- else:
162
- text = r.recognize_google(audio_data, language = lang_in)
163
- #print(text)
164
- print("Destination language ",lang)
165
-
166
- # init the Google API translator
167
- translator = Translator()
168
-
169
-
170
- try:
171
- translation = translator.translate(text, dest=lang)
172
- except Exception:
173
- print("This text cannot be translated")
174
- cleanup()
175
- # Return back to main directory
176
- os.chdir(home_dir)
177
- return "./demo/tryagain.mp4"
178
-
179
- #translation.text
180
- trans=translation.text
181
-
182
- myobj = gTTS(text=trans, lang=lang, slow=False)
183
- myobj.save("audio.wav")
184
- # loading audio file
185
- audioclip = AudioFileClip("audio.wav")
186
-
187
- # adding audio to the video clip
188
- new_audioclip = CompositeAudioClip([audioclip])
189
- videoclip.audio = new_audioclip
190
- new_video="video_translated_"+lang+".mp4"
191
-
192
- # Return back to main directory
193
- os.chdir(home_dir)
194
- print('Final directory',os.getcwd())
195
-
196
- videoclip.write_videofile(new_video)
197
-
198
- videoclip.close()
199
- del file_obj
200
-
201
- return new_video
202
-
203
- initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
204
- final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
205
- url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
206
-
207
-
208
- gr.Interface(fn = video_to_translate,
209
- inputs = [url,initial_language,final_language],
210
- outputs = 'video',
211
- verbose = True,
212
- title = 'Video Youtube Translator',
213
- description = 'A simple application that translates Youtube videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.',
214
- article =
215
- '''<div>
216
- <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
217
- For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
218
- </p>
219
- </div>''',
220
-
221
- examples = [
222
- ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"],
223
- ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"],
224
- ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
225
- ["https://www.youtube.com/watch?v=_5YeX8eCLgA&ab_channel=TheTelegraph", "Russian","English"],
226
- ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
227
- ["https://www.youtube.com/watch?v=eo17uDr2_XA", "German","Spanish"]
228
- ]
229
  ).launch()
1
+ # coding=utf8
2
+ # Youtube Video Translator
3
+ # Developed by Ruslan Magana Vsevolodovna
4
+ # https://ruslanmv.com/
5
+
6
+ # importing all necessary libraries
7
+ import pathlib
8
+ import sys, os
9
+ from gtts import gTTS
10
+ import gradio as gr
11
+ import os
12
+ import speech_recognition as sr
13
+ from googletrans import Translator, constants
14
+ from pprint import pprint
15
+ from moviepy.editor import *
16
+ from pytube import YouTube
17
+ from youtube_transcript_api import YouTubeTranscriptApi
18
+ from utils import *
19
+
20
+ def download_video(url):
21
+ print("Downloading...")
22
+ local_file = (
23
+ YouTube(url)
24
+ .streams.filter(progressive=True, file_extension="mp4")
25
+ .first()
26
+ .download()
27
+ )
28
+ print("Downloaded")
29
+ return local_file
30
+
31
+ def validate_url(url):
32
+ import validators
33
+ if not validators.url(url):
34
+ print("Hi there URL seems invalid ")
35
+
36
+
37
+ def cleanup():
38
+ import pathlib
39
+ import glob
40
+ types = ('*.mp4', '*.wav') # the tuple of file types
41
+ #Finding mp4 and wave files
42
+ junks = []
43
+ for files in types:
44
+ junks.extend(glob.glob(files))
45
+ try:
46
+ # Deleting those files
47
+ for junk in junks:
48
+ print("Deleting",junk)
49
+ # Setting the path for the file to delete
50
+ file = pathlib.Path(junk)
51
+ # Calling the unlink method on the path
52
+ file.unlink()
53
+ except Exception:
54
+ print("I cannot delete the file because it is being used by another process")
55
+
56
+ def getSize(filename):
57
+ st = os.stat(filename)
58
+ return st.st_size
59
+
60
+
61
+ def generate_transcript(url,lang_api):
62
+ id = url[url.index("=")+1:]
63
+ transcript = YouTubeTranscriptApi.get_transcript(id,languages=[lang_api])
64
+ script = ""
65
+ for text in transcript:
66
+ t = text["text"]
67
+ if t != '[Music]':
68
+ script += t + " "
69
+ return script
70
+
71
+
72
+ def video_to_translate(url,initial_language,final_language):
73
+
74
+ #Internal definitions
75
+ if initial_language == "English":
76
+ lang_in='en-US'
77
+ lang_api='en'
78
+ elif initial_language == "Italian":
79
+ lang_in='it-IT'
80
+ lang_api='it'
81
+ elif initial_language == "Spanish":
82
+ lang_in='es-MX'
83
+ lang_api='es'
84
+ elif initial_language == "Russian":
85
+ lang_in='ru-RU'
86
+ lang_api='rus'
87
+ elif initial_language == "German":
88
+ lang_in='de-DE'
89
+ lang_api='de'
90
+ elif initial_language == "Japanese":
91
+ lang_in='ja-JP'
92
+ lang_api='ja'
93
+ if final_language == "English":
94
+ lang='en'
95
+ elif final_language == "Italian":
96
+ lang='it'
97
+ elif final_language == "Spanish":
98
+ lang='es'
99
+ elif final_language == "Russian":
100
+ lang='ru'
101
+ elif final_language == "German":
102
+ lang='de'
103
+ elif final_language == "Japanese":
104
+ lang='ja'
105
+
106
+ # Initial directory
107
+ home_dir = os.getcwd()
108
+ print('Initial directory:',home_dir)
109
+ cleanup()
110
+ # Temporal directory
111
+ temp_dir=os.path.join(home_dir, "temp")
112
+ print('Temporal directory:',temp_dir)
113
+ #Create temp directory
114
+ pathlib.Path(temp_dir).mkdir(parents=True, exist_ok=True)
115
+ # Go to temp directory
116
+ os.chdir(temp_dir)
117
+ print('Changing temporal directory',os.getcwd())
118
+ # Cleaning previous files
119
+ cleanup()
120
+ file_obj=download_video(url)
121
+ print(file_obj)
122
+ # Insert Local Video File Path
123
+ videoclip = VideoFileClip(file_obj)
124
+ try:
125
+ # Trying to get transcripts
126
+ text = generate_transcript(url,lang_api)
127
+ print("Transcript Found")
128
+ except Exception:
129
+ print("No Transcript Found")
130
+ # Trying to recognize audio
131
+ # Insert Local Audio File Path
132
+ videoclip.audio.write_audiofile("audio.wav",codec='pcm_s16le')
133
+ # initialize the recognizer
134
+ r = sr.Recognizer()
135
+ # open the file
136
+ with sr.AudioFile("audio.wav") as source:
137
+ # listen for the data (load audio to memory)
138
+ audio_data = r.record(source)
139
+ # recognize (convert from speech to text)
140
+ print("Recognize from ",lang_in)
141
+ #There is a limit of 10 MB on all single requests sent to the API using local file
142
+ size_wav=getSize("audio.wav")
143
+ if size_wav > 50000000:
144
+ print("The wav is too large")
145
+ audio_chunks=split_audio_wav("audio.wav")
146
+ text=""
147
+ for chunk in audio_chunks:
148
+ print("Converting audio to text",chunk)
149
+ try:
150
+ text_chunk= r.recognize_google(audio_data, language = lang_in)
151
+ except Exception:
152
+ print("This video cannot be recognized")
153
+ cleanup()
154
+ # Return back to main directory
155
+ os.chdir(home_dir)
156
+ return "./demo/tryagain.mp4"
157
+ text=text+text_chunk+" "
158
+ text=str(text)
159
+ print(type(text))
160
+
161
+ else:
162
+ text = r.recognize_google(audio_data, language = lang_in)
163
+ #print(text)
164
+ print("Destination language ",lang)
165
+
166
+ # init the Google API translator
167
+ translator = Translator()
168
+
169
+
170
+ try:
171
+ translation = translator.translate(text, dest=lang)
172
+ except Exception:
173
+ print("This text cannot be translated")
174
+ cleanup()
175
+ # Return back to main directory
176
+ os.chdir(home_dir)
177
+ return "./demo/tryagain.mp4"
178
+
179
+ #translation.text
180
+ trans=translation.text
181
+
182
+ myobj = gTTS(text=trans, lang=lang, slow=False)
183
+ myobj.save("audio.wav")
184
+ # loading audio file
185
+ audioclip = AudioFileClip("audio.wav")
186
+
187
+ # adding audio to the video clip
188
+ new_audioclip = CompositeAudioClip([audioclip])
189
+ videoclip.audio = new_audioclip
190
+ new_video="video_translated_"+lang+".mp4"
191
+
192
+ # Return back to main directory
193
+ os.chdir(home_dir)
194
+ print('Final directory',os.getcwd())
195
+
196
+ videoclip.write_videofile(new_video)
197
+
198
+ videoclip.close()
199
+ del file_obj
200
+
201
+ return new_video
202
+
203
+ initial_language = gr.inputs.Dropdown(["English","Italian","Japanese","Russian","Spanish","German"])
204
+ final_language = gr.inputs.Dropdown([ "Russian","Italian","Spanish","German","English","Japanese"])
205
+ url =gr.inputs.Textbox(label = "Enter the YouTube URL below:")
206
+
207
+
208
+ gr.Interface(fn = video_to_translate,
209
+ inputs = [url,initial_language,final_language],
210
+ outputs = 'video',
211
+ verbose = True,
212
+ title = 'Video Youtube Translator',
213
+ description = 'A simple application that translates Youtube videos from English, Italian, Japanese, Russian, Spanish, and German to Italian, Spanish, Russian, English and Japanese. Wait one minute to process.',
214
+ article =
215
+ '''<div>
216
+ <p style="text-align: center"> All you need to do is to paste the Youtube link and hit submit, then wait for compiling. After that click on Play/Pause for listing to the video. The video is saved in an mp4 format.
217
+ For more information visit <a href="https://ruslanmv.com/">ruslanmv.com</a>
218
+ </p>
219
+ </div>''',
220
+
221
+ examples = [
222
+ ["https://www.youtube.com/watch?v=Cu3R5it4cQs&list", "English","Italian"],
223
+ ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Spanish"],
224
+ ["https://www.youtube.com/watch?v=fkGCLIQx1MI", "English","Russian"],
225
+ ["https://www.youtube.com/watch?v=_5YeX8eCLgA&ab_channel=TheTelegraph", "Russian","English"],
226
+ ["https://www.youtube.com/watch?v=qzzweIQoIOU", "Japanese","English"],
227
+ ["https://www.youtube.com/watch?v=eo17uDr2_XA", "German","Spanish"]
228
+ ]
229
  ).launch()
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
- pip==22.2.2
2
- gradio==3.0.24
3
- googletrans==4.0.0rc1
4
- moviepy
5
- SpeechRecognition
6
- gTTS
7
- youtube_transcript_api
8
  pytube
1
+ pip==22.2.2
2
+ gradio==3.0.24
3
+ googletrans==4.0.0rc1
4
+ moviepy
5
+ SpeechRecognition
6
+ gTTS
7
+ youtube_transcript_api
8
  pytube
utils.py CHANGED
@@ -1,37 +1,37 @@
1
- from pydub import AudioSegment
2
- #from pydub.utils import mediainfo
3
- from pydub.utils import make_chunks
4
- import math
5
- #flac_audio = AudioSegment.from_file("sample.flac", "flac")
6
- #flac_audio.export("audio.wav", format="wav")
7
- def split_audio_wav(filename):
8
- myaudio = AudioSegment.from_file(filename , "wav")
9
- channel_count = myaudio.channels #Get channels
10
- sample_width = myaudio.sample_width #Get sample width
11
- duration_in_sec = len(myaudio) / 1000#Length of audio in sec
12
- sample_rate = myaudio.frame_rate
13
- print("sample_width=", sample_width)
14
- print("channel_count=", channel_count)
15
- print("duration_in_sec=", duration_in_sec)
16
- print("frame_rate=", sample_rate)
17
- bit_rate =16 #assumption , you can extract from mediainfo("test.wav") dynamically
18
- wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
19
- print("wav_file_size = ",wav_file_size)
20
- file_split_size = 40000000 # 40mb OR 40, 000, 000 bytes
21
- total_chunks = wav_file_size // file_split_size
22
- #Get chunk size by following method #There are more than one ofcourse
23
- #for duration_in_sec (X) --> wav_file_size (Y)
24
- #So whats duration in sec (K) --> for file size of 40Mb
25
- # K = X * 40Mb / Y
26
- chunk_length_in_sec = math.ceil((duration_in_sec * 40000000 ) /wav_file_size) #in sec
27
- chunk_length_ms = chunk_length_in_sec * 1000
28
- chunks = make_chunks(myaudio, chunk_length_ms)
29
- number_chunks=len(chunks)
30
- chunks_list=[]
31
- #Export all of the individual chunks as wav files
32
- for i, chunk in enumerate(chunks):
33
- chunk_name = "chunk{0}.wav".format(i)
34
- print("exporting", chunk_name)
35
- chunk.export(chunk_name, format="wav")
36
- chunks_list.append(chunk_name)
37
  return chunks_list
1
+ from pydub import AudioSegment
2
+ #from pydub.utils import mediainfo
3
+ from pydub.utils import make_chunks
4
+ import math
5
+ #flac_audio = AudioSegment.from_file("sample.flac", "flac")
6
+ #flac_audio.export("audio.wav", format="wav")
7
+ def split_audio_wav(filename):
8
+ myaudio = AudioSegment.from_file(filename , "wav")
9
+ channel_count = myaudio.channels #Get channels
10
+ sample_width = myaudio.sample_width #Get sample width
11
+ duration_in_sec = len(myaudio) / 1000#Length of audio in sec
12
+ sample_rate = myaudio.frame_rate
13
+ print("sample_width=", sample_width)
14
+ print("channel_count=", channel_count)
15
+ print("duration_in_sec=", duration_in_sec)
16
+ print("frame_rate=", sample_rate)
17
+ bit_rate =16 #assumption , you can extract from mediainfo("test.wav") dynamically
18
+ wav_file_size = (sample_rate * bit_rate * channel_count * duration_in_sec) / 8
19
+ print("wav_file_size = ",wav_file_size)
20
+ file_split_size = 40000000 # 40mb OR 40, 000, 000 bytes
21
+ total_chunks = wav_file_size // file_split_size
22
+ #Get chunk size by following method #There are more than one ofcourse
23
+ #for duration_in_sec (X) --> wav_file_size (Y)
24
+ #So whats duration in sec (K) --> for file size of 40Mb
25
+ # K = X * 40Mb / Y
26
+ chunk_length_in_sec = math.ceil((duration_in_sec * 40000000 ) /wav_file_size) #in sec
27
+ chunk_length_ms = chunk_length_in_sec * 1000
28
+ chunks = make_chunks(myaudio, chunk_length_ms)
29
+ number_chunks=len(chunks)
30
+ chunks_list=[]
31
+ #Export all of the individual chunks as wav files
32
+ for i, chunk in enumerate(chunks):
33
+ chunk_name = "chunk{0}.wav".format(i)
34
+ print("exporting", chunk_name)
35
+ chunk.export(chunk_name, format="wav")
36
+ chunks_list.append(chunk_name)
37
  return chunks_list